From 1e8aab9222df291ee61c4030eb499efa75b80a02 Mon Sep 17 00:00:00 2001 From: LegendaryLHL Date: Sat, 14 Jun 2025 12:46:23 +0800 Subject: [PATCH 1/4] Single alloc for dataset --- CMakeLists.txt | 2 +- include/cneuron/cneuron.h | 35 ++++--- src/data.c | 192 ++++++++++++++------------------------ src/main.c | 26 +++--- src/network.c | 23 ++--- test/data.cpp | 78 ++++++---------- test/network.cpp | 16 ++-- test/test_utils.cpp | 74 ++++++--------- test/test_utils.h | 2 + 9 files changed, 180 insertions(+), 268 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c828b6728..e8df94ea9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,7 +46,6 @@ function(apply_target target) target_include_directories(${target} PRIVATE ${CMAKE_SOURCE_DIR}/external) target_include_directories(${target} PRIVATE ${CMAKE_SOURCE_DIR}/src) target_compile_options(${target} PRIVATE ${WARNING_FLAGS}) - target_link_libraries(libcneuron PRIVATE ${BLAS_LIBRARIES} m) if(CMAKE_BUILD_TYPE STREQUAL "Debug") target_compile_options(${target} PRIVATE ${ASAN_FLAGS} -g) @@ -61,6 +60,7 @@ endfunction() # libcneuron library add_library(libcneuron ${SRC_FILES}) apply_target(libcneuron) +target_link_libraries(libcneuron PRIVATE ${BLAS_LIBRARIES} m) # Main executable add_executable(${PROJECT_NAME} src/main.c) diff --git a/include/cneuron/cneuron.h b/include/cneuron/cneuron.h index 5ea530423..a77539f7b 100644 --- a/include/cneuron/cneuron.h +++ b/include/cneuron/cneuron.h @@ -8,8 +8,8 @@ * @brief Represents a single data element with its inputs and expected output index. */ typedef struct { - float *inputs; /**< Pointer to an array of input values. */ size_t expected_index; /**< Index of the expected output label. */ + float *inputs; /**< Pointer to an array of input values. */ } data; /** @@ -18,39 +18,44 @@ typedef struct { typedef struct { size_t length; /**< Number of data elements in the dataset. */ size_t inputs_length; /**< Number of input values per data element. */ - data **datas; /**< Array of pointers containing the datas */ + data *datas; /**< Array containing the datas */ } dataset; /** - * @brief Reads a dataset from the specified file. + * @brief Allocate and setup a data * - * @param filename Path to the file containing the dataset. - * @return Pointer to the loaded 'dataset' structure, or NULL if an error occurs. + * @return inputs_length number of input of the data + * + * @data newly allocated data */ -dataset *get_dataset(const char *filename); +data *alloc_data(size_t inputs_length); /** - * @brief Frees all memory associated with a 'dataset' structure and its contents. + * @brief Allocate and setup a dataset + * + * @return dataset_length number of data of the dataset + * @return inputs_length number of input of the data * - * @param dataset Pointer to the dataset to be freed. + * @data newly allocated dataset */ -void free_dataset(dataset *dataset); +dataset *alloc_dataset(size_t dataset_length, size_t inputs_length); /** - * @brief Frees all memory associated with a 'data' structure and its conetents. + * @brief Reads a dataset from the specified file. * - * @param data Pointer to the data element to be freed. + * @param filename Path to the file containing the dataset. + * @return Pointer to the loaded 'dataset' structure, or NULL if an error occurs. */ -void free_data(data *data); +dataset *get_dataset(const char *filename); /** - * @brief Creates a copy of a 'data' structure. + * @brief Copy a 'data' structure. * * @param source_data Pointer to the original data element to copy. + * @param target_data Pointer to the destination data element to perform deep copy. * @param inputs_length Number of input values in the data element. - * @return Pointer to the newly created copy of the 'data' structure. */ -data *get_data_copy(const data *source_data, size_t inputs_length); +void copy_data(data *target_data, const data *source_data, size_t inputs_length); /** * @brief Creates allocate new dataset and select random copy of data from a source dataset. diff --git a/src/data.c b/src/data.c index 088e38d2e..e411e7101 100644 --- a/src/data.c +++ b/src/data.c @@ -9,7 +9,27 @@ #include "cneuron/cneuron.h" #include "rand.h" -#define BACKGROUND_VALUE 0.0f +data *alloc_data(size_t inputs_length) { + data *new_data = malloc(sizeof(data) + sizeof(float) * inputs_length); + new_data->inputs = (float *)(new_data + 1); + + return new_data; +} + +dataset *alloc_dataset(size_t dataset_length, size_t inputs_length) { + dataset *new_dataset = malloc(sizeof(dataset) + (sizeof(data) + sizeof(float) * inputs_length) * dataset_length); + if (!new_dataset) return NULL; + new_dataset->datas = (data *)(new_dataset + 1); + new_dataset->length = dataset_length; + new_dataset->inputs_length = inputs_length; + + float *inputs_block = (float *)(new_dataset->datas + dataset_length); + for (size_t i = 0; i < dataset_length; i++) { + new_dataset->datas[i].inputs = inputs_block + i * inputs_length; + } + + return new_dataset; +} dataset *get_dataset(const char *filename) { assert(filename); @@ -20,201 +40,139 @@ dataset *get_dataset(const char *filename) { return NULL; } - dataset *read_dataset = malloc(sizeof(dataset)); - if (!read_dataset) { - fclose(file); - return NULL; - } - - if (fread(&read_dataset->length, sizeof(uint64_t), 1, file) != 1) { + size_t dataset_length = 0; + if (fread(&dataset_length, sizeof(uint64_t), 1, file) != 1) { fprintf(stderr, "Failed to read dataset length from %s\n", filename); - free(read_dataset); fclose(file); return NULL; } - read_dataset->datas = calloc(read_dataset->length, sizeof(data *)); - if (!read_dataset->datas) { - free(read_dataset); + size_t inputs_length = 0; + if (fread(&inputs_length, sizeof(uint64_t), 1, file) != 1) { + fprintf(stderr, "Failed to read inputs_length from %s\n", filename); fclose(file); return NULL; } - if (fread(&read_dataset->inputs_length, sizeof(uint64_t), 1, file) != 1) { - fprintf(stderr, "Failed to read inputs_length from %s\n", filename); - free(read_dataset); + dataset *read_dataset = alloc_dataset(dataset_length, inputs_length); + if (!read_dataset) { + fprintf(stderr, "Failed to allocate for dataset %s: %s\n", filename, strerror(errno)); fclose(file); return NULL; } - for (size_t i = 0; i < read_dataset->length; i++) { - data *read_data = malloc(sizeof(data)); - if (!read_data) { - goto cleanup; - } - - read_data->inputs = malloc(sizeof(float) * read_dataset->inputs_length); - if (!read_data->inputs) { - free(read_data); - goto cleanup; - } - + for (size_t i = 0; i < dataset_length; i++) { + data *read_data = &read_dataset->datas[i]; size_t read_inputs = fread(read_data->inputs, sizeof(float), read_dataset->inputs_length, file); if (read_inputs != read_dataset->inputs_length) { fprintf(stderr, "Invalid inputs_length from %s. Expected: %zu. But found: %zu\n", filename, read_dataset->inputs_length, read_inputs); - free_data(read_data); - goto cleanup; + free(read_dataset); + fclose(file); + return NULL; } if (fread(&(read_data->expected_index), sizeof(uint64_t), 1, file) != 1) { fprintf(stderr, "Failed to read expected_index from %s\n", filename); - free_data(read_data); - goto cleanup; + free(read_dataset); + fclose(file); + return NULL; } - - read_dataset->datas[i] = read_data; } fclose(file); return read_dataset; - -cleanup: - free_dataset(read_dataset); - fclose(file); - return NULL; } -void free_dataset(dataset *dataset) { - if (!dataset) { - return; - } - - for (size_t i = 0; i < dataset->length; i++) { - free_data(dataset->datas[i]); - } - free(dataset->datas); - free(dataset); -} - -void free_data(data *data) { - if (!data) { - return; - } - - free(data->inputs); - free(data); -} - -data *get_data_copy(const data *source_data, size_t inputs_length) { - assert(source_data); - assert(source_data->inputs); +void copy_data(data *target_data, const data *source_data, size_t inputs_length) { assert(inputs_length > 0); - data *copy = malloc(sizeof(data)); - if (!copy) { - return NULL; - } - - copy->expected_index = source_data->expected_index; - size_t inputs_size = sizeof(float) * inputs_length; - copy->inputs = malloc(inputs_size); - if (!copy->inputs) { - free(copy); - return NULL; - } - - memcpy(copy->inputs, source_data->inputs, inputs_size); - return copy; + target_data->expected_index = source_data->expected_index; + memcpy(target_data->inputs, source_data->inputs, inputs_size); } dataset *get_random_dataset_sample(const dataset *source_dataset, size_t amount) { - dataset *new_dataset = malloc(sizeof(dataset)); - new_dataset->inputs_length = source_dataset->inputs_length; + assert(source_dataset); + dataset *new_dataset = alloc_dataset(amount, source_dataset->inputs_length); + if (!new_dataset) { + return NULL; + } + new_dataset->datas = (data *)(new_dataset + 1); new_dataset->length = amount; - new_dataset->datas = malloc(sizeof(data) * amount); + new_dataset->inputs_length = source_dataset->inputs_length; for (size_t i = 0; i < amount; i++) { - new_dataset->datas[i] = get_data_copy(source_dataset->datas[randnum_u32(source_dataset->length, 0)], source_dataset->inputs_length); + data *random_data = &source_dataset->datas[randnum_u32(source_dataset->length, 0)]; + copy_data(&new_dataset->datas[i], random_data, source_dataset->inputs_length); } return new_dataset; } void rotate_data(data *data, int width, int height, float angle) { - assert(data); - assert(data->inputs); assert(width > 0 && height > 0); float rad = angle * M_PI / 180.0f; - float cos_angle = cos(rad); - float sin_angle = sin(rad); - float *new_inputs = malloc(sizeof(float) * width * height); + float cos_angle = cosf(rad); + float sin_angle = sinf(rad); + float *new_inputs = calloc(width * height, sizeof(float)); if (!new_inputs) { return; } + int center_x = floorf(width / 2.0f); + int center_y = floorf(height / 2.0f); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - int center_x = floor(width / 2.0f); - int center_y = floor(height / 2.0f); - int src_x = round((x - center_x) * cos_angle + (y - center_y) * sin_angle + center_x); - int src_y = round((y - center_y) * cos_angle - (x - center_x) * sin_angle + center_y); + int src_x = roundf((x - center_x) * cos_angle + (y - center_y) * sin_angle + center_x); + int src_y = roundf((y - center_y) * cos_angle - (x - center_x) * sin_angle + center_y); if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) { new_inputs[y * width + x] = data->inputs[src_y * width + src_x]; - } else { - new_inputs[y * width + x] = BACKGROUND_VALUE; } } } - free(data->inputs); - data->inputs = new_inputs; + memcpy(data->inputs, new_inputs, sizeof(float) * width * height); + free(new_inputs); } void scale_data(data *data, int width, int height, float scale) { - assert(data); - assert(data->inputs); assert(width > 0 && height > 0); - int scale_width = round(width * scale); - int scale_height = round(height * scale); - float *new_inputs = malloc(sizeof(float) * width * height); + int scale_width = roundf(width * scale); + int scale_height = roundf(height * scale); + float *new_inputs = calloc(width * height, sizeof(float)); if (!new_inputs) { return; } - int offset_x = round((scale_width - width) / 2.0f); - int offset_y = round((scale_height - height) / 2.0f); + int offset_x = roundf((scale_width - width) / 2.0f); + int offset_y = roundf((scale_height - height) / 2.0f); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { int scaled_x = x + offset_x; int scaled_y = y + offset_y; - int src_x = round(scaled_x / scale); - int src_y = round(scaled_y / scale); + int src_x = roundf(scaled_x / scale); + int src_y = roundf(scaled_y / scale); if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) { new_inputs[y * width + x] = data->inputs[src_y * width + src_x]; - } else { - new_inputs[y * width + x] = BACKGROUND_VALUE; } } } - free(data->inputs); - data->inputs = new_inputs; + memcpy(data->inputs, new_inputs, sizeof(float) * width * height); + free(new_inputs); } void offset_data(data *data, int width, int height, float offset_x, float offset_y) { - assert(data); - assert(data->inputs); assert(width > 0 && height > 0); - float *new_inputs = malloc(sizeof(float) * width * height); + float *new_inputs = calloc(width * height, sizeof(float)); if (!new_inputs) { return; } @@ -224,22 +182,18 @@ void offset_data(data *data, int width, int height, float offset_x, float offset float new_x = x - offset_x; float new_y = y - offset_y; - int src_x = round(new_x); - int src_y = round(new_y); + int src_x = roundf(new_x); + int src_y = roundf(new_y); if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) { new_inputs[y * width + x] = data->inputs[src_y * width + src_x]; - } else { - new_inputs[y * width + x] = BACKGROUND_VALUE; } } } - free(data->inputs); - data->inputs = new_inputs; + memcpy(data->inputs, new_inputs, sizeof(float) * width * height); + free(new_inputs); } void noise_data(data *data, size_t inputs_length, float noise_factor, float probability) { - assert(data); - assert(data->inputs); assert(inputs_length > 0); for (size_t i = 0; i < inputs_length; i++) { @@ -253,8 +207,6 @@ void noise_data(data *data, size_t inputs_length, float noise_factor, float prob } float output_expected(size_t index, const data *data) { - assert(data); - if (index == data->expected_index) { return 1.0f; } else { diff --git a/src/main.c b/src/main.c index 65679bf1c..d0aa7f942 100644 --- a/src/main.c +++ b/src/main.c @@ -38,7 +38,7 @@ typedef struct { dataset *dataset_generator(generator_args *args) { dataset *batch_dataset = get_random_dataset_sample(args->train_dataset, args->batch_size); for (size_t i = 0; i < batch_dataset->length; i++) { - data *data = batch_dataset->datas[i]; + data *data = &batch_dataset->datas[i]; rotate_data(data, IMAGE_SIZE, IMAGE_SIZE, randf(10.0f, -5.0f)); scale_data(data, IMAGE_SIZE, IMAGE_SIZE, randf(1.2f, -0.1f)); offset_data(data, IMAGE_SIZE, IMAGE_SIZE, randf(6.0f, -3.0f), randf(6.0f, -3.0f)); @@ -67,18 +67,18 @@ void train(neural_network *nn, dataset *train_dataset, dataset *test_dataset, fl #ifdef USE_THREADING pthread_create(&thread, NULL, (void *(*)(void *))dataset_generator, &args); mini_batch_gd(nn, learn_rate, batch_dataset); - free_dataset(batch_dataset); + free(batch_dataset); void *result = NULL; pthread_join(thread, &result); batch_dataset = (dataset *)result; #else mini_batch_gd(nn, learn_rate, batch_dataset); - free_dataset(batch_dataset); + free(batch_dataset); batch_dataset = dataset_generator(&args); #endif } // Last dataset not used - free_dataset(batch_dataset); + free(batch_dataset); } dataset *get_mnist(bool is_test) { @@ -103,20 +103,18 @@ dataset *get_mnist(bool is_test) { for (size_t i = 0; i < 10; i++) { total_length += datasets[i]->length; } + size_t inputs_length = datasets[0]->inputs_length; - dataset *mnist_dataset = malloc(sizeof(dataset)); - mnist_dataset->datas = malloc(sizeof(data *) * total_length); - mnist_dataset->length = total_length; - mnist_dataset->inputs_length = IMAGE_SIZE * IMAGE_SIZE; + dataset *mnist_dataset = alloc_dataset(total_length, inputs_length); size_t curr_count = 0; for (size_t i = 0; i < 10; i++) { for (size_t j = 0; j < datasets[i]->length; j++) { - mnist_dataset->datas[curr_count] = datasets[i]->datas[j]; + data *curr_data = &mnist_dataset->datas[curr_count]; + copy_data(curr_data, &datasets[i]->datas[j], inputs_length); curr_count++; } - free(datasets[i]->datas); free(datasets[i]); } @@ -142,9 +140,9 @@ int main(int argc, char **argv) { // Parameters float learn_rate = 1.5f; size_t batch_size = 30; - int learn_amount = 48000000; + int learn_amount = 4800000; int batch_amount = learn_amount / batch_size; - int log_amount = 200; // Log once reached a number of batch + int log_amount = 1000; // Log once reached a number of batch char cmd[100]; FILE *fp; @@ -223,8 +221,8 @@ int main(int argc, char **argv) { continue; } } - free_dataset(train_dataset); - free_dataset(test_dataset); + free(train_dataset); + free(test_dataset); free_neural_network(nn); free(layer_lengths); return 0; diff --git a/src/network.c b/src/network.c index dabe45b7e..d6ff1f7e2 100644 --- a/src/network.c +++ b/src/network.c @@ -17,8 +17,7 @@ layer *get_layer(size_t length, size_t prev_length) { layer *new_layer = calloc(1, sizeof(layer)); - if (!new_layer) - return NULL; + if (!new_layer) return NULL; new_layer->length = length; @@ -63,8 +62,7 @@ neural_network *get_neural_network(size_t layer_length, const size_t *layer_leng assert(layer_lengths); neural_network *nn = malloc(sizeof(neural_network)); - if (!nn) - return NULL; + if (!nn) return NULL; // Use calloc for freeing when error nn->layers = calloc(layer_length, sizeof(layer)); @@ -94,8 +92,7 @@ neural_network *get_neural_network(size_t layer_length, const size_t *layer_leng } void free_layer(layer *layer) { - if (!layer) - return; + if (!layer) return; free(layer->weighted_input); free(layer->output); @@ -106,8 +103,7 @@ void free_layer(layer *layer) { } void free_neural_network(neural_network *nn) { - if (!nn) - return; + if (!nn) return; for (size_t i = 0; i < nn->length; i++) free_layer(nn->layers[i]); @@ -157,8 +153,7 @@ void print_activation_percentages(neural_network *nn) { layer *output_layer = nn->layers[nn->length - 1]; float *percentages = malloc(sizeof(float) * output_layer->length); - if (!percentages) - return; + if (!percentages) return; size_t *indices = malloc(sizeof(size_t) * output_layer->length); if (!indices) { @@ -206,7 +201,7 @@ float cost(neural_network *nn, const dataset *test_dataset, size_t num_test) { layer *output_layer = nn->layers[nn->length - 1]; for (size_t i = 0; i < num_test; i++) { - data *test_data = test_dataset->datas[randnum_u32(test_dataset->length, 0)]; + data *test_data = &test_dataset->datas[randnum_u32(test_dataset->length, 0)]; compute_network(nn, test_data->inputs); for (size_t j = 0; j < output_layer->length; j++) { float output = output_layer->output[j]; @@ -322,7 +317,7 @@ void *thread_worker(void *arg) { } for (size_t i = 0; i < args->data_batch->length; i++) { - data *data = args->data_batch->datas[i]; + data *data = &args->data_batch->datas[i]; compute_network(nn, data->inputs); for (size_t j = 0; j < nn->length; j++) { @@ -463,13 +458,13 @@ float test_network_percent(neural_network *nn, const dataset *test_dataset) { int correct = 0; for (size_t i = 0; i < test_dataset->length; i++) { - compute_network(nn, test_dataset->datas[i]->inputs); + compute_network(nn, test_dataset->datas[i].inputs); size_t max = 0; for (size_t j = 0; j < nn->layers[nn->length - 1]->length; j++) { if (softmax(nn, j) > softmax(nn, max)) max = j; } - if (max == test_dataset->datas[i]->expected_index) { + if (max == test_dataset->datas[i].expected_index) { correct++; } } diff --git a/test/data.cpp b/test/data.cpp index abe560d7f..b31e6ec95 100644 --- a/test/data.cpp +++ b/test/data.cpp @@ -4,20 +4,6 @@ extern "C" { #include "cneuron/cneuron.h" } -TEST(DataTest, CreateData) { - data *test_data = (data *)malloc(sizeof(data)); - - size_t inputs_length = 10; - test_data->inputs = (float *)malloc(sizeof(float) * inputs_length); - EXPECT_NE(test_data, nullptr); - EXPECT_NE(test_data->inputs, nullptr); - for (size_t i = 0; i < inputs_length; i++) { - test_data->inputs[i] = static_cast(i); - } - free_data(test_data); - // No crash -} - TEST(DataTest, GetDatasetFileNotFound) { dataset *test_dataset = get_dataset("non_existent_file.dat"); ASSERT_EQ(test_dataset, nullptr); @@ -30,42 +16,34 @@ TEST(DataTest, GetDatasetValidFile) { ASSERT_GT(test_dataset->inputs_length, 0); ASSERT_NE(test_dataset, nullptr); - ASSERT_NE(test_dataset->datas[0], nullptr); - ASSERT_NE(test_dataset->datas[0]->inputs, nullptr); + ASSERT_NE(test_dataset->datas[0].inputs, nullptr); - free_dataset(test_dataset); + free(test_dataset); } TEST(DataTest, FreeDataset) { dataset *test_dataset = get_dataset("data/mnist/test/0.dat"); - free_dataset(test_dataset); - // No crash -} - -TEST(DataTest, FreeData) { - data *test_data = (data *)malloc(sizeof(data)); - test_data->inputs = (float *)malloc(sizeof(float) * 10); - - free_data(test_data); + free(test_dataset); // No crash } TEST(DataTest, CopyData) { dataset *test_dataset = get_dataset("data/mnist/test/0.dat"); + data *data_copy = alloc_data(test_dataset->inputs_length); - data *data_copy = get_data_copy(test_dataset->datas[0], test_dataset->inputs_length); + copy_data(data_copy, &test_dataset->datas[0], test_dataset->inputs_length); ASSERT_NE(data_copy, nullptr); ASSERT_NE(data_copy->inputs, nullptr); for (size_t i = 0; i < test_dataset->inputs_length; i++) { - ASSERT_FLOAT_EQ(data_copy->inputs[i], test_dataset->datas[0]->inputs[i]); + ASSERT_FLOAT_EQ(data_copy->inputs[i], test_dataset->datas[0].inputs[i]); } - ASSERT_FLOAT_EQ(data_copy->expected_index, test_dataset->datas[0]->expected_index); + ASSERT_FLOAT_EQ(data_copy->expected_index, test_dataset->datas[0].expected_index); - free_data(data_copy); - free_dataset(test_dataset); + free(data_copy); + free(test_dataset); } TEST(DataTest, RandomSampleDataset) { @@ -74,17 +52,15 @@ TEST(DataTest, RandomSampleDataset) { dataset *dataset_sample = get_random_dataset_sample(test_dataset, test_dataset->length - 1); ASSERT_NE(dataset_sample, nullptr); ASSERT_NE(dataset_sample->datas, nullptr); - ASSERT_NE(dataset_sample->datas[0]->inputs, nullptr); - free_dataset(dataset_sample); - free_dataset(test_dataset); + free(dataset_sample); + free(test_dataset); } TEST(DataTest, RotateData) { - data *test_data = (data *)malloc(sizeof(data)); - size_t inputs_length = 9; - test_data->inputs = (float *)malloc(sizeof(float) * inputs_length); + data *test_data = alloc_data(inputs_length); + for (size_t i = 0; i < inputs_length; i++) { test_data->inputs[i] = static_cast(i) + 1.0f; } @@ -94,14 +70,13 @@ TEST(DataTest, RotateData) { ASSERT_FLOAT_EQ(test_data->inputs[2], 1.0f); ASSERT_FLOAT_EQ(test_data->inputs[4], 5.0f); - free_data(test_data); + free(test_data); } TEST(DataTest, ScaleData) { - data *test_data = (data *)malloc(sizeof(data)); - size_t inputs_length = 9; - test_data->inputs = (float *)malloc(sizeof(float) * inputs_length); + data *test_data = alloc_data(inputs_length); + for (size_t i = 0; i < inputs_length; i++) { test_data->inputs[i] = i + 1.0f; } @@ -111,14 +86,13 @@ TEST(DataTest, ScaleData) { ASSERT_FLOAT_EQ(test_data->inputs[2], 6.0f); ASSERT_FLOAT_EQ(test_data->inputs[8], 9.0f); - free_data(test_data); + free(test_data); } TEST(DataTest, OffsetData) { - data *test_data = (data *)malloc(sizeof(data)); - size_t inputs_length = 9; - test_data->inputs = (float *)malloc(sizeof(float) * inputs_length); + data *test_data = alloc_data(inputs_length); + for (size_t i = 0; i < inputs_length; i++) { test_data->inputs[i] = i + 1.0f; } @@ -129,18 +103,18 @@ TEST(DataTest, OffsetData) { ASSERT_FLOAT_EQ(test_data->inputs[6], 0.0f); ASSERT_FLOAT_EQ(test_data->inputs[8], 5.0f); - free_data(test_data); + free(test_data); } TEST(DataTest, NoiseData) { - data *test_data = (data *)malloc(sizeof(data)); - size_t inputs_length = 9; - test_data->inputs = (float *)malloc(sizeof(float) * inputs_length); + data *test_data = alloc_data(inputs_length); + data *data_copy = alloc_data(inputs_length); + for (size_t i = 0; i < inputs_length; i++) { test_data->inputs[i] = i + 1.0f; } - data *data_copy = get_data_copy(test_data, inputs_length); + copy_data(data_copy, test_data, inputs_length); bool same = true; noise_data(test_data, inputs_length, 1.0f, 1.0f); @@ -152,8 +126,8 @@ TEST(DataTest, NoiseData) { } ASSERT_FALSE(same); - free_data(test_data); - free_data(data_copy); + free(test_data); + free(data_copy); } TEST(DataTest, OutputExpected) { diff --git a/test/network.cpp b/test/network.cpp index 965b344c8..047632c21 100644 --- a/test/network.cpp +++ b/test/network.cpp @@ -134,7 +134,7 @@ TEST(NetworkTest, StochasticGDSingleLayer) { for (size_t i = 0; i < 50000; i++) { for (size_t j = 0; j < test_dataset->length; j++) { - stochastic_gd(nn, 0.03f, test_dataset->datas[randnum_u32(test_dataset->length, 0)]); + stochastic_gd(nn, 0.03f, &test_dataset->datas[randnum_u32(test_dataset->length, 0)]); } if (i % 10000 == 0) { printf("Single layer learn cost: %f\n", cost(nn, test_dataset, test_dataset->length)); @@ -144,7 +144,7 @@ TEST(NetworkTest, StochasticGDSingleLayer) { ASSERT_LE(cost(nn, test_dataset, test_dataset->length), 0.2f); ASSERT_GE(test_network_percent(nn, test_dataset), 90.0f); - free_dataset(test_dataset); + free(test_dataset); free_neural_network(nn); free(layer_lengths); } @@ -161,7 +161,7 @@ TEST(NetworkTest, StochasticGDTests) { for (size_t i = 0; i < 500000; i++) { for (size_t j = 0; j < test_dataset->length; j++) { - stochastic_gd(nn, 0.001f, test_dataset->datas[randnum_u32(test_dataset->length, 0)]); + stochastic_gd(nn, 0.001f, &test_dataset->datas[randnum_u32(test_dataset->length, 0)]); } if (i % 100000 == 0) { printf("Stochastic Multi layer learn cost: %f\n", cost(nn, test_dataset, test_dataset->length)); @@ -182,7 +182,7 @@ TEST(NetworkTest, StochasticGDTests) { for (size_t i = 0; i < 50000; i++) { for (size_t j = 0; j < test_dataset->length; j++) { - stochastic_gd(nn, 0.03f, test_dataset->datas[randnum_u32(test_dataset->length, 0)]); + stochastic_gd(nn, 0.03f, &test_dataset->datas[randnum_u32(test_dataset->length, 0)]); } if (i % 10000 == 0) { printf("Stochastic Non-linearly separable learn cost: %f\n", cost(nn, test_dataset, test_dataset->length)); @@ -194,7 +194,7 @@ TEST(NetworkTest, StochasticGDTests) { free_neural_network(nn); free(layer_lengths); - free_dataset(test_dataset); + free(test_dataset); } TEST(NetworkTest, MiniBatchGDTests) { @@ -210,7 +210,7 @@ TEST(NetworkTest, MiniBatchGDTests) { for (size_t i = 0; i < 2000000; i++) { dataset *batch_dataset = get_random_dataset_sample(test_dataset, test_dataset->length); mini_batch_gd(nn, 0.001f, batch_dataset); - free_dataset(batch_dataset); + free(batch_dataset); if (i % 200000 == 0) { printf("Mini Batch Multi layer learn cost: %f\n", cost(nn, test_dataset, test_dataset->length)); } @@ -231,7 +231,7 @@ TEST(NetworkTest, MiniBatchGDTests) { for (size_t i = 0; i < 100000; i++) { dataset *batch_dataset = get_random_dataset_sample(test_dataset, test_dataset->length); mini_batch_gd(nn, 0.001f, batch_dataset); - free_dataset(batch_dataset); + free(batch_dataset); if (i % 20000 == 0) { printf("Mini Batch Non-linearly separable learn cost: %f\n", cost(nn, test_dataset, test_dataset->length)); } @@ -242,5 +242,5 @@ TEST(NetworkTest, MiniBatchGDTests) { free_neural_network(nn); free(layer_lengths); - free_dataset(test_dataset); + free(test_dataset); } diff --git a/test/test_utils.cpp b/test/test_utils.cpp index 5513f015d..b99ce6b5c 100644 --- a/test/test_utils.cpp +++ b/test/test_utils.cpp @@ -12,66 +12,52 @@ float sigmoid(float val, bool is_deravative) { dataset *get_xor() { // Create data - dataset *test_dataset = (dataset *)malloc(sizeof(dataset)); - test_dataset->length = 4; - data **datas = (data **)malloc(sizeof(data *) * test_dataset->length); - test_dataset->datas = datas; - test_dataset->inputs_length = 2; - - for (size_t i = 0; i < test_dataset->length; i++) { - test_dataset->datas[i] = (data *)malloc(sizeof(data)); - test_dataset->datas[i]->inputs = (float *)malloc(sizeof(float) * test_dataset->inputs_length); - } + size_t dataset_length = 4; + size_t inputs_length = 2; + dataset *test_dataset = alloc_dataset(dataset_length, inputs_length); // XOR gate - test_dataset->datas[0]->inputs[0] = 1.0f; - test_dataset->datas[0]->inputs[1] = 1.0f; - test_dataset->datas[0]->expected_index = 0; + test_dataset->datas[0].inputs[0] = 1.0f; + test_dataset->datas[0].inputs[1] = 1.0f; + test_dataset->datas[0].expected_index = 0; - test_dataset->datas[1]->inputs[0] = 0.0f; - test_dataset->datas[1]->inputs[1] = 0.0f; - test_dataset->datas[1]->expected_index = 0; + test_dataset->datas[1].inputs[0] = 0.0f; + test_dataset->datas[1].inputs[1] = 0.0f; + test_dataset->datas[1].expected_index = 0; - test_dataset->datas[2]->inputs[0] = 0.0f; - test_dataset->datas[2]->inputs[1] = 1.0f; - test_dataset->datas[2]->expected_index = 1; + test_dataset->datas[2].inputs[0] = 0.0f; + test_dataset->datas[2].inputs[1] = 1.0f; + test_dataset->datas[2].expected_index = 1; - test_dataset->datas[3]->inputs[0] = 1.0f; - test_dataset->datas[3]->inputs[1] = 0.0f; - test_dataset->datas[3]->expected_index = 1; + test_dataset->datas[3].inputs[0] = 1.0f; + test_dataset->datas[3].inputs[1] = 0.0f; + test_dataset->datas[3].expected_index = 1; return test_dataset; } dataset *get_or() { // Create data - dataset *test_dataset = (dataset *)malloc(sizeof(dataset)); - test_dataset->length = 4; - data **datas = (data **)malloc(sizeof(data *) * test_dataset->length); - test_dataset->datas = datas; - test_dataset->inputs_length = 2; - - for (size_t i = 0; i < test_dataset->length; i++) { - test_dataset->datas[i] = (data *)malloc(sizeof(data)); - test_dataset->datas[i]->inputs = (float *)malloc(sizeof(float) * test_dataset->inputs_length); - } + size_t dataset_length = 4; + size_t inputs_length = 2; + dataset *test_dataset = alloc_dataset(dataset_length, inputs_length); // OR gate - test_dataset->datas[1]->inputs[0] = 0.0f; - test_dataset->datas[1]->inputs[1] = 0.0f; - test_dataset->datas[1]->expected_index = 0; + test_dataset->datas[1].inputs[0] = 0.0f; + test_dataset->datas[1].inputs[1] = 0.0f; + test_dataset->datas[1].expected_index = 0; - test_dataset->datas[0]->inputs[0] = 1.0f; - test_dataset->datas[0]->inputs[1] = 1.0f; - test_dataset->datas[0]->expected_index = 1; + test_dataset->datas[0].inputs[0] = 1.0f; + test_dataset->datas[0].inputs[1] = 1.0f; + test_dataset->datas[0].expected_index = 1; - test_dataset->datas[2]->inputs[0] = 0.0f; - test_dataset->datas[2]->inputs[1] = 1.0f; - test_dataset->datas[2]->expected_index = 1; + test_dataset->datas[2].inputs[0] = 0.0f; + test_dataset->datas[2].inputs[1] = 1.0f; + test_dataset->datas[2].expected_index = 1; - test_dataset->datas[3]->inputs[0] = 1.0f; - test_dataset->datas[3]->inputs[1] = 0.0f; - test_dataset->datas[3]->expected_index = 1; + test_dataset->datas[3].inputs[0] = 1.0f; + test_dataset->datas[3].inputs[1] = 0.0f; + test_dataset->datas[3].expected_index = 1; return test_dataset; } diff --git a/test/test_utils.h b/test/test_utils.h index 3c310aef2..6fe5d87c1 100644 --- a/test/test_utils.h +++ b/test/test_utils.h @@ -1,7 +1,9 @@ #ifndef TEST_UTILS_H #define TEST_UTILS_H +extern "C" { #include "cneuron/cneuron.h" +} float sigmoid(float val, bool is_deravative); From 5beb9083fb9795a97248e053fc203b3f9acb53b5 Mon Sep 17 00:00:00 2001 From: LegendaryLHL Date: Sat, 14 Jun 2025 16:53:48 +0800 Subject: [PATCH 2/4] Removed linked list like pattern --- include/cneuron/cneuron.h | 14 ++++++-------- src/network.c | 26 ++++++++++---------------- test/network.cpp | 2 -- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/include/cneuron/cneuron.h b/include/cneuron/cneuron.h index a77539f7b..5c6675e78 100644 --- a/include/cneuron/cneuron.h +++ b/include/cneuron/cneuron.h @@ -142,14 +142,12 @@ void hadamard_product(const float *a, const float *b, float *c, size_t length); * @brief Represents a single layer in a neural network. */ typedef struct layer { - float *delta; /**< Error delta for backpropagation. */ - float *weighted_input; /**< Weighted input values for the layer. */ - float *weights; /**< Weights of the layer in column-major format. */ - float *bias; /**< Bias values for the layer. */ - float *output; /**< Output values from the layer. */ - struct layer *prev_layer; /**< Pointer to the previous layer in the network. */ - struct layer *next_layer; /**< Pointer to the next layer in the network. */ - size_t length; /**< Number of neurons in this layer. */ + float *delta; /**< Error delta for backpropagation. */ + float *weighted_input; /**< Weighted input values for the layer. */ + float *weights; /**< Weights of the layer in column-major format. */ + float *bias; /**< Bias values for the layer. */ + float *output; /**< Output values from the layer. */ + size_t length; /**< Number of neurons in this layer. */ } layer; /** diff --git a/src/network.c b/src/network.c index d6ff1f7e2..bdaa2ec77 100644 --- a/src/network.c +++ b/src/network.c @@ -82,11 +82,6 @@ neural_network *get_neural_network(size_t layer_length, const size_t *layer_leng } } - for (size_t i = 0; i < layer_length; i++) { - nn->layers[i]->prev_layer = (i == 0) ? NULL : nn->layers[i - 1]; - nn->layers[i]->next_layer = (i == layer_length - 1) ? NULL : nn->layers[i + 1]; - } - nn->activation_function = activation_function; return nn; } @@ -115,18 +110,17 @@ void free_neural_network(neural_network *nn) { void compute_network(neural_network *nn, const float *inputs) { assert(nn && inputs); - layer *curr = nn->layers[0]; - while (curr != NULL) { - layer *prev = curr->prev_layer; - if (prev == NULL) { + for (size_t i = 0; i < nn->length; i++) { + layer *curr = nn->layers[i]; + if (i == 0) { cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, curr->length, 1, nn->inputs_length, 1.0f, curr->weights, curr->length, inputs, nn->inputs_length, 0.0f, curr->weighted_input, curr->length); } else { + layer *prev = nn->layers[i - 1]; cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, curr->length, 1, prev->length, 1.0f, curr->weights, curr->length, prev->output, prev->length, 0.0f, curr->weighted_input, curr->length); } cblas_saxpy(curr->length, 1.0f, curr->bias, 1, curr->weighted_input, 1); vector_apply_activation(curr->weighted_input, curr->output, curr->length, nn->activation_function, false); - curr = curr->next_layer; } } @@ -223,8 +217,6 @@ void layer_learn(neural_network *nn, size_t layer_index, float learn_rate, const assert(nn && data); layer *curr_layer = nn->layers[layer_index]; - layer *prev_layer = curr_layer->prev_layer; - layer *next_layer = curr_layer->next_layer; // f'(Z_i) in weighted_input vector_apply_activation(curr_layer->weighted_input, curr_layer->weighted_input, curr_layer->length, nn->activation_function, true); @@ -233,6 +225,7 @@ void layer_learn(neural_network *nn, size_t layer_index, float learn_rate, const curr_layer->output[data->expected_index] -= 1.0f; } else { // W^T_{i+1}δ_{i+1} in output + layer *next_layer = nn->layers[layer_index + 1]; cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, curr_layer->length, 1, next_layer->length, 1.0f, next_layer->weights, next_layer->length, next_layer->delta, next_layer->length, 0.0f, curr_layer->output, curr_layer->length); } @@ -244,6 +237,7 @@ void layer_learn(neural_network *nn, size_t layer_index, float learn_rate, const cblas_sger(CblasColMajor, curr_layer->length, nn->inputs_length, 1.0f, curr_layer->delta, 1, data->inputs, 1, weight_gradient, curr_layer->length); cblas_saxpy(curr_layer->length * nn->inputs_length, -learn_rate, weight_gradient, 1, curr_layer->weights, 1); } else { + layer *prev_layer = nn->layers[layer_index - 1]; weight_gradient = calloc(curr_layer->length * prev_layer->length, sizeof(float)); cblas_sger(CblasColMajor, curr_layer->length, prev_layer->length, 1.0f, curr_layer->delta, 1, prev_layer->output, 1, weight_gradient, curr_layer->length); cblas_saxpy(curr_layer->length * prev_layer->length, -learn_rate, weight_gradient, 1, curr_layer->weights, 1); @@ -259,8 +253,6 @@ void layer_learn_collect_gradient(neural_network *nn, float *layer_weights_gradi assert(nn && layer_weights_gradients && layer_bias_gradients && data); layer *curr_layer = nn->layers[layer_index]; - layer *prev_layer = curr_layer->prev_layer; - layer *next_layer = curr_layer->next_layer; // f'(Z_i) in weighted_input vector_apply_activation(curr_layer->weighted_input, curr_layer->weighted_input, curr_layer->length, nn->activation_function, true); @@ -269,6 +261,7 @@ void layer_learn_collect_gradient(neural_network *nn, float *layer_weights_gradi curr_layer->output[data->expected_index] -= 1.0f; } else { // W^T_{i+1}δ_{i+1} in output + layer *next_layer = nn->layers[layer_index + 1]; cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, curr_layer->length, 1, next_layer->length, 1.0f, next_layer->weights, next_layer->length, next_layer->delta, next_layer->length, 0.0f, curr_layer->output, curr_layer->length); } @@ -277,6 +270,7 @@ void layer_learn_collect_gradient(neural_network *nn, float *layer_weights_gradi if (layer_index == 0) { cblas_sger(CblasColMajor, curr_layer->length, nn->inputs_length, 1.0f, curr_layer->delta, 1, data->inputs, 1, layer_weights_gradients, curr_layer->length); } else { + layer *prev_layer = nn->layers[layer_index - 1]; cblas_sger(CblasColMajor, curr_layer->length, prev_layer->length, 1.0f, curr_layer->delta, 1, prev_layer->output, 1, layer_weights_gradients, curr_layer->length); } @@ -384,7 +378,7 @@ bool save_network(const char *filename, neural_network *nn) { } for (size_t i = 0; i < nn->length; i++) { - size_t weights_length = nn->layers[i]->length * ((i == 0) ? nn->inputs_length : nn->layers[i]->prev_layer->length); + size_t weights_length = nn->layers[i]->length * ((i == 0) ? nn->inputs_length : nn->layers[i - 1]->length); if (fwrite(&(nn->layers[i]->length), sizeof(uint64_t), 1, file) != 1 || fwrite(nn->layers[i]->weights, sizeof(float), weights_length, file) != weights_length || fwrite(nn->layers[i]->bias, sizeof(float), nn->layers[i]->length, file) != nn->layers[i]->length) { fprintf(stderr, "Failed to write layer %zu data to '%s'\n", i, filename); @@ -437,7 +431,7 @@ bool load_network(const char *filename, neural_network *nn) { goto cleanup; } - size_t weights_length = nn->layers[i]->length * ((i == 0) ? nn->inputs_length : nn->layers[i]->prev_layer->length); + size_t weights_length = nn->layers[i]->length * ((i == 0) ? nn->inputs_length : nn->layers[i - 1]->length); if (fread(nn->layers[i]->weights, sizeof(float), weights_length, file) != weights_length || fread(nn->layers[i]->bias, sizeof(float), nn->layers[i]->length, file) != nn->layers[i]->length) { fprintf(stderr, "Failed to read layer %zu data from '%s'\n", i, filename); goto cleanup; diff --git a/test/network.cpp b/test/network.cpp index 047632c21..cd6d33a74 100644 --- a/test/network.cpp +++ b/test/network.cpp @@ -54,8 +54,6 @@ TEST(NetworkTest, GetNeuralNetwork) { for (size_t i = 0; i < layer_length; i++) { ASSERT_NE(nn->layers[i], nullptr); ASSERT_EQ(nn->layers[i]->length, layer_lengths[i]); - ASSERT_EQ(nn->layers[i]->prev_layer, (i == 0) ? nullptr : nn->layers[i - 1]); - ASSERT_EQ(nn->layers[i]->next_layer, (i == layer_length - 1) ? nullptr : nn->layers[i + 1]); } free_neural_network(nn); From 960ac837794dbcef6adb0aa4a6e882c33df78874 Mon Sep 17 00:00:00 2001 From: LegendaryLHL Date: Sat, 14 Jun 2025 18:28:39 +0800 Subject: [PATCH 3/4] Use one alloc for network --- include/cneuron/cneuron.h | 45 +++++------ src/main.c | 14 ++-- src/network.c | 154 ++++++++++++++------------------------ test/network.cpp | 57 ++++---------- 4 files changed, 95 insertions(+), 175 deletions(-) diff --git a/include/cneuron/cneuron.h b/include/cneuron/cneuron.h index 5c6675e78..073a9b302 100644 --- a/include/cneuron/cneuron.h +++ b/include/cneuron/cneuron.h @@ -24,19 +24,19 @@ typedef struct { /** * @brief Allocate and setup a data * - * @return inputs_length number of input of the data + * @param inputs_length Number of input of the data * - * @data newly allocated data + * @return Newly allocated data */ data *alloc_data(size_t inputs_length); /** * @brief Allocate and setup a dataset * - * @return dataset_length number of data of the dataset - * @return inputs_length number of input of the data + * @param dataset_length Number of data of the dataset + * @param inputs_length Number of input of the data * - * @data newly allocated dataset + * @return newly allocated dataset */ dataset *alloc_dataset(size_t dataset_length, size_t inputs_length); @@ -154,45 +154,34 @@ typedef struct layer { * @brief Represents a neural network with multiple layers. */ typedef struct { - layer **layers; /**< Array of pointers to layers in the network. */ + layer *layers; /**< Array of struct to layers in the network. */ size_t length; /**< Number of layers in the network. */ size_t inputs_length; /**< Number of inputs to the network. */ float (*activation_function)(float, bool); /**< Pointer to the activation function used in the network. */ } neural_network; /** - * @brief Allocates and initializes a new layer. + * @brief Allocate and setup a neural_network * - * @param length Number of neurons in this layer. - * @param prev_length Number of neurons in the previous layer. - * @return Pointer to the newly created layer. + * @param network_length Number of layers in the network. + * @param layers_length Array specifying the number of neurons in each layer. + * @param inputs_length Number of inputs to the network. + * + * @return Newly allocated data */ -layer *get_layer(size_t length, size_t prev_length); +neural_network *alloc_neural_network(size_t network_length, const size_t *layers_length, size_t inputs_length); /** * @brief Allocates and initializes a new neural network. * - * @param layer_length Number of layers in the network. - * @param layer_lengths Array specifying the number of neurons in each layer. + * @param network_length Number of layers in the network. + * @param layers_length Array specifying the number of neurons in each layer. * @param inputs_length Number of inputs to the network. * @param activation_function Activation function to be used in the network. - * @return Pointer to the newly created neural network. - */ -neural_network *get_neural_network(size_t layer_length, const size_t *layer_lengths, size_t inputs_length, float (*activation_function)(float, bool)); - -/** - * @brief Frees all memory associated with a 'layer' structure and its conetents. * - * @param layer Pointer to the layer to be freed. - */ -void free_layer(layer *layer); - -/** - * @brief Frees all memory associated with a 'neural_network' structure and its conetents. - * - * @param nn Pointer to the neural network to be freed. + * @return Pointer to the newly created neural network. */ -void free_neural_network(neural_network *nn); +neural_network *get_neural_network(size_t network_length, const size_t *layers_length, size_t inputs_length, float (*activation_function)(float, bool)); /** * @brief Computes the output of the neural network for the given inputs. diff --git a/src/main.c b/src/main.c index d0aa7f942..c9b9fc943 100644 --- a/src/main.c +++ b/src/main.c @@ -130,12 +130,12 @@ int main(int argc, char **argv) { dataset *train_dataset = get_mnist(false); dataset *test_dataset = get_mnist(true); size_t network_length = 3; - size_t *layer_lengths = malloc(sizeof(size_t) * network_length); - layer_lengths[0] = 100; - layer_lengths[1] = 16; - layer_lengths[2] = 10; + size_t *layers_length = malloc(sizeof(size_t) * network_length); + layers_length[0] = 100; + layers_length[1] = 16; + layers_length[2] = 10; - neural_network *nn = get_neural_network(network_length, layer_lengths, train_dataset->inputs_length, &sigmoid); + neural_network *nn = get_neural_network(network_length, layers_length, train_dataset->inputs_length, &sigmoid); // Parameters float learn_rate = 1.5f; @@ -223,7 +223,7 @@ int main(int argc, char **argv) { } free(train_dataset); free(test_dataset); - free_neural_network(nn); - free(layer_lengths); + free(nn); + free(layers_length); return 0; } diff --git a/src/network.c b/src/network.c index bdaa2ec77..27d32dd49 100644 --- a/src/network.c +++ b/src/network.c @@ -15,70 +15,47 @@ #include "cneuron/cneuron.h" #include "rand.h" -layer *get_layer(size_t length, size_t prev_length) { - layer *new_layer = calloc(1, sizeof(layer)); - if (!new_layer) return NULL; - - new_layer->length = length; - - new_layer->weights = malloc(sizeof(float) * length * prev_length); - if (!new_layer->weights) { - free_layer(new_layer); - return NULL; - } - - for (size_t i = 0; i < length * prev_length; i++) { - new_layer->weights[i] = randf(2.0f, -1.0f); - } - - new_layer->delta = calloc(length, sizeof(float)); - if (!new_layer->delta) { - free_layer(new_layer); - return NULL; +neural_network *alloc_neural_network(size_t network_length, const size_t *layers_length, size_t inputs_length) { + size_t total_float = 0; + for (size_t i = 0; i < network_length; i++) { + size_t prev_length = (i == 0) ? inputs_length : layers_length[i - 1]; + total_float += layers_length[i] * 4 + layers_length[i] * prev_length; } + neural_network *nn = calloc(1, sizeof(neural_network) + sizeof(layer) * network_length + sizeof(float) * total_float); + if (!nn) return NULL; - new_layer->bias = calloc(length, sizeof(float)); - if (!new_layer->bias) { - free_layer(new_layer); - return NULL; - } + nn->inputs_length = inputs_length; + nn->length = network_length; - new_layer->output = calloc(length, sizeof(float)); - if (!new_layer->output) { - free_layer(new_layer); - return NULL; - } + nn->layers = (layer *)(nn + 1); + float *float_pointing = (float *)(nn->layers + network_length); + for (size_t i = 0; i < network_length; ++i) { + nn->layers[i].length = layers_length[i]; - new_layer->weighted_input = calloc(length, sizeof(float)); - if (!new_layer->output) { - free_layer(new_layer); - return NULL; + size_t prev_length = (i == 0) ? inputs_length : layers_length[i - 1]; + layer *curr_layer = &nn->layers[i]; + curr_layer->delta = float_pointing; + curr_layer->weighted_input = float_pointing + layers_length[i]; + curr_layer->bias = float_pointing + 2 * layers_length[i]; + curr_layer->output = float_pointing + 3 * layers_length[i]; + curr_layer->weights = float_pointing + 4 * layers_length[i]; + float_pointing += 4 * layers_length[i] + layers_length[i] * prev_length; } - return new_layer; + return nn; } -neural_network *get_neural_network(size_t layer_length, const size_t *layer_lengths, size_t inputs_length, float (*activation_function)(float, bool)) { - assert(layer_lengths); +neural_network *get_neural_network(size_t network_length, const size_t *layers_length, size_t inputs_length, float (*activation_function)(float, bool)) { + assert(layers_length); - neural_network *nn = malloc(sizeof(neural_network)); + neural_network *nn = alloc_neural_network(network_length, layers_length, inputs_length); if (!nn) return NULL; - // Use calloc for freeing when error - nn->layers = calloc(layer_length, sizeof(layer)); - if (!nn->layers) { - free(nn); - return NULL; - } - - nn->length = layer_length; - nn->inputs_length = inputs_length; - - for (size_t i = 0; i < layer_length; i++) { - nn->layers[i] = get_layer(layer_lengths[i], (i == 0) ? inputs_length : layer_lengths[i - 1]); - if (!nn->layers[i]) { - free_neural_network(nn); - return NULL; + for (size_t i = 0; i < network_length; i++) { + size_t prev_length = (i == 0) ? inputs_length : layers_length[i - 1]; + for (size_t j = 0; j < layers_length[i] * prev_length; j++) { + // Initialise weights to -1.0f - 1.0f + nn->layers[i].weights[j] = randf(2.0f, -1.0f); } } @@ -86,36 +63,15 @@ neural_network *get_neural_network(size_t layer_length, const size_t *layer_leng return nn; } -void free_layer(layer *layer) { - if (!layer) return; - - free(layer->weighted_input); - free(layer->output); - free(layer->bias); - free(layer->delta); - free(layer->weights); - free(layer); -} - -void free_neural_network(neural_network *nn) { - if (!nn) return; - - for (size_t i = 0; i < nn->length; i++) - free_layer(nn->layers[i]); - - free(nn->layers); - free(nn); -} - void compute_network(neural_network *nn, const float *inputs) { assert(nn && inputs); for (size_t i = 0; i < nn->length; i++) { - layer *curr = nn->layers[i]; + layer *curr = &nn->layers[i]; if (i == 0) { cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, curr->length, 1, nn->inputs_length, 1.0f, curr->weights, curr->length, inputs, nn->inputs_length, 0.0f, curr->weighted_input, curr->length); } else { - layer *prev = nn->layers[i - 1]; + layer *prev = &nn->layers[i - 1]; cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, curr->length, 1, prev->length, 1.0f, curr->weights, curr->length, prev->output, prev->length, 0.0f, curr->weighted_input, curr->length); } @@ -130,7 +86,7 @@ float softmax(neural_network *nn, size_t neuron_index) { float sum = 0.0f; float max_output = -INFINITY; - layer *output_layer = nn->layers[nn->length - 1]; + layer *output_layer = &nn->layers[nn->length - 1]; for (size_t i = 0; i < output_layer->length; i++) { if (output_layer->output[i] > max_output) max_output = output_layer->output[i]; @@ -145,7 +101,7 @@ float softmax(neural_network *nn, size_t neuron_index) { void print_activation_percentages(neural_network *nn) { assert(nn); - layer *output_layer = nn->layers[nn->length - 1]; + layer *output_layer = &nn->layers[nn->length - 1]; float *percentages = malloc(sizeof(float) * output_layer->length); if (!percentages) return; @@ -193,7 +149,7 @@ float cost(neural_network *nn, const dataset *test_dataset, size_t num_test) { float cost = 0.0f; - layer *output_layer = nn->layers[nn->length - 1]; + layer *output_layer = &nn->layers[nn->length - 1]; for (size_t i = 0; i < num_test; i++) { data *test_data = &test_dataset->datas[randnum_u32(test_dataset->length, 0)]; compute_network(nn, test_data->inputs); @@ -208,7 +164,7 @@ float cost(neural_network *nn, const dataset *test_dataset, size_t num_test) { void print_result(neural_network *nn) { assert(nn); - layer *output_layer = nn->layers[nn->length - 1]; + layer *output_layer = &nn->layers[nn->length - 1]; for (size_t i = 0; i < output_layer->length; i++) printf("%f ", output_layer->output[i]); } @@ -216,7 +172,7 @@ void print_result(neural_network *nn) { void layer_learn(neural_network *nn, size_t layer_index, float learn_rate, const data *data) { assert(nn && data); - layer *curr_layer = nn->layers[layer_index]; + layer *curr_layer = &nn->layers[layer_index]; // f'(Z_i) in weighted_input vector_apply_activation(curr_layer->weighted_input, curr_layer->weighted_input, curr_layer->length, nn->activation_function, true); @@ -225,7 +181,7 @@ void layer_learn(neural_network *nn, size_t layer_index, float learn_rate, const curr_layer->output[data->expected_index] -= 1.0f; } else { // W^T_{i+1}δ_{i+1} in output - layer *next_layer = nn->layers[layer_index + 1]; + layer *next_layer = &nn->layers[layer_index + 1]; cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, curr_layer->length, 1, next_layer->length, 1.0f, next_layer->weights, next_layer->length, next_layer->delta, next_layer->length, 0.0f, curr_layer->output, curr_layer->length); } @@ -237,7 +193,7 @@ void layer_learn(neural_network *nn, size_t layer_index, float learn_rate, const cblas_sger(CblasColMajor, curr_layer->length, nn->inputs_length, 1.0f, curr_layer->delta, 1, data->inputs, 1, weight_gradient, curr_layer->length); cblas_saxpy(curr_layer->length * nn->inputs_length, -learn_rate, weight_gradient, 1, curr_layer->weights, 1); } else { - layer *prev_layer = nn->layers[layer_index - 1]; + layer *prev_layer = &nn->layers[layer_index - 1]; weight_gradient = calloc(curr_layer->length * prev_layer->length, sizeof(float)); cblas_sger(CblasColMajor, curr_layer->length, prev_layer->length, 1.0f, curr_layer->delta, 1, prev_layer->output, 1, weight_gradient, curr_layer->length); cblas_saxpy(curr_layer->length * prev_layer->length, -learn_rate, weight_gradient, 1, curr_layer->weights, 1); @@ -252,7 +208,7 @@ void layer_learn(neural_network *nn, size_t layer_index, float learn_rate, const void layer_learn_collect_gradient(neural_network *nn, float *layer_weights_gradients, float *layer_bias_gradients, size_t layer_index, const data *data) { assert(nn && layer_weights_gradients && layer_bias_gradients && data); - layer *curr_layer = nn->layers[layer_index]; + layer *curr_layer = &nn->layers[layer_index]; // f'(Z_i) in weighted_input vector_apply_activation(curr_layer->weighted_input, curr_layer->weighted_input, curr_layer->length, nn->activation_function, true); @@ -261,7 +217,7 @@ void layer_learn_collect_gradient(neural_network *nn, float *layer_weights_gradi curr_layer->output[data->expected_index] -= 1.0f; } else { // W^T_{i+1}δ_{i+1} in output - layer *next_layer = nn->layers[layer_index + 1]; + layer *next_layer = &nn->layers[layer_index + 1]; cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, curr_layer->length, 1, next_layer->length, 1.0f, next_layer->weights, next_layer->length, next_layer->delta, next_layer->length, 0.0f, curr_layer->output, curr_layer->length); } @@ -270,7 +226,7 @@ void layer_learn_collect_gradient(neural_network *nn, float *layer_weights_gradi if (layer_index == 0) { cblas_sger(CblasColMajor, curr_layer->length, nn->inputs_length, 1.0f, curr_layer->delta, 1, data->inputs, 1, layer_weights_gradients, curr_layer->length); } else { - layer *prev_layer = nn->layers[layer_index - 1]; + layer *prev_layer = &nn->layers[layer_index - 1]; cblas_sger(CblasColMajor, curr_layer->length, prev_layer->length, 1.0f, curr_layer->delta, 1, prev_layer->output, 1, layer_weights_gradients, curr_layer->length); } @@ -305,9 +261,9 @@ void *thread_worker(void *arg) { float **bias_gradients = args->bias_gradients; for (size_t i = 0; i < nn->length; i++) { - size_t weights_size = nn->layers[i]->length * ((i == 0) ? nn->inputs_length : nn->layers[i - 1]->length); + size_t weights_size = nn->layers[i].length * ((i == 0) ? nn->inputs_length : nn->layers[i - 1].length); weights_gradients[i] = calloc(weights_size, sizeof(float)); - bias_gradients[i] = calloc(nn->layers[i]->length, sizeof(float)); + bias_gradients[i] = calloc(nn->layers[i].length, sizeof(float)); } for (size_t i = 0; i < args->data_batch->length; i++) { @@ -342,14 +298,14 @@ void mini_batch_gd(neural_network *nn, float learn_rate, const dataset *data_bat #endif for (size_t i = 0; i < nn->length; i++) { - size_t weights_size = nn->layers[i]->length * ((i == 0) ? nn->inputs_length : nn->layers[i - 1]->length); + size_t weights_size = nn->layers[i].length * ((i == 0) ? nn->inputs_length : nn->layers[i - 1].length); for (size_t j = 0; j < weights_size; j++) { - nn->layers[i]->weights[j] -= weights_gradients[i][j] / data_batch->length * learn_rate; + nn->layers[i].weights[j] -= weights_gradients[i][j] / data_batch->length * learn_rate; } - for (size_t j = 0; j < nn->layers[i]->length; j++) { - nn->layers[i]->bias[j] -= (bias_gradients[i][j] / data_batch->length) * learn_rate; + for (size_t j = 0; j < nn->layers[i].length; j++) { + nn->layers[i].bias[j] -= (bias_gradients[i][j] / data_batch->length) * learn_rate; } } @@ -378,9 +334,9 @@ bool save_network(const char *filename, neural_network *nn) { } for (size_t i = 0; i < nn->length; i++) { - size_t weights_length = nn->layers[i]->length * ((i == 0) ? nn->inputs_length : nn->layers[i - 1]->length); + size_t weights_length = nn->layers[i].length * ((i == 0) ? nn->inputs_length : nn->layers[i - 1].length); - if (fwrite(&(nn->layers[i]->length), sizeof(uint64_t), 1, file) != 1 || fwrite(nn->layers[i]->weights, sizeof(float), weights_length, file) != weights_length || fwrite(nn->layers[i]->bias, sizeof(float), nn->layers[i]->length, file) != nn->layers[i]->length) { + if (fwrite(&(nn->layers[i].length), sizeof(uint64_t), 1, file) != 1 || fwrite(nn->layers[i].weights, sizeof(float), weights_length, file) != weights_length || fwrite(nn->layers[i].bias, sizeof(float), nn->layers[i].length, file) != nn->layers[i].length) { fprintf(stderr, "Failed to write layer %zu data to '%s'\n", i, filename); fclose(file); return false; @@ -426,13 +382,13 @@ bool load_network(const char *filename, neural_network *nn) { fprintf(stderr, "Failed to read layer_length from %s\n", filename); goto cleanup; } - if (layer_length != nn->layers[i]->length) { - fprintf(stderr, "Invalid layer length. Expected: %zu. But found: %llu\n", nn->layers[i]->length, (unsigned long long)layer_length); + if (layer_length != nn->layers[i].length) { + fprintf(stderr, "Invalid layer length. Expected: %zu. But found: %llu\n", nn->layers[i].length, (unsigned long long)layer_length); goto cleanup; } - size_t weights_length = nn->layers[i]->length * ((i == 0) ? nn->inputs_length : nn->layers[i - 1]->length); - if (fread(nn->layers[i]->weights, sizeof(float), weights_length, file) != weights_length || fread(nn->layers[i]->bias, sizeof(float), nn->layers[i]->length, file) != nn->layers[i]->length) { + size_t weights_length = nn->layers[i].length * ((i == 0) ? nn->inputs_length : nn->layers[i - 1].length); + if (fread(nn->layers[i].weights, sizeof(float), weights_length, file) != weights_length || fread(nn->layers[i].bias, sizeof(float), nn->layers[i].length, file) != nn->layers[i].length) { fprintf(stderr, "Failed to read layer %zu data from '%s'\n", i, filename); goto cleanup; } @@ -454,7 +410,7 @@ float test_network_percent(neural_network *nn, const dataset *test_dataset) { for (size_t i = 0; i < test_dataset->length; i++) { compute_network(nn, test_dataset->datas[i].inputs); size_t max = 0; - for (size_t j = 0; j < nn->layers[nn->length - 1]->length; j++) { + for (size_t j = 0; j < nn->layers[nn->length - 1].length; j++) { if (softmax(nn, j) > softmax(nn, max)) max = j; } diff --git a/test/network.cpp b/test/network.cpp index cd6d33a74..6096da114 100644 --- a/test/network.cpp +++ b/test/network.cpp @@ -21,22 +21,6 @@ TEST(NetworkTest, RandomFloat) { ASSERT_FALSE(same); } -TEST(NetworkTest, GetLayer) { - size_t layer_length = 3; - layer *test_layer = get_layer(layer_length, 5); - - ASSERT_NE(test_layer, nullptr); - ASSERT_NE(test_layer->delta, nullptr); - ASSERT_NE(test_layer->weighted_input, nullptr); - ASSERT_NE(test_layer->weights, nullptr); - ASSERT_NE(test_layer->bias, nullptr); - ASSERT_NE(test_layer->output, nullptr); - - ASSERT_EQ(test_layer->length, layer_length); - - free_layer(test_layer); -} - TEST(NetworkTest, GetNeuralNetwork) { size_t layer_length = 3; size_t *layer_lengths = (size_t *)malloc(sizeof(size_t) * layer_length); @@ -52,11 +36,10 @@ TEST(NetworkTest, GetNeuralNetwork) { ASSERT_EQ(nn->activation_function, &sigmoid); ASSERT_NE(nn->layers, nullptr); for (size_t i = 0; i < layer_length; i++) { - ASSERT_NE(nn->layers[i], nullptr); - ASSERT_EQ(nn->layers[i]->length, layer_lengths[i]); + ASSERT_EQ(nn->layers[i].length, layer_lengths[i]); } - free_neural_network(nn); + free(nn); free(layer_lengths); } @@ -66,19 +49,11 @@ TEST(NetworkTest, FreeDataset) { layer_lengths[0] = 2; neural_network *nn = get_neural_network(layer_length, layer_lengths, 2, nullptr); - free_neural_network(nn); + free(nn); // No crash free(layer_lengths); } -TEST(NetworkTest, FreeLayer) { - size_t layer_length = 2; - layer *test_layer = get_layer(layer_length, 3); - - free_layer(test_layer); - // No crash -} - TEST(NetworkTest, ComputeNetwork) { size_t layer_length = 1; size_t inputs_length = 1; @@ -89,15 +64,15 @@ TEST(NetworkTest, ComputeNetwork) { float *inputs = (float *)malloc(sizeof(float) * inputs_length); inputs[0] = 0.2f; - nn->layers[0]->weights[0] = 0.5f; - nn->layers[0]->bias[0] = 0.3f; + nn->layers[0].weights[0] = 0.5f; + nn->layers[0].bias[0] = 0.3f; compute_network(nn, inputs); - ASSERT_FLOAT_EQ(nn->layers[0]->output[0], 0.59868766f); + ASSERT_FLOAT_EQ(nn->layers[0].output[0], 0.59868766f); free(inputs); - free_neural_network(nn); + free(nn); free(layer_lengths); } @@ -108,15 +83,15 @@ TEST(NetworkTest, Softmax) { layer_lengths[0] = 3; neural_network *nn = get_neural_network(layer_length, layer_lengths, inputs_length, &sigmoid); - nn->layers[0]->output[0] = 0.2f; - nn->layers[0]->output[1] = 0.3f; - nn->layers[0]->output[2] = 0.5f; + nn->layers[0].output[0] = 0.2f; + nn->layers[0].output[1] = 0.3f; + nn->layers[0].output[2] = 0.5f; ASSERT_FLOAT_EQ(softmax(nn, 0), 0.28943311f); ASSERT_FLOAT_EQ(softmax(nn, 1), 0.31987305f); ASSERT_FLOAT_EQ(softmax(nn, 2), 0.39069383f); - free_neural_network(nn); + free(nn); free(layer_lengths); } @@ -143,7 +118,7 @@ TEST(NetworkTest, StochasticGDSingleLayer) { ASSERT_GE(test_network_percent(nn, test_dataset), 90.0f); free(test_dataset); - free_neural_network(nn); + free(nn); free(layer_lengths); } @@ -169,7 +144,7 @@ TEST(NetworkTest, StochasticGDTests) { ASSERT_LE(cost(nn, test_dataset, test_dataset->length), 0.2f); ASSERT_GE(test_network_percent(nn, test_dataset), 90.0f); - free_neural_network(nn); + free(nn); free(layer_lengths); // Non-linearly separable test @@ -190,7 +165,7 @@ TEST(NetworkTest, StochasticGDTests) { ASSERT_GE(cost(nn, test_dataset, test_dataset->length), 0.2f); ASSERT_LE(test_network_percent(nn, test_dataset), 90.0f); - free_neural_network(nn); + free(nn); free(layer_lengths); free(test_dataset); } @@ -217,7 +192,7 @@ TEST(NetworkTest, MiniBatchGDTests) { ASSERT_LE(cost(nn, test_dataset, test_dataset->length), 0.2f); ASSERT_GE(test_network_percent(nn, test_dataset), 90.0f); - free_neural_network(nn); + free(nn); free(layer_lengths); // Non-linearly separable test @@ -238,7 +213,7 @@ TEST(NetworkTest, MiniBatchGDTests) { ASSERT_GE(cost(nn, test_dataset, test_dataset->length), 0.2f); ASSERT_LE(test_network_percent(nn, test_dataset), 90.0f); - free_neural_network(nn); + free(nn); free(layer_lengths); free(test_dataset); } From 0dea7b59e1b0e4387aefc70c31da5b1c6c0b35f1 Mon Sep 17 00:00:00 2001 From: Lee Hoon Lim Date: Mon, 16 Jun 2025 11:52:24 +0800 Subject: [PATCH 4/4] Added undefined sanitize --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e8df94ea9..643d3c751 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,7 @@ endif() file(GLOB_RECURSE SRC_FILES src/*.c EXCLUDE "src/main.c") file(GLOB_RECURSE TEST_FILES test/*.cpp) -set(ASAN_FLAGS -fsanitize=address -fno-omit-frame-pointer) +set(SANITIZE_FLAGS -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer) set(WARNING_FLAGS -Wall -Wextra -Wpedantic) set(RELEASE_FLAGS -O3 -DNDEBUG -march=native) set(PROFILE_FLAG -O3 -p -g) @@ -48,8 +48,8 @@ function(apply_target target) target_compile_options(${target} PRIVATE ${WARNING_FLAGS}) if(CMAKE_BUILD_TYPE STREQUAL "Debug") - target_compile_options(${target} PRIVATE ${ASAN_FLAGS} -g) - target_link_options(${target} PRIVATE ${ASAN_FLAGS}) + target_compile_options(${target} PRIVATE ${SANITIZE_FLAGS} -g) + target_link_options(${target} PRIVATE ${SANITIZE_FLAGS}) elseif(CMAKE_BUILD_TYPE STREQUAL "Profile") target_compile_options(${target} PRIVATE ${PROFILE_FLAG}) elseif(CMAKE_BUILD_TYPE STREQUAL "Release")