neural-net-cpp/Layer.cpp at master · emily-emily/neural-net-cpp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#include <cmath>
#include "NeuralNetwork.h"

#include <iostream>

NeuralNetwork::Layer::Layer(int nIn, int nOut, ActivationFunction f)
    : numNodesIn(nIn)
    , numNodesOut(nOut)
    , activationFunctionID(f)
    , activationFunction(activationFunctions[f])
    , activationDerivative(activationDerivatives[f]) {
  weights = Matrix(numNodesIn);
  costGradientW = Matrix(numNodesIn);
  for (int in = 0; in < numNodesIn; in++) {
    weights[in] = Vector(numNodesOut);
    costGradientW[in] = Vector(numNodesOut);
    for (int out = 0; out < numNodesOut; out++) {
      // randomly seed weights with a number between -1 and 1
      weights[in][out] = (rand() / double(RAND_MAX) * 2 - 1) / sqrt(numNodesIn);
    }
  }
  biases = Vector(numNodesOut);
  costGradientB = Vector(numNodesOut);
  inputs = Vector(numNodesOut);
  weightedInputs = Vector(numNodesOut);
  activations = Vector(numNodesOut);
}

NeuralNetwork::Layer::Layer(json data) {
  // extract data from json
  numNodesIn = data["nodesIn"];
  numNodesOut = data["nodesOut"];
  activationFunctionID = data["activation"];
  activationFunction = activationFunctions[activationFunctionID];
  activationDerivative = activationDerivatives[activationFunctionID];
  weights = data["weights"];
  biases = data["biases"].get<std::vector<double>>();

  // allocate remaining empty parameters
  costGradientW = Matrix(numNodesIn);
  for (int in = 0; in < numNodesIn; in++) {
    costGradientW[in] = Vector(numNodesOut);
  }
  costGradientB = Vector(numNodesOut);
  inputs = Vector(numNodesOut);
  weightedInputs = Vector(numNodesOut);
  activations = Vector(numNodesOut);
}

void NeuralNetwork::Layer::printWeights() { printMatrix(weights); }

void NeuralNetwork::Layer::printBiases() { printVector(biases); }

void NeuralNetwork::Layer::printGradientsW() { printMatrix(costGradientW); }

void NeuralNetwork::Layer::printGradientsB() { printVector(costGradientB); }

json NeuralNetwork::Layer::toJSON() {
  json data = json::object();

  data["nodesIn"] = numNodesIn;
  data["nodesOut"] = numNodesOut;
  data["activation"] = activationFunctionID;
  data["weights"] = weights;
  data["biases"] = biases;

  return data;
}

Vector NeuralNetwork::Layer::runLayer(Vector layerInputs) {
  inputs = layerInputs;
  // calculate weighted inputs
  for (int out = 0; out < numNodesOut; out++) {
    weightedInputs[out] = biases[out];
    for (int in = 0; in < numNodesIn; in++) {
      weightedInputs[out] += inputs[in] * weights[in][out];
    }
    // apply activation function
    activations[out] = activationFunction(weightedInputs[out]);
  }

  return activations;
}

void NeuralNetwork::Layer::updateGradients(Vector nodeValues) {
  for (int out = 0; out < numNodesOut; out++) {
    for (int in = 0; in < numNodesIn; in++) {
      // partial derivative of cost wrt weight
      double x = inputs[in] * nodeValues[out];
      costGradientW[in][out] += x;
    }
    costGradientB[out] += nodeValues[out];
  }
}

void NeuralNetwork::Layer::applyGradients(double learnRate) {
  for (int out = 0; out < numNodesOut; out++) {
    biases[out] -= costGradientB[out] * learnRate;
    for (int in = 0; in < numNodesIn; in++) {
      weights[in][out] -= costGradientW[in][out] * learnRate;
    }
  }
}

void NeuralNetwork::Layer::resetGradients() {
  costGradientW = Matrix(numNodesIn);
  for (int in = 0; in < numNodesIn; in++) {
    costGradientW[in] = Vector(numNodesOut);
  }
  costGradientB = Vector(numNodesOut);
}

Vector NeuralNetwork::Layer::calculateOutputLayerNodeValues(Vector expected) {
  int len = expected.size();
  Vector nodeValues = Vector(len);
  for (int i = 0; i < len; i++) {
    double cDerivative = nodeCostDerivative(activations[i], expected[i]);
    double aDerivative = activationDerivative(weightedInputs[i]);
    nodeValues[i] = cDerivative * aDerivative;
  }

  return nodeValues;
}

Vector NeuralNetwork::Layer::calculateHiddenLayerNodeValues(Layer oldLayer, Vector oldNodeValues) {
  Vector newNodeValues = Vector(numNodesOut);

  for (int newNode = 0; newNode < newNodeValues.size(); newNode++) {
    double newNodeValue = 0;
    for (int oldNode = 0; oldNode < oldNodeValues.size(); oldNode++) {
      // partial derivative of weighted input wrt input
      double weightedInputDerivative = oldLayer.weights[newNode][oldNode];
      newNodeValue += weightedInputDerivative * oldNodeValues[oldNode];
    }
    newNodeValue *= activationDerivative(weightedInputs[newNode]);
    newNodeValues[newNode] = newNodeValue;
  }

  return newNodeValues;
}