'Output layer approaches 0 - Neural Network
I'm building a animal picture recognition neural network from scratch and I've been working on an algorithm for updating the weights with backpropagation for the last couple of days.
The input layer consists of arrays of length 3 with one float for each R, G and B value. These values are propagated separately in the network and then summed to get the output layer.
Using the formulas from this article https://hmkcode.com/ai/backpropagation-step-by-step/ , I came up with this system (the program lets the user choose a batch size of random sample images to work with, that's why the errors are calulated repeatedly and then averaged to modify the weights. There are currently two image classes: bull and fox).
Here's the code. The backpropagation part is highlighted with a comment.
for (int i = 0; i < trainingBatchSize; i++)
{
int imageClass = maths.Random(0, trainingImages.Count);
int imageIndex = maths.Random(0, trainingImages[imageClass].Count);
images[i] = trainingImages[imageClass][imageIndex];
imageClasses[i] = imageClass;
}
#region Neural Network
float[] outputLayerErrorsAvg = new float[outputLayer.Length];
float[,] secondHiddenLayerErrorsAvg = new float[secondHiddenLayer.GetLength(0), 3];
float[,] firstHiddenLayerErrorsAvg = new float[firstHiddenLayer.GetLength(0), 3];
for (int currentImage = 0; currentImage < trainingBatchSize; currentImage++)
{
int pixels = images[currentImage].Width * images[currentImage].Height;
#region Forward Propagation
int index = 0;
for (int x = 0; x < images[currentImage].Width; x++)
{
for (int y = 0; y < images[currentImage].Height; y++)
{
inputLayer[index, 0] = images[currentImage].GetPixel(x, y).R / 255f;
inputLayer[index, 1] = images[currentImage].GetPixel(x, y).G / 255f;
inputLayer[index, 2] = images[currentImage].GetPixel(x, y).B / 255f;
index++;
}
}
for (int i = 0; i < firstHiddenLayer.GetLength(0); i++)
{
for (int color = 0; color < 3; color++)
{
float sum = 0;
for (int input = 0; input < inputLayer.GetLength(0); input++)
{
sum += inputLayer[input, color] * inputLayerWeights[input, i, color];
}
firstHiddenLayer[i, color] = ActivationFunction(sum + firstHiddenLayerBiases[i, color], "ReLU");
}
}
for (int i = 0; i < secondHiddenLayer.GetLength(0); i++)
{
for (int color = 0; color < 3; color++)
{
float sum = 0;
for (int input = 0; input < firstHiddenLayer.GetLength(0); input++)
{
sum += firstHiddenLayer[input, color] * firstHiddenLayerWeights[input, i, color];
}
secondHiddenLayer[i, color] = ActivationFunction(sum + secondHiddenLayerBiases[i, color], "ReLU");
}
}
for (int i = 0; i < outputLayer.Length; i++)
{
float sum = 0;
for (int color = 0; color < 3; color++)
{
for (int input = 0; input < secondHiddenLayer.GetLength(0); input++)
{
sum += secondHiddenLayer[input, color] * secondHiddenLayerWeights[input, i, color];
}
}
outputLayer[i] = ActivationFunction(sum + outputLayerBiases[i], "Sigmoid");
}
#endregion
// START OF BACKPROPAGATION ALGORITHM
#region Backpropagate Errors
float[,] outputLayerErrors = new float[outputLayer.Length, trainingBatchSize];
for (int i = 0; i < outputLayer.Length; i++)
{
outputLayerErrors[i, currentImage] = outputLayer[i] - (imageClasses[currentImage] == i ? 1 : 0);
outputLayerErrors[i, currentImage] *= outputLayerErrors[i, currentImage];
outputLayerErrors[i, currentImage] /= 2;
}
float[,,] secondHiddenLayerErrors = new float[secondHiddenLayer.GetLength(0), 3, trainingBatchSize];
for (int i = 0; i < secondHiddenLayer.GetLength(0); i++)
{
for (int j = 0; j < outputLayer.Length; j++)
{
for (int color = 0; color < 3; color++)
{
secondHiddenLayerErrors[i, color, currentImage] += outputLayerErrors[j, currentImage] * secondHiddenLayerWeights[i, j, color];
}
}
}
float[,,] firstHiddenLayerErrors = new float[firstHiddenLayer.GetLength(0), 3, trainingBatchSize];
for (int i = 0; i < firstHiddenLayer.GetLength(0); i++)
{
for (int j = 0; j < secondHiddenLayer.GetLength(0); j++)
{
for (int color = 0; color < 3; color++)
{
firstHiddenLayerErrors[i, color, currentImage] += secondHiddenLayerErrors[j, color, currentImage] * firstHiddenLayerWeights[i, j, color];
}
}
}
for (int i = 0; i < outputLayerErrorsAvg.Length; i++)
{
float sum = 0;
for (int j = 0; j < trainingBatchSize; j++)
{
sum += outputLayerErrors[i, j];
}
outputLayerErrorsAvg[i] = sum / trainingBatchSize;
}
for (int i = 0; i < secondHiddenLayerErrorsAvg.GetLength(0); i++)
{
for (int color = 0; color < secondHiddenLayerErrorsAvg.GetLength(1); color++)
{
float sum = 0;
for (int j = 0; j < trainingBatchSize; j++)
{
sum += secondHiddenLayerErrors[i, color, j];
}
secondHiddenLayerErrorsAvg[i, color] = sum / trainingBatchSize;
}
}
for (int i = 0; i < firstHiddenLayerErrorsAvg.GetLength(0); i++)
{
for (int color = 0; color < firstHiddenLayerErrorsAvg.GetLength(1); color++)
{
float sum = 0;
for (int j = 0; j < trainingBatchSize; j++)
{
sum += firstHiddenLayerErrors[i, color, j];
}
firstHiddenLayerErrorsAvg[i, color] = sum / trainingBatchSize;
}
}
#endregion
}
#region Backpropagation
for (int i = 0; i < secondHiddenLayerWeights.GetLength(0); i++)
{
for (int j = 0; j < secondHiddenLayerWeights.GetLength(1); j++)
{
for (int color = 0; color < 3; color++)
{
secondHiddenLayerWeights[i, j, color] -= learningRate * secondHiddenLayer[i, color] * outputLayerErrorsAvg[j];
if (secondHiddenLayerWeights[i, j, color] > weightsTreshold)
{
secondHiddenLayerWeights[i, j, color] = weightsTreshold;
}
else if (secondHiddenLayerWeights[i, j, color] < -weightsTreshold)
{
secondHiddenLayerWeights[i, j, color] = -weightsTreshold;
}
}
}
}
for (int i = 0; i < outputLayerBiases.GetLength(0); i++)
{
outputLayerBiases[i] -= learningRate * outputLayerErrorsAvg[i];
}
for (int i = 0; i < firstHiddenLayerWeights.GetLength(0); i++)
{
for (int j = 0; j < firstHiddenLayerWeights.GetLength(1); j++)
{
for (int color = 0; color < 3; color++)
{
firstHiddenLayerWeights[i, j, color] -= learningRate * firstHiddenLayer[i, color] * secondHiddenLayerErrorsAvg[j, color];
if (firstHiddenLayerWeights[i, j, color] > weightsTreshold)
{
firstHiddenLayerWeights[i, j, color] = weightsTreshold;
}
else if (firstHiddenLayerWeights[i, j, color] < -weightsTreshold)
{
firstHiddenLayerWeights[i, j, color] = -weightsTreshold;
}
}
}
}
for (int i = 0; i < secondHiddenLayerBiases.GetLength(0); i++)
{
for (int color = 0; color < 3; color++)
{
secondHiddenLayerBiases[i, color] -= learningRate * secondHiddenLayerErrorsAvg[i, color];
}
}
for (int i = 0; i < inputLayerWeights.GetLength(0); i++)
{
for (int j = 0; j < inputLayerWeights.GetLength(1); j++)
{
for (int color = 0; color < 3; color++)
{
inputLayerWeights[i, j, color] -= learningRate * inputLayer[i, color] * firstHiddenLayerErrorsAvg[j, color];
if (inputLayerWeights[i, j, color] > weightsTreshold)
{
inputLayerWeights[i, j, color] = weightsTreshold;
}
else if (inputLayerWeights[i, j, color] < -weightsTreshold)
{
inputLayerWeights[i, j, color] = -weightsTreshold;
}
}
}
}
for (int i = 0; i < firstHiddenLayerBiases.GetLength(0); i++)
{
for (int color = 0; color < 3; color++)
{
firstHiddenLayerBiases[i, color] -= learningRate * firstHiddenLayerErrorsAvg[i, color];
}
}
#endregion
#endregion
The "weightsThreshold" variable is used to prevent the weights from skyrocketing to infinity or negative infinity before eventually becoming NaN (which used to happen until I changed the algotithm a bit: now it's not a problem anymore, but I prefer to keep this threshold to avoid any complications) and it is set to 5.
The problem I have is that after hundreds of iterations of the code, the outputLayer is always 0, 0 or numbers very close to that (like ...E-30) and the loss is always 0.025 (the loss function is the average of the output layer errors, and one of them is always wrong if the outputs are both 0).
Is there anything wrong with my code/algorithm?
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
