I am a beginner. I am trying to create a neural network with one hidden layer that will classify images into 12 different classes. When I try to run the code using my gradient descent function to start training the model, the code does not output anything at all and moves to the next cell.
def initialize_parameters(hidden_units):
w1 = np.random.randn(hidden_units, 640 * 480 * 3) * 0.01
b1 = np.zeros((hidden_units, 1))
w2 = np.random.randn(12, hidden_units) * 0.01
b2 = np.zeros((12, 1))
return w1, b1, w2, b2
def ReLU(Z):
return np.maximum(0, Z)
def softmax(Z):
expZ = np.exp(Z)
return expZ / np.sum(expZ, axis=0, keepdims=True)
def forward_propagation(w1, b1, w2, b2, X):
z1 = np.dot(w1, X) + b1
a1 = ReLU(z1)
z2 = np.dot(w2, a1) + b2
a2 = softmax(z2)
return z1, a1, z2, a2
def onehotencoding(Y):
one_hot_Y = np.zeros((Y.size, Y.max() + 1))
one_hot_Y[np.arange(Y.size), Y] = 1
one_hot_Y = one_hot_Y.T
return one_hot_Y
def derivativeReLU(Z):
return Z > 0
def back_propagation(w2, a1, z1, a2, X, Y):
m = Y.size
one_hot_Y = onehotencoding(Y)
dz2 = a2 - one_hot_Y
dw2 = 1 / m * np.dot(dz2, a1.T)
db2 = 1 / m * np.sum(dz2, axis=1, keepdims=True)
dz1 = np.dot(w2.T, dz2) * derivativeReLU(z1)
dw1 = 1 / m * np.dot(dz1, X.T)
db1 = 1 / m * np.sum(dz1, axis=1, keepdims=True)
return dw1, db1, dw2, db2
def update_parameters(w1, b1, w2, b2, dw1, db1, dw2, db2, alpha):
w1 = w1 - alpha * dw1
b1 = b1 - alpha * db1
w2 = w2 - alpha * dw2
b2 = b2 - alpha * db2
return w1, b1, w2, b2
This cell of code ends here and is followed by this block of code.
def get_predictions(a2):
return np.argmax(a2, axis=0)
def get_accuracy(predictions, Y):
return np.sum(predictions == Y) / Y.size
def gradient_descent(X, Y, hidden_units, iterations, alpha):
w1, b1, w2, b2 = initialize_parameters(hidden_units)
for i in range(iterations):
z1, a1, z2, a2 = forward_propagation(w1, b1, w2, b2, X)
dw1, db1, dw2, db2 = back_propagation(w2, a1, z1, a2, X, Y)
w1, b1, w2, b2 = update_parameters(w1, b1, w2, b2, dw1, db1, dw2, db2, alpha)
if i % 10 == 0:
predictions = get_predictions(a2)
accuracy = get_accuracy(predictions, Y)
print("Iteration: ", i)
print("Accuracy: ", accuracy)
return w1, b1, w2, b2
This is the gradient descent function to start the training.
w1, b1, w2, b2 = gradient_descent(X_train, Y_train, 500, iterations=1000, alpha=0.1)