Learn how neural networks learn through backpropagation
Backpropagation is the algorithm neural networks use to learn. It computes gradients of the loss with respect to each weight by applying the chain rule backward through the network. **Steps:** 1. Forward pass: Compute predictions 2. Compute loss 3. Backward pass: Compute gradients 4. Update weights using gradients
Implement a 2-layer network:
import numpy as np
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size):
# Initialize weights
self.W1 = np.random.randn(input_size, hidden_size) * 0.01
self.b1 = np.zeros((1, hidden_size))
self.W2 = np.random.randn(hidden_size, output_size) * 0.01
self.b2 = np.zeros((1, output_size))
def sigmoid(self, z):
return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
def sigmoid_derivative(self, z):
return z * (1 - z)
def forward(self, X):
# Hidden layer
self.z1 = np.dot(X, self.W1) + self.b1
self.a1 = self.sigmoid(self.z1)
# Output layer
self.z2 = np.dot(self.a1, self.W2) + self.b2
self.a2 = self.sigmoid(self.z2)
return self.a2
def backward(self, X, y, output, learning_rate=0.1):
m = X.shape[0]
# Output layer gradients
dz2 = output - y
dW2 = (1/m) * np.dot(self.a1.T, dz2)
db2 = (1/m) * np.sum(dz2, axis=0, keepdims=True)
# Hidden layer gradients
dz1 = np.dot(dz2, self.W2.T) * self.sigmoid_derivative(self.a1)
dW1 = (1/m) * np.dot(X.T, dz1)
db1 = (1/m) * np.sum(dz1, axis=0, keepdims=True)
# Update weights
self.W2 -= learning_rate * dW2
self.b2 -= learning_rate * db2
self.W1 -= learning_rate * dW1
self.b1 -= learning_rate * db1
def train(self, X, y, epochs=1000):
for epoch in range(epochs):
# Forward pass
output = self.forward(X)
# Backward pass
self.backward(X, y, output)
if epoch % 100 == 0:
loss = np.mean(np.square(output - y))
print(f"Epoch {epoch}, Loss: {loss:.4f}")
# XOR problem
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])
# Train network
nn = NeuralNetwork(input_size=2, hidden_size=4, output_size=1)
nn.train(X, y, epochs=1000)
# Test
print("\nPredictions:")
predictions = nn.forward(X)
for i, xi in enumerate(X):
print(f"Input: {xi} -> Predicted: {predictions[i][0]:.4f}, Actual: {y[i][0]}")Epoch 0, Loss: 0.2508 Epoch 100, Loss: 0.2490 Epoch 200, Loss: 0.2419 Epoch 300, Loss: 0.2184 Epoch 400, Loss: 0.1481 Epoch 500, Loss: 0.0621 Epoch 600, Loss: 0.0267 Epoch 700, Loss: 0.0140 Epoch 800, Loss: 0.0087 Epoch 900, Loss: 0.0061 Predictions: Input: [0 0] -> Predicted: 0.0324, Actual: 0 Input: [0 1] -> Predicted: 0.9629, Actual: 1 Input: [1 0] -> Predicted: 0.9631, Actual: 1 Input: [1 1] -> Predicted: 0.0391, Actual: 0