Build and train multi-layer deep networks
Deep networks have multiple hidden layers, allowing them to learn hierarchical representations. Each layer learns increasingly abstract features. **Challenges:** - Vanishing/exploding gradients - Overfitting - Computational cost **Solutions:** - Better initialization (Xavier, He) - Batch normalization - Dropout regularization - Advanced optimizers (Adam, RMSprop)
Build a flexible deep network:
import numpy as np
class DeepNeuralNetwork:
def __init__(self, layer_dims):
self.parameters = {}
self.L = len(layer_dims) - 1
# He initialization
for l in range(1, len(layer_dims)):
self.parameters[f'W{l}'] = np.random.randn(
layer_dims[l-1], layer_dims[l]) * np.sqrt(2 / layer_dims[l-1])
self.parameters[f'b{l}'] = np.zeros((1, layer_dims[l]))
def relu(self, Z):
return np.maximum(0, Z)
def relu_derivative(self, Z):
return (Z > 0).astype(float)
def sigmoid(self, Z):
return 1 / (1 + np.exp(-np.clip(Z, -500, 500)))
def forward_propagation(self, X):
self.cache = {'A0': X}
A = X
# Hidden layers with ReLU
for l in range(1, self.L):
Z = np.dot(A, self.parameters[f'W{l}']) + self.parameters[f'b{l}']
A = self.relu(Z)
self.cache[f'Z{l}'] = Z
self.cache[f'A{l}'] = A
# Output layer with sigmoid
Z = np.dot(A, self.parameters[f'W{self.L}']) + self.parameters[f'b{self.L}']
A = self.sigmoid(Z)
self.cache[f'Z{self.L}'] = Z
self.cache[f'A{self.L}'] = A
return A
def backward_propagation(self, X, y, output):
m = X.shape[0]
grads = {}
# Output layer
dZ = output - y
grads[f'dW{self.L}'] = (1/m) * np.dot(self.cache[f'A{self.L-1}'].T, dZ)
grads[f'db{self.L}'] = (1/m) * np.sum(dZ, axis=0, keepdims=True)
# Hidden layers
for l in range(self.L-1, 0, -1):
dA = np.dot(dZ, self.parameters[f'W{l+1}'].T)
dZ = dA * self.relu_derivative(self.cache[f'Z{l}'])
grads[f'dW{l}'] = (1/m) * np.dot(self.cache[f'A{l-1}'].T, dZ)
grads[f'db{l}'] = (1/m) * np.sum(dZ, axis=0, keepdims=True)
return grads
def update_parameters(self, grads, learning_rate):
for l in range(1, self.L + 1):
self.parameters[f'W{l}'] -= learning_rate * grads[f'dW{l}']
self.parameters[f'b{l}'] -= learning_rate * grads[f'db{l}']
def train(self, X, y, epochs=1000, learning_rate=0.01):
for epoch in range(epochs):
output = self.forward_propagation(X)
grads = self.backward_propagation(X, y, output)
self.update_parameters(grads, learning_rate)
if epoch % 100 == 0:
loss = -np.mean(y * np.log(output) + (1-y) * np.log(1-output))
accuracy = np.mean((output > 0.5) == y)
print(f"Epoch {epoch}: Loss = {loss:.4f}, Accuracy = {accuracy:.4f}")
# Create spiral dataset
np.random.seed(42)
N = 100
D = 2
K = 2
X = np.zeros((N*K, D))
y = np.zeros((N*K, 1))
for j in range(K):
ix = range(N*j, N*(j+1))
r = np.linspace(0.0, 1, N)
t = np.linspace(j*4, (j+1)*4, N) + np.random.randn(N)*0.2
X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
y[ix] = j
# Build deep network: 2 -> 16 -> 8 -> 4 -> 1
model = DeepNeuralNetwork([2, 16, 8, 4, 1])
model.train(X, y, epochs=1000, learning_rate=0.5)
# Final evaluation
predictions = model.forward_propagation(X)
accuracy = np.mean((predictions > 0.5) == y)
print(f"\nFinal Accuracy: {accuracy * 100:.2f}%")Epoch 0: Loss = 0.6942, Accuracy = 0.5050 Epoch 100: Loss = 0.4823, Accuracy = 0.8250 Epoch 200: Loss = 0.3154, Accuracy = 0.9100 Epoch 300: Loss = 0.2187, Accuracy = 0.9450 Epoch 400: Loss = 0.1623, Accuracy = 0.9650 Epoch 500: Loss = 0.1269, Accuracy = 0.9750 Epoch 600: Loss = 0.1031, Accuracy = 0.9800 Epoch 700: Loss = 0.0862, Accuracy = 0.9850 Epoch 800: Loss = 0.0736, Accuracy = 0.9850 Epoch 900: Loss = 0.0638, Accuracy = 0.9900 Final Accuracy: 99.00%