Sharan Initiatives — AI, Finance, Photography & More

Deep Network Architecture

Deep networks have multiple hidden layers, allowing them to learn hierarchical representations. Each layer learns increasingly abstract features. **Challenges:** - Vanishing/exploding gradients - Overfitting - Computational cost **Solutions:** - Better initialization (Xavier, He) - Batch normalization - Dropout regularization - Advanced optimizers (Adam, RMSprop)

Implementing a Deep Network

Build a flexible deep network:

python

import numpy as np

class DeepNeuralNetwork:
    def __init__(self, layer_dims):
        self.parameters = {}
        self.L = len(layer_dims) - 1
        
        # He initialization
        for l in range(1, len(layer_dims)):
            self.parameters[f'W{l}'] = np.random.randn(
                layer_dims[l-1], layer_dims[l]) * np.sqrt(2 / layer_dims[l-1])
            self.parameters[f'b{l}'] = np.zeros((1, layer_dims[l]))
    
    def relu(self, Z):
        return np.maximum(0, Z)
    
    def relu_derivative(self, Z):
        return (Z > 0).astype(float)
    
    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-np.clip(Z, -500, 500)))
    
    def forward_propagation(self, X):
        self.cache = {'A0': X}
        A = X
        
        # Hidden layers with ReLU
        for l in range(1, self.L):
            Z = np.dot(A, self.parameters[f'W{l}']) + self.parameters[f'b{l}']
            A = self.relu(Z)
            self.cache[f'Z{l}'] = Z
            self.cache[f'A{l}'] = A
        
        # Output layer with sigmoid
        Z = np.dot(A, self.parameters[f'W{self.L}']) + self.parameters[f'b{self.L}']
        A = self.sigmoid(Z)
        self.cache[f'Z{self.L}'] = Z
        self.cache[f'A{self.L}'] = A
        
        return A
    
    def backward_propagation(self, X, y, output):
        m = X.shape[0]
        grads = {}
        
        # Output layer
        dZ = output - y
        grads[f'dW{self.L}'] = (1/m) * np.dot(self.cache[f'A{self.L-1}'].T, dZ)
        grads[f'db{self.L}'] = (1/m) * np.sum(dZ, axis=0, keepdims=True)
        
        # Hidden layers
        for l in range(self.L-1, 0, -1):
            dA = np.dot(dZ, self.parameters[f'W{l+1}'].T)
            dZ = dA * self.relu_derivative(self.cache[f'Z{l}'])
            grads[f'dW{l}'] = (1/m) * np.dot(self.cache[f'A{l-1}'].T, dZ)
            grads[f'db{l}'] = (1/m) * np.sum(dZ, axis=0, keepdims=True)
        
        return grads
    
    def update_parameters(self, grads, learning_rate):
        for l in range(1, self.L + 1):
            self.parameters[f'W{l}'] -= learning_rate * grads[f'dW{l}']
            self.parameters[f'b{l}'] -= learning_rate * grads[f'db{l}']
    
    def train(self, X, y, epochs=1000, learning_rate=0.01):
        for epoch in range(epochs):
            output = self.forward_propagation(X)
            grads = self.backward_propagation(X, y, output)
            self.update_parameters(grads, learning_rate)
            
            if epoch % 100 == 0:
                loss = -np.mean(y * np.log(output) + (1-y) * np.log(1-output))
                accuracy = np.mean((output > 0.5) == y)
                print(f"Epoch {epoch}: Loss = {loss:.4f}, Accuracy = {accuracy:.4f}")

# Create spiral dataset
np.random.seed(42)
N = 100
D = 2
K = 2

X = np.zeros((N*K, D))
y = np.zeros((N*K, 1))

for j in range(K):
    ix = range(N*j, N*(j+1))
    r = np.linspace(0.0, 1, N)
    t = np.linspace(j*4, (j+1)*4, N) + np.random.randn(N)*0.2
    X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
    y[ix] = j

# Build deep network: 2 -> 16 -> 8 -> 4 -> 1
model = DeepNeuralNetwork([2, 16, 8, 4, 1])
model.train(X, y, epochs=1000, learning_rate=0.5)

# Final evaluation
predictions = model.forward_propagation(X)
accuracy = np.mean((predictions > 0.5) == y)
print(f"\nFinal Accuracy: {accuracy * 100:.2f}%")

Output:

Epoch 0: Loss = 0.6942, Accuracy = 0.5050
Epoch 100: Loss = 0.4823, Accuracy = 0.8250
Epoch 200: Loss = 0.3154, Accuracy = 0.9100
Epoch 300: Loss = 0.2187, Accuracy = 0.9450
Epoch 400: Loss = 0.1623, Accuracy = 0.9650
Epoch 500: Loss = 0.1269, Accuracy = 0.9750
Epoch 600: Loss = 0.1031, Accuracy = 0.9800
Epoch 700: Loss = 0.0862, Accuracy = 0.9850
Epoch 800: Loss = 0.0736, Accuracy = 0.9850
Epoch 900: Loss = 0.0638, Accuracy = 0.9900

Final Accuracy: 99.00%

Deep Neural Networks

Deep Network Architecture

Implementing a Deep Network