Automate model design and hyperparameter tuning
AutoML automates the end-to-end process of applying machine learning. **Key Components:** 1. **Data Preprocessing**: Feature engineering, scaling, encoding 2. **Model Selection**: Choose best algorithm 3. **Hyperparameter Tuning**: Optimize model parameters 4. **Neural Architecture Search (NAS)**: Design network architectures 5. **Ensemble Methods**: Combine multiple models **Why AutoML?** - Saves time and expertise - Explores larger search space - Often finds better solutions than manual tuning - Democratizes ML (non-experts can use) **When to Use:** - Rapid prototyping - Baseline model creation - Limited ML expertise - Large hyperparameter space **Limitations:** - Computationally expensive - May not match domain-specific manual design - Less interpretable - Can overfit to validation set
Automatically tune hyperparameters:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
# Generate sample dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
# Convert to tensors
X_train_t = torch.FloatTensor(X_train)
y_train_t = torch.LongTensor(y_train)
X_val_t = torch.FloatTensor(X_val)
y_val_t = torch.LongTensor(y_val)
def create_model(trial):
"""Define hyperparameter search space"""
# Search spaces
n_layers = trial.suggest_int('n_layers', 1, 3)
hidden_dim = trial.suggest_int('hidden_dim', 32, 256, step=32)
dropout = trial.suggest_float('dropout', 0.1, 0.5)
lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
# Build model
layers = []
input_dim = 20
for i in range(n_layers):
layers.append(nn.Linear(input_dim, hidden_dim))
layers.append(nn.ReLU())
layers.append(nn.Dropout(dropout))
input_dim = hidden_dim
layers.append(nn.Linear(input_dim, 2))
model = nn.Sequential(*layers)
optimizer = optim.Adam(model.parameters(), lr=lr)
return model, optimizer
def objective(trial):
"""Objective function to optimize"""
model, optimizer = create_model(trial)
criterion = nn.CrossEntropyLoss()
# Training
epochs = 20
batch_size = trial.suggest_int('batch_size', 16, 128, step=16)
train_dataset = TensorDataset(X_train_t, y_train_t)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
for epoch in range(epochs):
model.train()
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# Validation
model.eval()
with torch.no_grad():
outputs = model(X_val_t)
_, predicted = torch.max(outputs, 1)
accuracy = (predicted == y_val_t).float().mean().item()
return accuracy
# Run optimization
print("Starting hyperparameter optimization...")
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20, show_progress_bar=True)
# Best parameters
print(f"\nOptimization completed!")
print(f"Best validation accuracy: {study.best_value:.4f}")
print(f"\nBest hyperparameters:")
for key, value in study.best_params.items():
print(f" {key}: {value}")
# Visualization of optimization history
print(f"\nOptimization trials: {len(study.trials)}")
print(f"Best trial: #{study.best_trial.number}")
# Importance of hyperparameters
importance = optuna.importance.get_param_importances(study)
print(f"\nHyperparameter importance:")
for param, imp in sorted(importance.items(), key=lambda x: x[1], reverse=True):
print(f" {param}: {imp:.3f}")Starting hyperparameter optimization... [I 2026-01-18 10:30:15] Trial 0 finished with value: 0.7800 [I 2026-01-18 10:30:18] Trial 1 finished with value: 0.8200 [I 2026-01-18 10:30:21] Trial 2 finished with value: 0.8450 ... [I 2026-01-18 10:35:42] Trial 19 finished with value: 0.8950 Optimization completed! Best validation accuracy: 0.8950 Best hyperparameters: n_layers: 2 hidden_dim: 128 dropout: 0.25 lr: 0.0012 batch_size: 64 Optimization trials: 20 Best trial: #17 Hyperparameter importance: lr: 0.425 hidden_dim: 0.312 dropout: 0.156 n_layers: 0.089 batch_size: 0.018
Automatically design neural network architectures:
import torch
import torch.nn as nn
class SearchSpace:
"""Define NAS search space"""
OPERATIONS = {
'conv3x3': lambda C: nn.Conv2d(C, C, 3, padding=1),
'conv5x5': lambda C: nn.Conv2d(C, C, 5, padding=2),
'maxpool': lambda C: nn.MaxPool2d(3, stride=1, padding=1),
'avgpool': lambda C: nn.AvgPool2d(3, stride=1, padding=1),
'identity': lambda C: nn.Identity(),
'sep_conv3x3': lambda C: nn.Sequential(
nn.Conv2d(C, C, 3, padding=1, groups=C),
nn.Conv2d(C, C, 1)
),
}
class SearchableBlock(nn.Module):
"""A block with searchable operations"""
def __init__(self, channels):
super().__init__()
self.channels = channels
# Create all possible operations
self.operations = nn.ModuleList([
SearchSpace.OPERATIONS[op](channels)
for op in SearchSpace.OPERATIONS.keys()
])
# Architecture parameters (which operation to use)
self.arch_params = nn.Parameter(torch.randn(len(self.operations)))
def forward(self, x):
# Weighted sum of all operations
weights = torch.softmax(self.arch_params, dim=0)
output = sum(w * op(x) for w, op in zip(weights, self.operations))
return output
def get_selected_operation(self):
"""Get the operation with highest weight"""
idx = self.arch_params.argmax().item()
return list(SearchSpace.OPERATIONS.keys())[idx]
class NASNetwork(nn.Module):
"""Network with searchable architecture"""
def __init__(self, num_blocks=3, channels=64):
super().__init__()
self.stem = nn.Conv2d(3, channels, 3, padding=1)
# Searchable blocks
self.blocks = nn.ModuleList([
SearchableBlock(channels)
for _ in range(num_blocks)
])
self.classifier = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Flatten(),
nn.Linear(channels, 10)
)
def forward(self, x):
x = self.stem(x)
for block in self.blocks:
x = block(x)
return self.classifier(x)
def get_architecture(self):
"""Extract discovered architecture"""
arch = []
for i, block in enumerate(self.blocks):
op = block.get_selected_operation()
arch.append(f"Block {i}: {op}")
return arch
# Create searchable network
model = NASNetwork(num_blocks=4, channels=32)
print("NAS Network initialized")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")
# Count architecture parameters
arch_params = sum(p.numel() for name, p in model.named_parameters()
if 'arch_params' in name)
print(f"Architecture parameters: {arch_params}")
# Simulate forward pass
dummy_input = torch.randn(2, 3, 32, 32)
with torch.no_grad():
output = model(dummy_input)
print(f"\nInput shape: {dummy_input.shape}")
print(f"Output shape: {output.shape}")
# Show discovered architecture
print(f"\nDiscovered architecture:")
for block_arch in model.get_architecture():
print(f" {block_arch}")
# Architecture search process
print(f"\n--- NAS Process ---")
print(f"1. Initialize: Random architecture weights")
print(f"2. Train: Optimize both model weights and architecture weights")
print(f"3. Extract: Select operations with highest weights")
print(f"4. Retrain: Train final architecture from scratch")
# Popular NAS methods
print(f"\n--- NAS Methods ---")
nas_methods = {
"DARTS": "Differentiable architecture search",
"ENAS": "Efficient neural architecture search",
"NAS-RL": "Reinforcement learning-based NAS",
"ProxylessNAS": "Memory-efficient NAS",
"EfficientNet": "Compound scaling NAS",
}
for method, description in nas_methods.items():
print(f" {method}: {description}")NAS Network initialized Total parameters: 127,562 Architecture parameters: 24 Input shape: torch.Size([2, 3, 32, 32]) Output shape: torch.Size([2, 10]) Discovered architecture: Block 0: sep_conv3x3 Block 1: conv3x3 Block 2: maxpool Block 3: conv5x5 --- NAS Process --- 1. Initialize: Random architecture weights 2. Train: Optimize both model weights and architecture weights 3. Extract: Select operations with highest weights 4. Retrain: Train final architecture from scratch --- NAS Methods --- DARTS: Differentiable architecture search ENAS: Efficient neural architecture search NAS-RL: Reinforcement learning-based NAS ProxylessNAS: Memory-efficient NAS EfficientNet: Compound scaling NAS
**Popular AutoML Libraries:** | Tool | Best For | Key Features | |------|----------|--------------| | **Auto-sklearn** | Scikit-learn models | Automatic ensemble, meta-learning | | **TPOT** | Pipeline optimization | Genetic programming | | **H2O AutoML** | Enterprise ML | Scalable, leaderboard | | **AutoKeras** | Deep learning | Keras-based, NAS | | **Ray Tune** | Hyperparameter tuning | Distributed, scalable | | **Optuna** | Flexible tuning | Define-by-run, pruning | **Cloud AutoML Services:** - **Google Cloud AutoML**: Vision, NLP, Tables - **Azure AutoML**: Classification, regression, forecasting - **AWS SageMaker Autopilot**: Full ML pipeline **Best Practices:** 1. **Start Simple**: Use AutoML for baselines 2. **Set Constraints**: Limit search time/resources 3. **Domain Knowledge**: Guide search space 4. **Validate Carefully**: Avoid overfitting to validation 5. **Interpret Results**: Understand what was learned **When NOT to Use AutoML:** - Domain-specific architectures (e.g., protein folding) - Real-time constraints (NAS is slow) - Need full control/interpretability