Deploy ML models to production environments
Moving from Jupyter notebooks to production requires careful planning and infrastructure. **Deployment Pipeline:** 1. **Model Training**: Train and validate model 2. **Model Serialization**: Save model to disk 3. **API Development**: Create endpoint to serve predictions 4. **Containerization**: Package model and dependencies 5. **Deployment**: Deploy to cloud/server 6. **Monitoring**: Track performance and errors **Deployment Patterns:** **Batch Prediction** - Process large datasets offline - Scheduled jobs (daily, hourly) - Use case: Recommendation systems, fraud detection **Real-time Prediction** - Low latency (<100ms) - REST API or gRPC - Use case: Chatbots, real-time pricing **Edge Deployment** - Run on device (mobile, IoT) - Reduced latency, privacy - Use case: Face recognition, voice assistants **Streaming** - Process data streams - Kafka, Kinesis - Use case: Anomaly detection, monitoring
Save and load trained models:
import torch
import torch.nn as nn
import pickle
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
import json
import os
# 1. PyTorch Model Serialization
class SimpleNN(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super().__init__()
self.network = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim)
)
def forward(self, x):
return self.network(x)
# Create and save PyTorch model
model = SimpleNN(10, 64, 2)
model_path = "pytorch_model.pth"
# Save entire model
torch.save(model, model_path)
print(f"ā Saved complete PyTorch model to {model_path}")
# Save state dict (recommended)
torch.save(model.state_dict(), "pytorch_state_dict.pth")
print(f"ā Saved PyTorch state dict")
# Load model
loaded_model = torch.load(model_path)
loaded_model.eval()
print(f"ā Loaded PyTorch model")
# 2. Scikit-learn Model Serialization
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)
sklearn_model = LogisticRegression()
sklearn_model.fit(X, y)
# Save with joblib (preferred for sklearn)
joblib.dump(sklearn_model, "sklearn_model.joblib")
print(f"\nā Saved sklearn model with joblib")
# Save with pickle
with open("sklearn_model.pkl", "wb") as f:
pickle.dump(sklearn_model, f)
print(f"ā Saved sklearn model with pickle")
# Load model
loaded_sklearn = joblib.load("sklearn_model.joblib")
print(f"ā Loaded sklearn model")
# 3. Save Model Metadata
metadata = {
"model_type": "LogisticRegression",
"features": ["feature_" + str(i) for i in range(10)],
"training_date": "2026-01-18",
"accuracy": 0.95,
"version": "1.0.0",
"preprocessing": {
"scaler": "StandardScaler",
"missing_value_strategy": "mean_imputation"
}
}
with open("model_metadata.json", "w") as f:
json.dump(metadata, f, indent=2)
print(f"\nā Saved model metadata")
# 4. Model Versioning Best Practices
def save_versioned_model(model, version, base_dir="models"):
"""Save model with version control"""
os.makedirs(base_dir, exist_ok=True)
# Save model
model_path = f"{base_dir}/model_v{version}.joblib"
joblib.dump(model, model_path)
# Save metadata
metadata = {
"version": version,
"timestamp": "2026-01-18T10:30:00",
"model_path": model_path,
"metrics": {"accuracy": 0.95, "f1": 0.93}
}
metadata_path = f"{base_dir}/model_v{version}_metadata.json"
with open(metadata_path, "w") as f:
json.dump(metadata, f, indent=2)
return model_path, metadata_path
model_path, meta_path = save_versioned_model(sklearn_model, "1.2.0")
print(f"\nā Saved versioned model:")
print(f" Model: {model_path}")
print(f" Metadata: {meta_path}")
# 5. Model Comparison
print(f"\n--- Serialization Methods ---")
formats = {
"PyTorch (.pth)": "Complete model or state dict",
"Joblib": "Fast, sklearn optimized",
"Pickle": "General Python, less efficient",
"ONNX": "Cross-framework, production",
"TorchScript": "PyTorch production deployment",
"TensorFlow SavedModel": "TF serving format"
}
for format_name, description in formats.items():
print(f" {format_name}: {description}")
print(f"\nā Model serialization complete!")ā Saved complete PyTorch model to pytorch_model.pth ā Saved PyTorch state dict ā Loaded PyTorch model ā Saved sklearn model with joblib ā Saved sklearn model with pickle ā Loaded sklearn model ā Saved model metadata ā Saved versioned model: Model: models/model_v1.2.0.joblib Metadata: models/model_v1.2.0_metadata.json --- Serialization Methods --- PyTorch (.pth): Complete model or state dict Joblib: Fast, sklearn optimized Pickle: General Python, less efficient ONNX: Cross-framework, production TorchScript: PyTorch production deployment TensorFlow SavedModel: TF serving format ā Model serialization complete!
**Major Cloud Platforms:** **AWS SageMaker** - Managed ML service - Built-in algorithms and frameworks - Auto-scaling endpoints - Model monitoring **Google Cloud AI Platform** - Vertex AI for MLOps - TensorFlow optimization - AutoML capabilities - BigQuery ML integration **Azure ML** - Designer for no-code ML - MLflow integration - Kubernetes deployment - Real-time and batch inference **Deployment Strategies:** **Canary Deployment** - Route small % traffic to new model - Monitor performance - Gradually increase traffic - Rollback if issues **Blue-Green Deployment** - Two identical environments - Switch traffic between them - Instant rollback capability - Zero downtime **Shadow Deployment** - New model receives traffic copy - Predictions not served to users - Compare with production model - Safe testing
Deploy PyTorch models with TorchServe:
# Model handler for TorchServe
import torch
import json
import logging
from ts.torch_handler.base_handler import BaseHandler
class CustomModelHandler(BaseHandler):
"""
Custom handler for TorchServe
Handles preprocessing, inference, and postprocessing
"""
def initialize(self, context):
"""Load model and set up"""
self.manifest = context.manifest
properties = context.system_properties
model_dir = properties.get("model_dir")
# Load model
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = f"{model_dir}/model.pth"
self.model = torch.load(model_path, map_location=self.device)
self.model.eval()
# Load metadata
with open(f"{model_dir}/metadata.json", "r") as f:
self.metadata = json.load(f)
self.initialized = True
logging.info(f"Model loaded successfully from {model_dir}")
def preprocess(self, data):
"""
Transform raw input into model input
data: List of dictionaries from requests
"""
processed = []
for row in data:
# Assume input is JSON with 'features' key
input_data = row.get("body", row)
if isinstance(input_data, (bytes, bytearray)):
input_data = json.loads(input_data)
# Extract features
features = input_data.get("features", [])
# Convert to tensor
tensor = torch.FloatTensor(features).unsqueeze(0)
processed.append(tensor)
return torch.cat(processed, dim=0).to(self.device)
def inference(self, model_input):
"""
Run inference on preprocessed data
"""
with torch.no_grad():
predictions = self.model(model_input)
probabilities = torch.softmax(predictions, dim=1)
return probabilities
def postprocess(self, inference_output):
"""
Transform model output to response format
"""
results = []
for probs in inference_output:
# Get prediction and confidence
confidence, predicted_class = torch.max(probs, dim=0)
result = {
"prediction": int(predicted_class),
"confidence": float(confidence),
"probabilities": probs.cpu().numpy().tolist(),
"model_version": self.metadata.get("version", "unknown")
}
results.append(result)
return results
# Example deployment configuration (config.properties)
config_properties = """
inference_address=http://0.0.0.0:8080
management_address=http://0.0.0.0:8081
metrics_address=http://0.0.0.0:8082
number_of_netty_threads=32
job_queue_size=1000
model_store=/models
model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"custom_model":{"1.0":{"defaultVersion":true,"marName":"custom_model.mar","minWorkers":1,"maxWorkers":4}}}}
"""
print("TorchServe Model Handler")
print("=" * 50)
print(f"\nHandler Methods:")
print(f" 1. initialize(): Load model and setup")
print(f" 2. preprocess(): Transform input data")
print(f" 3. inference(): Run model prediction")
print(f" 4. postprocess(): Format response")
print(f"\nTorchServe Commands:")
print(f" # Create model archive")
print(f" torch-model-archiver --model-name custom_model \\")
print(f" --version 1.0 \\")
print(f" --model-file model.py \\")
print(f" --serialized-file model.pth \\")
print(f" --handler custom_handler.py")
print(f"\n # Start TorchServe")
print(f" torchserve --start \\")
print(f" --model-store /models \\")
print(f" --models custom_model=custom_model.mar")
print(f"\n # Make prediction")
print(f" curl -X POST http://localhost:8080/predictions/custom_model \\")
print(f" -H 'Content-Type: application/json' \\")
print(f" -d '{{\"features\": [1.0, 2.0, 3.0]}}'")
print(f"\nConfiguration:")
print(f" - Inference port: 8080")
print(f" - Management port: 8081")
print(f" - Metrics port: 8082")
print(f" - Workers: 1-4 (auto-scaling)")
print(f"\nā TorchServe deployment ready!")TorchServe Model Handler
==================================================
Handler Methods:
1. initialize(): Load model and setup
2. preprocess(): Transform input data
3. inference(): Run model prediction
4. postprocess(): Format response
TorchServe Commands:
# Create model archive
torch-model-archiver --model-name custom_model \
--version 1.0 \
--model-file model.py \
--serialized-file model.pth \
--handler custom_handler.py
# Start TorchServe
torchserve --start \
--model-store /models \
--models custom_model=custom_model.mar
# Make prediction
curl -X POST http://localhost:8080/predictions/custom_model \
-H 'Content-Type: application/json' \
-d '{"features": [1.0, 2.0, 3.0]}'
Configuration:
- Inference port: 8080
- Management port: 8081
- Metrics port: 8082
- Workers: 1-4 (auto-scaling)
ā TorchServe deployment ready!