Build binary classifiers from scratch
Logistic Regression is used for binary classification. Unlike linear regression, it outputs probabilities between 0 and 1 using the sigmoid function. **Sigmoid Function:** σ(z) = 1 / (1 + e^(-z)) **Hypothesis:** h(x) = σ(wx + b)
1. **Sigmoid activation**: Converts linear output to probability 2. **Binary Cross-Entropy loss**: Measures classification error 3. **Decision boundary**: Threshold (usually 0.5) to classify 4. **Gradient descent**: Optimizes weights and bias
Build from scratch:
import numpy as np
class LogisticRegression:
def __init__(self, learning_rate=0.01, n_iterations=1000):
self.lr = learning_rate
self.n_iterations = n_iterations
self.w = None
self.b = None
def sigmoid(self, z):
return 1 / (1 + np.exp(-z))
def fit(self, X, y):
n_samples, n_features = X.shape
self.w = np.zeros(n_features)
self.b = 0
for i in range(self.n_iterations):
# Forward pass
linear_output = np.dot(X, self.w) + self.b
predictions = self.sigmoid(linear_output)
# Compute gradients
dw = (1/n_samples) * np.dot(X.T, (predictions - y))
db = (1/n_samples) * np.sum(predictions - y)
# Update parameters
self.w -= self.lr * dw
self.b -= self.lr * db
if i % 100 == 0:
loss = -np.mean(y*np.log(predictions) +
(1-y)*np.log(1-predictions))
print(f"Iteration {i}: Loss = {loss:.4f}")
def predict_proba(self, X):
linear_output = np.dot(X, self.w) + self.b
return self.sigmoid(linear_output)
def predict(self, X):
return (self.predict_proba(X) >= 0.5).astype(int)
# Generate synthetic data
np.random.seed(42)
X = np.random.randn(200, 2)
y = (X[:, 0] + X[:, 1] > 0).astype(int)
# Train model
model = LogisticRegression(learning_rate=0.1, n_iterations=1000)
model.fit(X, y)
# Evaluate
predictions = model.predict(X)
accuracy = np.mean(predictions == y)
print(f"\nAccuracy: {accuracy * 100:.2f}%")Iteration 0: Loss = 0.6931 Iteration 100: Loss = 0.3247 Iteration 200: Loss = 0.2459 Iteration 300: Loss = 0.2043 Iteration 400: Loss = 0.1788 Iteration 500: Loss = 0.1614 Iteration 600: Loss = 0.1488 Iteration 700: Loss = 0.1392 Iteration 800: Loss = 0.1317 Iteration 900: Loss = 0.1256 Accuracy: 95.50%