feat: initial release of GIS classification project with strategy-based classifiers selector
This commit is contained in:
commit
af365cfe68
14 changed files with 1115 additions and 0 deletions
250
src/strategies/classifiers.py
Normal file
250
src/strategies/classifiers.py
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
"""Built-in classification strategies."""
|
||||
|
||||
from typing import Any
|
||||
import numpy as np
|
||||
from scipy.stats import multivariate_normal
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from .base import ClassificationStrategy
|
||||
|
||||
|
||||
class RandomForestStrategy(ClassificationStrategy):
|
||||
"""Random Forest classification strategy."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
n_estimators: int = 100,
|
||||
max_depth: int | None = None,
|
||||
random_state: int = 42,
|
||||
**kwargs
|
||||
):
|
||||
self.n_estimators = n_estimators
|
||||
self.max_depth = max_depth
|
||||
self.random_state = random_state
|
||||
self._clf = RandomForestClassifier(
|
||||
n_estimators=n_estimators,
|
||||
max_depth=max_depth,
|
||||
random_state=random_state,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
def train(self, X: np.ndarray, y: np.ndarray) -> None:
|
||||
self._clf.fit(X, y)
|
||||
|
||||
def predict(self, X: np.ndarray) -> np.ndarray:
|
||||
return self._clf.predict(X)
|
||||
|
||||
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
||||
return self._clf.predict_proba(X)
|
||||
|
||||
def get_params(self) -> dict[str, Any]:
|
||||
return {
|
||||
"n_estimators": self.n_estimators,
|
||||
"max_depth": self.max_depth,
|
||||
"random_state": self.random_state,
|
||||
}
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "RandomForest"
|
||||
|
||||
|
||||
class SVMStrategy(ClassificationStrategy):
|
||||
"""Support Vector Machine classification strategy."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
kernel: str = "rbf",
|
||||
C: float = 1.0,
|
||||
gamma: str = "scale",
|
||||
random_state: int = 42,
|
||||
**kwargs
|
||||
):
|
||||
self.kernel = kernel
|
||||
self.C = C
|
||||
self.gamma = gamma
|
||||
self.random_state = random_state
|
||||
self._clf = SVC(
|
||||
kernel=kernel,
|
||||
C=C,
|
||||
gamma=gamma,
|
||||
random_state=random_state,
|
||||
probability=True,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
def train(self, X: np.ndarray, y: np.ndarray) -> None:
|
||||
self._clf.fit(X, y)
|
||||
|
||||
def predict(self, X: np.ndarray) -> np.ndarray:
|
||||
return self._clf.predict(X)
|
||||
|
||||
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
||||
return self._clf.predict_proba(X)
|
||||
|
||||
def get_params(self) -> dict[str, Any]:
|
||||
return {
|
||||
"kernel": self.kernel,
|
||||
"C": self.C,
|
||||
"gamma": self.gamma,
|
||||
"random_state": self.random_state,
|
||||
}
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "SVM"
|
||||
|
||||
|
||||
class LogisticRegressionStrategy(ClassificationStrategy):
|
||||
"""Logistic Regression classification strategy."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
penalty: str = "l2",
|
||||
C: float = 1.0,
|
||||
max_iter: int = 1000,
|
||||
random_state: int = 42,
|
||||
**kwargs
|
||||
):
|
||||
self.penalty = penalty
|
||||
self.C = C
|
||||
self.max_iter = max_iter
|
||||
self.random_state = random_state
|
||||
self._clf = LogisticRegression(
|
||||
penalty=penalty,
|
||||
C=C,
|
||||
max_iter=max_iter,
|
||||
random_state=random_state,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
def train(self, X: np.ndarray, y: np.ndarray) -> None:
|
||||
self._clf.fit(X, y)
|
||||
|
||||
def predict(self, X: np.ndarray) -> np.ndarray:
|
||||
return self._clf.predict(X)
|
||||
|
||||
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
||||
return self._clf.predict_proba(X)
|
||||
|
||||
def get_params(self) -> dict[str, Any]:
|
||||
return {
|
||||
"penalty": self.penalty,
|
||||
"C": self.C,
|
||||
"max_iter": self.max_iter,
|
||||
"random_state": self.random_state,
|
||||
}
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "LogisticRegression"
|
||||
|
||||
|
||||
class MLEStrategy(ClassificationStrategy):
|
||||
"""Maximum Likelihood Estimation classification strategy.
|
||||
|
||||
Assumes each class follows a multivariate normal distribution.
|
||||
Classic algorithm for GIS/remote sensing classification.
|
||||
"""
|
||||
|
||||
def __init__(self, reg_covar: float = 1e-6):
|
||||
"""Initialize MLE classifier.
|
||||
|
||||
Args:
|
||||
reg_covar: Regularization for covariance matrix stability.
|
||||
"""
|
||||
self.reg_covar = reg_covar
|
||||
self._means: dict[Any, np.ndarray] = {}
|
||||
self._covs: dict[Any, np.ndarray] = {}
|
||||
self._priors: dict[Any, float] = {}
|
||||
self._classes: np.ndarray | None = None
|
||||
|
||||
def train(self, X: np.ndarray, y: np.ndarray) -> None:
|
||||
"""Estimate mean, covariance and prior for each class."""
|
||||
self._classes = np.unique(y)
|
||||
self._means = {}
|
||||
self._covs = {}
|
||||
self._priors = {}
|
||||
|
||||
n_samples = len(y)
|
||||
|
||||
for cls in self._classes:
|
||||
X_cls = X[y == cls]
|
||||
|
||||
# Prior probability
|
||||
self._priors[cls] = len(X_cls) / n_samples
|
||||
|
||||
# Mean vector
|
||||
self._means[cls] = np.mean(X_cls, axis=0)
|
||||
|
||||
# Covariance matrix with regularization
|
||||
cov = np.cov(X_cls, rowvar=False)
|
||||
if cov.ndim == 0:
|
||||
cov = np.array([[cov]])
|
||||
cov += np.eye(cov.shape[0]) * self.reg_covar
|
||||
self._covs[cls] = cov
|
||||
|
||||
def _compute_log_likelihood(self, X: np.ndarray, cls: Any) -> np.ndarray:
|
||||
"""Compute log-likelihood for a class."""
|
||||
mean = self._means[cls]
|
||||
cov = self._covs[cls]
|
||||
prior = self._priors[cls]
|
||||
|
||||
try:
|
||||
rv = multivariate_normal(mean=mean, cov=cov, allow_singular=True)
|
||||
log_likelihood = rv.logpdf(X)
|
||||
except Exception:
|
||||
# Fallback: compute manually
|
||||
diff = X - mean
|
||||
try:
|
||||
cov_inv = np.linalg.inv(cov)
|
||||
except np.linalg.LinAlgError:
|
||||
cov_inv = np.linalg.pinv(cov)
|
||||
|
||||
mahalanobis = np.sum(diff @ cov_inv * diff, axis=1)
|
||||
log_det = np.linalg.slogdet(cov)[1]
|
||||
log_likelihood = -0.5 * (X.shape[1] * np.log(2 * np.pi) + log_det + mahalanobis)
|
||||
|
||||
return log_likelihood + np.log(prior)
|
||||
|
||||
def predict(self, X: np.ndarray) -> np.ndarray:
|
||||
"""Predict class with maximum likelihood."""
|
||||
if self._classes is None:
|
||||
raise RuntimeError("Classifier not trained")
|
||||
|
||||
# Compute log-likelihoods for all classes
|
||||
log_likelihoods = np.zeros((X.shape[0], len(self._classes)))
|
||||
|
||||
for i, cls in enumerate(self._classes):
|
||||
log_likelihoods[:, i] = self._compute_log_likelihood(X, cls)
|
||||
|
||||
# Return class with maximum likelihood
|
||||
return self._classes[np.argmax(log_likelihoods, axis=1)]
|
||||
|
||||
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
||||
"""Predict class probabilities using softmax of log-likelihoods."""
|
||||
if self._classes is None:
|
||||
raise RuntimeError("Classifier not trained")
|
||||
|
||||
# Compute log-likelihoods
|
||||
log_likelihoods = np.zeros((X.shape[0], len(self._classes)))
|
||||
|
||||
for i, cls in enumerate(self._classes):
|
||||
log_likelihoods[:, i] = self._compute_log_likelihood(X, cls)
|
||||
|
||||
# Convert to probabilities via softmax
|
||||
log_likelihoods -= np.max(log_likelihoods, axis=1, keepdims=True)
|
||||
exp_ll = np.exp(log_likelihoods)
|
||||
probabilities = exp_ll / np.sum(exp_ll, axis=1, keepdims=True)
|
||||
|
||||
return probabilities
|
||||
|
||||
def get_params(self) -> dict[str, Any]:
|
||||
return {
|
||||
"reg_covar": self.reg_covar,
|
||||
}
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "MLE"
|
||||
Loading…
Add table
Add a link
Reference in a new issue