"""Built-in classification strategies.""" from typing import Any import numpy as np from scipy.stats import multivariate_normal from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from .base import ClassificationStrategy class RandomForestStrategy(ClassificationStrategy): """Random Forest classification strategy.""" def __init__( self, n_estimators: int = 100, max_depth: int | None = None, random_state: int = 42, **kwargs ): self.n_estimators = n_estimators self.max_depth = max_depth self.random_state = random_state self._clf = RandomForestClassifier( n_estimators=n_estimators, max_depth=max_depth, random_state=random_state, **kwargs ) def train(self, X: np.ndarray, y: np.ndarray) -> None: self._clf.fit(X, y) def predict(self, X: np.ndarray) -> np.ndarray: return self._clf.predict(X) def predict_proba(self, X: np.ndarray) -> np.ndarray: return self._clf.predict_proba(X) def get_params(self) -> dict[str, Any]: return { "n_estimators": self.n_estimators, "max_depth": self.max_depth, "random_state": self.random_state, } @property def name(self) -> str: return "RandomForest" class SVMStrategy(ClassificationStrategy): """Support Vector Machine classification strategy.""" def __init__( self, kernel: str = "rbf", C: float = 1.0, gamma: str = "scale", random_state: int = 42, **kwargs ): self.kernel = kernel self.C = C self.gamma = gamma self.random_state = random_state self._clf = SVC( kernel=kernel, C=C, gamma=gamma, random_state=random_state, probability=True, **kwargs ) def train(self, X: np.ndarray, y: np.ndarray) -> None: self._clf.fit(X, y) def predict(self, X: np.ndarray) -> np.ndarray: return self._clf.predict(X) def predict_proba(self, X: np.ndarray) -> np.ndarray: return self._clf.predict_proba(X) def get_params(self) -> dict[str, Any]: return { "kernel": self.kernel, "C": self.C, "gamma": self.gamma, "random_state": self.random_state, } @property def name(self) -> str: return "SVM" class LogisticRegressionStrategy(ClassificationStrategy): """Logistic Regression classification strategy.""" def __init__( self, penalty: str = "l2", C: float = 1.0, max_iter: int = 1000, random_state: int = 42, **kwargs ): self.penalty = penalty self.C = C self.max_iter = max_iter self.random_state = random_state self._clf = LogisticRegression( penalty=penalty, C=C, max_iter=max_iter, random_state=random_state, **kwargs ) def train(self, X: np.ndarray, y: np.ndarray) -> None: self._clf.fit(X, y) def predict(self, X: np.ndarray) -> np.ndarray: return self._clf.predict(X) def predict_proba(self, X: np.ndarray) -> np.ndarray: return self._clf.predict_proba(X) def get_params(self) -> dict[str, Any]: return { "penalty": self.penalty, "C": self.C, "max_iter": self.max_iter, "random_state": self.random_state, } @property def name(self) -> str: return "LogisticRegression" class MLEStrategy(ClassificationStrategy): """Maximum Likelihood Estimation classification strategy. Assumes each class follows a multivariate normal distribution. Classic algorithm for GIS/remote sensing classification. """ def __init__(self, reg_covar: float = 1e-6): """Initialize MLE classifier. Args: reg_covar: Regularization for covariance matrix stability. """ self.reg_covar = reg_covar self._means: dict[Any, np.ndarray] = {} self._covs: dict[Any, np.ndarray] = {} self._priors: dict[Any, float] = {} self._classes: np.ndarray | None = None def train(self, X: np.ndarray, y: np.ndarray) -> None: """Estimate mean, covariance and prior for each class.""" self._classes = np.unique(y) self._means = {} self._covs = {} self._priors = {} n_samples = len(y) for cls in self._classes: X_cls = X[y == cls] # Prior probability self._priors[cls] = len(X_cls) / n_samples # Mean vector self._means[cls] = np.mean(X_cls, axis=0) # Covariance matrix with regularization cov = np.cov(X_cls, rowvar=False) if cov.ndim == 0: cov = np.array([[cov]]) cov += np.eye(cov.shape[0]) * self.reg_covar self._covs[cls] = cov def _compute_log_likelihood(self, X: np.ndarray, cls: Any) -> np.ndarray: """Compute log-likelihood for a class.""" mean = self._means[cls] cov = self._covs[cls] prior = self._priors[cls] try: rv = multivariate_normal(mean=mean, cov=cov, allow_singular=True) log_likelihood = rv.logpdf(X) except Exception: # Fallback: compute manually diff = X - mean try: cov_inv = np.linalg.inv(cov) except np.linalg.LinAlgError: cov_inv = np.linalg.pinv(cov) mahalanobis = np.sum(diff @ cov_inv * diff, axis=1) log_det = np.linalg.slogdet(cov)[1] log_likelihood = -0.5 * (X.shape[1] * np.log(2 * np.pi) + log_det + mahalanobis) return log_likelihood + np.log(prior) def predict(self, X: np.ndarray) -> np.ndarray: """Predict class with maximum likelihood.""" if self._classes is None: raise RuntimeError("Classifier not trained") # Compute log-likelihoods for all classes log_likelihoods = np.zeros((X.shape[0], len(self._classes))) for i, cls in enumerate(self._classes): log_likelihoods[:, i] = self._compute_log_likelihood(X, cls) # Return class with maximum likelihood return self._classes[np.argmax(log_likelihoods, axis=1)] def predict_proba(self, X: np.ndarray) -> np.ndarray: """Predict class probabilities using softmax of log-likelihoods.""" if self._classes is None: raise RuntimeError("Classifier not trained") # Compute log-likelihoods log_likelihoods = np.zeros((X.shape[0], len(self._classes))) for i, cls in enumerate(self._classes): log_likelihoods[:, i] = self._compute_log_likelihood(X, cls) # Convert to probabilities via softmax log_likelihoods -= np.max(log_likelihoods, axis=1, keepdims=True) exp_ll = np.exp(log_likelihoods) probabilities = exp_ll / np.sum(exp_ll, axis=1, keepdims=True) return probabilities def get_params(self) -> dict[str, Any]: return { "reg_covar": self.reg_covar, } @property def name(self) -> str: return "MLE"