"""Principal Component Analysis Base Classes""" # Author: Alexandre Gramfort # Olivier Grisel # Mathieu Blondel # Denis A. Engemann # Kyle Kastner # # License: BSD 3 clause import numpy as np from scipy import linalg from ..base import BaseEstimator, TransformerMixin from ..utils.validation import check_is_fitted from abc import ABCMeta, abstractmethod class _BasePCA(TransformerMixin, BaseEstimator, metaclass=ABCMeta): """Base class for PCA methods. Warning: This class should not be used directly. Use derived classes instead. """ def get_covariance(self): """Compute data covariance with the generative model. ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)`` where S**2 contains the explained variances, and sigma2 contains the noise variances. Returns ------- cov : array of shape=(n_features, n_features) Estimated covariance of data. """ components_ = self.components_ exp_var = self.explained_variance_ if self.whiten: components_ = components_ * np.sqrt(exp_var[:, np.newaxis]) exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0) cov = np.dot(components_.T * exp_var_diff, components_) cov.flat[:: len(cov) + 1] += self.noise_variance_ # modify diag inplace return cov def get_precision(self): """Compute data precision matrix with the generative model. Equals the inverse of the covariance but computed with the matrix inversion lemma for efficiency. Returns ------- precision : array, shape=(n_features, n_features) Estimated precision of data. """ n_features = self.components_.shape[1] # handle corner cases first if self.n_components_ == 0: return np.eye(n_features) / self.noise_variance_ if self.n_components_ == n_features: return linalg.inv(self.get_covariance()) # Get precision using matrix inversion lemma components_ = self.components_ exp_var = self.explained_variance_ if self.whiten: components_ = components_ * np.sqrt(exp_var[:, np.newaxis]) exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0) precision = np.dot(components_, components_.T) / self.noise_variance_ precision.flat[:: len(precision) + 1] += 1.0 / exp_var_diff precision = np.dot(components_.T, np.dot(linalg.inv(precision), components_)) precision /= -(self.noise_variance_ ** 2) precision.flat[:: len(precision) + 1] += 1.0 / self.noise_variance_ return precision @abstractmethod def fit(self, X, y=None): """Placeholder for fit. Subclasses should implement this method! Fit the model with X. Parameters ---------- X : array-like of shape (n_samples, n_features) Training data, where `n_samples` is the number of samples and `n_features` is the number of features. Returns ------- self : object Returns the instance itself. """ def transform(self, X): """Apply dimensionality reduction to X. X is projected on the first principal components previously extracted from a training set. Parameters ---------- X : array-like of shape (n_samples, n_features) New data, where `n_samples` is the number of samples and `n_features` is the number of features. Returns ------- X_new : array-like of shape (n_samples, n_components) Projection of X in the first principal components, where `n_samples` is the number of samples and `n_components` is the number of the components. """ check_is_fitted(self) X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False) if self.mean_ is not None: X = X - self.mean_ X_transformed = np.dot(X, self.components_.T) if self.whiten: X_transformed /= np.sqrt(self.explained_variance_) return X_transformed def inverse_transform(self, X): """Transform data back to its original space. In other words, return an input `X_original` whose transform would be X. Parameters ---------- X : array-like of shape (n_samples, n_components) New data, where `n_samples` is the number of samples and `n_components` is the number of components. Returns ------- X_original array-like of shape (n_samples, n_features) Original data, where `n_samples` is the number of samples and `n_features` is the number of features. Notes ----- If whitening is enabled, inverse_transform will compute the exact inverse operation, which includes reversing whitening. """ if self.whiten: return ( np.dot( X, np.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_, ) + self.mean_ ) else: return np.dot(X, self.components_) + self.mean_