Module `transact.pv_computation`

PVComputation: Non-linear dimensionality reduction by kernel PCA and alignment of resulting kernel PCs.

@author: Soufiane Mourragui

Example

Notes

::
import numpy as np
from transact.PVComputation import PVComputation

# Generate data
n_source = 100
n_target = 200
n_features = 500

X_source = np.random.normal(size=(n_source, n_features))
y_source = X_source.dot(np.random.normal(size=(n_features)))
X_target = np.random.normal(size=(n_target, n_features))


# Create a TRANSACT instance
principal_vectors = PVComputation(
    kernel='rbf',
    kernel_params={'gamma':1/np.sqrt(n_features)},
    n_components={'source': 20, 'target':40},
    n_jobs=1,
    verbose=1
)

# Compute consensus features
clf.fit(
    X_source,
    X_target,
    n_pv=10,
    step=100,
    with_interpolation=True
)
::

References

[1] Golub and van Loan, Matrix Computations, 2013. [2] Mourragui et al 2021, Predicting clinical drug response from model systems by non-linear subspace-based transfer learning, Biorxiv.

Expand source code

""" <h3>PVComputation</h3>: Non-linear dimensionality reduction by kernel PCA and alignment of resulting kernel PCs.

@author: Soufiane Mourragui


Example
-------
    
Notes
-------
    ::
    import numpy as np
    from transact.PVComputation import PVComputation

    # Generate data
    n_source = 100
    n_target = 200
    n_features = 500

    X_source = np.random.normal(size=(n_source, n_features))
    y_source = X_source.dot(np.random.normal(size=(n_features)))
    X_target = np.random.normal(size=(n_target, n_features))


    # Create a TRANSACT instance
    principal_vectors = PVComputation(
        kernel='rbf',
        kernel_params={'gamma':1/np.sqrt(n_features)},
        n_components={'source': 20, 'target':40},
        n_jobs=1,
        verbose=1
    )

    # Compute consensus features
    clf.fit(
        X_source,
        X_target,
        n_pv=10,
        step=100,
        with_interpolation=True
    )
    ::

References
-------
[1] Golub and van Loan, Matrix Computations, 2013.
[2] Mourragui et al 2021, Predicting clinical drug response from model systems by non-linear subspace-based transfer
learning, Biorxiv.

"""

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import kernel_metrics
from sklearn.decomposition import KernelPCA
from sklearn.metrics.pairwise import kernel_metrics
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge, ElasticNet

from transact.matrix_operations import _sqrt_matrix, _center_kernel, _right_center_kernel, _left_center_kernel
from transact.kernel_computer import KernelComputer


class PVComputation:
    """
    PVComputation handles the dimensionality reduction and alignment of learned manifold.
    <br/><br/>
    This class contains all the following tasks and sub-routines:
    <ul>
        <li> Kernel PCA decomposition on source and target independently.
         <li> Kernel principal components comparison.
         <li> Computation of Principal Vectors (PVs).
    </ul>
    """

    def __init__(self, kernel, kernel_params={}, n_components=None, n_pv=None, n_jobs=1):
        """
        Parameters
        ----------
        kernel : str, default to 'linear'
            Name of the kernel to be used in the algorithm. Has to be compliant with
            <a href="https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.kernel_metrics.html#sklearn.metrics.pairwise.kernel_metrics">
            scikit-learn kernel</a>, e.g., "rbf", "polynomial", "laplacian", "linear", ...

        kernel_params : dict, default to None
            Parameters of the kernel (degree for polynomial kernel, gamma for RBF).
            Naming has to be compliant with scikit-learn, e.g., {"gamma": 0.0005}.

        n_components : int or dict, default to None
            Number of components for kernel PCA.
            <br/> If int, then indicates the same number of components for source and target.
            <br/> If dict, then must be of the form {'source':int, 'target':int}.

        n_pv : int, default to None
            Number of principal vectors.

        n_jobs : int, default to 1
            Number of concurrent threads to use for tasks that can be parallelized.
        """

        self.gamma_coef = None
        self.alpha_coef = None
        self.beta_coef = None
        self.canonical_angles = None

        self.kernel = kernel
        self.kernel_ = kernel_metrics()[kernel]
        self.kernel_params_ = kernel_params

        self.kernel_values_ = KernelComputer(self.kernel, self.kernel_params_)

        # Put n_components in dictionary format.
        self.n_components = n_components
        if type(self.n_components) == int:
            self.n_components = {
                s:self.n_components for s in ['source', 'target']
            }
        self.n_pv = n_pv

        self.n_jobs = n_jobs


    def fit(self, source_data, target_data, method='two-stage', n_components=None, n_pv=None):
        """
        Computes the kernel principal vectors between source and target data.

        Parameters
        -------
        source_data: numpy.ndarray, shape (n_samples, n_genes)
            Source data

        target_data: numpy.ndarray, shape (n_samples, n_genes)
            Source data

        method: str, default to "two-stage"
            Method used for computed the kernel PVs, either "two-stage" (first kernel PCA, then
            alignment), or "direct" (direct minimization).
            <br/>
            <b>NOT IMPLEMENTED:</b> The one-shot computation of the PVs has not been implemented.

        n_components: int, default to None
            Number of components taken into the decomposition.

        n_pv: int, default to None
            Number of Principal Vectors. If not set here or in __init__, then maximum number of PV will be computed.

        Returned Values
        -------
        self : PVComputation
            Fitted instance.
        """

        # Compute kernel matrices
        self.kernel_values_.fit(source_data, target_data, center=True)

        if method == 'two-stage':
            self._two_stage_computation(n_components, n_pv)
        elif method == 'direct':
            self._direct_computation(n_components)

        return self
        

    def transform(self, X, right_center=False):
        """
        Project data X on source and target kernel principal vectors

        Parameters
        -------
        X: numpy.ndarray, shape (n_samples, n_genes)
            Data to project

        right_center: Boolean, default to False
            Whether data should be implicitly mean centered

        Returned Values
        -------
        Dictionary with 'source' and 'target' as keys, and projected arrays as values.
        """

        X_projected = {}
        for t in ['source', 'target']:
            X_projected[t] = self._project_PV_from_data(X, t, right_center)

        return X_projected


    def fit_transform(self, source_data, target_data, method='two-stage', n_components=None, n_pv=None):
        """
        Computes the kernel principal vectors between source and target data.

        -------
        source_data: numpy.ndarray, shape (n_samples, n_genes)
            Source data

        target_data: numpy.ndarray, shape (n_samples, n_genes)
            Source data

        method: str, default to "two-stage"
            Method used for computed the kernel PVs, either "two-stage" (first kernel PCA, then
            alignment), or "direct" (direct minimization).
            <br/>
            <b>NOT IMPLEMENTED:</b> The one-shot computation of the PVs has not been implemented.

        n_components: int or dictionary, default to None
            Number of components taken into account for PCA. Can be int (if same number of components
            for source or target) or dictionary with {'source': int, 'target':int} indicating the
            number of source and target principal components.

        n_pv: int, default to None
            Number of Principal Vectors. If not set here or in __init__, then maximum number of PV will be computed.

        Returned Values
        -------
        source_projected: dictionary

        target_projected: dictionary
        """

        self.fit(source_data, target_data, method, n_components)

        source_projected = {
            'source': self._project_PV_from_data(source_data, 'source'),
            'target': self._project_PV_from_data(source_data, 'target')
        }
        
        target_projected = {
            'source': self._project_PV_from_data(target_data, 'source'),
            'target': self._project_PV_from_data(target_data, 'target')
        }

        return source_projected, target_projected


    def _two_stage_computation(self, n_components=None, n_pv=None):

        self.n_components = n_components or self.n_components
        if self.n_components is None or type(self.n_components) == int:
            self.n_components = {
                s:self.n_components for s in ['source', 'target']
            }

        self.n_pv = n_pv or (self.n_pv or min(self.n_components.values()))

        ## First step: Kernel PCA
        self._dim_reduction()
        
        ## Second step: Align based on cosine similarity
        self._align_principal_components()


    def _dim_reduction(self):
        self.dim_reduc_clf_ = {}
        self.alpha_coef = {}

        # Independent processing of source and target
        for t in ['source', 'target']:
            # Reduce dimensionality using kernelPCA.
            self.dim_reduc_clf_[t] = KernelPCA(self.n_components[t],
                                            kernel=self.kernel,
                                            n_jobs=self.n_jobs,
                                            **self.kernel_params_)
            self.dim_reduc_clf_[t].fit(self.kernel_values_.data[t])

            # Save kernel PCA coefficients
            self.alpha_coef[t] = self.dim_reduc_clf_[t].alphas_ / np.sqrt(self.dim_reduc_clf_[t].lambdas_)


    def _align_principal_components(self):
        self.cosine_similarity_ = self.alpha_coef['source'].T.dot(self.kernel_values_.k_st).dot(self.alpha_coef['target'])
        
        beta_s, theta, beta_t = np.linalg.svd(self.cosine_similarity_)
        self.beta_coef = {}
        self.beta_coef['source'] = beta_s
        self.beta_coef['target'] = beta_t.T # Due to definition of SVD by matplotlib

        # Computation of gamma coefficients
        self.gamma_coef = {}
        for t in ['source', 'target']:
            self.gamma_coef[t] = self.beta_coef[t].T.dot(self.alpha_coef[t].T)
            self.gamma_coef[t] = self.gamma_coef[t][:self.n_pv]

        # Canonical angles
        self.canonical_angles = np.arccos(theta[:self.n_pv])


    def _direct_computation(self, n_components=None):
        raise NotImplementedError('Direct computation of PVs has not been implemented.')


    def _project_PV_from_data(self, X, t, right_center=False):
        """
        Project data X on source and target kernel principal vectors

        -------
        X: numpy.ndarray, shape (n_samples, n_genes)
            Data to project

        t: str
            Type, either 'source' or 'target'

        right_center: Boolean, default to False
            Whether data should be implicitly mean centered

        Returned Values
        -------
        Dictionary with 'source' and 'target' as keys, and projected arrays as values.
        Projected arrays are of size (n_samples, n_pv)
        """
        
        K = self.kernel_(self.kernel_values_.data[t], X, **self.kernel_params_)
        K = _left_center_kernel(K)
        if right_center:
            K = _right_center_kernel(K)

        return self._project_PV_from_kernel(K,t)

    def _project_PV_from_kernel(self, K, t):
        """
        Project kernel X on source and target kernel principal vectors

        -------
        K: numpy.ndarray, shape (n_samples, n_samples)
            Kernel matrix between data from type t and specific dataset.
            Source (or target) samples in the rows (same order as given to the algorithm)
            New dataset samples in the columns 

        t: str
            Type, either 'source' or 'target'

        Returned Values
        -------
        Dictionary with 'source' and 'target' as keys, and projected arrays as values.
        Projected arrays are of size (n_samples, n_pv)
        """

        return self.gamma_coef[t].dot(K).T

Classes

class PVComputation (kernel, kernel_params={}, n_components=None, n_pv=None, n_jobs=1)

PVComputation handles the dimensionality reduction and alignment of learned manifold.

This class contains all the following tasks and sub-routines:

Kernel PCA decomposition on source and target independently.
Kernel principal components comparison.
Computation of Principal Vectors (PVs).

Parameters

kernel : str, default to 'linear': Name of the kernel to be used in the algorithm. Has to be compliant with scikit-learn kernel, e.g., "rbf", "polynomial", "laplacian", "linear", …
kernel_params : dict, default to None: Parameters of the kernel (degree for polynomial kernel, gamma for RBF). Naming has to be compliant with scikit-learn, e.g., {"gamma": 0.0005}.
n_components : int or dict, default to None: Number of components for kernel PCA.
If int, then indicates the same number of components for source and target.
If dict, then must be of the form {'source':int, 'target':int}.
n_pv : int, default to None: Number of principal vectors.
n_jobs : int, default to 1: Number of concurrent threads to use for tasks that can be parallelized.

Expand source code

class PVComputation:
    """
    PVComputation handles the dimensionality reduction and alignment of learned manifold.
    <br/><br/>
    This class contains all the following tasks and sub-routines:
    <ul>
        <li> Kernel PCA decomposition on source and target independently.
         <li> Kernel principal components comparison.
         <li> Computation of Principal Vectors (PVs).
    </ul>
    """

    def __init__(self, kernel, kernel_params={}, n_components=None, n_pv=None, n_jobs=1):
        """
        Parameters
        ----------
        kernel : str, default to 'linear'
            Name of the kernel to be used in the algorithm. Has to be compliant with
            <a href="https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.kernel_metrics.html#sklearn.metrics.pairwise.kernel_metrics">
            scikit-learn kernel</a>, e.g., "rbf", "polynomial", "laplacian", "linear", ...

        kernel_params : dict, default to None
            Parameters of the kernel (degree for polynomial kernel, gamma for RBF).
            Naming has to be compliant with scikit-learn, e.g., {"gamma": 0.0005}.

        n_components : int or dict, default to None
            Number of components for kernel PCA.
            <br/> If int, then indicates the same number of components for source and target.
            <br/> If dict, then must be of the form {'source':int, 'target':int}.

        n_pv : int, default to None
            Number of principal vectors.

        n_jobs : int, default to 1
            Number of concurrent threads to use for tasks that can be parallelized.
        """

        self.gamma_coef = None
        self.alpha_coef = None
        self.beta_coef = None
        self.canonical_angles = None

        self.kernel = kernel
        self.kernel_ = kernel_metrics()[kernel]
        self.kernel_params_ = kernel_params

        self.kernel_values_ = KernelComputer(self.kernel, self.kernel_params_)

        # Put n_components in dictionary format.
        self.n_components = n_components
        if type(self.n_components) == int:
            self.n_components = {
                s:self.n_components for s in ['source', 'target']
            }
        self.n_pv = n_pv

        self.n_jobs = n_jobs


    def fit(self, source_data, target_data, method='two-stage', n_components=None, n_pv=None):
        """
        Computes the kernel principal vectors between source and target data.

        Parameters
        -------
        source_data: numpy.ndarray, shape (n_samples, n_genes)
            Source data

        target_data: numpy.ndarray, shape (n_samples, n_genes)
            Source data

        method: str, default to "two-stage"
            Method used for computed the kernel PVs, either "two-stage" (first kernel PCA, then
            alignment), or "direct" (direct minimization).
            <br/>
            <b>NOT IMPLEMENTED:</b> The one-shot computation of the PVs has not been implemented.

        n_components: int, default to None
            Number of components taken into the decomposition.

        n_pv: int, default to None
            Number of Principal Vectors. If not set here or in __init__, then maximum number of PV will be computed.

        Returned Values
        -------
        self : PVComputation
            Fitted instance.
        """

        # Compute kernel matrices
        self.kernel_values_.fit(source_data, target_data, center=True)

        if method == 'two-stage':
            self._two_stage_computation(n_components, n_pv)
        elif method == 'direct':
            self._direct_computation(n_components)

        return self
        

    def transform(self, X, right_center=False):
        """
        Project data X on source and target kernel principal vectors

        Parameters
        -------
        X: numpy.ndarray, shape (n_samples, n_genes)
            Data to project

        right_center: Boolean, default to False
            Whether data should be implicitly mean centered

        Returned Values
        -------
        Dictionary with 'source' and 'target' as keys, and projected arrays as values.
        """

        X_projected = {}
        for t in ['source', 'target']:
            X_projected[t] = self._project_PV_from_data(X, t, right_center)

        return X_projected


    def fit_transform(self, source_data, target_data, method='two-stage', n_components=None, n_pv=None):
        """
        Computes the kernel principal vectors between source and target data.

        -------
        source_data: numpy.ndarray, shape (n_samples, n_genes)
            Source data

        target_data: numpy.ndarray, shape (n_samples, n_genes)
            Source data

        method: str, default to "two-stage"
            Method used for computed the kernel PVs, either "two-stage" (first kernel PCA, then
            alignment), or "direct" (direct minimization).
            <br/>
            <b>NOT IMPLEMENTED:</b> The one-shot computation of the PVs has not been implemented.

        n_components: int or dictionary, default to None
            Number of components taken into account for PCA. Can be int (if same number of components
            for source or target) or dictionary with {'source': int, 'target':int} indicating the
            number of source and target principal components.

        n_pv: int, default to None
            Number of Principal Vectors. If not set here or in __init__, then maximum number of PV will be computed.

        Returned Values
        -------
        source_projected: dictionary

        target_projected: dictionary
        """

        self.fit(source_data, target_data, method, n_components)

        source_projected = {
            'source': self._project_PV_from_data(source_data, 'source'),
            'target': self._project_PV_from_data(source_data, 'target')
        }
        
        target_projected = {
            'source': self._project_PV_from_data(target_data, 'source'),
            'target': self._project_PV_from_data(target_data, 'target')
        }

        return source_projected, target_projected


    def _two_stage_computation(self, n_components=None, n_pv=None):

        self.n_components = n_components or self.n_components
        if self.n_components is None or type(self.n_components) == int:
            self.n_components = {
                s:self.n_components for s in ['source', 'target']
            }

        self.n_pv = n_pv or (self.n_pv or min(self.n_components.values()))

        ## First step: Kernel PCA
        self._dim_reduction()
        
        ## Second step: Align based on cosine similarity
        self._align_principal_components()


    def _dim_reduction(self):
        self.dim_reduc_clf_ = {}
        self.alpha_coef = {}

        # Independent processing of source and target
        for t in ['source', 'target']:
            # Reduce dimensionality using kernelPCA.
            self.dim_reduc_clf_[t] = KernelPCA(self.n_components[t],
                                            kernel=self.kernel,
                                            n_jobs=self.n_jobs,
                                            **self.kernel_params_)
            self.dim_reduc_clf_[t].fit(self.kernel_values_.data[t])

            # Save kernel PCA coefficients
            self.alpha_coef[t] = self.dim_reduc_clf_[t].alphas_ / np.sqrt(self.dim_reduc_clf_[t].lambdas_)


    def _align_principal_components(self):
        self.cosine_similarity_ = self.alpha_coef['source'].T.dot(self.kernel_values_.k_st).dot(self.alpha_coef['target'])
        
        beta_s, theta, beta_t = np.linalg.svd(self.cosine_similarity_)
        self.beta_coef = {}
        self.beta_coef['source'] = beta_s
        self.beta_coef['target'] = beta_t.T # Due to definition of SVD by matplotlib

        # Computation of gamma coefficients
        self.gamma_coef = {}
        for t in ['source', 'target']:
            self.gamma_coef[t] = self.beta_coef[t].T.dot(self.alpha_coef[t].T)
            self.gamma_coef[t] = self.gamma_coef[t][:self.n_pv]

        # Canonical angles
        self.canonical_angles = np.arccos(theta[:self.n_pv])


    def _direct_computation(self, n_components=None):
        raise NotImplementedError('Direct computation of PVs has not been implemented.')


    def _project_PV_from_data(self, X, t, right_center=False):
        """
        Project data X on source and target kernel principal vectors

        -------
        X: numpy.ndarray, shape (n_samples, n_genes)
            Data to project

        t: str
            Type, either 'source' or 'target'

        right_center: Boolean, default to False
            Whether data should be implicitly mean centered

        Returned Values
        -------
        Dictionary with 'source' and 'target' as keys, and projected arrays as values.
        Projected arrays are of size (n_samples, n_pv)
        """
        
        K = self.kernel_(self.kernel_values_.data[t], X, **self.kernel_params_)
        K = _left_center_kernel(K)
        if right_center:
            K = _right_center_kernel(K)

        return self._project_PV_from_kernel(K,t)

    def _project_PV_from_kernel(self, K, t):
        """
        Project kernel X on source and target kernel principal vectors

        -------
        K: numpy.ndarray, shape (n_samples, n_samples)
            Kernel matrix between data from type t and specific dataset.
            Source (or target) samples in the rows (same order as given to the algorithm)
            New dataset samples in the columns 

        t: str
            Type, either 'source' or 'target'

        Returned Values
        -------
        Dictionary with 'source' and 'target' as keys, and projected arrays as values.
        Projected arrays are of size (n_samples, n_pv)
        """

        return self.gamma_coef[t].dot(K).T

Methods

def fit(self, source_data, target_data, method='two-stage', n_components=None, n_pv=None)

Computes the kernel principal vectors between source and target data.

Parameters

source_data : numpy.ndarray, shape (n_samples, n_genes): Source data
target_data : numpy.ndarray, shape (n_samples, n_genes): Source data
method : str, default to "two-stage": Method used for computed the kernel PVs, either "two-stage" (first kernel PCA, then alignment), or "direct" (direct minimization).
NOT IMPLEMENTED: The one-shot computation of the PVs has not been implemented.
n_components : int, default to None: Number of components taken into the decomposition.
n_pv : int, default to None: Number of Principal Vectors. If not set here or in init, then maximum number of PV will be computed.

Returned Values

self : PVComputation Fitted instance.

Expand source code

def fit(self, source_data, target_data, method='two-stage', n_components=None, n_pv=None):
    """
    Computes the kernel principal vectors between source and target data.

    Parameters
    -------
    source_data: numpy.ndarray, shape (n_samples, n_genes)
        Source data

    target_data: numpy.ndarray, shape (n_samples, n_genes)
        Source data

    method: str, default to "two-stage"
        Method used for computed the kernel PVs, either "two-stage" (first kernel PCA, then
        alignment), or "direct" (direct minimization).
        <br/>
        <b>NOT IMPLEMENTED:</b> The one-shot computation of the PVs has not been implemented.

    n_components: int, default to None
        Number of components taken into the decomposition.

    n_pv: int, default to None
        Number of Principal Vectors. If not set here or in __init__, then maximum number of PV will be computed.

    Returned Values
    -------
    self : PVComputation
        Fitted instance.
    """

    # Compute kernel matrices
    self.kernel_values_.fit(source_data, target_data, center=True)

    if method == 'two-stage':
        self._two_stage_computation(n_components, n_pv)
    elif method == 'direct':
        self._direct_computation(n_components)

    return self

def fit_transform(self, source_data, target_data, method='two-stage', n_components=None, n_pv=None)

Computes the kernel principal vectors between source and target data.

source_data: numpy.ndarray, shape (n_samples, n_genes) Source data

target_data: numpy.ndarray, shape (n_samples, n_genes) Source data

method: str, default to "two-stage" Method used for computed the kernel PVs, either "two-stage" (first kernel PCA, then alignment), or "direct" (direct minimization).
NOT IMPLEMENTED: The one-shot computation of the PVs has not been implemented.

n_components: int or dictionary, default to None Number of components taken into account for PCA. Can be int (if same number of components for source or target) or dictionary with {'source': int, 'target':int} indicating the number of source and target principal components.

n_pv: int, default to None Number of Principal Vectors. If not set here or in init, then maximum number of PV will be computed.

Returned Values

source_projected: dictionary

target_projected: dictionary

Expand source code

def fit_transform(self, source_data, target_data, method='two-stage', n_components=None, n_pv=None):
    """
    Computes the kernel principal vectors between source and target data.

    -------
    source_data: numpy.ndarray, shape (n_samples, n_genes)
        Source data

    target_data: numpy.ndarray, shape (n_samples, n_genes)
        Source data

    method: str, default to "two-stage"
        Method used for computed the kernel PVs, either "two-stage" (first kernel PCA, then
        alignment), or "direct" (direct minimization).
        <br/>
        <b>NOT IMPLEMENTED:</b> The one-shot computation of the PVs has not been implemented.

    n_components: int or dictionary, default to None
        Number of components taken into account for PCA. Can be int (if same number of components
        for source or target) or dictionary with {'source': int, 'target':int} indicating the
        number of source and target principal components.

    n_pv: int, default to None
        Number of Principal Vectors. If not set here or in __init__, then maximum number of PV will be computed.

    Returned Values
    -------
    source_projected: dictionary

    target_projected: dictionary
    """

    self.fit(source_data, target_data, method, n_components)

    source_projected = {
        'source': self._project_PV_from_data(source_data, 'source'),
        'target': self._project_PV_from_data(source_data, 'target')
    }
    
    target_projected = {
        'source': self._project_PV_from_data(target_data, 'source'),
        'target': self._project_PV_from_data(target_data, 'target')
    }

    return source_projected, target_projected

def transform(self, X, right_center=False)

Project data X on source and target kernel principal vectors

Parameters

X : numpy.ndarray, shape (n_samples, n_genes): Data to project
right_center : Boolean, default to False: Whether data should be implicitly mean centered

Returned Values

Dictionary with 'source' and 'target' as keys, and projected arrays as values.

Expand source code

def transform(self, X, right_center=False):
    """
    Project data X on source and target kernel principal vectors

    Parameters
    -------
    X: numpy.ndarray, shape (n_samples, n_genes)
        Data to project

    right_center: Boolean, default to False
        Whether data should be implicitly mean centered

    Returned Values
    -------
    Dictionary with 'source' and 'target' as keys, and projected arrays as values.
    """

    X_projected = {}
    for t in ['source', 'target']:
        X_projected[t] = self._project_PV_from_data(X, t, right_center)

    return X_projected