@@ -39,7 +39,9 @@ def calculate_kernel(data, k=15, a=10, alpha_decay=True, knn_dist='euclidean',
3939 Parameters
4040 ----------
4141 data : array-like [n_samples, n_dimensions]
42- 2 dimensional input data array with n cells and p dimensions
42+ 2 dimensional input data array with n cells and p dimensions If
43+ `knn_dist` is 'precomputed', `data` should be a n_samples x n_samples
44+ distance matrix
4345
4446 k : int, optional, default: 15
4547 used to set epsilon while autotuning kernel bandwidth
@@ -51,9 +53,10 @@ def calculate_kernel(data, k=15, a=10, alpha_decay=True, knn_dist='euclidean',
5153 If true, use the alpha decaying kernel
5254
5355 knn_dist : string, optional, default: 'euclidean'
54- recommended values: 'euclidean' and 'cosine'
55- Any metric from scipy.spatial.distance can be used
56- distance metric for building kNN graph
56+ recommended values: 'euclidean', 'cosine', 'precomputed'
57+ Any metric from `scipy.spatial.distance` can be used
58+ distance metric for building kNN graph. If 'precomputed',
59+ `data` should be an n_samples x n_samples distance matrix
5760
5861 verbose : boolean, optional, default: True
5962 If true, print status messages
@@ -79,9 +82,7 @@ def calculate_kernel(data, k=15, a=10, alpha_decay=True, knn_dist='euclidean',
7982 kernel : array-like [n_samples, n_samples]
8083 kernel matrix built from the input data
8184 """
82- precomputed = isinstance (knn_dist , list ) or \
83- isinstance (knn_dist , np .ndarray )
84- if not precomputed and ndim < data .shape [1 ]:
85+ if knn_dist != 'precomputed' and ndim < data .shape [1 ]:
8586 if verbose :
8687 print ("Calculating PCA..." )
8788 start = time .time ()
@@ -102,10 +103,10 @@ def calculate_kernel(data, k=15, a=10, alpha_decay=True, knn_dist='euclidean',
102103 # kernel includes self as connection but not in k
103104 # actually search for k+1 neighbors including self
104105 k = k + 1
105- if alpha_decay :
106+ if alpha_decay and a is not None :
106107 try :
107- if precomputed :
108- pdx = knn_dist
108+ if knn_dist == ' precomputed' :
109+ pdx = data
109110 else :
110111 pdx = squareform (pdist (data , metric = knn_dist ))
111112 knn_dist = np .partition (pdx , k , axis = 1 )[:, :k ]
@@ -118,7 +119,8 @@ def calculate_kernel(data, k=15, a=10, alpha_decay=True, knn_dist='euclidean',
118119 'Try removing duplicates.' )
119120 kernel = np .exp (- 1 * (pdx ** a )) # not really Gaussian kernel
120121 else :
121- if precomputed :
122+ if knn_dist == 'precomputed' :
123+ # we already have pairwise distances
122124 pdx = knn_dist
123125 knn_idx = np .argpartition (pdx , k , axis = 1 )[:, :k ]
124126 ind_ptr = np .arange (knn_idx .shape [0 ] + 1 ) * knn_idx .shape [1 ]
@@ -184,7 +186,8 @@ def calculate_landmark_operator(kernel, n_landmark=2000,
184186 n_components = n_svd ,
185187 random_state = random_state )
186188 if verbose :
187- print ("SVD complete in {:.2f} seconds" .format (time .time () - start ))
189+ print ("Calculated SVD in {:.2f} seconds" .format (
190+ time .time () - start ))
188191 start = time .time ()
189192 print ("Calculating Kmeans..." )
190193 kmeans = MiniBatchKMeans (n_landmark ,
@@ -194,7 +197,8 @@ def calculate_landmark_operator(kernel, n_landmark=2000,
194197 clusters = kmeans .fit_predict (np .matmul (U , np .diagflat (S )))
195198 landmarks = np .unique (clusters )
196199 if verbose :
197- print ("Complete in " , time .time () - start )
200+ print ("Calculated Kmeans in {:.2f} seconds" .format (
201+ time .time () - start ))
198202
199203 # transition matrices
200204 if is_sparse :
@@ -226,7 +230,9 @@ def calculate_operator(data, k=15, a=10, alpha_decay=True, n_landmark=2000,
226230 Parameters
227231 ----------
228232 data : array-like [n_samples, n_dimensions]
229- 2 dimensional input data array with n cells and p dimensions
233+ 2 dimensional input data array with n cells and p dimensions. If
234+ `knn_dist` is 'precomputed', `data` should be a n_samples x n_samples
235+ distance or affinity matrix
230236
231237 k : int, optional, default: 15
232238 used to set epsilon while autotuning kernel bandwidth
@@ -241,9 +247,11 @@ def calculate_operator(data, k=15, a=10, alpha_decay=True, n_landmark=2000,
241247 number of landmarks to use in fast PHATE
242248
243249 knn_dist : string, optional, default: 'euclidean'
244- recommended values: 'euclidean' and 'cosine'
245- Any metric from scipy.spatial.distance can be used
246- distance metric for building kNN graph
250+ recommended values: 'euclidean', 'cosine', 'precomputed'
251+ Any metric from `scipy.spatial.distance` can be used
252+ distance metric for building kNN graph. If 'precomputed',
253+ `data` should be an n_samples x n_samples distance or
254+ affinity matrix
247255
248256 diff_op : array-like, optional shape=[n_samples, n_samples], default: None
249257 Precomputed diffusion operator
@@ -296,12 +304,26 @@ def calculate_operator(data, k=15, a=10, alpha_decay=True, n_landmark=2000,
296304 if diff_op is None :
297305 if verbose :
298306 print ("Building kNN graph and diffusion operator..." )
299- kernel = calculate_kernel (data , a = a , k = k , knn_dist = knn_dist ,
300- ndim = n_pca ,
301- alpha_decay = alpha_decay ,
302- random_state = random_state ,
303- n_jobs = n_jobs ,
304- verbose = verbose )
307+ if knn_dist == 'precomputed' and np .all (np .diagonal (data ) != 0 ):
308+ print ("Using precomputed affinity matrix..." )
309+ kernel = data
310+ else :
311+ if knn_dist == 'precomputed' :
312+ if np .all (np .diagonal (data ) == 0 ):
313+ print ("Using precomputed distance matrix..." )
314+ else :
315+ raise ValueError (
316+ "Cannot determine precomputed data type. "
317+ "Precomputed affinity matrices should have "
318+ "only non-zero entries on the diagonal, and"
319+ " precomputed distance matrices should have"
320+ " only zero entries on the diagonal." )
321+ kernel = calculate_kernel (data , a = a , k = k , knn_dist = knn_dist ,
322+ ndim = n_pca ,
323+ alpha_decay = alpha_decay ,
324+ random_state = random_state ,
325+ n_jobs = n_jobs ,
326+ verbose = verbose )
305327 diff_op , landmark_transitions = calculate_landmark_operator (
306328 kernel , n_landmark = n_landmark ,
307329 random_state = random_state , verbose = verbose )
@@ -382,17 +404,32 @@ def embed_mds(diff_op, t=30, n_components=2, diff_potential=None,
382404
383405 X = np .linalg .matrix_power (diff_op , t ) # diffused diffusion operator
384406
385- if potential_method == 'log' :
407+ if potential_method == 'log' : # or potential_method == 1:
386408 # handling small values
387409 # X[X <= np.finfo(float).eps] = np.finfo(
388410 # float).eps
389- X = X + 1e-3
411+ X = X + 1e-7
390412 diff_potential = - 1 * np .log (X ) # diffusion potential
391413 elif potential_method == 'sqrt' :
392414 diff_potential = np .sqrt (X ) # diffusion potential
393- else :
394- raise ValueError ("Allowable 'potential_method' values: 'log' or "
395- "'sqrt'. '%s' was passed." % (potential_method ))
415+ else : # if isinstance(potential_method, str):
416+ raise ValueError (
417+ "Allowable 'potential_method' values: 'log' or "
418+ "'sqrt'. '{}' was passed." .format (potential_method ))
419+ # else:
420+ # # gamma
421+ # print("Warning: gamma potential is not stable."
422+ # " Recommended values: 'log' or 'sqrt'")
423+ # if potential_method > 1 or potential_method < -1:
424+ # raise ValueError(
425+ # "Allowable 'potential_method' values between -1 and 1"
426+ # " inclusive. '{}' was passed.".format(potential_method))
427+ # elif potential_method != -1:
428+ # diff_potential = 2 / (1 - potential_method) * \
429+ # np.power(X, ((1 - potential_method) / 2))
430+ # else:
431+ # # gamma = -1 is just MDS on DM
432+ # diff_potential = X
396433
397434 if verbose :
398435 print ("Calculated diffusion potential in "
@@ -425,9 +462,9 @@ class PHATE(BaseEstimator):
425462 """PHATE operator which performs dimensionality reduction.
426463
427464 Potential of Heat-diffusion for Affinity-based Trajectory Embedding
428- (PHATE).[1]_ Embeds high dimensional single-cell data into two or three
465+ (PHATE) embeds high dimensional single-cell data into two or three
429466 dimensions for visualization of biological progressions as described
430- in .
467+ in Moon et al, 2017 [1]_ .
431468
432469 Parameters
433470 ----------
@@ -466,9 +503,11 @@ class PHATE(BaseEstimator):
466503 log(n_samples) time.
467504
468505 knn_dist : string, optional, default: 'euclidean'
469- recommended values: 'euclidean' and 'cosine'
506+ recommended values: 'euclidean', 'cosine', 'precomputed '
470507 Any metric from `scipy.spatial.distance` can be used
471- distance metric for building kNN graph
508+ distance metric for building kNN graph. If 'precomputed',
509+ `data` should be an n_samples x n_samples distance or
510+ affinity matrix
472511
473512 mds_dist : string, optional, default: 'euclidean'
474513 recommended values: 'euclidean' and 'cosine'
@@ -628,7 +667,9 @@ def fit(self, X):
628667 X : array, shape=[n_samples, n_features]
629668 input data with `n_samples` samples and `n_dimensions`
630669 dimensions. Accepted data types: `numpy.ndarray`,
631- `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`
670+ `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`. If
671+ `knn_dist` is 'precomputed', `data` should be a n_samples x
672+ n_samples distance or affinity matrix
632673
633674 Returns
634675 -------
@@ -673,7 +714,9 @@ def transform(self, X=None, t_max=100, plot_optimal_t=False, ax=None):
673714 dimensions. Not required, since PHATE does not currently embed
674715 cells not given in the input matrix to `PHATE.fit()`.
675716 Accepted data types: `numpy.ndarray`,
676- `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`.
717+ `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`. If
718+ `knn_dist` is 'precomputed', `data` should be a n_samples x
719+ n_samples distance or affinity matrix
677720
678721 t_max : int, optional, default: 100
679722 maximum t to test if `t` is set to 'auto'
@@ -733,7 +776,9 @@ def fit_transform(self, X, **kwargs):
733776 X : array, shape=[n_samples, n_features]
734777 input data with `n_samples` samples and `n_dimensions`
735778 dimensions. Accepted data types: `numpy.ndarray`,
736- `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`
779+ `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData` If
780+ `knn_dist` is 'precomputed', `data` should be a n_samples x
781+ n_samples distance or affinity matrix
737782
738783 kwargs : further arguments for `PHATE.transform()`
739784 Keyword arguments as specified in :func:`~phate.PHATE.transform`
0 commit comments