Skip to content

Commit ffb3c24

Browse files
authored
Merge pull request #24 from KrishnaswamyLab/dev
Upgrade to version 0.2.4: accepts affinity and distance matrices
2 parents 9804d9d + d9eebe0 commit ffb3c24

File tree

6 files changed

+224
-170
lines changed

6 files changed

+224
-170
lines changed

Python/phate/phate.py

Lines changed: 80 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ def calculate_kernel(data, k=15, a=10, alpha_decay=True, knn_dist='euclidean',
3939
Parameters
4040
----------
4141
data : array-like [n_samples, n_dimensions]
42-
2 dimensional input data array with n cells and p dimensions
42+
2 dimensional input data array with n cells and p dimensions If
43+
`knn_dist` is 'precomputed', `data` should be a n_samples x n_samples
44+
distance matrix
4345
4446
k : int, optional, default: 15
4547
used to set epsilon while autotuning kernel bandwidth
@@ -51,9 +53,10 @@ def calculate_kernel(data, k=15, a=10, alpha_decay=True, knn_dist='euclidean',
5153
If true, use the alpha decaying kernel
5254
5355
knn_dist : string, optional, default: 'euclidean'
54-
recommended values: 'euclidean' and 'cosine'
55-
Any metric from scipy.spatial.distance can be used
56-
distance metric for building kNN graph
56+
recommended values: 'euclidean', 'cosine', 'precomputed'
57+
Any metric from `scipy.spatial.distance` can be used
58+
distance metric for building kNN graph. If 'precomputed',
59+
`data` should be an n_samples x n_samples distance matrix
5760
5861
verbose : boolean, optional, default: True
5962
If true, print status messages
@@ -79,9 +82,7 @@ def calculate_kernel(data, k=15, a=10, alpha_decay=True, knn_dist='euclidean',
7982
kernel : array-like [n_samples, n_samples]
8083
kernel matrix built from the input data
8184
"""
82-
precomputed = isinstance(knn_dist, list) or \
83-
isinstance(knn_dist, np.ndarray)
84-
if not precomputed and ndim < data.shape[1]:
85+
if knn_dist != 'precomputed' and ndim < data.shape[1]:
8586
if verbose:
8687
print("Calculating PCA...")
8788
start = time.time()
@@ -102,10 +103,10 @@ def calculate_kernel(data, k=15, a=10, alpha_decay=True, knn_dist='euclidean',
102103
# kernel includes self as connection but not in k
103104
# actually search for k+1 neighbors including self
104105
k = k + 1
105-
if alpha_decay:
106+
if alpha_decay and a is not None:
106107
try:
107-
if precomputed:
108-
pdx = knn_dist
108+
if knn_dist == 'precomputed':
109+
pdx = data
109110
else:
110111
pdx = squareform(pdist(data, metric=knn_dist))
111112
knn_dist = np.partition(pdx, k, axis=1)[:, :k]
@@ -118,7 +119,8 @@ def calculate_kernel(data, k=15, a=10, alpha_decay=True, knn_dist='euclidean',
118119
'Try removing duplicates.')
119120
kernel = np.exp(-1 * (pdx ** a)) # not really Gaussian kernel
120121
else:
121-
if precomputed:
122+
if knn_dist == 'precomputed':
123+
# we already have pairwise distances
122124
pdx = knn_dist
123125
knn_idx = np.argpartition(pdx, k, axis=1)[:, :k]
124126
ind_ptr = np.arange(knn_idx.shape[0] + 1) * knn_idx.shape[1]
@@ -184,7 +186,8 @@ def calculate_landmark_operator(kernel, n_landmark=2000,
184186
n_components=n_svd,
185187
random_state=random_state)
186188
if verbose:
187-
print("SVD complete in {:.2f} seconds".format(time.time() - start))
189+
print("Calculated SVD in {:.2f} seconds".format(
190+
time.time() - start))
188191
start = time.time()
189192
print("Calculating Kmeans...")
190193
kmeans = MiniBatchKMeans(n_landmark,
@@ -194,7 +197,8 @@ def calculate_landmark_operator(kernel, n_landmark=2000,
194197
clusters = kmeans.fit_predict(np.matmul(U, np.diagflat(S)))
195198
landmarks = np.unique(clusters)
196199
if verbose:
197-
print("Complete in ", time.time() - start)
200+
print("Calculated Kmeans in {:.2f} seconds".format(
201+
time.time() - start))
198202

199203
# transition matrices
200204
if is_sparse:
@@ -226,7 +230,9 @@ def calculate_operator(data, k=15, a=10, alpha_decay=True, n_landmark=2000,
226230
Parameters
227231
----------
228232
data : array-like [n_samples, n_dimensions]
229-
2 dimensional input data array with n cells and p dimensions
233+
2 dimensional input data array with n cells and p dimensions. If
234+
`knn_dist` is 'precomputed', `data` should be a n_samples x n_samples
235+
distance or affinity matrix
230236
231237
k : int, optional, default: 15
232238
used to set epsilon while autotuning kernel bandwidth
@@ -241,9 +247,11 @@ def calculate_operator(data, k=15, a=10, alpha_decay=True, n_landmark=2000,
241247
number of landmarks to use in fast PHATE
242248
243249
knn_dist : string, optional, default: 'euclidean'
244-
recommended values: 'euclidean' and 'cosine'
245-
Any metric from scipy.spatial.distance can be used
246-
distance metric for building kNN graph
250+
recommended values: 'euclidean', 'cosine', 'precomputed'
251+
Any metric from `scipy.spatial.distance` can be used
252+
distance metric for building kNN graph. If 'precomputed',
253+
`data` should be an n_samples x n_samples distance or
254+
affinity matrix
247255
248256
diff_op : array-like, optional shape=[n_samples, n_samples], default: None
249257
Precomputed diffusion operator
@@ -296,12 +304,26 @@ def calculate_operator(data, k=15, a=10, alpha_decay=True, n_landmark=2000,
296304
if diff_op is None:
297305
if verbose:
298306
print("Building kNN graph and diffusion operator...")
299-
kernel = calculate_kernel(data, a=a, k=k, knn_dist=knn_dist,
300-
ndim=n_pca,
301-
alpha_decay=alpha_decay,
302-
random_state=random_state,
303-
n_jobs=n_jobs,
304-
verbose=verbose)
307+
if knn_dist == 'precomputed' and np.all(np.diagonal(data) != 0):
308+
print("Using precomputed affinity matrix...")
309+
kernel = data
310+
else:
311+
if knn_dist == 'precomputed':
312+
if np.all(np.diagonal(data) == 0):
313+
print("Using precomputed distance matrix...")
314+
else:
315+
raise ValueError(
316+
"Cannot determine precomputed data type. "
317+
"Precomputed affinity matrices should have "
318+
"only non-zero entries on the diagonal, and"
319+
" precomputed distance matrices should have"
320+
" only zero entries on the diagonal.")
321+
kernel = calculate_kernel(data, a=a, k=k, knn_dist=knn_dist,
322+
ndim=n_pca,
323+
alpha_decay=alpha_decay,
324+
random_state=random_state,
325+
n_jobs=n_jobs,
326+
verbose=verbose)
305327
diff_op, landmark_transitions = calculate_landmark_operator(
306328
kernel, n_landmark=n_landmark,
307329
random_state=random_state, verbose=verbose)
@@ -382,17 +404,32 @@ def embed_mds(diff_op, t=30, n_components=2, diff_potential=None,
382404

383405
X = np.linalg.matrix_power(diff_op, t) # diffused diffusion operator
384406

385-
if potential_method == 'log':
407+
if potential_method == 'log': # or potential_method == 1:
386408
# handling small values
387409
# X[X <= np.finfo(float).eps] = np.finfo(
388410
# float).eps
389-
X = X + 1e-3
411+
X = X + 1e-7
390412
diff_potential = -1 * np.log(X) # diffusion potential
391413
elif potential_method == 'sqrt':
392414
diff_potential = np.sqrt(X) # diffusion potential
393-
else:
394-
raise ValueError("Allowable 'potential_method' values: 'log' or "
395-
"'sqrt'. '%s' was passed." % (potential_method))
415+
else: # if isinstance(potential_method, str):
416+
raise ValueError(
417+
"Allowable 'potential_method' values: 'log' or "
418+
"'sqrt'. '{}' was passed.".format(potential_method))
419+
# else:
420+
# # gamma
421+
# print("Warning: gamma potential is not stable."
422+
# " Recommended values: 'log' or 'sqrt'")
423+
# if potential_method > 1 or potential_method < -1:
424+
# raise ValueError(
425+
# "Allowable 'potential_method' values between -1 and 1"
426+
# " inclusive. '{}' was passed.".format(potential_method))
427+
# elif potential_method != -1:
428+
# diff_potential = 2 / (1 - potential_method) * \
429+
# np.power(X, ((1 - potential_method) / 2))
430+
# else:
431+
# # gamma = -1 is just MDS on DM
432+
# diff_potential = X
396433

397434
if verbose:
398435
print("Calculated diffusion potential in "
@@ -425,9 +462,9 @@ class PHATE(BaseEstimator):
425462
"""PHATE operator which performs dimensionality reduction.
426463
427464
Potential of Heat-diffusion for Affinity-based Trajectory Embedding
428-
(PHATE).[1]_ Embeds high dimensional single-cell data into two or three
465+
(PHATE) embeds high dimensional single-cell data into two or three
429466
dimensions for visualization of biological progressions as described
430-
in .
467+
in Moon et al, 2017 [1]_.
431468
432469
Parameters
433470
----------
@@ -466,9 +503,11 @@ class PHATE(BaseEstimator):
466503
log(n_samples) time.
467504
468505
knn_dist : string, optional, default: 'euclidean'
469-
recommended values: 'euclidean' and 'cosine'
506+
recommended values: 'euclidean', 'cosine', 'precomputed'
470507
Any metric from `scipy.spatial.distance` can be used
471-
distance metric for building kNN graph
508+
distance metric for building kNN graph. If 'precomputed',
509+
`data` should be an n_samples x n_samples distance or
510+
affinity matrix
472511
473512
mds_dist : string, optional, default: 'euclidean'
474513
recommended values: 'euclidean' and 'cosine'
@@ -628,7 +667,9 @@ def fit(self, X):
628667
X : array, shape=[n_samples, n_features]
629668
input data with `n_samples` samples and `n_dimensions`
630669
dimensions. Accepted data types: `numpy.ndarray`,
631-
`scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`
670+
`scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`. If
671+
`knn_dist` is 'precomputed', `data` should be a n_samples x
672+
n_samples distance or affinity matrix
632673
633674
Returns
634675
-------
@@ -673,7 +714,9 @@ def transform(self, X=None, t_max=100, plot_optimal_t=False, ax=None):
673714
dimensions. Not required, since PHATE does not currently embed
674715
cells not given in the input matrix to `PHATE.fit()`.
675716
Accepted data types: `numpy.ndarray`,
676-
`scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`.
717+
`scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`. If
718+
`knn_dist` is 'precomputed', `data` should be a n_samples x
719+
n_samples distance or affinity matrix
677720
678721
t_max : int, optional, default: 100
679722
maximum t to test if `t` is set to 'auto'
@@ -733,7 +776,9 @@ def fit_transform(self, X, **kwargs):
733776
X : array, shape=[n_samples, n_features]
734777
input data with `n_samples` samples and `n_dimensions`
735778
dimensions. Accepted data types: `numpy.ndarray`,
736-
`scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`
779+
`scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData` If
780+
`knn_dist` is 'precomputed', `data` should be a n_samples x
781+
n_samples distance or affinity matrix
737782
738783
kwargs : further arguments for `PHATE.transform()`
739784
Keyword arguments as specified in :func:`~phate.PHATE.transform`

Python/phate/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.2.3"
1+
__version__ = "0.2.4"

0 commit comments

Comments
 (0)