-
Notifications
You must be signed in to change notification settings - Fork 106
Open
Description
========================================================================================= FAILURES =========================================================================================
_________________________________________________________________________ test_bitpacked_nn_descent_query_accuracy _________________________________________________________________________
nn_data = array([[0.52111531, 0.77647716, 0.93834037, 0.66185582, 0.19981062],
[0.43632302, 0.16532886, 0.67949223, 0.587... 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0. ]])
def test_bitpacked_nn_descent_query_accuracy(nn_data):
bitpacked_data = (nn_data * 256).astype(np.uint8)
unpacked_data = np.zeros(
(bitpacked_data.shape[0], bitpacked_data.shape[1] * 8), dtype=np.float32
)
for i in range(unpacked_data.shape[0]):
for j in range(unpacked_data.shape[1]):
unpacked_data[i, j] = (bitpacked_data[i, j // 8] & (1 << (j % 8))) > 0
> nnd = NNDescent(
bitpacked_data[200:], "bit_jaccard", n_neighbors=50, random_state=None
)
pynndescent/tests/test_pynndescent_.py:207:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pynndescent.pynndescent_.NNDescent object at 0x24f3964bdc10>
data = array([[ 91, 127, 169, 1, 27],
[ 51, 148, 22, 235, 2],
[170, 219, 25, 239, 176],
...,
[ 44, 244, 81, 85, 237],
[ 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0]], dtype=uint8)
metric = 'bit_jaccard', metric_kwds = {}, n_neighbors = 50, n_trees = 10, leaf_size = None, pruning_degree_multiplier = 1.5, diversify_prob = 1.0, n_search_trees = 1, tree_init = True
init_graph = None, init_dist = None, random_state = None, low_memory = True, max_candidates = None, max_rptree_depth = 200, n_iters = 10, delta = 0.001, n_jobs = None, compressed = False
parallel_batch_queries = False, verbose = False
def __init__(
self,
data,
metric="euclidean",
metric_kwds=None,
n_neighbors=30,
n_trees=None,
leaf_size=None,
pruning_degree_multiplier=1.5,
diversify_prob=1.0,
n_search_trees=1,
tree_init=True,
init_graph=None,
init_dist=None,
random_state=None,
low_memory=True,
max_candidates=None,
max_rptree_depth=200,
n_iters=None,
delta=0.001,
n_jobs=None,
compressed=False,
parallel_batch_queries=False,
verbose=False,
):
if n_trees is None:
n_trees = 5 + int(round((data.shape[0]) ** 0.25))
n_trees = min(32, n_trees) # Only so many trees are useful
if n_iters is None:
n_iters = max(5, int(round(np.log2(data.shape[0]))))
self.n_trees = n_trees
self.n_trees_after_update = max(1, int(np.round(self.n_trees / 3)))
self.n_neighbors = n_neighbors
self.metric = metric
self.metric_kwds = metric_kwds
self.leaf_size = leaf_size
self.prune_degree_multiplier = pruning_degree_multiplier
self.diversify_prob = diversify_prob
self.n_search_trees = n_search_trees
self.max_rptree_depth = max_rptree_depth
self.max_candidates = max_candidates
self.low_memory = low_memory
self.n_iters = n_iters
self.delta = delta
self.dim = data.shape[1]
self.n_jobs = n_jobs
self.compressed = compressed
self.parallel_batch_queries = parallel_batch_queries
self.verbose = verbose
if getattr(data, "dtype", None) == np.float32 and (
issparse(data) or is_c_contiguous(data)
):
copy_on_normalize = True
else:
copy_on_normalize = False
if metric in ("bit_hamming", "bit_jaccard"):
data = check_array(data, dtype=np.uint8, order="C")
self._input_dtype = np.uint8
else:
data = check_array(data, dtype=np.float32, accept_sparse="csr", order="C")
self._input_dtype = np.float32
self._raw_data = data
if not tree_init or n_trees == 0 or init_graph is not None:
self.tree_init = False
else:
self.tree_init = True
metric_kwds = metric_kwds or {}
self._dist_args = tuple(metric_kwds.values())
self.random_state = random_state
current_random_state = check_random_state(self.random_state)
self._distance_correction = None
if callable(metric):
_distance_func = metric
elif metric in pynnd_dist.named_distances:
if metric in pynnd_dist.fast_distance_alternatives:
_distance_func = pynnd_dist.fast_distance_alternatives[metric]["dist"]
self._distance_correction = pynnd_dist.fast_distance_alternatives[
metric
]["correction"]
else:
_distance_func = pynnd_dist.named_distances[metric]
else:
raise ValueError("Metric is neither callable, " + "nor a recognised string")
# Create a partial function for distances with arguments
if len(self._dist_args) > 0:
dist_args = self._dist_args
@numba.njit()
def _partial_dist_func(x, y):
return _distance_func(x, y, *dist_args)
self._distance_func = _partial_dist_func
else:
self._distance_func = _distance_func
if metric in (
"cosine",
"dot",
"correlation",
"dice",
"jaccard",
"hellinger",
"hamming",
"bit_hamming",
"bit_jaccard",
):
self._angular_trees = True
if metric in ("bit_hamming", "bit_jaccard"):
self._bit_trees = True
else:
self._bit_trees = False
else:
self._angular_trees = False
self._bit_trees = False
if metric == "dot":
data = normalize(data, norm="l2", copy=copy_on_normalize)
self._raw_data = data
self.rng_state = current_random_state.randint(INT32_MIN, INT32_MAX, 3).astype(
np.int64
)
self.search_rng_state = current_random_state.randint(
INT32_MIN, INT32_MAX, 3
).astype(np.int64)
# Warm up the rng state
for i in range(10):
_ = tau_rand_int(self.search_rng_state)
if self.tree_init:
if verbose:
print(ts(), "Building RP forest with", str(n_trees), "trees")
self._rp_forest = make_forest(
data,
n_neighbors,
n_trees,
leaf_size,
self.rng_state,
current_random_state,
self.n_jobs,
self._angular_trees,
self._bit_trees,
max_depth=self.max_rptree_depth,
)
leaf_array = rptree_leaf_array(self._rp_forest)
else:
self._rp_forest = None
leaf_array = np.array([[-1]])
if self.max_candidates is None:
effective_max_candidates = min(60, self.n_neighbors)
else:
effective_max_candidates = self.max_candidates
# Set threading constraints
self._original_num_threads = numba.get_num_threads()
if self.n_jobs != -1 and self.n_jobs is not None:
numba.set_num_threads(self.n_jobs)
if isspmatrix_csr(self._raw_data):
self._is_sparse = True
if not self._raw_data.has_sorted_indices:
self._raw_data.sort_indices()
if metric in sparse.sparse_named_distances:
if metric in sparse.sparse_fast_distance_alternatives:
_distance_func = sparse.sparse_fast_distance_alternatives[metric][
"dist"
]
self._distance_correction = (
sparse.sparse_fast_distance_alternatives[metric]["correction"]
)
else:
_distance_func = sparse.sparse_named_distances[metric]
elif callable(metric):
_distance_func = metric
else:
raise ValueError(
"Metric {} not supported for sparse data".format(metric)
)
if metric in sparse.sparse_need_n_features:
metric_kwds["n_features"] = self._raw_data.shape[1]
self._dist_args = tuple(metric_kwds.values())
# Create a partial function for distances with arguments
if len(self._dist_args) > 0:
dist_args = self._dist_args
@numba.njit()
def _partial_dist_func(ind1, data1, ind2, data2):
return _distance_func(ind1, data1, ind2, data2, *dist_args)
self._distance_func = _partial_dist_func
else:
self._distance_func = _distance_func
if init_graph is None:
_init_graph = EMPTY_GRAPH
else:
if init_graph.shape[0] != self._raw_data.shape[0]:
raise ValueError("Init graph size does not match dataset size!")
_init_graph = make_heap(init_graph.shape[0], self.n_neighbors)
_init_graph = sparse_initalize_heap_from_graph_indices(
_init_graph,
init_graph,
self._raw_data.indptr,
self._raw_data.indices,
self._raw_data.data,
self._distance_func,
)
if verbose:
print(ts(), "metric NN descent for", str(n_iters), "iterations")
self._neighbor_graph = sparse_nnd.nn_descent(
self._raw_data.indices,
self._raw_data.indptr,
self._raw_data.data,
self.n_neighbors,
self.rng_state,
max_candidates=effective_max_candidates,
dist=self._distance_func,
n_iters=self.n_iters,
delta=self.delta,
rp_tree_init=True,
leaf_array=leaf_array,
init_graph=_init_graph,
low_memory=self.low_memory,
verbose=verbose,
)
else:
self._is_sparse = False
if init_graph is None:
_init_graph = EMPTY_GRAPH
else:
if init_graph.shape[0] != self._raw_data.shape[0]:
raise ValueError("Init graph size does not match dataset size!")
_init_graph = make_heap(init_graph.shape[0], self.n_neighbors)
if init_dist is None:
_init_graph = initalize_heap_from_graph_indices(
_init_graph, init_graph, data, self._distance_func
)
elif init_graph.shape != init_dist.shape:
raise ValueError(
"The shapes of init graph and init distances do not match!"
)
else:
_init_graph = initalize_heap_from_graph_indices_and_distances(
_init_graph, init_graph, init_dist
)
if verbose:
print(ts(), "NN descent for", str(n_iters), "iterations")
> self._neighbor_graph = nn_descent(
self._raw_data,
self.n_neighbors,
self.rng_state,
effective_max_candidates,
self._distance_func,
self.n_iters,
self.delta,
low_memory=self.low_memory,
rp_tree_init=True,
init_graph=_init_graph,
leaf_array=leaf_array,
verbose=verbose,
)
E ZeroDivisionError: division by zero
pynndescent/pynndescent_.py:946: ZeroDivisionError
===================================================================================== warnings summary =====================================================================================
pynndescent/tests/test_distances.py::test_bit_jaccard
pynndescent/tests/test_pynndescent_.py::test_bitpacked_nn_descent_neighbor_accuracy
/usr/local/lib/python3.9/site-packages/sklearn/metrics/pairwise.py:2317: DataConversionWarning: Data was converted to boolean for metric jaccard
warnings.warn(msg, DataConversionWarning)
pynndescent/tests/test_pynndescent_.py::test_bitpacked_nn_descent_neighbor_accuracy
/usr/ports/math/py-pynndescent/work-py39/pynndescent-0.5.12/pynndescent/pynndescent_.py:962: UserWarning: Failed to correctly find n_neighbors for some samples. Results may be less than ideal. Try re-running with different parameters.
warn(
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
================================================================================= short test summary info ==================================================================================
SKIPPED [1] pynndescent/tests/test_distances.py:245: incorrect function in scipy<1.8
============================================================= 1 failed, 145 passed, 1 skipped, 3 warnings in 575.60s (0:09:35) =============================================================
*** Error code 1
Version: 0.5.12
Python-3.9
FreeBSD 14.0
Metadata
Metadata
Assignees
Labels
No labels