From a73e17579c8892e94ccbc592a28e27d13ca54442 Mon Sep 17 00:00:00 2001 From: Jaewook Lee Date: Fri, 26 Jul 2024 16:59:25 +0200 Subject: [PATCH 1/8] update pre-commit config and pyproject based build --- .pre-commit-config.yaml | 38 +- CMakeLists.txt | 62 +- napf/_version.py | 2 +- napf/base.py | 6 +- pyproject.toml | 41 +- setup.py | 70 - src/python/CMakeLists.txt | 21 + tests/test_init_and_query.py | 2 +- third_party/nanoflann.hpp | 4349 +++++++++++++++++----------------- 9 files changed, 2218 insertions(+), 2373 deletions(-) delete mode 100644 setup.py create mode 100644 src/python/CMakeLists.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7f13648..67c4fe4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,11 +6,9 @@ # use default options for ci ci: - autoupdate_schedule: "monthly" + autoupdate_schedule: "weekly" submodules: false -exclude: "third_party/nanoflann.hpp" - repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: "v4.6.0" @@ -28,31 +26,31 @@ repos: - id: requirements-txt-fixer - id: trailing-whitespace -- repo: https://github.com/asottile/pyupgrade - rev: "v3.15.2" - hooks: - - id: pyupgrade - args: [--py36-plus] - -- repo: https://github.com/PyCQA/isort - rev: "5.13.2" - hooks: - - id: isort - - repo: https://github.com/psf/black rev: "24.4.2" hooks: - id: black - args: [--line-length=79] + additional_dependencies: [tomli] -- repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.4 hooks: - - id: flake8 - args: [--extend-ignore=E203] + - id: ruff + args: [--fix, --exit-non-zero-on-fix] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v18.1.4 + rev: v18.1.8 hooks: - id: clang-format types_or: [c++] + +- repo: https://github.com/cheshirekow/cmake-format-precommit + rev: "v0.6.13" + hooks: + - id: cmake-format + +- repo: https://github.com/keewis/blackdoc + rev: v0.3.9 + hooks: + - id: blackdoc + additional_dependencies: [tomli] diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f81784..3ca8559 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,11 @@ cmake_minimum_required(VERSION 3.12) -project(napf VERSION 0.0.0 LANGUAGES CXX) +project( + napf + VERSION 0.1.0 + LANGUAGES CXX) if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + endif() # options @@ -29,22 +33,19 @@ add_library(napf INTERFACE) add_library(napf::napf ALIAS napf) # basic path -target_include_directories(napf - INTERFACE - $ - $) +target_include_directories( + napf INTERFACE $ + $) -# try to use installed nanoflann. -# else, include the one in the third_party +# try to use installed nanoflann. else, include the one in the third_party find_package(nanoflann QUIET) if(nanoflann_FOUND AND nanoflann_VERSION VERSION_GREATER_EQUAL "1.5.0") message("nanoflann found - napf will link to nanoflann found in system.") target_link_libraries(napf INTERFACE nanoflann::nanoflann) else() set(CXX_HEADERS ${CXX_HEADERS} third_party/nanoflann.hpp) - target_include_directories(napf - INTERFACE - $) + target_include_directories( + napf INTERFACE $) endif() target_compile_features(napf INTERFACE cxx_std_11) @@ -64,33 +65,22 @@ endif(SPLINEPY_EXT) # configure config files include(CMakePackageConfigHelpers) -write_basic_package_version_file( - "${version_config}" COMPATIBILITY SameMajorVersion -) -configure_package_config_file( - "cmake/config.cmake.in" - "${project_config}" - INSTALL_DESTINATION "${cfg_dest}" -) +write_basic_package_version_file("${version_config}" + COMPATIBILITY SameMajorVersion) +configure_package_config_file("cmake/config.cmake.in" "${project_config}" + INSTALL_DESTINATION "${cfg_dest}") install( - TARGETS napf - EXPORT "${TARGETS_EXPORT_NAME}" - LIBRARY DESTINATION ${lib_dest} - ARCHIVE DESTINATION ${lib_dest} - INCLUDES DESTINATION "${incl_dest}" -) -install( - FILES "${project_config}" "${version_config}" - DESTINATION "${cfg_dest}" -) + TARGETS napf + EXPORT "${TARGETS_EXPORT_NAME}" + LIBRARY DESTINATION ${lib_dest} + ARCHIVE DESTINATION ${lib_dest} + INCLUDES + DESTINATION "${incl_dest}") +install(FILES "${project_config}" "${version_config}" DESTINATION "${cfg_dest}") install( - EXPORT "${TARGETS_EXPORT_NAME}" - NAMESPACE "${namespace}" - DESTINATION "${cfg_dest}" -) -install( - FILES ${CXX_HEADERS} - DESTINATION ${incl_dest} -) + EXPORT "${TARGETS_EXPORT_NAME}" + NAMESPACE "${namespace}" + DESTINATION "${cfg_dest}") +install(FILES ${CXX_HEADERS} DESTINATION ${incl_dest}) diff --git a/napf/_version.py b/napf/_version.py index 00ad904..9f7a875 100644 --- a/napf/_version.py +++ b/napf/_version.py @@ -1 +1 @@ -version = "0.0.8" +version = "0.1.0" diff --git a/napf/base.py b/napf/base.py index 235502e..cc11efb 100644 --- a/napf/base.py +++ b/napf/base.py @@ -252,7 +252,7 @@ def knn_search(self, queries, kneighbors, nthread=None): Parameters ----------- - queires: (m, d) np.ndarray + queries: (m, d) np.ndarray Data type will be casted to the same type as `tree_data`. kneighbors: int nthread: int @@ -405,7 +405,7 @@ def radii_search(self, queries, radii, return_sorted, nthread=None): # input size check if len(queries) != len(radii): raise ValueError( - f"Input size mismatch between queires ({len(queries)}) " + f"Input size mismatch between queries ({len(queries)}) " f" and radii ({len(radii)})." "They should be the same." ) @@ -447,7 +447,7 @@ def unique_data_and_inverse( Same as kdt.tree_data[unique_ids]. unique_ids: np.ndarray Indices of unique entries from tree data. - First occurance is considered unique. + First occurrence is considered unique. inverse_ids: np.ndarray Indices to reconstruct original tree_data with unique_data. kdt.tree_data == unique_data[inverse_ids] diff --git a/pyproject.toml b/pyproject.toml index bef86f9..3b109c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,18 +1,45 @@ [build-system] requires = [ - "setuptools>=42", - "wheel", - "pybind11", + "scikit-build-core", + "pybind11>=2.12", ] +build-backend = "scikit_build_core.build" -build-backend = "setuptools.build_meta" +[project] +name = "napf" +authors = [ + {name="Jaewook Lee", email="jaewooklee042@gmail.com"} +] +license = {file = "LICENSE"} +description = "nanoflann python bindings for kdtree with multithreaded queries" +urls = {Homepage = "https://github.com/tataratat/napf"} +classifiers=[ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python", + "Natural Language :: English", + "Topic :: Scientific/Engineering", +] +dynamic = ["version"] + +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "napf/_version.py" [tool.cibuildwheel] test-command = "python {project}/tests/test_init_and_query.py" -[tool.cibuildwheel.macos] -archs = ["x86_64", "arm64"] -test-skip = ["*x86_64"] + +[project.readme] +file = "README.md" +content-type = "text/markdown" [tool.cibuildwheel.windows] skip = "pp*" + +[tool.black] +line-length = 79 +exclude = "third_party" + +[tool.blackdoc] +line-length = 75 diff --git a/setup.py b/setup.py deleted file mode 100644 index 1efcb64..0000000 --- a/setup.py +++ /dev/null @@ -1,70 +0,0 @@ -import platform - -from pybind11.setup_helpers import Pybind11Extension, build_ext -from setuptools import setup - -with open("README.md") as f: - readme = f.read() - -with open("napf/_version.py") as f: - version = eval(f.read().strip().split("=")[-1]) - -flags = {} -if platform.system().startswith("Windows"): - flags["extra_compile_args"] = ["/O2"] -else: - flags["extra_compile_args"] = ["-O3"] - flags["libraries"] = ["stdc++"] - -ext_modules = [ - Pybind11Extension( - "napf._napf", - [ - "src/python/classes/radius_search_result_vectors.cpp", - "src/python/classes/int_trees.cpp", - "src/python/classes/long_trees.cpp", - "src/python/classes/float_trees.cpp", - "src/python/classes/double_trees.cpp", - "src/python/napf.cpp", - ], - include_dirs=["third_party"], - cxx_std=11, - **flags - ) -] - -setup( - name="napf", - version=version, - description="nanoflann python bindings for kdtree.", - long_description=readme, - long_description_content_type="text/markdown", - author="Jaewook Lee", - author_email="jaewooklee042@gmail.com", - url="https://github.com/tataratat/napf", - ext_modules=ext_modules, - cmdclass={"build_ext": build_ext}, - packages=["napf"], - package_data={ # didn't do much. so added MANIFEST.in - "src": ["*.hpp", "python/*.hpp"], - "third_party": ["*.hpp"], - }, - classifiers=[ - "Development Status :: 2 - Pre-Alpha", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Natural Language :: English", - "Topic :: Scientific/Engineering", - ], - install_requires=[ - "numpy", - ], - zip_safe=False, - license="MIT", -) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt new file mode 100644 index 0000000..a2aa959 --- /dev/null +++ b/src/python/CMakeLists.txt @@ -0,0 +1,21 @@ +set(NAPF_SOURCES + classes/radius_search_result_vectors.cpp classes/int_trees.cpp + classes/long_trees.cpp classes/float_trees.cpp classes/double_trees.cpp + napf.cpp) + +find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) +find_package(pybind11 CONFIG REQUIRED) + +python_add_library(_napf MODULE ${NAPF_SOURCES}) +target_link_libraries(_napf PRIVATE pybind11::headers) +target_compile_definitions(_napf PRIVATE $<$>:NDEBUG) +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options(funi PRIVATE $<$>:-O3>) +elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSCV") + target_compile_options(funi PRIVATE $<$>:/O2>) +endif() + +install( + TARGETS _napf + DESTINATION napf + COMPONENT PythonModule) diff --git a/tests/test_init_and_query.py b/tests/test_init_and_query.py index c811ee0..25a31cb 100644 --- a/tests/test_init_and_query.py +++ b/tests/test_init_and_query.py @@ -92,7 +92,7 @@ def test_func(dim, data_t, metric): assert ids.shape[1] == nn assert np.isclose(dists.sum(), 0) - # with five, only first two queires are zero and last three are + # with five, only first two queries are zero and last three are # dummy values nn = 5 ids, dists = kdt.rknn_search(random_data, 1e-10, 5) diff --git a/third_party/nanoflann.hpp b/third_party/nanoflann.hpp index 80995d6..b2bb12d 100644 --- a/third_party/nanoflann.hpp +++ b/third_party/nanoflann.hpp @@ -48,13 +48,13 @@ #include #include #include -#include // for abs() +#include // for abs() #include -#include // for abs() -#include // std::reference_wrapper +#include // for abs() +#include // std::reference_wrapper #include #include -#include // std::numeric_limits +#include // std::numeric_limits #include #include #include @@ -64,8 +64,9 @@ #define NANOFLANN_VERSION 0x155 // Avoid conflicting declaration of min/max macros in Windows headers -#if !defined(NOMINMAX) && \ - (defined(_WIN32) || defined(_WIN32_) || defined(WIN32) || defined(_WIN64)) +#if !defined(NOMINMAX) \ + && (defined(_WIN32) || defined(_WIN32_) || defined(WIN32) \ + || defined(_WIN64)) #define NOMINMAX #ifdef max #undef max @@ -77,261 +78,230 @@ #undef None #endif -namespace nanoflann -{ +namespace nanoflann { /** @addtogroup nanoflann_grp nanoflann C++ library for KD-trees * @{ */ /** the PI constant (required to avoid MSVC missing symbols) */ -template -T pi_const() -{ - return static_cast(3.14159265358979323846); +template +T pi_const() { + return static_cast(3.14159265358979323846); } /** * Traits if object is resizable and assignable (typically has a resize | assign * method) */ -template -struct has_resize : std::false_type -{ -}; +template +struct has_resize : std::false_type {}; -template -struct has_resize().resize(1), 0)> - : std::true_type -{ -}; +template +struct has_resize().resize(1), 0)> + : std::true_type {}; -template -struct has_assign : std::false_type -{ -}; +template +struct has_assign : std::false_type {}; -template -struct has_assign().assign(1, 0), 0)> - : std::true_type -{ -}; +template +struct has_assign().assign(1, 0), 0)> + : std::true_type {}; /** * Free function to resize a resizable object */ -template -inline typename std::enable_if::value, void>::type resize( - Container& c, const size_t nElements) -{ - c.resize(nElements); +template +inline typename std::enable_if::value, void>::type +resize(Container& c, const size_t nElements) { + c.resize(nElements); } /** * Free function that has no effects on non resizable containers (e.g. * std::array) It raises an exception if the expected size does not match */ -template +template inline typename std::enable_if::value, void>::type - resize(Container& c, const size_t nElements) -{ - if (nElements != c.size()) - throw std::logic_error("Try to change the size of a std::array."); +resize(Container& c, const size_t nElements) { + if (nElements != c.size()) + throw std::logic_error("Try to change the size of a std::array."); } /** * Free function to assign to a container */ -template -inline typename std::enable_if::value, void>::type assign( - Container& c, const size_t nElements, const T& value) -{ - c.assign(nElements, value); +template +inline typename std::enable_if::value, void>::type +assign(Container& c, const size_t nElements, const T& value) { + c.assign(nElements, value); } /** * Free function to assign to a std::array */ -template +template inline typename std::enable_if::value, void>::type - assign(Container& c, const size_t nElements, const T& value) -{ - for (size_t i = 0; i < nElements; i++) c[i] = value; +assign(Container& c, const size_t nElements, const T& value) { + for (size_t i = 0; i < nElements; i++) + c[i] = value; } /** @addtogroup result_sets_grp Result set classes * @{ */ /** Result set for KNN searches (N-closest neighbors) */ -template < - typename _DistanceType, typename _IndexType = size_t, - typename _CountType = size_t> -class KNNResultSet -{ - public: - using DistanceType = _DistanceType; - using IndexType = _IndexType; - using CountType = _CountType; - - private: - IndexType* indices; - DistanceType* dists; - CountType capacity; - CountType count; - - public: - explicit KNNResultSet(CountType capacity_) - : indices(nullptr), dists(nullptr), capacity(capacity_), count(0) - { - } - - void init(IndexType* indices_, DistanceType* dists_) - { - indices = indices_; - dists = dists_; - count = 0; - if (capacity) - dists[capacity - 1] = (std::numeric_limits::max)(); - } - - CountType size() const { return count; } - bool empty() const { return count == 0; } - bool full() const { return count == capacity; } - - /** - * Called during search to add an element matching the criteria. - * @return true if the search should be continued, false if the results are - * sufficient - */ - bool addPoint(DistanceType dist, IndexType index) - { - CountType i; - for (i = count; i > 0; --i) - { - /** If defined and two points have the same distance, the one with - * the lowest-index will be returned first. */ +template +class KNNResultSet { +public: + using DistanceType = _DistanceType; + using IndexType = _IndexType; + using CountType = _CountType; + +private: + IndexType* indices; + DistanceType* dists; + CountType capacity; + CountType count; + +public: + explicit KNNResultSet(CountType capacity_) + : indices(nullptr), + dists(nullptr), + capacity(capacity_), + count(0) {} + + void init(IndexType* indices_, DistanceType* dists_) { + indices = indices_; + dists = dists_; + count = 0; + if (capacity) + dists[capacity - 1] = (std::numeric_limits::max)(); + } + + CountType size() const { return count; } + bool empty() const { return count == 0; } + bool full() const { return count == capacity; } + + /** + * Called during search to add an element matching the criteria. + * @return true if the search should be continued, false if the results are + * sufficient + */ + bool addPoint(DistanceType dist, IndexType index) { + CountType i; + for (i = count; i > 0; --i) { + /** If defined and two points have the same distance, the one with + * the lowest-index will be returned first. */ #ifdef NANOFLANN_FIRST_MATCH - if ((dists[i - 1] > dist) || - ((dist == dists[i - 1]) && (indices[i - 1] > index))) - { + if ((dists[i - 1] > dist) + || ((dist == dists[i - 1]) && (indices[i - 1] > index))) { #else - if (dists[i - 1] > dist) - { + if (dists[i - 1] > dist) { #endif - if (i < capacity) - { - dists[i] = dists[i - 1]; - indices[i] = indices[i - 1]; - } - } - else - break; + if (i < capacity) { + dists[i] = dists[i - 1]; + indices[i] = indices[i - 1]; } - if (i < capacity) - { - dists[i] = dist; - indices[i] = index; - } - if (count < capacity) count++; - - // tell caller that the search shall continue - return true; + } else + break; } - - DistanceType worstDist() const { return dists[capacity - 1]; } -}; - -/** Result set for RKNN searches (N-closest neighbors with a maximum radius) */ -template < - typename _DistanceType, typename _IndexType = size_t, - typename _CountType = size_t> -class RKNNResultSet -{ - public: - using DistanceType = _DistanceType; - using IndexType = _IndexType; - using CountType = _CountType; - - private: - IndexType* indices; - DistanceType* dists; - CountType capacity; - CountType count; - DistanceType maximumSearchDistanceSquared; - - public: - explicit RKNNResultSet( - CountType capacity_, DistanceType maximumSearchDistanceSquared_) - : indices(nullptr), - dists(nullptr), - capacity(capacity_), - count(0), - maximumSearchDistanceSquared(maximumSearchDistanceSquared_) - { + if (i < capacity) { + dists[i] = dist; + indices[i] = index; } + if (count < capacity) + count++; - void init(IndexType* indices_, DistanceType* dists_) - { - indices = indices_; - dists = dists_; - count = 0; - if (capacity) dists[capacity - 1] = maximumSearchDistanceSquared; - } + // tell caller that the search shall continue + return true; + } - CountType size() const { return count; } - bool empty() const { return count == 0; } - bool full() const { return count == capacity; } + DistanceType worstDist() const { return dists[capacity - 1]; } +}; - /** - * Called during search to add an element matching the criteria. - * @return true if the search should be continued, false if the results are - * sufficient - */ - bool addPoint(DistanceType dist, IndexType index) - { - CountType i; - for (i = count; i > 0; --i) - { - /** If defined and two points have the same distance, the one with - * the lowest-index will be returned first. */ +/** Result set for RKNN searches (N-closest neighbors with a maximum radius) */ +template +class RKNNResultSet { +public: + using DistanceType = _DistanceType; + using IndexType = _IndexType; + using CountType = _CountType; + +private: + IndexType* indices; + DistanceType* dists; + CountType capacity; + CountType count; + DistanceType maximumSearchDistanceSquared; + +public: + explicit RKNNResultSet(CountType capacity_, + DistanceType maximumSearchDistanceSquared_) + : indices(nullptr), + dists(nullptr), + capacity(capacity_), + count(0), + maximumSearchDistanceSquared(maximumSearchDistanceSquared_) {} + + void init(IndexType* indices_, DistanceType* dists_) { + indices = indices_; + dists = dists_; + count = 0; + if (capacity) + dists[capacity - 1] = maximumSearchDistanceSquared; + } + + CountType size() const { return count; } + bool empty() const { return count == 0; } + bool full() const { return count == capacity; } + + /** + * Called during search to add an element matching the criteria. + * @return true if the search should be continued, false if the results are + * sufficient + */ + bool addPoint(DistanceType dist, IndexType index) { + CountType i; + for (i = count; i > 0; --i) { + /** If defined and two points have the same distance, the one with + * the lowest-index will be returned first. */ #ifdef NANOFLANN_FIRST_MATCH - if ((dists[i - 1] > dist) || - ((dist == dists[i - 1]) && (indices[i - 1] > index))) - { + if ((dists[i - 1] > dist) + || ((dist == dists[i - 1]) && (indices[i - 1] > index))) { #else - if (dists[i - 1] > dist) - { + if (dists[i - 1] > dist) { #endif - if (i < capacity) - { - dists[i] = dists[i - 1]; - indices[i] = indices[i - 1]; - } - } - else - break; + if (i < capacity) { + dists[i] = dists[i - 1]; + indices[i] = indices[i - 1]; } - if (i < capacity) - { - dists[i] = dist; - indices[i] = index; - } - if (count < capacity) count++; - - // tell caller that the search shall continue - return true; + } else + break; } + if (i < capacity) { + dists[i] = dist; + indices[i] = index; + } + if (count < capacity) + count++; + + // tell caller that the search shall continue + return true; + } - DistanceType worstDist() const { return dists[capacity - 1]; } + DistanceType worstDist() const { return dists[capacity - 1]; } }; /** operator "<" for std::sort() */ -struct IndexDist_Sorter -{ - /** PairType will be typically: ResultItem */ - template - bool operator()(const PairType& p1, const PairType& p2) const - { - return p1.second < p2.second; - } +struct IndexDist_Sorter { + /** PairType will be typically: ResultItem */ + template + bool operator()(const PairType& p1, const PairType& p2) const { + return p1.second < p2.second; + } }; /** @@ -342,119 +312,109 @@ struct IndexDist_Sorter * languages. * See: https://github.com/jlblancoc/nanoflann/issues/166 */ -template -struct ResultItem -{ - ResultItem() = default; - ResultItem(const IndexType index, const DistanceType distance) - : first(index), second(distance) - { - } - - IndexType first; //!< Index of the sample in the dataset - DistanceType second; //!< Distance from sample to query point +template +struct ResultItem { + ResultItem() = default; + ResultItem(const IndexType index, const DistanceType distance) + : first(index), + second(distance) {} + + IndexType first; //!< Index of the sample in the dataset + DistanceType second; //!< Distance from sample to query point }; /** * A result-set class used when performing a radius based search. */ -template -class RadiusResultSet -{ - public: - using DistanceType = _DistanceType; - using IndexType = _IndexType; - - public: - const DistanceType radius; - - std::vector>& m_indices_dists; - - explicit RadiusResultSet( - DistanceType radius_, - std::vector>& indices_dists) - : radius(radius_), m_indices_dists(indices_dists) - { - init(); - } - - void init() { clear(); } - void clear() { m_indices_dists.clear(); } - - size_t size() const { return m_indices_dists.size(); } - size_t empty() const { return m_indices_dists.empty(); } - - bool full() const { return true; } - - /** - * Called during search to add an element matching the criteria. - * @return true if the search should be continued, false if the results are - * sufficient - */ - bool addPoint(DistanceType dist, IndexType index) - { - if (dist < radius) m_indices_dists.emplace_back(index, dist); - return true; - } - - DistanceType worstDist() const { return radius; } - - /** - * Find the worst result (farthest neighbor) without copying or sorting - * Pre-conditions: size() > 0 - */ - ResultItem worst_item() const - { - if (m_indices_dists.empty()) - throw std::runtime_error( - "Cannot invoke RadiusResultSet::worst_item() on " - "an empty list of results."); - auto it = std::max_element( - m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter()); - return *it; - } +template +class RadiusResultSet { +public: + using DistanceType = _DistanceType; + using IndexType = _IndexType; + +public: + const DistanceType radius; + + std::vector>& m_indices_dists; + + explicit RadiusResultSet( + DistanceType radius_, + std::vector>& indices_dists) + : radius(radius_), + m_indices_dists(indices_dists) { + init(); + } + + void init() { clear(); } + void clear() { m_indices_dists.clear(); } + + size_t size() const { return m_indices_dists.size(); } + size_t empty() const { return m_indices_dists.empty(); } + + bool full() const { return true; } + + /** + * Called during search to add an element matching the criteria. + * @return true if the search should be continued, false if the results are + * sufficient + */ + bool addPoint(DistanceType dist, IndexType index) { + if (dist < radius) + m_indices_dists.emplace_back(index, dist); + return true; + } + + DistanceType worstDist() const { return radius; } + + /** + * Find the worst result (farthest neighbor) without copying or sorting + * Pre-conditions: size() > 0 + */ + ResultItem worst_item() const { + if (m_indices_dists.empty()) + throw std::runtime_error("Cannot invoke RadiusResultSet::worst_item() on " + "an empty list of results."); + auto it = std::max_element(m_indices_dists.begin(), + m_indices_dists.end(), + IndexDist_Sorter()); + return *it; + } }; /** @} */ /** @addtogroup loadsave_grp Load/save auxiliary functions * @{ */ -template -void save_value(std::ostream& stream, const T& value) -{ - stream.write(reinterpret_cast(&value), sizeof(T)); +template +void save_value(std::ostream& stream, const T& value) { + stream.write(reinterpret_cast(&value), sizeof(T)); } -template -void save_value(std::ostream& stream, const std::vector& value) -{ - size_t size = value.size(); - stream.write(reinterpret_cast(&size), sizeof(size_t)); - stream.write(reinterpret_cast(value.data()), sizeof(T) * size); +template +void save_value(std::ostream& stream, const std::vector& value) { + size_t size = value.size(); + stream.write(reinterpret_cast(&size), sizeof(size_t)); + stream.write(reinterpret_cast(value.data()), sizeof(T) * size); } -template -void load_value(std::istream& stream, T& value) -{ - stream.read(reinterpret_cast(&value), sizeof(T)); +template +void load_value(std::istream& stream, T& value) { + stream.read(reinterpret_cast(&value), sizeof(T)); } -template -void load_value(std::istream& stream, std::vector& value) -{ - size_t size; - stream.read(reinterpret_cast(&size), sizeof(size_t)); - value.resize(size); - stream.read(reinterpret_cast(value.data()), sizeof(T) * size); +template +void load_value(std::istream& stream, std::vector& value) { + size_t size; + stream.read(reinterpret_cast(&size), sizeof(size_t)); + value.resize(size); + stream.read(reinterpret_cast(value.data()), sizeof(T) * size); } /** @} */ /** @addtogroup metric_grp Metric (distance) classes * @{ */ -struct Metric -{ -}; +struct Metric {}; /** Manhattan distance functor (generic version, optimized for * high-dimensionality data sets). Corresponding distance traits: @@ -466,56 +426,55 @@ struct Metric * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class T, class DataSource, typename _DistanceType = T, - typename IndexType = uint32_t> -struct L1_Adaptor -{ - using ElementType = T; - using DistanceType = _DistanceType; - - const DataSource& data_source; - - L1_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} - - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size, - DistanceType worst_dist = -1) const - { - DistanceType result = DistanceType(); - const T* last = a + size; - const T* lastgroup = last - 3; - size_t d = 0; - - /* Process 4 items with each loop for efficiency. */ - while (a < lastgroup) - { - const DistanceType diff0 = - std::abs(a[0] - data_source.kdtree_get_pt(b_idx, d++)); - const DistanceType diff1 = - std::abs(a[1] - data_source.kdtree_get_pt(b_idx, d++)); - const DistanceType diff2 = - std::abs(a[2] - data_source.kdtree_get_pt(b_idx, d++)); - const DistanceType diff3 = - std::abs(a[3] - data_source.kdtree_get_pt(b_idx, d++)); - result += diff0 + diff1 + diff2 + diff3; - a += 4; - if ((worst_dist > 0) && (result > worst_dist)) { return result; } - } - /* Process last 0-3 components. Not needed for standard vector lengths. - */ - while (a < last) - { - result += std::abs(*a++ - data_source.kdtree_get_pt(b_idx, d++)); - } +template +struct L1_Adaptor { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource& data_source; + + L1_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} + + DistanceType evalMetric(const T* a, + const IndexType b_idx, + size_t size, + DistanceType worst_dist = -1) const { + DistanceType result = DistanceType(); + const T* last = a + size; + const T* lastgroup = last - 3; + size_t d = 0; + + /* Process 4 items with each loop for efficiency. */ + while (a < lastgroup) { + const DistanceType diff0 = + std::abs(a[0] - data_source.kdtree_get_pt(b_idx, d++)); + const DistanceType diff1 = + std::abs(a[1] - data_source.kdtree_get_pt(b_idx, d++)); + const DistanceType diff2 = + std::abs(a[2] - data_source.kdtree_get_pt(b_idx, d++)); + const DistanceType diff3 = + std::abs(a[3] - data_source.kdtree_get_pt(b_idx, d++)); + result += diff0 + diff1 + diff2 + diff3; + a += 4; + if ((worst_dist > 0) && (result > worst_dist)) { return result; + } } - - template - DistanceType accum_dist(const U a, const V b, const size_t) const - { - return std::abs(a - b); + /* Process last 0-3 components. Not needed for standard vector lengths. + */ + while (a < last) { + result += std::abs(*a++ - data_source.kdtree_get_pt(b_idx, d++)); } + return result; + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const { + return std::abs(a - b); + } }; /** **Squared** Euclidean distance functor (generic version, optimized for @@ -528,59 +487,52 @@ struct L1_Adaptor * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class T, class DataSource, typename _DistanceType = T, - typename IndexType = uint32_t> -struct L2_Adaptor -{ - using ElementType = T; - using DistanceType = _DistanceType; - - const DataSource& data_source; - - L2_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} - - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size, - DistanceType worst_dist = -1) const - { - DistanceType result = DistanceType(); - const T* last = a + size; - const T* lastgroup = last - 3; - size_t d = 0; - - /* Process 4 items with each loop for efficiency. */ - while (a < lastgroup) - { - const DistanceType diff0 = - a[0] - data_source.kdtree_get_pt(b_idx, d++); - const DistanceType diff1 = - a[1] - data_source.kdtree_get_pt(b_idx, d++); - const DistanceType diff2 = - a[2] - data_source.kdtree_get_pt(b_idx, d++); - const DistanceType diff3 = - a[3] - data_source.kdtree_get_pt(b_idx, d++); - result += - diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; - a += 4; - if ((worst_dist > 0) && (result > worst_dist)) { return result; } - } - /* Process last 0-3 components. Not needed for standard vector lengths. - */ - while (a < last) - { - const DistanceType diff0 = - *a++ - data_source.kdtree_get_pt(b_idx, d++); - result += diff0 * diff0; - } +template +struct L2_Adaptor { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource& data_source; + + L2_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} + + DistanceType evalMetric(const T* a, + const IndexType b_idx, + size_t size, + DistanceType worst_dist = -1) const { + DistanceType result = DistanceType(); + const T* last = a + size; + const T* lastgroup = last - 3; + size_t d = 0; + + /* Process 4 items with each loop for efficiency. */ + while (a < lastgroup) { + const DistanceType diff0 = a[0] - data_source.kdtree_get_pt(b_idx, d++); + const DistanceType diff1 = a[1] - data_source.kdtree_get_pt(b_idx, d++); + const DistanceType diff2 = a[2] - data_source.kdtree_get_pt(b_idx, d++); + const DistanceType diff3 = a[3] - data_source.kdtree_get_pt(b_idx, d++); + result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; + a += 4; + if ((worst_dist > 0) && (result > worst_dist)) { return result; + } } - - template - DistanceType accum_dist(const U a, const V b, const size_t) const - { - return (a - b) * (a - b); + /* Process last 0-3 components. Not needed for standard vector lengths. + */ + while (a < last) { + const DistanceType diff0 = *a++ - data_source.kdtree_get_pt(b_idx, d++); + result += diff0 * diff0; } + return result; + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const { + return (a - b) * (a - b); + } }; /** **Squared** Euclidean (L2) distance functor (suitable for low-dimensionality @@ -593,39 +545,33 @@ struct L2_Adaptor * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class T, class DataSource, typename _DistanceType = T, - typename IndexType = uint32_t> -struct L2_Simple_Adaptor -{ - using ElementType = T; - using DistanceType = _DistanceType; - - const DataSource& data_source; - - L2_Simple_Adaptor(const DataSource& _data_source) - : data_source(_data_source) - { - } - - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size) const - { - DistanceType result = DistanceType(); - for (size_t i = 0; i < size; ++i) - { - const DistanceType diff = - a[i] - data_source.kdtree_get_pt(b_idx, i); - result += diff * diff; - } - return result; - } - - template - DistanceType accum_dist(const U a, const V b, const size_t) const - { - return (a - b) * (a - b); - } +template +struct L2_Simple_Adaptor { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource& data_source; + + L2_Simple_Adaptor(const DataSource& _data_source) + : data_source(_data_source) {} + + DistanceType + evalMetric(const T* a, const IndexType b_idx, size_t size) const { + DistanceType result = DistanceType(); + for (size_t i = 0; i < size; ++i) { + const DistanceType diff = a[i] - data_source.kdtree_get_pt(b_idx, i); + result += diff * diff; + } + return result; + } + + template + DistanceType accum_dist(const U a, const V b, const size_t) const { + return (a - b) * (a - b); + } }; /** SO2 distance functor @@ -638,39 +584,38 @@ struct L2_Simple_Adaptor * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class T, class DataSource, typename _DistanceType = T, - typename IndexType = uint32_t> -struct SO2_Adaptor -{ - using ElementType = T; - using DistanceType = _DistanceType; - - const DataSource& data_source; - - SO2_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} - - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size) const - { - return accum_dist( - a[size - 1], data_source.kdtree_get_pt(b_idx, size - 1), size - 1); - } - - /** Note: this assumes that input angles are already in the range [-pi,pi] - */ - template - DistanceType accum_dist(const U a, const V b, const size_t) const - { - DistanceType result = DistanceType(); - DistanceType PI = pi_const(); - result = b - a; - if (result > PI) - result -= 2 * PI; - else if (result < -PI) - result += 2 * PI; - return result; - } +template +struct SO2_Adaptor { + using ElementType = T; + using DistanceType = _DistanceType; + + const DataSource& data_source; + + SO2_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} + + DistanceType + evalMetric(const T* a, const IndexType b_idx, size_t size) const { + return accum_dist(a[size - 1], + data_source.kdtree_get_pt(b_idx, size - 1), + size - 1); + } + + /** Note: this assumes that input angles are already in the range [-pi,pi] + */ + template + DistanceType accum_dist(const U a, const V b, const size_t) const { + DistanceType result = DistanceType(); + DistanceType PI = pi_const(); + result = b - a; + if (result > PI) + result -= 2 * PI; + else if (result < -PI) + result += 2 * PI; + return result; + } }; /** SO3 distance functor (Uses L2_Simple) @@ -683,81 +628,66 @@ struct SO2_Adaptor * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class T, class DataSource, typename _DistanceType = T, - typename IndexType = uint32_t> -struct SO3_Adaptor -{ - using ElementType = T; - using DistanceType = _DistanceType; - - L2_Simple_Adaptor - distance_L2_Simple; - - SO3_Adaptor(const DataSource& _data_source) - : distance_L2_Simple(_data_source) - { - } - - DistanceType evalMetric( - const T* a, const IndexType b_idx, size_t size) const - { - return distance_L2_Simple.evalMetric(a, b_idx, size); - } - - template - DistanceType accum_dist(const U a, const V b, const size_t idx) const - { - return distance_L2_Simple.accum_dist(a, b, idx); - } +template +struct SO3_Adaptor { + using ElementType = T; + using DistanceType = _DistanceType; + + L2_Simple_Adaptor distance_L2_Simple; + + SO3_Adaptor(const DataSource& _data_source) + : distance_L2_Simple(_data_source) {} + + DistanceType + evalMetric(const T* a, const IndexType b_idx, size_t size) const { + return distance_L2_Simple.evalMetric(a, b_idx, size); + } + + template + DistanceType accum_dist(const U a, const V b, const size_t idx) const { + return distance_L2_Simple.accum_dist(a, b, idx); + } }; /** Metaprogramming helper traits class for the L1 (Manhattan) metric */ -struct metric_L1 : public Metric -{ - template - struct traits - { - using distance_t = L1_Adaptor; - }; +struct metric_L1 : public Metric { + template + struct traits { + using distance_t = L1_Adaptor; + }; }; /** Metaprogramming helper traits class for the L2 (Euclidean) **squared** * distance metric */ -struct metric_L2 : public Metric -{ - template - struct traits - { - using distance_t = L2_Adaptor; - }; +struct metric_L2 : public Metric { + template + struct traits { + using distance_t = L2_Adaptor; + }; }; /** Metaprogramming helper traits class for the L2_simple (Euclidean) * **squared** distance metric */ -struct metric_L2_Simple : public Metric -{ - template - struct traits - { - using distance_t = L2_Simple_Adaptor; - }; +struct metric_L2_Simple : public Metric { + template + struct traits { + using distance_t = L2_Simple_Adaptor; + }; }; /** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ -struct metric_SO2 : public Metric -{ - template - struct traits - { - using distance_t = SO2_Adaptor; - }; +struct metric_SO2 : public Metric { + template + struct traits { + using distance_t = SO2_Adaptor; + }; }; /** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ -struct metric_SO3 : public Metric -{ - template - struct traits - { - using distance_t = SO3_Adaptor; - }; +struct metric_SO3 : public Metric { + template + struct traits { + using distance_t = SO3_Adaptor; + }; }; /** @} */ @@ -765,50 +695,43 @@ struct metric_SO3 : public Metric /** @addtogroup param_grp Parameter structs * @{ */ -enum class KDTreeSingleIndexAdaptorFlags -{ - None = 0, - SkipInitialBuildIndex = 1 +enum class KDTreeSingleIndexAdaptorFlags { + None = 0, + SkipInitialBuildIndex = 1 }; -inline std::underlying_type::type operator&( - KDTreeSingleIndexAdaptorFlags lhs, KDTreeSingleIndexAdaptorFlags rhs) -{ - using underlying = - typename std::underlying_type::type; - return static_cast(lhs) & static_cast(rhs); +inline std::underlying_type::type +operator&(KDTreeSingleIndexAdaptorFlags lhs, + KDTreeSingleIndexAdaptorFlags rhs) { + using underlying = + typename std::underlying_type::type; + return static_cast(lhs) & static_cast(rhs); } /** Parameters (see README.md) */ -struct KDTreeSingleIndexAdaptorParams -{ - KDTreeSingleIndexAdaptorParams( - size_t _leaf_max_size = 10, - KDTreeSingleIndexAdaptorFlags _flags = - KDTreeSingleIndexAdaptorFlags::None, - unsigned int _n_thread_build = 1) - : leaf_max_size(_leaf_max_size), - flags(_flags), - n_thread_build(_n_thread_build) - { - } - - size_t leaf_max_size; - KDTreeSingleIndexAdaptorFlags flags; - unsigned int n_thread_build; +struct KDTreeSingleIndexAdaptorParams { + KDTreeSingleIndexAdaptorParams(size_t _leaf_max_size = 10, + KDTreeSingleIndexAdaptorFlags _flags = + KDTreeSingleIndexAdaptorFlags::None, + unsigned int _n_thread_build = 1) + : leaf_max_size(_leaf_max_size), + flags(_flags), + n_thread_build(_n_thread_build) {} + + size_t leaf_max_size; + KDTreeSingleIndexAdaptorFlags flags; + unsigned int n_thread_build; }; /** Search options for KDTreeSingleIndexAdaptor::findNeighbors() */ -struct SearchParameters -{ - SearchParameters(float eps_ = 0, bool sorted_ = true) - : eps(eps_), sorted(sorted_) - { - } - - float eps; //!< search for eps-approximate neighbours (default: 0) - bool sorted; //!< only for radius search, require neighbours sorted by - //!< distance (default: true) +struct SearchParameters { + SearchParameters(float eps_ = 0, bool sorted_ = true) + : eps(eps_), + sorted(sorted_) {} + + float eps; //!< search for eps-approximate neighbours (default: 0) + bool sorted; //!< only for radius search, require neighbours sorted by + //!< distance (default: true) }; /** @} */ @@ -829,118 +752,110 @@ struct SearchParameters * is no need to track down all the objects to free them. * */ -class PooledAllocator -{ - static constexpr size_t WORDSIZE = 16; // WORDSIZE must >= 8 - static constexpr size_t BLOCKSIZE = 8192; - - /* We maintain memory alignment to word boundaries by requiring that all - allocations be in multiples of the machine wordsize. */ - /* Size of machine word in bytes. Must be power of 2. */ - /* Minimum number of bytes requested at a time from the system. Must be - * multiple of WORDSIZE. */ - - using Size = size_t; - - Size remaining_ = 0; //!< Number of bytes left in current block of storage - void* base_ = nullptr; //!< Pointer to base of current block of storage - void* loc_ = nullptr; //!< Current location in block to next allocate - - void internal_init() - { - remaining_ = 0; - base_ = nullptr; - usedMemory = 0; - wastedMemory = 0; - } - - public: - Size usedMemory = 0; - Size wastedMemory = 0; - - /** - Default constructor. Initializes a new pool. - */ - PooledAllocator() { internal_init(); } - - /** - * Destructor. Frees all the memory allocated in this pool. - */ - ~PooledAllocator() { free_all(); } - - /** Frees all allocated memory chunks */ - void free_all() - { - while (base_ != nullptr) - { - // Get pointer to prev block - void* prev = *(static_cast(base_)); - ::free(base_); - base_ = prev; - } - internal_init(); - } - - /** - * Returns a pointer to a piece of new memory of the given size in bytes - * allocated from the pool. +class PooledAllocator { + static constexpr size_t WORDSIZE = 16; // WORDSIZE must >= 8 + static constexpr size_t BLOCKSIZE = 8192; + + /* We maintain memory alignment to word boundaries by requiring that all + allocations be in multiples of the machine wordsize. */ + /* Size of machine word in bytes. Must be power of 2. */ + /* Minimum number of bytes requested at a time from the system. Must be + * multiple of WORDSIZE. */ + + using Size = size_t; + + Size remaining_ = 0; //!< Number of bytes left in current block of storage + void* base_ = nullptr; //!< Pointer to base of current block of storage + void* loc_ = nullptr; //!< Current location in block to next allocate + + void internal_init() { + remaining_ = 0; + base_ = nullptr; + usedMemory = 0; + wastedMemory = 0; + } + +public: + Size usedMemory = 0; + Size wastedMemory = 0; + + /** + Default constructor. Initializes a new pool. + */ + PooledAllocator() { internal_init(); } + + /** + * Destructor. Frees all the memory allocated in this pool. + */ + ~PooledAllocator() { free_all(); } + + /** Frees all allocated memory chunks */ + void free_all() { + while (base_ != nullptr) { + // Get pointer to prev block + void* prev = *(static_cast(base_)); + ::free(base_); + base_ = prev; + } + internal_init(); + } + + /** + * Returns a pointer to a piece of new memory of the given size in bytes + * allocated from the pool. + */ + void* malloc(const size_t req_size) { + /* Round size up to a multiple of wordsize. The following expression + only works for WORDSIZE that is a power of 2, by masking last bits + of incremented size to zero. */ - void* malloc(const size_t req_size) - { - /* Round size up to a multiple of wordsize. The following expression - only works for WORDSIZE that is a power of 2, by masking last bits - of incremented size to zero. - */ - const Size size = (req_size + (WORDSIZE - 1)) & ~(WORDSIZE - 1); - - /* Check whether a new block must be allocated. Note that the first - word of a block is reserved for a pointer to the previous block. - */ - if (size > remaining_) - { - wastedMemory += remaining_; - - /* Allocate new storage. */ - const Size blocksize = - size > BLOCKSIZE ? size + WORDSIZE : BLOCKSIZE + WORDSIZE; - - // use the standard C malloc to allocate memory - void* m = ::malloc(blocksize); - if (!m) - { - fprintf(stderr, "Failed to allocate memory.\n"); - throw std::bad_alloc(); - } - - /* Fill first word of new block with pointer to previous block. */ - static_cast(m)[0] = base_; - base_ = m; - - remaining_ = blocksize - WORDSIZE; - loc_ = static_cast(m) + WORDSIZE; - } - void* rloc = loc_; - loc_ = static_cast(loc_) + size; - remaining_ -= size; + const Size size = (req_size + (WORDSIZE - 1)) & ~(WORDSIZE - 1); - usedMemory += size; - - return rloc; - } - - /** - * Allocates (using this pool) a generic type T. - * - * Params: - * count = number of instances to allocate. - * Returns: pointer (of type T*) to memory buffer + /* Check whether a new block must be allocated. Note that the first + word of a block is reserved for a pointer to the previous block. */ - template - T* allocate(const size_t count = 1) - { - T* mem = static_cast(this->malloc(sizeof(T) * count)); - return mem; - } + if (size > remaining_) { + wastedMemory += remaining_; + + /* Allocate new storage. */ + const Size blocksize = + size > BLOCKSIZE ? size + WORDSIZE : BLOCKSIZE + WORDSIZE; + + // use the standard C malloc to allocate memory + void* m = ::malloc(blocksize); + if (!m) { + fprintf(stderr, "Failed to allocate memory.\n"); + throw std::bad_alloc(); + } + + /* Fill first word of new block with pointer to previous block. */ + static_cast(m)[0] = base_; + base_ = m; + + remaining_ = blocksize - WORDSIZE; + loc_ = static_cast(m) + WORDSIZE; + } + void* rloc = loc_; + loc_ = static_cast(loc_) + size; + remaining_ -= size; + + usedMemory += size; + + return rloc; + } + + /** + * Allocates (using this pool) a generic type T. + * + * Params: + * count = number of instances to allocate. + * Returns: pointer (of type T*) to memory buffer + */ + template + T* allocate(const size_t count = 1) { + T* mem = static_cast(this->malloc(sizeof(T) * count)); + return mem; + } }; /** @} */ @@ -950,16 +865,14 @@ class PooledAllocator /** Used to declare fixed-size arrays when DIM>0, dynamically-allocated vectors * when DIM=-1. Fixed size version for a generic DIM: */ -template -struct array_or_vector -{ - using type = std::array; +template +struct array_or_vector { + using type = std::array; }; /** Dynamic size version */ -template -struct array_or_vector<-1, T> -{ - using type = std::vector; +template +struct array_or_vector<-1, T> { + using type = std::vector; }; /** @} */ @@ -978,478 +891,474 @@ struct array_or_vector<-1, T> * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - class Derived, typename Distance, class DatasetAdaptor, int32_t DIM = -1, - typename IndexType = uint32_t> -class KDTreeBaseClass -{ - public: - /** Frees the previously-built index. Automatically called within - * buildIndex(). */ - void freeIndex(Derived& obj) - { - obj.pool_.free_all(); - obj.root_node_ = nullptr; - obj.size_at_index_build_ = 0; - } - - using ElementType = typename Distance::ElementType; - using DistanceType = typename Distance::DistanceType; - - /** - * Array of indices to vectors in the dataset_. - */ - std::vector vAcc_; - - using Offset = typename decltype(vAcc_)::size_type; - using Size = typename decltype(vAcc_)::size_type; - using Dimension = int32_t; - - /*--------------------------- - * Internal Data Structures - * --------------------------*/ - struct Node - { - /** Union used because a node can be either a LEAF node or a non-leaf - * node, so both data fields are never used simultaneously */ - union - { - struct leaf - { - Offset left, right; //!< Indices of points in leaf node - } lr; - struct nonleaf - { - Dimension divfeat; //!< Dimension used for subdivision. - /// The values used for subdivision. - DistanceType divlow, divhigh; - } sub; - } node_type; - - /** Child nodes (both=nullptr mean its a leaf node) */ - Node *child1 = nullptr, *child2 = nullptr; - }; - - using NodePtr = Node*; - using NodeConstPtr = const Node*; - - struct Interval - { - ElementType low, high; - }; - - NodePtr root_node_ = nullptr; - - Size leaf_max_size_ = 0; - - /// Number of thread for concurrent tree build - Size n_thread_build_ = 1; - /// Number of current points in the dataset - Size size_ = 0; - /// Number of points in the dataset when the index was built - Size size_at_index_build_ = 0; - Dimension dim_ = 0; //!< Dimensionality of each data point - - /** Define "BoundingBox" as a fixed-size or variable-size container - * depending on "DIM" */ - using BoundingBox = typename array_or_vector::type; - - /** Define "distance_vector_t" as a fixed-size or variable-size container - * depending on "DIM" */ - using distance_vector_t = typename array_or_vector::type; - - /** The KD-tree used to find neighbours */ - BoundingBox root_bbox_; - - /** - * Pooled memory allocator. - * - * Using a pooled memory allocator is more efficient - * than allocating memory directly when there is a large - * number small of memory allocations. - */ - PooledAllocator pool_; - - /** Returns number of points in dataset */ - Size size(const Derived& obj) const { return obj.size_; } - - /** Returns the length of each point in the dataset */ - Size veclen(const Derived& obj) { return DIM > 0 ? DIM : obj.dim; } - - /// Helper accessor to the dataset points: - ElementType dataset_get( - const Derived& obj, IndexType element, Dimension component) const - { - return obj.dataset_.kdtree_get_pt(element, component); - } - - /** - * Computes the inde memory usage - * Returns: memory used by the index - */ - Size usedMemory(Derived& obj) - { - return obj.pool_.usedMemory + obj.pool_.wastedMemory + - obj.dataset_.kdtree_get_point_count() * - sizeof(IndexType); // pool memory and vind array memory - } - - void computeMinMax( - const Derived& obj, Offset ind, Size count, Dimension element, - ElementType& min_elem, ElementType& max_elem) - { - min_elem = dataset_get(obj, vAcc_[ind], element); - max_elem = min_elem; - for (Offset i = 1; i < count; ++i) - { - ElementType val = dataset_get(obj, vAcc_[ind + i], element); - if (val < min_elem) min_elem = val; - if (val > max_elem) max_elem = val; - } - } - - /** - * Create a tree node that subdivides the list of vecs from vind[first] - * to vind[last]. The routine is called recursively on each sublist. - * - * @param left index of the first vector - * @param right index of the last vector - */ - NodePtr divideTree( - Derived& obj, const Offset left, const Offset right, BoundingBox& bbox) - { - NodePtr node = obj.pool_.template allocate(); // allocate memory - const auto dims = (DIM > 0 ? DIM : obj.dim_); - - /* If too few exemplars remain, then make this a leaf node. */ - if ((right - left) <= static_cast(obj.leaf_max_size_)) - { - node->child1 = node->child2 = nullptr; /* Mark as leaf node. */ - node->node_type.lr.left = left; - node->node_type.lr.right = right; - - // compute bounding-box of leaf points - for (Dimension i = 0; i < dims; ++i) - { - bbox[i].low = dataset_get(obj, obj.vAcc_[left], i); - bbox[i].high = dataset_get(obj, obj.vAcc_[left], i); - } - for (Offset k = left + 1; k < right; ++k) - { - for (Dimension i = 0; i < dims; ++i) - { - const auto val = dataset_get(obj, obj.vAcc_[k], i); - if (bbox[i].low > val) bbox[i].low = val; - if (bbox[i].high < val) bbox[i].high = val; - } - } - } - else - { - Offset idx; - Dimension cutfeat; - DistanceType cutval; - middleSplit_(obj, left, right - left, idx, cutfeat, cutval, bbox); - - node->node_type.sub.divfeat = cutfeat; - - BoundingBox left_bbox(bbox); - left_bbox[cutfeat].high = cutval; - node->child1 = this->divideTree(obj, left, left + idx, left_bbox); - - BoundingBox right_bbox(bbox); - right_bbox[cutfeat].low = cutval; - node->child2 = this->divideTree(obj, left + idx, right, right_bbox); - - node->node_type.sub.divlow = left_bbox[cutfeat].high; - node->node_type.sub.divhigh = right_bbox[cutfeat].low; - - for (Dimension i = 0; i < dims; ++i) - { - bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low); - bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high); - } - } - - return node; - } - - /** - * Create a tree node that subdivides the list of vecs from vind[first] to - * vind[last] concurrently. The routine is called recursively on each - * sublist. - * - * @param left index of the first vector - * @param right index of the last vector - * @param thread_count count of std::async threads - * @param mutex mutex for mempool allocation - */ - NodePtr divideTreeConcurrent( - Derived& obj, const Offset left, const Offset right, BoundingBox& bbox, - std::atomic& thread_count, std::mutex& mutex) - { - std::unique_lock lock(mutex); - NodePtr node = obj.pool_.template allocate(); // allocate memory - lock.unlock(); - - const auto dims = (DIM > 0 ? DIM : obj.dim_); - - /* If too few exemplars remain, then make this a leaf node. */ - if ((right - left) <= static_cast(obj.leaf_max_size_)) - { - node->child1 = node->child2 = nullptr; /* Mark as leaf node. */ - node->node_type.lr.left = left; - node->node_type.lr.right = right; - - // compute bounding-box of leaf points - for (Dimension i = 0; i < dims; ++i) - { - bbox[i].low = dataset_get(obj, obj.vAcc_[left], i); - bbox[i].high = dataset_get(obj, obj.vAcc_[left], i); - } - for (Offset k = left + 1; k < right; ++k) - { - for (Dimension i = 0; i < dims; ++i) - { - const auto val = dataset_get(obj, obj.vAcc_[k], i); - if (bbox[i].low > val) bbox[i].low = val; - if (bbox[i].high < val) bbox[i].high = val; - } - } - } - else - { - Offset idx; - Dimension cutfeat; - DistanceType cutval; - middleSplit_(obj, left, right - left, idx, cutfeat, cutval, bbox); - - node->node_type.sub.divfeat = cutfeat; - - std::future right_future; - - BoundingBox right_bbox(bbox); - right_bbox[cutfeat].low = cutval; - if (++thread_count < n_thread_build_) - { - // Concurrent right sub-tree - right_future = std::async( - std::launch::async, &KDTreeBaseClass::divideTreeConcurrent, - this, std::ref(obj), left + idx, right, - std::ref(right_bbox), std::ref(thread_count), - std::ref(mutex)); - } - else - { - --thread_count; - } - - BoundingBox left_bbox(bbox); - left_bbox[cutfeat].high = cutval; - node->child1 = this->divideTreeConcurrent( - obj, left, left + idx, left_bbox, thread_count, mutex); - - if (right_future.valid()) - { - // Block and wait for concurrent right sub-tree - node->child2 = right_future.get(); - --thread_count; - } - else - { - node->child2 = this->divideTreeConcurrent( - obj, left + idx, right, right_bbox, thread_count, mutex); - } - - node->node_type.sub.divlow = left_bbox[cutfeat].high; - node->node_type.sub.divhigh = right_bbox[cutfeat].low; - - for (Dimension i = 0; i < dims; ++i) - { - bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low); - bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high); - } +template +class KDTreeBaseClass { +public: + /** Frees the previously-built index. Automatically called within + * buildIndex(). */ + void freeIndex(Derived& obj) { + obj.pool_.free_all(); + obj.root_node_ = nullptr; + obj.size_at_index_build_ = 0; + } + + using ElementType = typename Distance::ElementType; + using DistanceType = typename Distance::DistanceType; + + /** + * Array of indices to vectors in the dataset_. + */ + std::vector vAcc_; + + using Offset = typename decltype(vAcc_)::size_type; + using Size = typename decltype(vAcc_)::size_type; + using Dimension = int32_t; + + /*--------------------------- + * Internal Data Structures + * --------------------------*/ + struct Node { + /** Union used because a node can be either a LEAF node or a non-leaf + * node, so both data fields are never used simultaneously */ + union { + struct leaf { + Offset left, right; //!< Indices of points in leaf node + } lr; + struct nonleaf { + Dimension divfeat; //!< Dimension used for subdivision. + /// The values used for subdivision. + DistanceType divlow, divhigh; + } sub; + } node_type; + + /** Child nodes (both=nullptr mean its a leaf node) */ + Node *child1 = nullptr, *child2 = nullptr; + }; + + using NodePtr = Node*; + using NodeConstPtr = const Node*; + + struct Interval { + ElementType low, high; + }; + + NodePtr root_node_ = nullptr; + + Size leaf_max_size_ = 0; + + /// Number of thread for concurrent tree build + Size n_thread_build_ = 1; + /// Number of current points in the dataset + Size size_ = 0; + /// Number of points in the dataset when the index was built + Size size_at_index_build_ = 0; + Dimension dim_ = 0; //!< Dimensionality of each data point + + /** Define "BoundingBox" as a fixed-size or variable-size container + * depending on "DIM" */ + using BoundingBox = typename array_or_vector::type; + + /** Define "distance_vector_t" as a fixed-size or variable-size container + * depending on "DIM" */ + using distance_vector_t = typename array_or_vector::type; + + /** The KD-tree used to find neighbours */ + BoundingBox root_bbox_; + + /** + * Pooled memory allocator. + * + * Using a pooled memory allocator is more efficient + * than allocating memory directly when there is a large + * number small of memory allocations. + */ + PooledAllocator pool_; + + /** Returns number of points in dataset */ + Size size(const Derived& obj) const { return obj.size_; } + + /** Returns the length of each point in the dataset */ + Size veclen(const Derived& obj) { return DIM > 0 ? DIM : obj.dim; } + + /// Helper accessor to the dataset points: + ElementType dataset_get(const Derived& obj, + IndexType element, + Dimension component) const { + return obj.dataset_.kdtree_get_pt(element, component); + } + + /** + * Computes the inde memory usage + * Returns: memory used by the index + */ + Size usedMemory(Derived& obj) { + return obj.pool_.usedMemory + obj.pool_.wastedMemory + + obj.dataset_.kdtree_get_point_count() + * sizeof(IndexType); // pool memory and vind array memory + } + + void computeMinMax(const Derived& obj, + Offset ind, + Size count, + Dimension element, + ElementType& min_elem, + ElementType& max_elem) { + min_elem = dataset_get(obj, vAcc_[ind], element); + max_elem = min_elem; + for (Offset i = 1; i < count; ++i) { + ElementType val = dataset_get(obj, vAcc_[ind + i], element); + if (val < min_elem) + min_elem = val; + if (val > max_elem) + max_elem = val; + } + } + + /** + * Create a tree node that subdivides the list of vecs from vind[first] + * to vind[last]. The routine is called recursively on each sublist. + * + * @param left index of the first vector + * @param right index of the last vector + */ + NodePtr divideTree(Derived& obj, + const Offset left, + const Offset right, + BoundingBox& bbox) { + NodePtr node = obj.pool_.template allocate(); // allocate memory + const auto dims = (DIM > 0 ? DIM : obj.dim_); + + /* If too few exemplars remain, then make this a leaf node. */ + if ((right - left) <= static_cast(obj.leaf_max_size_)) { + node->child1 = node->child2 = nullptr; /* Mark as leaf node. */ + node->node_type.lr.left = left; + node->node_type.lr.right = right; + + // compute bounding-box of leaf points + for (Dimension i = 0; i < dims; ++i) { + bbox[i].low = dataset_get(obj, obj.vAcc_[left], i); + bbox[i].high = dataset_get(obj, obj.vAcc_[left], i); + } + for (Offset k = left + 1; k < right; ++k) { + for (Dimension i = 0; i < dims; ++i) { + const auto val = dataset_get(obj, obj.vAcc_[k], i); + if (bbox[i].low > val) + bbox[i].low = val; + if (bbox[i].high < val) + bbox[i].high = val; } - - return node; - } - - void middleSplit_( - const Derived& obj, const Offset ind, const Size count, Offset& index, - Dimension& cutfeat, DistanceType& cutval, const BoundingBox& bbox) - { - const auto dims = (DIM > 0 ? DIM : obj.dim_); - const auto EPS = static_cast(0.00001); - ElementType max_span = bbox[0].high - bbox[0].low; - for (Dimension i = 1; i < dims; ++i) - { - ElementType span = bbox[i].high - bbox[i].low; - if (span > max_span) { max_span = span; } + } + } else { + Offset idx; + Dimension cutfeat; + DistanceType cutval; + middleSplit_(obj, left, right - left, idx, cutfeat, cutval, bbox); + + node->node_type.sub.divfeat = cutfeat; + + BoundingBox left_bbox(bbox); + left_bbox[cutfeat].high = cutval; + node->child1 = this->divideTree(obj, left, left + idx, left_bbox); + + BoundingBox right_bbox(bbox); + right_bbox[cutfeat].low = cutval; + node->child2 = this->divideTree(obj, left + idx, right, right_bbox); + + node->node_type.sub.divlow = left_bbox[cutfeat].high; + node->node_type.sub.divhigh = right_bbox[cutfeat].low; + + for (Dimension i = 0; i < dims; ++i) { + bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low); + bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high); + } + } + + return node; + } + + /** + * Create a tree node that subdivides the list of vecs from vind[first] to + * vind[last] concurrently. The routine is called recursively on each + * sublist. + * + * @param left index of the first vector + * @param right index of the last vector + * @param thread_count count of std::async threads + * @param mutex mutex for mempool allocation + */ + NodePtr divideTreeConcurrent(Derived& obj, + const Offset left, + const Offset right, + BoundingBox& bbox, + std::atomic& thread_count, + std::mutex& mutex) { + std::unique_lock lock(mutex); + NodePtr node = obj.pool_.template allocate(); // allocate memory + lock.unlock(); + + const auto dims = (DIM > 0 ? DIM : obj.dim_); + + /* If too few exemplars remain, then make this a leaf node. */ + if ((right - left) <= static_cast(obj.leaf_max_size_)) { + node->child1 = node->child2 = nullptr; /* Mark as leaf node. */ + node->node_type.lr.left = left; + node->node_type.lr.right = right; + + // compute bounding-box of leaf points + for (Dimension i = 0; i < dims; ++i) { + bbox[i].low = dataset_get(obj, obj.vAcc_[left], i); + bbox[i].high = dataset_get(obj, obj.vAcc_[left], i); + } + for (Offset k = left + 1; k < right; ++k) { + for (Dimension i = 0; i < dims; ++i) { + const auto val = dataset_get(obj, obj.vAcc_[k], i); + if (bbox[i].low > val) + bbox[i].low = val; + if (bbox[i].high < val) + bbox[i].high = val; } - ElementType max_spread = -1; - cutfeat = 0; - ElementType min_elem = 0, max_elem = 0; - for (Dimension i = 0; i < dims; ++i) - { - ElementType span = bbox[i].high - bbox[i].low; - if (span > (1 - EPS) * max_span) - { - ElementType min_elem_, max_elem_; - computeMinMax(obj, ind, count, i, min_elem_, max_elem_); - ElementType spread = max_elem_ - min_elem_; - if (spread > max_spread) - { - cutfeat = i; - max_spread = spread; - min_elem = min_elem_; - max_elem = max_elem_; - } - } + } + } else { + Offset idx; + Dimension cutfeat; + DistanceType cutval; + middleSplit_(obj, left, right - left, idx, cutfeat, cutval, bbox); + + node->node_type.sub.divfeat = cutfeat; + + std::future right_future; + + BoundingBox right_bbox(bbox); + right_bbox[cutfeat].low = cutval; + if (++thread_count < n_thread_build_) { + // Concurrent right sub-tree + right_future = std::async(std::launch::async, + &KDTreeBaseClass::divideTreeConcurrent, + this, + std::ref(obj), + left + idx, + right, + std::ref(right_bbox), + std::ref(thread_count), + std::ref(mutex)); + } else { + --thread_count; + } + + BoundingBox left_bbox(bbox); + left_bbox[cutfeat].high = cutval; + node->child1 = this->divideTreeConcurrent(obj, + left, + left + idx, + left_bbox, + thread_count, + mutex); + + if (right_future.valid()) { + // Block and wait for concurrent right sub-tree + node->child2 = right_future.get(); + --thread_count; + } else { + node->child2 = this->divideTreeConcurrent(obj, + left + idx, + right, + right_bbox, + thread_count, + mutex); + } + + node->node_type.sub.divlow = left_bbox[cutfeat].high; + node->node_type.sub.divhigh = right_bbox[cutfeat].low; + + for (Dimension i = 0; i < dims; ++i) { + bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low); + bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high); + } + } + + return node; + } + + void middleSplit_(const Derived& obj, + const Offset ind, + const Size count, + Offset& index, + Dimension& cutfeat, + DistanceType& cutval, + const BoundingBox& bbox) { + const auto dims = (DIM > 0 ? DIM : obj.dim_); + const auto EPS = static_cast(0.00001); + ElementType max_span = bbox[0].high - bbox[0].low; + for (Dimension i = 1; i < dims; ++i) { + ElementType span = bbox[i].high - bbox[i].low; + if (span > max_span) { + max_span = span; + } + } + ElementType max_spread = -1; + cutfeat = 0; + ElementType min_elem = 0, max_elem = 0; + for (Dimension i = 0; i < dims; ++i) { + ElementType span = bbox[i].high - bbox[i].low; + if (span > (1 - EPS) * max_span) { + ElementType min_elem_, max_elem_; + computeMinMax(obj, ind, count, i, min_elem_, max_elem_); + ElementType spread = max_elem_ - min_elem_; + if (spread > max_spread) { + cutfeat = i; + max_spread = spread; + min_elem = min_elem_; + max_elem = max_elem_; } - // split in the middle - DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2; - - if (split_val < min_elem) - cutval = min_elem; - else if (split_val > max_elem) - cutval = max_elem; - else - cutval = split_val; - - Offset lim1, lim2; - planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2); - - if (lim1 > count / 2) - index = lim1; - else if (lim2 < count / 2) - index = lim2; - else - index = count / 2; - } - - /** - * Subdivide the list of points by a plane perpendicular on the axis - * corresponding to the 'cutfeat' dimension at 'cutval' position. - * - * On return: - * dataset[ind[0..lim1-1]][cutfeat]cutval + } + } + // split in the middle + DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2; + + if (split_val < min_elem) + cutval = min_elem; + else if (split_val > max_elem) + cutval = max_elem; + else + cutval = split_val; + + Offset lim1, lim2; + planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2); + + if (lim1 > count / 2) + index = lim1; + else if (lim2 < count / 2) + index = lim2; + else + index = count / 2; + } + + /** + * Subdivide the list of points by a plane perpendicular on the axis + * corresponding to the 'cutfeat' dimension at 'cutval' position. + * + * On return: + * dataset[ind[0..lim1-1]][cutfeat]cutval + */ + void planeSplit(const Derived& obj, + const Offset ind, + const Size count, + const Dimension cutfeat, + const DistanceType& cutval, + Offset& lim1, + Offset& lim2) { + /* Move vector indices for left subtree to front of list. */ + Offset left = 0; + Offset right = count - 1; + for (;;) { + while (left <= right + && dataset_get(obj, vAcc_[ind + left], cutfeat) < cutval) + ++left; + while (right && left <= right + && dataset_get(obj, vAcc_[ind + right], cutfeat) >= cutval) + --right; + if (left > right || !right) + break; // "!right" was added to support unsigned Index types + std::swap(vAcc_[ind + left], vAcc_[ind + right]); + ++left; + --right; + } + /* If either list is empty, it means that all remaining features + * are identical. Split in the middle to maintain a balanced tree. */ - void planeSplit( - const Derived& obj, const Offset ind, const Size count, - const Dimension cutfeat, const DistanceType& cutval, Offset& lim1, - Offset& lim2) - { - /* Move vector indices for left subtree to front of list. */ - Offset left = 0; - Offset right = count - 1; - for (;;) - { - while (left <= right && - dataset_get(obj, vAcc_[ind + left], cutfeat) < cutval) - ++left; - while (right && left <= right && - dataset_get(obj, vAcc_[ind + right], cutfeat) >= cutval) - --right; - if (left > right || !right) - break; // "!right" was added to support unsigned Index types - std::swap(vAcc_[ind + left], vAcc_[ind + right]); - ++left; - --right; - } - /* If either list is empty, it means that all remaining features - * are identical. Split in the middle to maintain a balanced tree. - */ - lim1 = left; - right = count - 1; - for (;;) - { - while (left <= right && - dataset_get(obj, vAcc_[ind + left], cutfeat) <= cutval) - ++left; - while (right && left <= right && - dataset_get(obj, vAcc_[ind + right], cutfeat) > cutval) - --right; - if (left > right || !right) - break; // "!right" was added to support unsigned Index types - std::swap(vAcc_[ind + left], vAcc_[ind + right]); - ++left; - --right; - } - lim2 = left; - } - - DistanceType computeInitialDistances( - const Derived& obj, const ElementType* vec, - distance_vector_t& dists) const - { - assert(vec); - DistanceType dist = DistanceType(); - - for (Dimension i = 0; i < (DIM > 0 ? DIM : obj.dim_); ++i) - { - if (vec[i] < obj.root_bbox_[i].low) - { - dists[i] = - obj.distance_.accum_dist(vec[i], obj.root_bbox_[i].low, i); - dist += dists[i]; - } - if (vec[i] > obj.root_bbox_[i].high) - { - dists[i] = - obj.distance_.accum_dist(vec[i], obj.root_bbox_[i].high, i); - dist += dists[i]; - } - } - return dist; - } - - static void save_tree( - const Derived& obj, std::ostream& stream, const NodeConstPtr tree) - { - save_value(stream, *tree); - if (tree->child1 != nullptr) { save_tree(obj, stream, tree->child1); } - if (tree->child2 != nullptr) { save_tree(obj, stream, tree->child2); } - } - - static void load_tree(Derived& obj, std::istream& stream, NodePtr& tree) - { - tree = obj.pool_.template allocate(); - load_value(stream, *tree); - if (tree->child1 != nullptr) { load_tree(obj, stream, tree->child1); } - if (tree->child2 != nullptr) { load_tree(obj, stream, tree->child2); } - } - - /** Stores the index in a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so - * when loading the index object it must be constructed associated to the - * same source of data points used while building it. See the example: - * examples/saveload_example.cpp \sa loadIndex */ - void saveIndex(const Derived& obj, std::ostream& stream) const - { - save_value(stream, obj.size_); - save_value(stream, obj.dim_); - save_value(stream, obj.root_bbox_); - save_value(stream, obj.leaf_max_size_); - save_value(stream, obj.vAcc_); - if (obj.root_node_) save_tree(obj, stream, obj.root_node_); - } - - /** Loads a previous index from a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so - * the index object must be constructed associated to the same source of - * data points used while building the index. See the example: - * examples/saveload_example.cpp \sa loadIndex */ - void loadIndex(Derived& obj, std::istream& stream) - { - load_value(stream, obj.size_); - load_value(stream, obj.dim_); - load_value(stream, obj.root_bbox_); - load_value(stream, obj.leaf_max_size_); - load_value(stream, obj.vAcc_); - load_tree(obj, stream, obj.root_node_); - } + lim1 = left; + right = count - 1; + for (;;) { + while (left <= right + && dataset_get(obj, vAcc_[ind + left], cutfeat) <= cutval) + ++left; + while (right && left <= right + && dataset_get(obj, vAcc_[ind + right], cutfeat) > cutval) + --right; + if (left > right || !right) + break; // "!right" was added to support unsigned Index types + std::swap(vAcc_[ind + left], vAcc_[ind + right]); + ++left; + --right; + } + lim2 = left; + } + + DistanceType computeInitialDistances(const Derived& obj, + const ElementType* vec, + distance_vector_t& dists) const { + assert(vec); + DistanceType dist = DistanceType(); + + for (Dimension i = 0; i < (DIM > 0 ? DIM : obj.dim_); ++i) { + if (vec[i] < obj.root_bbox_[i].low) { + dists[i] = obj.distance_.accum_dist(vec[i], obj.root_bbox_[i].low, i); + dist += dists[i]; + } + if (vec[i] > obj.root_bbox_[i].high) { + dists[i] = obj.distance_.accum_dist(vec[i], obj.root_bbox_[i].high, i); + dist += dists[i]; + } + } + return dist; + } + + static void + save_tree(const Derived& obj, std::ostream& stream, const NodeConstPtr tree) { + save_value(stream, *tree); + if (tree->child1 != nullptr) { + save_tree(obj, stream, tree->child1); + } + if (tree->child2 != nullptr) { + save_tree(obj, stream, tree->child2); + } + } + + static void load_tree(Derived& obj, std::istream& stream, NodePtr& tree) { + tree = obj.pool_.template allocate(); + load_value(stream, *tree); + if (tree->child1 != nullptr) { + load_tree(obj, stream, tree->child1); + } + if (tree->child2 != nullptr) { + load_tree(obj, stream, tree->child2); + } + } + + /** Stores the index in a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so + * when loading the index object it must be constructed associated to the + * same source of data points used while building it. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void saveIndex(const Derived& obj, std::ostream& stream) const { + save_value(stream, obj.size_); + save_value(stream, obj.dim_); + save_value(stream, obj.root_bbox_); + save_value(stream, obj.leaf_max_size_); + save_value(stream, obj.vAcc_); + if (obj.root_node_) + save_tree(obj, stream, obj.root_node_); + } + + /** Loads a previous index from a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so + * the index object must be constructed associated to the same source of + * data points used while building the index. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void loadIndex(Derived& obj, std::istream& stream) { + load_value(stream, obj.size_); + load_value(stream, obj.dim_); + load_value(stream, obj.root_bbox_); + load_value(stream, obj.leaf_max_size_); + load_value(stream, obj.vAcc_); + load_tree(obj, stream, obj.root_node_); + } }; /** @addtogroup kdtrees_grp KD-tree classes and adaptors @@ -1464,7 +1373,7 @@ class KDTreeBaseClass * non-virtual, inlined methods): * * \code - * // Must return the number of data poins + * // Must return the number of data points * size_t kdtree_get_point_count() const { ... } * * @@ -1493,443 +1402,426 @@ class KDTreeBaseClass * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will * be typically size_t or int */ -template < - typename Distance, class DatasetAdaptor, int32_t DIM = -1, - typename IndexType = uint32_t> +template class KDTreeSingleIndexAdaptor : public KDTreeBaseClass< KDTreeSingleIndexAdaptor, - Distance, DatasetAdaptor, DIM, IndexType> -{ - public: - /** Deleted copy constructor*/ - explicit KDTreeSingleIndexAdaptor( - const KDTreeSingleIndexAdaptor< - Distance, DatasetAdaptor, DIM, IndexType>&) = delete; - - /** The data source used by this index */ - const DatasetAdaptor& dataset_; - - const KDTreeSingleIndexAdaptorParams indexParams; - - Distance distance_; - - using Base = typename nanoflann::KDTreeBaseClass< - nanoflann::KDTreeSingleIndexAdaptor< - Distance, DatasetAdaptor, DIM, IndexType>, - Distance, DatasetAdaptor, DIM, IndexType>; - - using Offset = typename Base::Offset; - using Size = typename Base::Size; - using Dimension = typename Base::Dimension; - - using ElementType = typename Base::ElementType; - using DistanceType = typename Base::DistanceType; - - using Node = typename Base::Node; - using NodePtr = Node*; - - using Interval = typename Base::Interval; - - /** Define "BoundingBox" as a fixed-size or variable-size container - * depending on "DIM" */ - using BoundingBox = typename Base::BoundingBox; - - /** Define "distance_vector_t" as a fixed-size or variable-size container - * depending on "DIM" */ - using distance_vector_t = typename Base::distance_vector_t; - - /** - * KDTree constructor - * - * Refer to docs in README.md or online in - * https://github.com/jlblancoc/nanoflann - * - * The KD-Tree point dimension (the length of each point in the datase, e.g. - * 3 for 3D points) is determined by means of: - * - The \a DIM template parameter if >0 (highest priority) - * - Otherwise, the \a dimensionality parameter of this constructor. - * - * @param inputData Dataset with the input features. Its lifetime must be - * equal or longer than that of the instance of this class. - * @param params Basically, the maximum leaf node size - * - * Note that there is a variable number of optional additional parameters - * which will be forwarded to the metric class constructor. Refer to example - * `examples/pointcloud_custom_metric.cpp` for a use case. - * - */ - template - explicit KDTreeSingleIndexAdaptor( - const Dimension dimensionality, const DatasetAdaptor& inputData, - const KDTreeSingleIndexAdaptorParams& params, Args&&... args) - : dataset_(inputData), - indexParams(params), - distance_(inputData, std::forward(args)...) - { - init(dimensionality, params); - } - - explicit KDTreeSingleIndexAdaptor( - const Dimension dimensionality, const DatasetAdaptor& inputData, - const KDTreeSingleIndexAdaptorParams& params = {}) - : dataset_(inputData), indexParams(params), distance_(inputData) - { - init(dimensionality, params); - } - - private: - void init( - const Dimension dimensionality, - const KDTreeSingleIndexAdaptorParams& params) - { - Base::size_ = dataset_.kdtree_get_point_count(); - Base::size_at_index_build_ = Base::size_; - Base::dim_ = dimensionality; - if (DIM > 0) Base::dim_ = DIM; - Base::leaf_max_size_ = params.leaf_max_size; - if (params.n_thread_build > 0) - { - Base::n_thread_build_ = params.n_thread_build; - } - else - { - Base::n_thread_build_ = - std::max(std::thread::hardware_concurrency(), 1u); - } - - if (!(params.flags & - KDTreeSingleIndexAdaptorFlags::SkipInitialBuildIndex)) - { - // Build KD-tree: - buildIndex(); - } - } - - public: - /** - * Builds the index - */ - void buildIndex() - { - Base::size_ = dataset_.kdtree_get_point_count(); - Base::size_at_index_build_ = Base::size_; - init_vind(); - this->freeIndex(*this); - Base::size_at_index_build_ = Base::size_; - if (Base::size_ == 0) return; - computeBoundingBox(Base::root_bbox_); - // construct the tree - if (Base::n_thread_build_ == 1) - { - Base::root_node_ = - this->divideTree(*this, 0, Base::size_, Base::root_bbox_); - } - else - { + Distance, + DatasetAdaptor, + DIM, + IndexType> { +public: + /** Deleted copy constructor*/ + explicit KDTreeSingleIndexAdaptor( + const KDTreeSingleIndexAdaptor&) = delete; + + /** The data source used by this index */ + const DatasetAdaptor& dataset_; + + const KDTreeSingleIndexAdaptorParams indexParams; + + Distance distance_; + + using Base = typename nanoflann::KDTreeBaseClass< + nanoflann:: + KDTreeSingleIndexAdaptor, + Distance, + DatasetAdaptor, + DIM, + IndexType>; + + using Offset = typename Base::Offset; + using Size = typename Base::Size; + using Dimension = typename Base::Dimension; + + using ElementType = typename Base::ElementType; + using DistanceType = typename Base::DistanceType; + + using Node = typename Base::Node; + using NodePtr = Node*; + + using Interval = typename Base::Interval; + + /** Define "BoundingBox" as a fixed-size or variable-size container + * depending on "DIM" */ + using BoundingBox = typename Base::BoundingBox; + + /** Define "distance_vector_t" as a fixed-size or variable-size container + * depending on "DIM" */ + using distance_vector_t = typename Base::distance_vector_t; + + /** + * KDTree constructor + * + * Refer to docs in README.md or online in + * https://github.com/jlblancoc/nanoflann + * + * The KD-Tree point dimension (the length of each point in the datase, e.g. + * 3 for 3D points) is determined by means of: + * - The \a DIM template parameter if >0 (highest priority) + * - Otherwise, the \a dimensionality parameter of this constructor. + * + * @param inputData Dataset with the input features. Its lifetime must be + * equal or longer than that of the instance of this class. + * @param params Basically, the maximum leaf node size + * + * Note that there is a variable number of optional additional parameters + * which will be forwarded to the metric class constructor. Refer to example + * `examples/pointcloud_custom_metric.cpp` for a use case. + * + */ + template + explicit KDTreeSingleIndexAdaptor( + const Dimension dimensionality, + const DatasetAdaptor& inputData, + const KDTreeSingleIndexAdaptorParams& params, + Args&&... args) + : dataset_(inputData), + indexParams(params), + distance_(inputData, std::forward(args)...) { + init(dimensionality, params); + } + + explicit KDTreeSingleIndexAdaptor( + const Dimension dimensionality, + const DatasetAdaptor& inputData, + const KDTreeSingleIndexAdaptorParams& params = {}) + : dataset_(inputData), + indexParams(params), + distance_(inputData) { + init(dimensionality, params); + } + +private: + void init(const Dimension dimensionality, + const KDTreeSingleIndexAdaptorParams& params) { + Base::size_ = dataset_.kdtree_get_point_count(); + Base::size_at_index_build_ = Base::size_; + Base::dim_ = dimensionality; + if (DIM > 0) + Base::dim_ = DIM; + Base::leaf_max_size_ = params.leaf_max_size; + if (params.n_thread_build > 0) { + Base::n_thread_build_ = params.n_thread_build; + } else { + Base::n_thread_build_ = std::max(std::thread::hardware_concurrency(), 1u); + } + + if (!(params.flags + & KDTreeSingleIndexAdaptorFlags::SkipInitialBuildIndex)) { + // Build KD-tree: + buildIndex(); + } + } + +public: + /** + * Builds the index + */ + void buildIndex() { + Base::size_ = dataset_.kdtree_get_point_count(); + Base::size_at_index_build_ = Base::size_; + init_vind(); + this->freeIndex(*this); + Base::size_at_index_build_ = Base::size_; + if (Base::size_ == 0) + return; + computeBoundingBox(Base::root_bbox_); + // construct the tree + if (Base::n_thread_build_ == 1) { + Base::root_node_ = + this->divideTree(*this, 0, Base::size_, Base::root_bbox_); + } else { #ifndef NANOFLANN_NO_THREADS - std::atomic thread_count(0u); - std::mutex mutex; - Base::root_node_ = this->divideTreeConcurrent( - *this, 0, Base::size_, Base::root_bbox_, thread_count, mutex); -#else /* NANOFLANN_NO_THREADS */ - throw std::runtime_error("Multithreading is disabled"); + std::atomic thread_count(0u); + std::mutex mutex; + Base::root_node_ = this->divideTreeConcurrent(*this, + 0, + Base::size_, + Base::root_bbox_, + thread_count, + mutex); +#else /* NANOFLANN_NO_THREADS */ + throw std::runtime_error("Multithreading is disabled"); #endif /* NANOFLANN_NO_THREADS */ - } - } - - /** \name Query methods - * @{ */ - - /** - * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored - * inside the result object. - * - * Params: - * result = the result object in which the indices of the - * nearest-neighbors are stored vec = the vector for which to search the - * nearest neighbors - * - * \tparam RESULTSET Should be any ResultSet - * \return True if the requested neighbors could be found. - * \sa knnSearch, radiusSearch - * - * \note If L2 norms are used, all returned distances are actually squared - * distances. - */ - template - bool findNeighbors( - RESULTSET& result, const ElementType* vec, - const SearchParameters& searchParams = {}) const - { - assert(vec); - if (this->size(*this) == 0) return false; - if (!Base::root_node_) - throw std::runtime_error( - "[nanoflann] findNeighbors() called before building the " - "index."); - float epsError = 1 + searchParams.eps; - - // fixed or variable-sized container (depending on DIM) - distance_vector_t dists; - // Fill it with zeros. - auto zero = static_cast(0); - assign(dists, (DIM > 0 ? DIM : Base::dim_), zero); - DistanceType dist = this->computeInitialDistances(*this, vec, dists); - searchLevel(result, vec, Base::root_node_, dist, dists, epsError); - return result.full(); - } - - /** - * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. - * Their indices and distances are stored in the provided pointers to - * array/vector. - * - * \sa radiusSearch, findNeighbors - * \return Number `N` of valid points in the result set. - * - * \note If L2 norms are used, all returned distances are actually squared - * distances. - * - * \note Only the first `N` entries in `out_indices` and `out_distances` - * will be valid. Return is less than `num_closest` only if the - * number of elements in the tree is less than `num_closest`. - */ - Size knnSearch( - const ElementType* query_point, const Size num_closest, - IndexType* out_indices, DistanceType* out_distances) const - { - nanoflann::KNNResultSet resultSet(num_closest); - resultSet.init(out_indices, out_distances); - findNeighbors(resultSet, query_point); - return resultSet.size(); - } - - /** - * Find all the neighbors to \a query_point[0:dim-1] within a maximum - * radius. The output is given as a vector of pairs, of which the first - * element is a point index and the second the corresponding distance. - * Previous contents of \a IndicesDists are cleared. - * - * If searchParams.sorted==true, the output list is sorted by ascending - * distances. - * - * For a better performance, it is advisable to do a .reserve() on the - * vector if you have any wild guess about the number of expected matches. - * - * \sa knnSearch, findNeighbors, radiusSearchCustomCallback - * \return The number of points within the given radius (i.e. indices.size() - * or dists.size() ) - * - * \note If L2 norms are used, search radius and all returned distances - * are actually squared distances. - */ - Size radiusSearch( - const ElementType* query_point, const DistanceType& radius, - std::vector>& IndicesDists, - const SearchParameters& searchParams = {}) const - { - RadiusResultSet resultSet( - radius, IndicesDists); - const Size nFound = - radiusSearchCustomCallback(query_point, resultSet, searchParams); - if (searchParams.sorted) - std::sort( - IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); - return nFound; - } - - /** - * Just like radiusSearch() but with a custom callback class for each point - * found in the radius of the query. See the source of RadiusResultSet<> as - * a start point for your own classes. \sa radiusSearch - */ - template - Size radiusSearchCustomCallback( - const ElementType* query_point, SEARCH_CALLBACK& resultSet, - const SearchParameters& searchParams = {}) const - { - findNeighbors(resultSet, query_point, searchParams); - return resultSet.size(); - } - - /** - * Find the first N neighbors to \a query_point[0:dim-1] within a maximum - * radius. The output is given as a vector of pairs, of which the first - * element is a point index and the second the corresponding distance. - * Previous contents of \a IndicesDists are cleared. - * - * \sa radiusSearch, findNeighbors - * \return Number `N` of valid points in the result set. - * - * \note If L2 norms are used, all returned distances are actually squared - * distances. - * - * \note Only the first `N` entries in `out_indices` and `out_distances` - * will be valid. Return is less than `num_closest` only if the - * number of elements in the tree is less than `num_closest`. - */ - Size rknnSearch( - const ElementType* query_point, const Size num_closest, - IndexType* out_indices, DistanceType* out_distances, - const DistanceType& radius) const - { - nanoflann::RKNNResultSet resultSet( - num_closest, radius); - resultSet.init(out_indices, out_distances); - findNeighbors(resultSet, query_point); - return resultSet.size(); - } - - /** @} */ - - public: - /** Make sure the auxiliary list \a vind has the same size than the current - * dataset, and re-generate if size has changed. */ - void init_vind() - { - // Create a permutable array of indices to the input vectors. - Base::size_ = dataset_.kdtree_get_point_count(); - if (Base::vAcc_.size() != Base::size_) Base::vAcc_.resize(Base::size_); - for (Size i = 0; i < Base::size_; i++) Base::vAcc_[i] = i; - } - - void computeBoundingBox(BoundingBox& bbox) - { - const auto dims = (DIM > 0 ? DIM : Base::dim_); - resize(bbox, dims); - if (dataset_.kdtree_get_bbox(bbox)) - { - // Done! It was implemented in derived class - } - else - { - const Size N = dataset_.kdtree_get_point_count(); - if (!N) - throw std::runtime_error( - "[nanoflann] computeBoundingBox() called but " - "no data points found."); - for (Dimension i = 0; i < dims; ++i) - { - bbox[i].low = bbox[i].high = - this->dataset_get(*this, Base::vAcc_[0], i); - } - for (Offset k = 1; k < N; ++k) - { - for (Dimension i = 0; i < dims; ++i) - { - const auto val = - this->dataset_get(*this, Base::vAcc_[k], i); - if (val < bbox[i].low) bbox[i].low = val; - if (val > bbox[i].high) bbox[i].high = val; - } - } - } } - - /** - * Performs an exact search in the tree starting from a node. - * \tparam RESULTSET Should be any ResultSet - * \return true if the search should be continued, false if the results are - * sufficient - */ - template - bool searchLevel( - RESULTSET& result_set, const ElementType* vec, const NodePtr node, - DistanceType mindist, distance_vector_t& dists, - const float epsError) const - { - /* If this is a leaf node, then do check and return. */ - if ((node->child1 == nullptr) && (node->child2 == nullptr)) - { - DistanceType worst_dist = result_set.worstDist(); - for (Offset i = node->node_type.lr.left; - i < node->node_type.lr.right; ++i) - { - const IndexType accessor = Base::vAcc_[i]; // reorder... : i; - DistanceType dist = distance_.evalMetric( - vec, accessor, (DIM > 0 ? DIM : Base::dim_)); - if (dist < worst_dist) - { - if (!result_set.addPoint(dist, Base::vAcc_[i])) - { - // the resultset doesn't want to receive any more - // points, we're done searching! - return false; - } - } - } - return true; - } - - /* Which child branch should be taken first? */ - Dimension idx = node->node_type.sub.divfeat; - ElementType val = vec[idx]; - DistanceType diff1 = val - node->node_type.sub.divlow; - DistanceType diff2 = val - node->node_type.sub.divhigh; - - NodePtr bestChild; - NodePtr otherChild; - DistanceType cut_dist; - if ((diff1 + diff2) < 0) - { - bestChild = node->child1; - otherChild = node->child2; - cut_dist = - distance_.accum_dist(val, node->node_type.sub.divhigh, idx); - } - else - { - bestChild = node->child2; - otherChild = node->child1; - cut_dist = - distance_.accum_dist(val, node->node_type.sub.divlow, idx); + } + + /** \name Query methods + * @{ */ + + /** + * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored + * inside the result object. + * + * Params: + * result = the result object in which the indices of the + * nearest-neighbors are stored vec = the vector for which to search the + * nearest neighbors + * + * \tparam RESULTSET Should be any ResultSet + * \return True if the requested neighbors could be found. + * \sa knnSearch, radiusSearch + * + * \note If L2 norms are used, all returned distances are actually squared + * distances. + */ + template + bool findNeighbors(RESULTSET& result, + const ElementType* vec, + const SearchParameters& searchParams = {}) const { + assert(vec); + if (this->size(*this) == 0) + return false; + if (!Base::root_node_) + throw std::runtime_error( + "[nanoflann] findNeighbors() called before building the " + "index."); + float epsError = 1 + searchParams.eps; + + // fixed or variable-sized container (depending on DIM) + distance_vector_t dists; + // Fill it with zeros. + auto zero = static_cast(0); + assign(dists, (DIM > 0 ? DIM : Base::dim_), zero); + DistanceType dist = this->computeInitialDistances(*this, vec, dists); + searchLevel(result, vec, Base::root_node_, dist, dists, epsError); + return result.full(); + } + + /** + * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. + * Their indices and distances are stored in the provided pointers to + * array/vector. + * + * \sa radiusSearch, findNeighbors + * \return Number `N` of valid points in the result set. + * + * \note If L2 norms are used, all returned distances are actually squared + * distances. + * + * \note Only the first `N` entries in `out_indices` and `out_distances` + * will be valid. Return is less than `num_closest` only if the + * number of elements in the tree is less than `num_closest`. + */ + Size knnSearch(const ElementType* query_point, + const Size num_closest, + IndexType* out_indices, + DistanceType* out_distances) const { + nanoflann::KNNResultSet resultSet(num_closest); + resultSet.init(out_indices, out_distances); + findNeighbors(resultSet, query_point); + return resultSet.size(); + } + + /** + * Find all the neighbors to \a query_point[0:dim-1] within a maximum + * radius. The output is given as a vector of pairs, of which the first + * element is a point index and the second the corresponding distance. + * Previous contents of \a IndicesDists are cleared. + * + * If searchParams.sorted==true, the output list is sorted by ascending + * distances. + * + * For a better performance, it is advisable to do a .reserve() on the + * vector if you have any wild guess about the number of expected matches. + * + * \sa knnSearch, findNeighbors, radiusSearchCustomCallback + * \return The number of points within the given radius (i.e. indices.size() + * or dists.size() ) + * + * \note If L2 norms are used, search radius and all returned distances + * are actually squared distances. + */ + Size + radiusSearch(const ElementType* query_point, + const DistanceType& radius, + std::vector>& IndicesDists, + const SearchParameters& searchParams = {}) const { + RadiusResultSet resultSet(radius, IndicesDists); + const Size nFound = + radiusSearchCustomCallback(query_point, resultSet, searchParams); + if (searchParams.sorted) + std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); + return nFound; + } + + /** + * Just like radiusSearch() but with a custom callback class for each point + * found in the radius of the query. See the source of RadiusResultSet<> as + * a start point for your own classes. \sa radiusSearch + */ + template + Size + radiusSearchCustomCallback(const ElementType* query_point, + SEARCH_CALLBACK& resultSet, + const SearchParameters& searchParams = {}) const { + findNeighbors(resultSet, query_point, searchParams); + return resultSet.size(); + } + + /** + * Find the first N neighbors to \a query_point[0:dim-1] within a maximum + * radius. The output is given as a vector of pairs, of which the first + * element is a point index and the second the corresponding distance. + * Previous contents of \a IndicesDists are cleared. + * + * \sa radiusSearch, findNeighbors + * \return Number `N` of valid points in the result set. + * + * \note If L2 norms are used, all returned distances are actually squared + * distances. + * + * \note Only the first `N` entries in `out_indices` and `out_distances` + * will be valid. Return is less than `num_closest` only if the + * number of elements in the tree is less than `num_closest`. + */ + Size rknnSearch(const ElementType* query_point, + const Size num_closest, + IndexType* out_indices, + DistanceType* out_distances, + const DistanceType& radius) const { + nanoflann::RKNNResultSet resultSet(num_closest, + radius); + resultSet.init(out_indices, out_distances); + findNeighbors(resultSet, query_point); + return resultSet.size(); + } + + /** @} */ + +public: + /** Make sure the auxiliary list \a vind has the same size than the current + * dataset, and re-generate if size has changed. */ + void init_vind() { + // Create a permutable array of indices to the input vectors. + Base::size_ = dataset_.kdtree_get_point_count(); + if (Base::vAcc_.size() != Base::size_) + Base::vAcc_.resize(Base::size_); + for (Size i = 0; i < Base::size_; i++) + Base::vAcc_[i] = i; + } + + void computeBoundingBox(BoundingBox& bbox) { + const auto dims = (DIM > 0 ? DIM : Base::dim_); + resize(bbox, dims); + if (dataset_.kdtree_get_bbox(bbox)) { + // Done! It was implemented in derived class + } else { + const Size N = dataset_.kdtree_get_point_count(); + if (!N) + throw std::runtime_error("[nanoflann] computeBoundingBox() called but " + "no data points found."); + for (Dimension i = 0; i < dims; ++i) { + bbox[i].low = bbox[i].high = + this->dataset_get(*this, Base::vAcc_[0], i); + } + for (Offset k = 1; k < N; ++k) { + for (Dimension i = 0; i < dims; ++i) { + const auto val = this->dataset_get(*this, Base::vAcc_[k], i); + if (val < bbox[i].low) + bbox[i].low = val; + if (val > bbox[i].high) + bbox[i].high = val; } - - /* Call recursively to search next level down. */ - if (!searchLevel(result_set, vec, bestChild, mindist, dists, epsError)) - { - // the resultset doesn't want to receive any more points, we're done - // searching! + } + } + } + + /** + * Performs an exact search in the tree starting from a node. + * \tparam RESULTSET Should be any ResultSet + * \return true if the search should be continued, false if the results are + * sufficient + */ + template + bool searchLevel(RESULTSET& result_set, + const ElementType* vec, + const NodePtr node, + DistanceType mindist, + distance_vector_t& dists, + const float epsError) const { + /* If this is a leaf node, then do check and return. */ + if ((node->child1 == nullptr) && (node->child2 == nullptr)) { + DistanceType worst_dist = result_set.worstDist(); + for (Offset i = node->node_type.lr.left; i < node->node_type.lr.right; + ++i) { + const IndexType accessor = Base::vAcc_[i]; // reorder... : i; + DistanceType dist = + distance_.evalMetric(vec, accessor, (DIM > 0 ? DIM : Base::dim_)); + if (dist < worst_dist) { + if (!result_set.addPoint(dist, Base::vAcc_[i])) { + // the resultset doesn't want to receive any more + // points, we're done searching! return false; + } } - - DistanceType dst = dists[idx]; - mindist = mindist + cut_dist - dst; - dists[idx] = cut_dist; - if (mindist * epsError <= result_set.worstDist()) - { - if (!searchLevel( - result_set, vec, otherChild, mindist, dists, epsError)) - { - // the resultset doesn't want to receive any more points, we're - // done searching! - return false; - } - } - dists[idx] = dst; - return true; + } + return true; + } + + /* Which child branch should be taken first? */ + Dimension idx = node->node_type.sub.divfeat; + ElementType val = vec[idx]; + DistanceType diff1 = val - node->node_type.sub.divlow; + DistanceType diff2 = val - node->node_type.sub.divhigh; + + NodePtr bestChild; + NodePtr otherChild; + DistanceType cut_dist; + if ((diff1 + diff2) < 0) { + bestChild = node->child1; + otherChild = node->child2; + cut_dist = distance_.accum_dist(val, node->node_type.sub.divhigh, idx); + } else { + bestChild = node->child2; + otherChild = node->child1; + cut_dist = distance_.accum_dist(val, node->node_type.sub.divlow, idx); + } + + /* Call recursively to search next level down. */ + if (!searchLevel(result_set, vec, bestChild, mindist, dists, epsError)) { + // the resultset doesn't want to receive any more points, we're done + // searching! + return false; + } + + DistanceType dst = dists[idx]; + mindist = mindist + cut_dist - dst; + dists[idx] = cut_dist; + if (mindist * epsError <= result_set.worstDist()) { + if (!searchLevel(result_set, vec, otherChild, mindist, dists, epsError)) { + // the resultset doesn't want to receive any more points, we're + // done searching! + return false; + } } + dists[idx] = dst; + return true; + } - public: - /** Stores the index in a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so - * when loading the index object it must be constructed associated to the - * same source of data points used while building it. See the example: - * examples/saveload_example.cpp \sa loadIndex */ - void saveIndex(std::ostream& stream) const - { - Base::saveIndex(*this, stream); - } +public: + /** Stores the index in a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so + * when loading the index object it must be constructed associated to the + * same source of data points used while building it. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void saveIndex(std::ostream& stream) const { Base::saveIndex(*this, stream); } - /** Loads a previous index from a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so - * the index object must be constructed associated to the same source of - * data points used while building the index. See the example: - * examples/saveload_example.cpp \sa loadIndex */ - void loadIndex(std::istream& stream) { Base::loadIndex(*this, stream); } + /** Loads a previous index from a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so + * the index object must be constructed associated to the same source of + * data points used while building the index. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void loadIndex(std::istream& stream) { Base::loadIndex(*this, stream); } -}; // class KDTree +}; // class KDTree /** kd-tree dynamic index * @@ -1940,7 +1832,7 @@ class KDTreeSingleIndexAdaptor * non-virtual, inlined methods): * * \code - * // Must return the number of data poins + * // Must return the number of data points * size_t kdtree_get_point_count() const { ... } * * // Must return the dim'th component of the idx'th point in the class: @@ -1968,385 +1860,375 @@ class KDTreeSingleIndexAdaptor * \tparam IndexType Type of the arguments with which the data can be * accessed (e.g. float, double, int64_t, T*) */ -template < - typename Distance, class DatasetAdaptor, int32_t DIM = -1, - typename IndexType = uint32_t> +template class KDTreeSingleIndexDynamicAdaptor_ - : public KDTreeBaseClass< - KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM, IndexType>, - Distance, DatasetAdaptor, DIM, IndexType> -{ - public: - /** - * The dataset used by this index - */ - const DatasetAdaptor& dataset_; //!< The source of our data - - KDTreeSingleIndexAdaptorParams index_params_; - - std::vector& treeIndex_; - - Distance distance_; - - using Base = typename nanoflann::KDTreeBaseClass< - nanoflann::KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM, IndexType>, - Distance, DatasetAdaptor, DIM, IndexType>; - - using ElementType = typename Base::ElementType; - using DistanceType = typename Base::DistanceType; - - using Offset = typename Base::Offset; - using Size = typename Base::Size; - using Dimension = typename Base::Dimension; - - using Node = typename Base::Node; - using NodePtr = Node*; - - using Interval = typename Base::Interval; - /** Define "BoundingBox" as a fixed-size or variable-size container - * depending on "DIM" */ - using BoundingBox = typename Base::BoundingBox; - - /** Define "distance_vector_t" as a fixed-size or variable-size container - * depending on "DIM" */ - using distance_vector_t = typename Base::distance_vector_t; - - /** - * KDTree constructor - * - * Refer to docs in README.md or online in - * https://github.com/jlblancoc/nanoflann - * - * The KD-Tree point dimension (the length of each point in the datase, e.g. - * 3 for 3D points) is determined by means of: - * - The \a DIM template parameter if >0 (highest priority) - * - Otherwise, the \a dimensionality parameter of this constructor. - * - * @param inputData Dataset with the input features. Its lifetime must be - * equal or longer than that of the instance of this class. - * @param params Basically, the maximum leaf node size - */ - KDTreeSingleIndexDynamicAdaptor_( - const Dimension dimensionality, const DatasetAdaptor& inputData, - std::vector& treeIndex, - const KDTreeSingleIndexAdaptorParams& params = - KDTreeSingleIndexAdaptorParams()) - : dataset_(inputData), - index_params_(params), - treeIndex_(treeIndex), - distance_(inputData) - { - Base::size_ = 0; - Base::size_at_index_build_ = 0; - for (auto& v : Base::root_bbox_) v = {}; - Base::dim_ = dimensionality; - if (DIM > 0) Base::dim_ = DIM; - Base::leaf_max_size_ = params.leaf_max_size; - if (params.n_thread_build > 0) - { - Base::n_thread_build_ = params.n_thread_build; - } - else - { - Base::n_thread_build_ = - std::max(std::thread::hardware_concurrency(), 1u); - } - } - - /** Explicitly default the copy constructor */ - KDTreeSingleIndexDynamicAdaptor_( - const KDTreeSingleIndexDynamicAdaptor_& rhs) = default; - - /** Assignment operator definiton */ - KDTreeSingleIndexDynamicAdaptor_ operator=( - const KDTreeSingleIndexDynamicAdaptor_& rhs) - { - KDTreeSingleIndexDynamicAdaptor_ tmp(rhs); - std::swap(Base::vAcc_, tmp.Base::vAcc_); - std::swap(Base::leaf_max_size_, tmp.Base::leaf_max_size_); - std::swap(index_params_, tmp.index_params_); - std::swap(treeIndex_, tmp.treeIndex_); - std::swap(Base::size_, tmp.Base::size_); - std::swap(Base::size_at_index_build_, tmp.Base::size_at_index_build_); - std::swap(Base::root_node_, tmp.Base::root_node_); - std::swap(Base::root_bbox_, tmp.Base::root_bbox_); - std::swap(Base::pool_, tmp.Base::pool_); - return *this; - } - - /** - * Builds the index - */ - void buildIndex() - { - Base::size_ = Base::vAcc_.size(); - this->freeIndex(*this); - Base::size_at_index_build_ = Base::size_; - if (Base::size_ == 0) return; - computeBoundingBox(Base::root_bbox_); - // construct the tree - if (Base::n_thread_build_ == 1) - { - Base::root_node_ = - this->divideTree(*this, 0, Base::size_, Base::root_bbox_); - } - else - { + : public KDTreeBaseClass, + Distance, + DatasetAdaptor, + DIM, + IndexType> { +public: + /** + * The dataset used by this index + */ + const DatasetAdaptor& dataset_; //!< The source of our data + + KDTreeSingleIndexAdaptorParams index_params_; + + std::vector& treeIndex_; + + Distance distance_; + + using Base = typename nanoflann::KDTreeBaseClass< + nanoflann::KDTreeSingleIndexDynamicAdaptor_, + Distance, + DatasetAdaptor, + DIM, + IndexType>; + + using ElementType = typename Base::ElementType; + using DistanceType = typename Base::DistanceType; + + using Offset = typename Base::Offset; + using Size = typename Base::Size; + using Dimension = typename Base::Dimension; + + using Node = typename Base::Node; + using NodePtr = Node*; + + using Interval = typename Base::Interval; + /** Define "BoundingBox" as a fixed-size or variable-size container + * depending on "DIM" */ + using BoundingBox = typename Base::BoundingBox; + + /** Define "distance_vector_t" as a fixed-size or variable-size container + * depending on "DIM" */ + using distance_vector_t = typename Base::distance_vector_t; + + /** + * KDTree constructor + * + * Refer to docs in README.md or online in + * https://github.com/jlblancoc/nanoflann + * + * The KD-Tree point dimension (the length of each point in the datase, e.g. + * 3 for 3D points) is determined by means of: + * - The \a DIM template parameter if >0 (highest priority) + * - Otherwise, the \a dimensionality parameter of this constructor. + * + * @param inputData Dataset with the input features. Its lifetime must be + * equal or longer than that of the instance of this class. + * @param params Basically, the maximum leaf node size + */ + KDTreeSingleIndexDynamicAdaptor_( + const Dimension dimensionality, + const DatasetAdaptor& inputData, + std::vector& treeIndex, + const KDTreeSingleIndexAdaptorParams& params = + KDTreeSingleIndexAdaptorParams()) + : dataset_(inputData), + index_params_(params), + treeIndex_(treeIndex), + distance_(inputData) { + Base::size_ = 0; + Base::size_at_index_build_ = 0; + for (auto& v : Base::root_bbox_) + v = {}; + Base::dim_ = dimensionality; + if (DIM > 0) + Base::dim_ = DIM; + Base::leaf_max_size_ = params.leaf_max_size; + if (params.n_thread_build > 0) { + Base::n_thread_build_ = params.n_thread_build; + } else { + Base::n_thread_build_ = std::max(std::thread::hardware_concurrency(), 1u); + } + } + + /** Explicitly default the copy constructor */ + KDTreeSingleIndexDynamicAdaptor_( + const KDTreeSingleIndexDynamicAdaptor_& rhs) = default; + + /** Assignment operator definition */ + KDTreeSingleIndexDynamicAdaptor_ + operator=(const KDTreeSingleIndexDynamicAdaptor_& rhs) { + KDTreeSingleIndexDynamicAdaptor_ tmp(rhs); + std::swap(Base::vAcc_, tmp.Base::vAcc_); + std::swap(Base::leaf_max_size_, tmp.Base::leaf_max_size_); + std::swap(index_params_, tmp.index_params_); + std::swap(treeIndex_, tmp.treeIndex_); + std::swap(Base::size_, tmp.Base::size_); + std::swap(Base::size_at_index_build_, tmp.Base::size_at_index_build_); + std::swap(Base::root_node_, tmp.Base::root_node_); + std::swap(Base::root_bbox_, tmp.Base::root_bbox_); + std::swap(Base::pool_, tmp.Base::pool_); + return *this; + } + + /** + * Builds the index + */ + void buildIndex() { + Base::size_ = Base::vAcc_.size(); + this->freeIndex(*this); + Base::size_at_index_build_ = Base::size_; + if (Base::size_ == 0) + return; + computeBoundingBox(Base::root_bbox_); + // construct the tree + if (Base::n_thread_build_ == 1) { + Base::root_node_ = + this->divideTree(*this, 0, Base::size_, Base::root_bbox_); + } else { #ifndef NANOFLANN_NO_THREADS - std::atomic thread_count(0u); - std::mutex mutex; - Base::root_node_ = this->divideTreeConcurrent( - *this, 0, Base::size_, Base::root_bbox_, thread_count, mutex); -#else /* NANOFLANN_NO_THREADS */ - throw std::runtime_error("Multithreading is disabled"); + std::atomic thread_count(0u); + std::mutex mutex; + Base::root_node_ = this->divideTreeConcurrent(*this, + 0, + Base::size_, + Base::root_bbox_, + thread_count, + mutex); +#else /* NANOFLANN_NO_THREADS */ + throw std::runtime_error("Multithreading is disabled"); #endif /* NANOFLANN_NO_THREADS */ - } - } - - /** \name Query methods - * @{ */ - - /** - * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored - * inside the result object. - * This is the core search function, all others are wrappers around this - * one. - * - * \param result The result object in which the indices of the - * nearest-neighbors are stored. - * \param vec The vector of the query point for which to search the - * nearest neighbors. - * \param searchParams Optional parameters for the search. - * - * \tparam RESULTSET Should be any ResultSet - * \return True if the requested neighbors could be found. - * - * \sa knnSearch(), radiusSearch(), radiusSearchCustomCallback() - * - * \note If L2 norms are used, all returned distances are actually squared - * distances. - */ - template - bool findNeighbors( - RESULTSET& result, const ElementType* vec, - const SearchParameters& searchParams = {}) const - { - assert(vec); - if (this->size(*this) == 0) return false; - if (!Base::root_node_) return false; - float epsError = 1 + searchParams.eps; - - // fixed or variable-sized container (depending on DIM) - distance_vector_t dists; - // Fill it with zeros. - assign( - dists, (DIM > 0 ? DIM : Base::dim_), - static_cast(0)); - DistanceType dist = this->computeInitialDistances(*this, vec, dists); - searchLevel(result, vec, Base::root_node_, dist, dists, epsError); - return result.full(); } - - /** - * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. - * Their indices are stored inside the result object. \sa radiusSearch, - * findNeighbors - * \return Number `N` of valid points in - * the result set. - * - * \note If L2 norms are used, all returned distances are actually squared - * distances. - * - * \note Only the first `N` entries in `out_indices` and `out_distances` - * will be valid. Return may be less than `num_closest` only if the - * number of elements in the tree is less than `num_closest`. - */ - Size knnSearch( - const ElementType* query_point, const Size num_closest, - IndexType* out_indices, DistanceType* out_distances, - const SearchParameters& searchParams = {}) const - { - nanoflann::KNNResultSet resultSet(num_closest); - resultSet.init(out_indices, out_distances); - findNeighbors(resultSet, query_point, searchParams); - return resultSet.size(); - } - - /** - * Find all the neighbors to \a query_point[0:dim-1] within a maximum - * radius. The output is given as a vector of pairs, of which the first - * element is a point index and the second the corresponding distance. - * Previous contents of \a IndicesDists are cleared. - * - * If searchParams.sorted==true, the output list is sorted by ascending - * distances. - * - * For a better performance, it is advisable to do a .reserve() on the - * vector if you have any wild guess about the number of expected matches. - * - * \sa knnSearch, findNeighbors, radiusSearchCustomCallback - * \return The number of points within the given radius (i.e. indices.size() - * or dists.size() ) - * - * \note If L2 norms are used, search radius and all returned distances - * are actually squared distances. - */ - Size radiusSearch( - const ElementType* query_point, const DistanceType& radius, - std::vector>& IndicesDists, - const SearchParameters& searchParams = {}) const - { - RadiusResultSet resultSet( - radius, IndicesDists); - const size_t nFound = - radiusSearchCustomCallback(query_point, resultSet, searchParams); - if (searchParams.sorted) - std::sort( - IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); - return nFound; - } - - /** - * Just like radiusSearch() but with a custom callback class for each point - * found in the radius of the query. See the source of RadiusResultSet<> as - * a start point for your own classes. \sa radiusSearch - */ - template - Size radiusSearchCustomCallback( - const ElementType* query_point, SEARCH_CALLBACK& resultSet, - const SearchParameters& searchParams = {}) const - { - findNeighbors(resultSet, query_point, searchParams); - return resultSet.size(); - } - - /** @} */ - - public: - void computeBoundingBox(BoundingBox& bbox) - { - const auto dims = (DIM > 0 ? DIM : Base::dim_); - resize(bbox, dims); - - if (dataset_.kdtree_get_bbox(bbox)) - { - // Done! It was implemented in derived class - } - else - { - const Size N = Base::size_; - if (!N) - throw std::runtime_error( - "[nanoflann] computeBoundingBox() called but " - "no data points found."); - for (Dimension i = 0; i < dims; ++i) - { - bbox[i].low = bbox[i].high = - this->dataset_get(*this, Base::vAcc_[0], i); - } - for (Offset k = 1; k < N; ++k) - { - for (Dimension i = 0; i < dims; ++i) - { - const auto val = - this->dataset_get(*this, Base::vAcc_[k], i); - if (val < bbox[i].low) bbox[i].low = val; - if (val > bbox[i].high) bbox[i].high = val; - } - } - } - } - - /** - * Performs an exact search in the tree starting from a node. - * \tparam RESULTSET Should be any ResultSet - */ - template - void searchLevel( - RESULTSET& result_set, const ElementType* vec, const NodePtr node, - DistanceType mindist, distance_vector_t& dists, - const float epsError) const - { - /* If this is a leaf node, then do check and return. */ - if ((node->child1 == nullptr) && (node->child2 == nullptr)) - { - DistanceType worst_dist = result_set.worstDist(); - for (Offset i = node->node_type.lr.left; - i < node->node_type.lr.right; ++i) - { - const IndexType index = Base::vAcc_[i]; // reorder... : i; - if (treeIndex_[index] == -1) continue; - DistanceType dist = distance_.evalMetric( - vec, index, (DIM > 0 ? DIM : Base::dim_)); - if (dist < worst_dist) - { - if (!result_set.addPoint( - static_cast(dist), - static_cast( - Base::vAcc_[i]))) - { - // the resultset doesn't want to receive any more - // points, we're done searching! - return; // false; - } - } - } - return; - } - - /* Which child branch should be taken first? */ - Dimension idx = node->node_type.sub.divfeat; - ElementType val = vec[idx]; - DistanceType diff1 = val - node->node_type.sub.divlow; - DistanceType diff2 = val - node->node_type.sub.divhigh; - - NodePtr bestChild; - NodePtr otherChild; - DistanceType cut_dist; - if ((diff1 + diff2) < 0) - { - bestChild = node->child1; - otherChild = node->child2; - cut_dist = - distance_.accum_dist(val, node->node_type.sub.divhigh, idx); - } - else - { - bestChild = node->child2; - otherChild = node->child1; - cut_dist = - distance_.accum_dist(val, node->node_type.sub.divlow, idx); + } + + /** \name Query methods + * @{ */ + + /** + * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored + * inside the result object. + * This is the core search function, all others are wrappers around this + * one. + * + * \param result The result object in which the indices of the + * nearest-neighbors are stored. + * \param vec The vector of the query point for which to search the + * nearest neighbors. + * \param searchParams Optional parameters for the search. + * + * \tparam RESULTSET Should be any ResultSet + * \return True if the requested neighbors could be found. + * + * \sa knnSearch(), radiusSearch(), radiusSearchCustomCallback() + * + * \note If L2 norms are used, all returned distances are actually squared + * distances. + */ + template + bool findNeighbors(RESULTSET& result, + const ElementType* vec, + const SearchParameters& searchParams = {}) const { + assert(vec); + if (this->size(*this) == 0) + return false; + if (!Base::root_node_) + return false; + float epsError = 1 + searchParams.eps; + + // fixed or variable-sized container (depending on DIM) + distance_vector_t dists; + // Fill it with zeros. + assign(dists, + (DIM > 0 ? DIM : Base::dim_), + static_cast(0)); + DistanceType dist = this->computeInitialDistances(*this, vec, dists); + searchLevel(result, vec, Base::root_node_, dist, dists, epsError); + return result.full(); + } + + /** + * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. + * Their indices are stored inside the result object. \sa radiusSearch, + * findNeighbors + * \return Number `N` of valid points in + * the result set. + * + * \note If L2 norms are used, all returned distances are actually squared + * distances. + * + * \note Only the first `N` entries in `out_indices` and `out_distances` + * will be valid. Return may be less than `num_closest` only if the + * number of elements in the tree is less than `num_closest`. + */ + Size knnSearch(const ElementType* query_point, + const Size num_closest, + IndexType* out_indices, + DistanceType* out_distances, + const SearchParameters& searchParams = {}) const { + nanoflann::KNNResultSet resultSet(num_closest); + resultSet.init(out_indices, out_distances); + findNeighbors(resultSet, query_point, searchParams); + return resultSet.size(); + } + + /** + * Find all the neighbors to \a query_point[0:dim-1] within a maximum + * radius. The output is given as a vector of pairs, of which the first + * element is a point index and the second the corresponding distance. + * Previous contents of \a IndicesDists are cleared. + * + * If searchParams.sorted==true, the output list is sorted by ascending + * distances. + * + * For a better performance, it is advisable to do a .reserve() on the + * vector if you have any wild guess about the number of expected matches. + * + * \sa knnSearch, findNeighbors, radiusSearchCustomCallback + * \return The number of points within the given radius (i.e. indices.size() + * or dists.size() ) + * + * \note If L2 norms are used, search radius and all returned distances + * are actually squared distances. + */ + Size + radiusSearch(const ElementType* query_point, + const DistanceType& radius, + std::vector>& IndicesDists, + const SearchParameters& searchParams = {}) const { + RadiusResultSet resultSet(radius, IndicesDists); + const size_t nFound = + radiusSearchCustomCallback(query_point, resultSet, searchParams); + if (searchParams.sorted) + std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); + return nFound; + } + + /** + * Just like radiusSearch() but with a custom callback class for each point + * found in the radius of the query. See the source of RadiusResultSet<> as + * a start point for your own classes. \sa radiusSearch + */ + template + Size + radiusSearchCustomCallback(const ElementType* query_point, + SEARCH_CALLBACK& resultSet, + const SearchParameters& searchParams = {}) const { + findNeighbors(resultSet, query_point, searchParams); + return resultSet.size(); + } + + /** @} */ + +public: + void computeBoundingBox(BoundingBox& bbox) { + const auto dims = (DIM > 0 ? DIM : Base::dim_); + resize(bbox, dims); + + if (dataset_.kdtree_get_bbox(bbox)) { + // Done! It was implemented in derived class + } else { + const Size N = Base::size_; + if (!N) + throw std::runtime_error("[nanoflann] computeBoundingBox() called but " + "no data points found."); + for (Dimension i = 0; i < dims; ++i) { + bbox[i].low = bbox[i].high = + this->dataset_get(*this, Base::vAcc_[0], i); + } + for (Offset k = 1; k < N; ++k) { + for (Dimension i = 0; i < dims; ++i) { + const auto val = this->dataset_get(*this, Base::vAcc_[k], i); + if (val < bbox[i].low) + bbox[i].low = val; + if (val > bbox[i].high) + bbox[i].high = val; } - - /* Call recursively to search next level down. */ - searchLevel(result_set, vec, bestChild, mindist, dists, epsError); - - DistanceType dst = dists[idx]; - mindist = mindist + cut_dist - dst; - dists[idx] = cut_dist; - if (mindist * epsError <= result_set.worstDist()) - { - searchLevel(result_set, vec, otherChild, mindist, dists, epsError); + } + } + } + + /** + * Performs an exact search in the tree starting from a node. + * \tparam RESULTSET Should be any ResultSet + */ + template + void searchLevel(RESULTSET& result_set, + const ElementType* vec, + const NodePtr node, + DistanceType mindist, + distance_vector_t& dists, + const float epsError) const { + /* If this is a leaf node, then do check and return. */ + if ((node->child1 == nullptr) && (node->child2 == nullptr)) { + DistanceType worst_dist = result_set.worstDist(); + for (Offset i = node->node_type.lr.left; i < node->node_type.lr.right; + ++i) { + const IndexType index = Base::vAcc_[i]; // reorder... : i; + if (treeIndex_[index] == -1) + continue; + DistanceType dist = + distance_.evalMetric(vec, index, (DIM > 0 ? DIM : Base::dim_)); + if (dist < worst_dist) { + if (!result_set.addPoint( + static_cast(dist), + static_cast(Base::vAcc_[i]))) { + // the resultset doesn't want to receive any more + // points, we're done searching! + return; // false; + } } - dists[idx] = dst; - } - - public: - /** Stores the index in a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so - * when loading the index object it must be constructed associated to the - * same source of data points used while building it. See the example: - * examples/saveload_example.cpp \sa loadIndex */ - void saveIndex(std::ostream& stream) { saveIndex(*this, stream); } - - /** Loads a previous index from a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so - * the index object must be constructed associated to the same source of - * data points used while building the index. See the example: - * examples/saveload_example.cpp \sa loadIndex */ - void loadIndex(std::istream& stream) { loadIndex(*this, stream); } + } + return; + } + + /* Which child branch should be taken first? */ + Dimension idx = node->node_type.sub.divfeat; + ElementType val = vec[idx]; + DistanceType diff1 = val - node->node_type.sub.divlow; + DistanceType diff2 = val - node->node_type.sub.divhigh; + + NodePtr bestChild; + NodePtr otherChild; + DistanceType cut_dist; + if ((diff1 + diff2) < 0) { + bestChild = node->child1; + otherChild = node->child2; + cut_dist = distance_.accum_dist(val, node->node_type.sub.divhigh, idx); + } else { + bestChild = node->child2; + otherChild = node->child1; + cut_dist = distance_.accum_dist(val, node->node_type.sub.divlow, idx); + } + + /* Call recursively to search next level down. */ + searchLevel(result_set, vec, bestChild, mindist, dists, epsError); + + DistanceType dst = dists[idx]; + mindist = mindist + cut_dist - dst; + dists[idx] = cut_dist; + if (mindist * epsError <= result_set.worstDist()) { + searchLevel(result_set, vec, otherChild, mindist, dists, epsError); + } + dists[idx] = dst; + } + +public: + /** Stores the index in a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so + * when loading the index object it must be constructed associated to the + * same source of data points used while building it. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void saveIndex(std::ostream& stream) { saveIndex(*this, stream); } + + /** Loads a previous index from a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so + * the index object must be constructed associated to the same source of + * data points used while building the index. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void loadIndex(std::istream& stream) { loadIndex(*this, stream); } }; /** kd-tree dynaimic index @@ -2363,194 +2245,194 @@ class KDTreeSingleIndexDynamicAdaptor_ * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType * Will be typically size_t or int */ -template < - typename Distance, class DatasetAdaptor, int32_t DIM = -1, - typename IndexType = uint32_t> -class KDTreeSingleIndexDynamicAdaptor -{ - public: - using ElementType = typename Distance::ElementType; - using DistanceType = typename Distance::DistanceType; - - using Offset = typename KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM>::Offset; - using Size = typename KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM>::Size; - using Dimension = typename KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM>::Dimension; - - protected: - Size leaf_max_size_; - Size treeCount_; - Size pointCount_; - - /** - * The dataset used by this index - */ - const DatasetAdaptor& dataset_; //!< The source of our data - - /** treeIndex[idx] is the index of tree in which point at idx is stored. - * treeIndex[idx]=-1 means that point has been removed. */ - std::vector treeIndex_; - std::unordered_set removedPoints_; - - KDTreeSingleIndexAdaptorParams index_params_; - - Dimension dim_; //!< Dimensionality of each data point - - using index_container_t = KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM, IndexType>; - std::vector index_; - - public: - /** Get a const ref to the internal list of indices; the number of indices - * is adapted dynamically as the dataset grows in size. */ - const std::vector& getAllIndices() const - { - return index_; - } - - private: - /** finds position of least significant unset bit */ - int First0Bit(IndexType num) - { - int pos = 0; - while (num & 1) - { - num = num >> 1; - pos++; +template +class KDTreeSingleIndexDynamicAdaptor { +public: + using ElementType = typename Distance::ElementType; + using DistanceType = typename Distance::DistanceType; + + using Offset = typename KDTreeSingleIndexDynamicAdaptor_::Offset; + using Size = typename KDTreeSingleIndexDynamicAdaptor_::Size; + using Dimension = typename KDTreeSingleIndexDynamicAdaptor_::Dimension; + +protected: + Size leaf_max_size_; + Size treeCount_; + Size pointCount_; + + /** + * The dataset used by this index + */ + const DatasetAdaptor& dataset_; //!< The source of our data + + /** treeIndex[idx] is the index of tree in which point at idx is stored. + * treeIndex[idx]=-1 means that point has been removed. */ + std::vector treeIndex_; + std::unordered_set removedPoints_; + + KDTreeSingleIndexAdaptorParams index_params_; + + Dimension dim_; //!< Dimensionality of each data point + + using index_container_t = KDTreeSingleIndexDynamicAdaptor_; + std::vector index_; + +public: + /** Get a const ref to the internal list of indices; the number of indices + * is adapted dynamically as the dataset grows in size. */ + const std::vector& getAllIndices() const { return index_; } + +private: + /** finds position of least significant unset bit */ + int First0Bit(IndexType num) { + int pos = 0; + while (num & 1) { + num = num >> 1; + pos++; + } + return pos; + } + + /** Creates multiple empty trees to handle dynamic support */ + void init() { + using my_kd_tree_t = KDTreeSingleIndexDynamicAdaptor_; + std::vector index( + treeCount_, + my_kd_tree_t(dim_ /*dim*/, dataset_, treeIndex_, index_params_)); + index_ = index; + } + +public: + Distance distance_; + + /** + * KDTree constructor + * + * Refer to docs in README.md or online in + * https://github.com/jlblancoc/nanoflann + * + * The KD-Tree point dimension (the length of each point in the datase, e.g. + * 3 for 3D points) is determined by means of: + * - The \a DIM template parameter if >0 (highest priority) + * - Otherwise, the \a dimensionality parameter of this constructor. + * + * @param inputData Dataset with the input features. Its lifetime must be + * equal or longer than that of the instance of this class. + * @param params Basically, the maximum leaf node size + */ + explicit KDTreeSingleIndexDynamicAdaptor( + const int dimensionality, + const DatasetAdaptor& inputData, + const KDTreeSingleIndexAdaptorParams& params = + KDTreeSingleIndexAdaptorParams(), + const size_t maximumPointCount = 1000000000U) + : dataset_(inputData), + index_params_(params), + distance_(inputData) { + treeCount_ = static_cast(std::log2(maximumPointCount)) + 1; + pointCount_ = 0U; + dim_ = dimensionality; + treeIndex_.clear(); + if (DIM > 0) + dim_ = DIM; + leaf_max_size_ = params.leaf_max_size; + init(); + const size_t num_initial_points = dataset_.kdtree_get_point_count(); + if (num_initial_points > 0) { + addPoints(0, num_initial_points - 1); + } + } + + /** Deleted copy constructor*/ + explicit KDTreeSingleIndexDynamicAdaptor( + const KDTreeSingleIndexDynamicAdaptor&) = delete; + + /** Add points to the set, Inserts all points from [start, end] */ + void addPoints(IndexType start, IndexType end) { + const Size count = end - start + 1; + int maxIndex = 0; + treeIndex_.resize(treeIndex_.size() + count); + for (IndexType idx = start; idx <= end; idx++) { + const int pos = First0Bit(pointCount_); + maxIndex = std::max(pos, maxIndex); + treeIndex_[pointCount_] = pos; + + const auto it = removedPoints_.find(idx); + if (it != removedPoints_.end()) { + removedPoints_.erase(it); + treeIndex_[idx] = pos; + } + + for (int i = 0; i < pos; i++) { + for (int j = 0; j < static_cast(index_[i].vAcc_.size()); j++) { + index_[pos].vAcc_.push_back(index_[i].vAcc_[j]); + if (treeIndex_[index_[i].vAcc_[j]] != -1) + treeIndex_[index_[i].vAcc_[j]] = pos; } - return pos; - } - - /** Creates multiple empty trees to handle dynamic support */ - void init() - { - using my_kd_tree_t = KDTreeSingleIndexDynamicAdaptor_< - Distance, DatasetAdaptor, DIM, IndexType>; - std::vector index( - treeCount_, - my_kd_tree_t(dim_ /*dim*/, dataset_, treeIndex_, index_params_)); - index_ = index; - } - - public: - Distance distance_; - - /** - * KDTree constructor - * - * Refer to docs in README.md or online in - * https://github.com/jlblancoc/nanoflann - * - * The KD-Tree point dimension (the length of each point in the datase, e.g. - * 3 for 3D points) is determined by means of: - * - The \a DIM template parameter if >0 (highest priority) - * - Otherwise, the \a dimensionality parameter of this constructor. - * - * @param inputData Dataset with the input features. Its lifetime must be - * equal or longer than that of the instance of this class. - * @param params Basically, the maximum leaf node size - */ - explicit KDTreeSingleIndexDynamicAdaptor( - const int dimensionality, const DatasetAdaptor& inputData, - const KDTreeSingleIndexAdaptorParams& params = - KDTreeSingleIndexAdaptorParams(), - const size_t maximumPointCount = 1000000000U) - : dataset_(inputData), index_params_(params), distance_(inputData) - { - treeCount_ = static_cast(std::log2(maximumPointCount)) + 1; - pointCount_ = 0U; - dim_ = dimensionality; - treeIndex_.clear(); - if (DIM > 0) dim_ = DIM; - leaf_max_size_ = params.leaf_max_size; - init(); - const size_t num_initial_points = dataset_.kdtree_get_point_count(); - if (num_initial_points > 0) { addPoints(0, num_initial_points - 1); } - } - - /** Deleted copy constructor*/ - explicit KDTreeSingleIndexDynamicAdaptor( - const KDTreeSingleIndexDynamicAdaptor< - Distance, DatasetAdaptor, DIM, IndexType>&) = delete; - - /** Add points to the set, Inserts all points from [start, end] */ - void addPoints(IndexType start, IndexType end) - { - const Size count = end - start + 1; - int maxIndex = 0; - treeIndex_.resize(treeIndex_.size() + count); - for (IndexType idx = start; idx <= end; idx++) - { - const int pos = First0Bit(pointCount_); - maxIndex = std::max(pos, maxIndex); - treeIndex_[pointCount_] = pos; - - const auto it = removedPoints_.find(idx); - if (it != removedPoints_.end()) - { - removedPoints_.erase(it); - treeIndex_[idx] = pos; - } - - for (int i = 0; i < pos; i++) - { - for (int j = 0; j < static_cast(index_[i].vAcc_.size()); - j++) - { - index_[pos].vAcc_.push_back(index_[i].vAcc_[j]); - if (treeIndex_[index_[i].vAcc_[j]] != -1) - treeIndex_[index_[i].vAcc_[j]] = pos; - } - index_[i].vAcc_.clear(); - } - index_[pos].vAcc_.push_back(idx); - pointCount_++; - } - - for (int i = 0; i <= maxIndex; ++i) - { - index_[i].freeIndex(index_[i]); - if (!index_[i].vAcc_.empty()) index_[i].buildIndex(); - } - } - - /** Remove a point from the set (Lazy Deletion) */ - void removePoint(size_t idx) - { - if (idx >= pointCount_) return; - removedPoints_.insert(idx); - treeIndex_[idx] = -1; - } - - /** - * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored - * inside the result object. - * - * Params: - * result = the result object in which the indices of the - * nearest-neighbors are stored vec = the vector for which to search the - * nearest neighbors - * - * \tparam RESULTSET Should be any ResultSet - * \return True if the requested neighbors could be found. - * \sa knnSearch, radiusSearch - * - * \note If L2 norms are used, all returned distances are actually squared - * distances. - */ - template - bool findNeighbors( - RESULTSET& result, const ElementType* vec, - const SearchParameters& searchParams = {}) const - { - for (size_t i = 0; i < treeCount_; i++) - { - index_[i].findNeighbors(result, &vec[0], searchParams); - } - return result.full(); - } + index_[i].vAcc_.clear(); + } + index_[pos].vAcc_.push_back(idx); + pointCount_++; + } + + for (int i = 0; i <= maxIndex; ++i) { + index_[i].freeIndex(index_[i]); + if (!index_[i].vAcc_.empty()) + index_[i].buildIndex(); + } + } + + /** Remove a point from the set (Lazy Deletion) */ + void removePoint(size_t idx) { + if (idx >= pointCount_) + return; + removedPoints_.insert(idx); + treeIndex_[idx] = -1; + } + + /** + * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored + * inside the result object. + * + * Params: + * result = the result object in which the indices of the + * nearest-neighbors are stored vec = the vector for which to search the + * nearest neighbors + * + * \tparam RESULTSET Should be any ResultSet + * \return True if the requested neighbors could be found. + * \sa knnSearch, radiusSearch + * + * \note If L2 norms are used, all returned distances are actually squared + * distances. + */ + template + bool findNeighbors(RESULTSET& result, + const ElementType* vec, + const SearchParameters& searchParams = {}) const { + for (size_t i = 0; i < treeCount_; i++) { + index_[i].findNeighbors(result, &vec[0], searchParams); + } + return result.full(); + } }; /** An L2-metric KD-tree adaptor for working with data directly stored in an @@ -2578,116 +2460,113 @@ class KDTreeSingleIndexDynamicAdaptor * points, if set to false the columns of the matrix are used as the * points. */ -template < - class MatrixType, int32_t DIM = -1, class Distance = nanoflann::metric_L2, - bool row_major = true> -struct KDTreeEigenMatrixAdaptor -{ - using self_t = - KDTreeEigenMatrixAdaptor; - using num_t = typename MatrixType::Scalar; - using IndexType = typename MatrixType::Index; - using metric_t = typename Distance::template traits< - num_t, self_t, IndexType>::distance_t; - - using index_t = KDTreeSingleIndexAdaptor< - metric_t, self_t, - row_major ? MatrixType::ColsAtCompileTime - : MatrixType::RowsAtCompileTime, - IndexType>; - - index_t* index_; //! The kd-tree index for the user to call its methods as - //! usual with any other FLANN index. - - using Offset = typename index_t::Offset; - using Size = typename index_t::Size; - using Dimension = typename index_t::Dimension; - - /// Constructor: takes a const ref to the matrix object with the data points - explicit KDTreeEigenMatrixAdaptor( - const Dimension dimensionality, - const std::reference_wrapper& mat, - const int leaf_max_size = 10) - : m_data_matrix(mat) - { - const auto dims = row_major ? mat.get().cols() : mat.get().rows(); - if (static_cast(dims) != dimensionality) - throw std::runtime_error( - "Error: 'dimensionality' must match column count in data " - "matrix"); - if (DIM > 0 && static_cast(dims) != DIM) - throw std::runtime_error( - "Data set dimensionality does not match the 'DIM' template " - "argument"); - index_ = new index_t( - dims, *this /* adaptor */, - nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size)); - } - - public: - /** Deleted copy constructor */ - KDTreeEigenMatrixAdaptor(const self_t&) = delete; - - ~KDTreeEigenMatrixAdaptor() { delete index_; } - - const std::reference_wrapper m_data_matrix; - - /** Query for the \a num_closest closest points to a given point (entered as - * query_point[0:dim-1]). Note that this is a short-cut method for - * index->findNeighbors(). The user can also call index->... methods as - * desired. - * - * \note If L2 norms are used, all returned distances are actually squared - * distances. - */ - void query( - const num_t* query_point, const Size num_closest, - IndexType* out_indices, num_t* out_distances) const - { - nanoflann::KNNResultSet resultSet(num_closest); - resultSet.init(out_indices, out_distances); - index_->findNeighbors(resultSet, query_point); - } - - /** @name Interface expected by KDTreeSingleIndexAdaptor - * @{ */ - - const self_t& derived() const { return *this; } - self_t& derived() { return *this; } - - // Must return the number of data points - Size kdtree_get_point_count() const - { - if (row_major) - return m_data_matrix.get().rows(); - else - return m_data_matrix.get().cols(); - } - - // Returns the dim'th component of the idx'th point in the class: - num_t kdtree_get_pt(const IndexType idx, size_t dim) const - { - if (row_major) - return m_data_matrix.get().coeff(idx, IndexType(dim)); - else - return m_data_matrix.get().coeff(IndexType(dim), idx); - } - - // Optional bounding-box computation: return false to default to a standard - // bbox computation loop. - // Return true if the BBOX was already computed by the class and returned - // in "bb" so it can be avoided to redo it again. Look at bb.size() to - // find out the expected dimensionality (e.g. 2 or 3 for point clouds) - template - bool kdtree_get_bbox(BBOX& /*bb*/) const - { - return false; - } - - /** @} */ - -}; // end of KDTreeEigenMatrixAdaptor +template +struct KDTreeEigenMatrixAdaptor { + using self_t = KDTreeEigenMatrixAdaptor; + using num_t = typename MatrixType::Scalar; + using IndexType = typename MatrixType::Index; + using metric_t = + typename Distance::template traits::distance_t; + + using index_t = + KDTreeSingleIndexAdaptor; + + index_t* index_; //! The kd-tree index for the user to call its methods as + //! usual with any other FLANN index. + + using Offset = typename index_t::Offset; + using Size = typename index_t::Size; + using Dimension = typename index_t::Dimension; + + /// Constructor: takes a const ref to the matrix object with the data points + explicit KDTreeEigenMatrixAdaptor( + const Dimension dimensionality, + const std::reference_wrapper& mat, + const int leaf_max_size = 10) + : m_data_matrix(mat) { + const auto dims = row_major ? mat.get().cols() : mat.get().rows(); + if (static_cast(dims) != dimensionality) + throw std::runtime_error( + "Error: 'dimensionality' must match column count in data " + "matrix"); + if (DIM > 0 && static_cast(dims) != DIM) + throw std::runtime_error( + "Data set dimensionality does not match the 'DIM' template " + "argument"); + index_ = + new index_t(dims, + *this /* adaptor */, + nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size)); + } + +public: + /** Deleted copy constructor */ + KDTreeEigenMatrixAdaptor(const self_t&) = delete; + + ~KDTreeEigenMatrixAdaptor() { delete index_; } + + const std::reference_wrapper m_data_matrix; + + /** Query for the \a num_closest closest points to a given point (entered as + * query_point[0:dim-1]). Note that this is a short-cut method for + * index->findNeighbors(). The user can also call index->... methods as + * desired. + * + * \note If L2 norms are used, all returned distances are actually squared + * distances. + */ + void query(const num_t* query_point, + const Size num_closest, + IndexType* out_indices, + num_t* out_distances) const { + nanoflann::KNNResultSet resultSet(num_closest); + resultSet.init(out_indices, out_distances); + index_->findNeighbors(resultSet, query_point); + } + + /** @name Interface expected by KDTreeSingleIndexAdaptor + * @{ */ + + const self_t& derived() const { return *this; } + self_t& derived() { return *this; } + + // Must return the number of data points + Size kdtree_get_point_count() const { + if (row_major) + return m_data_matrix.get().rows(); + else + return m_data_matrix.get().cols(); + } + + // Returns the dim'th component of the idx'th point in the class: + num_t kdtree_get_pt(const IndexType idx, size_t dim) const { + if (row_major) + return m_data_matrix.get().coeff(idx, IndexType(dim)); + else + return m_data_matrix.get().coeff(IndexType(dim), idx); + } + + // Optional bounding-box computation: return false to default to a standard + // bbox computation loop. + // Return true if the BBOX was already computed by the class and returned + // in "bb" so it can be avoided to redo it again. Look at bb.size() to + // find out the expected dimensionality (e.g. 2 or 3 for point clouds) + template + bool kdtree_get_bbox(BBOX& /*bb*/) const { + return false; + } + + /** @} */ + +}; // end of KDTreeEigenMatrixAdaptor /** @} */ -/** @} */ // end of grouping -} // namespace nanoflann +/** @} */ // end of grouping +} // namespace nanoflann From c78ad187a4ef449256d91455bc3636f83df5a665 Mon Sep 17 00:00:00 2001 From: Jaewook Lee Date: Fri, 26 Jul 2024 17:02:56 +0200 Subject: [PATCH 2/8] update workflow --- .github/workflows/main.yml | 49 +++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 699643a..0cb2d36 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,26 +6,33 @@ on: pull_request: jobs: - macos_wheel: - runs-on: macos-latest - strategy: - matrix: - arch: [x86_64, arm64] - cw_build: ["pp*", "cp*"] - exclude: - - arch: arm64 - cw_build: "pp*" + macos_wheel_m: + runs-on: macos-14 steps: - uses: actions/checkout@v3 with: submodules: recursive - name: build wheels - uses: pypa/cibuildwheel@v2.18 + uses: pypa/cibuildwheel@v2.19 env: - CIBW_ARCHS: ${{ matrix.arch }} - CIBW_BUILD: ${{ matrix.cw_build }} - CIBW_TEST_SKIP: "*-macosx_x86_64" + CIBW_ARCHS: "arm64" + + - uses: actions/upload-artifact@v3 + with: + path: ./wheelhouse/*.whl + + macos_wheel_intel: + runs-on: macos-13 + + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + - name: build wheels + uses: pypa/cibuildwheel@v2.19 + env: + CIBW_ARCHS: "x86_64" - uses: actions/upload-artifact@v3 with: @@ -45,11 +52,10 @@ jobs: with: submodules: recursive - name: build wheels - uses: pypa/cibuildwheel@v2.18 + uses: pypa/cibuildwheel@v2.19 env: CIBW_ARCHS: ${{ matrix.arch }} CIBW_BUILD: ${{ matrix.cw_build }} - CIBW_SKIP: "pp39-many*" - uses: actions/upload-artifact@v3 with: @@ -60,7 +66,7 @@ jobs: strategy: matrix: arch: [aarch64, ppc64le] - cw_build: ["cp36*many*", "cp37*many*", "cp38*many*", "cp39*many*", "cp310*many*", "cp311*many*", "cp312*many*", "pp37*many*", "pp38*many*", "pp39*many*"] + cw_build: ["cp37*many*", "cp38*many*", "cp39*many*", "cp310*many*", "cp311*many*", "cp312*many*", "pp37*many*", "pp38*many*", "pp39*many*"] exclude: - arch: ppc64le cw_build: "pp37*many*" @@ -81,7 +87,7 @@ jobs: platforms: arm64, ppc64le - name: build wheels - uses: pypa/cibuildwheel@v2.18 + uses: pypa/cibuildwheel@v2.19 env: CIBW_ARCHS: ${{ matrix.arch }} CIBW_BUILD: ${{ matrix.cw_build }} @@ -104,9 +110,10 @@ jobs: with: submodules: recursive - name: build wheels - uses: pypa/cibuildwheel@v2.18 + uses: pypa/cibuildwheel@v2.19 env: CIBW_ARCHS: ${{ matrix.arch }} + CIBW_SKIP: "pp*" - uses: actions/upload-artifact@v3 @@ -129,10 +136,8 @@ jobs: path: ./dist/* upload_pypi: - needs: [macos_wheel, linux_wheel, windows_wheel, source_dist] + needs: [macos_wheel_m, macos_wheel_intel, linux_wheel, windows_wheel, source_dist] runs-on: ubuntu-latest - permissions: - id-token: write # try to publish only if this is a push to stable branch if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} steps: @@ -143,4 +148,4 @@ jobs: - uses: pypa/gh-action-pypi-publish@release/v1 with: - skip-existing: true + skip_existing: true From ffa9d83a57c3274c076e8073a9aec294440d5a91 Mon Sep 17 00:00:00 2001 From: Jaewook Lee Date: Fri, 26 Jul 2024 17:04:51 +0200 Subject: [PATCH 3/8] update nanoflann --- third_party/nanoflann | 2 +- third_party/nanoflann.hpp | 100 ++++++++++++++++++++++---------------- 2 files changed, 58 insertions(+), 44 deletions(-) diff --git a/third_party/nanoflann b/third_party/nanoflann index 3900193..e68fd0f 160000 --- a/third_party/nanoflann +++ b/third_party/nanoflann @@ -1 +1 @@ -Subproject commit 3900193f7e200c9e152aa4b240d7eca47a830a80 +Subproject commit e68fd0f36e00af65fcb13c0fdbd275725f50527e diff --git a/third_party/nanoflann.hpp b/third_party/nanoflann.hpp index b2bb12d..2e51d75 100644 --- a/third_party/nanoflann.hpp +++ b/third_party/nanoflann.hpp @@ -145,6 +145,34 @@ assign(Container& c, const size_t nElements, const T& value) { c[i] = value; } +/** operator "<" for std::sort() */ +struct IndexDist_Sorter { + /** PairType will be typically: ResultItem */ + template + bool operator()(const PairType& p1, const PairType& p2) const { + return p1.second < p2.second; + } +}; + +/** + * Each result element in RadiusResultSet. Note that distances and indices + * are named `first` and `second` to keep backward-compatibility with the + * `std::pair<>` type used in the past. In contrast, this structure is ensured + * to be `std::is_standard_layout` so it can be used in wrappers to other + * languages. + * See: https://github.com/jlblancoc/nanoflann/issues/166 + */ +template +struct ResultItem { + ResultItem() = default; + ResultItem(const IndexType index, const DistanceType distance) + : first(index), + second(distance) {} + + IndexType first; //!< Index of the sample in the dataset + DistanceType second; //!< Distance from sample to query point +}; + /** @addtogroup result_sets_grp Result set classes * @{ */ @@ -218,6 +246,10 @@ class KNNResultSet { } DistanceType worstDist() const { return dists[capacity - 1]; } + + void sort() { + // already sorted + } }; /** Result set for RKNN searches (N-closest neighbors with a maximum radius) */ @@ -293,36 +325,12 @@ class RKNNResultSet { } DistanceType worstDist() const { return dists[capacity - 1]; } -}; -/** operator "<" for std::sort() */ -struct IndexDist_Sorter { - /** PairType will be typically: ResultItem */ - template - bool operator()(const PairType& p1, const PairType& p2) const { - return p1.second < p2.second; + void sort() { + // already sorted } }; -/** - * Each result element in RadiusResultSet. Note that distances and indices - * are named `first` and `second` to keep backward-compatibility with the - * `std::pair<>` type used in the past. In contrast, this structure is ensured - * to be `std::is_standard_layout` so it can be used in wrappers to other - * languages. - * See: https://github.com/jlblancoc/nanoflann/issues/166 - */ -template -struct ResultItem { - ResultItem() = default; - ResultItem(const IndexType index, const DistanceType distance) - : first(index), - second(distance) {} - - IndexType first; //!< Index of the sample in the dataset - DistanceType second; //!< Distance from sample to query point -}; - /** * A result-set class used when performing a radius based search. */ @@ -379,6 +387,12 @@ class RadiusResultSet { IndexDist_Sorter()); return *it; } + + void sort() { + std::sort(m_indices_dists.begin(), + m_indices_dists.end(), + IndexDist_Sorter()); + } }; /** @} */ @@ -895,7 +909,7 @@ template + typename index_t = uint32_t> class KDTreeBaseClass { public: /** Frees the previously-built index. Automatically called within @@ -908,6 +922,7 @@ class KDTreeBaseClass { using ElementType = typename Distance::ElementType; using DistanceType = typename Distance::DistanceType; + using IndexType = index_t; /** * Array of indices to vectors in the dataset_. @@ -1373,7 +1388,7 @@ class KDTreeBaseClass { * non-virtual, inlined methods): * * \code - * // Must return the number of data points + * // Must return the number of data poins * size_t kdtree_get_point_count() const { ... } * * @@ -1405,21 +1420,19 @@ class KDTreeBaseClass { template + typename index_t = uint32_t> class KDTreeSingleIndexAdaptor : public KDTreeBaseClass< - KDTreeSingleIndexAdaptor, + KDTreeSingleIndexAdaptor, Distance, DatasetAdaptor, DIM, - IndexType> { + index_t> { public: /** Deleted copy constructor*/ explicit KDTreeSingleIndexAdaptor( - const KDTreeSingleIndexAdaptor&) = delete; + const KDTreeSingleIndexAdaptor&) = + delete; /** The data source used by this index */ const DatasetAdaptor& dataset_; @@ -1430,11 +1443,11 @@ class KDTreeSingleIndexAdaptor using Base = typename nanoflann::KDTreeBaseClass< nanoflann:: - KDTreeSingleIndexAdaptor, + KDTreeSingleIndexAdaptor, Distance, DatasetAdaptor, DIM, - IndexType>; + index_t>; using Offset = typename Base::Offset; using Size = typename Base::Size; @@ -1442,6 +1455,7 @@ class KDTreeSingleIndexAdaptor using ElementType = typename Base::ElementType; using DistanceType = typename Base::DistanceType; + using IndexType = typename Base::IndexType; using Node = typename Base::Node; using NodePtr = Node*; @@ -1592,6 +1606,10 @@ class KDTreeSingleIndexAdaptor assign(dists, (DIM > 0 ? DIM : Base::dim_), zero); DistanceType dist = this->computeInitialDistances(*this, vec, dists); searchLevel(result, vec, Base::root_node_, dist, dists, epsError); + + if (searchParams.sorted) + result.sort(); + return result.full(); } @@ -1647,8 +1665,6 @@ class KDTreeSingleIndexAdaptor RadiusResultSet resultSet(radius, IndicesDists); const Size nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams); - if (searchParams.sorted) - std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); return nFound; } @@ -1832,7 +1848,7 @@ class KDTreeSingleIndexAdaptor * non-virtual, inlined methods): * * \code - * // Must return the number of data points + * // Must return the number of data poins * size_t kdtree_get_point_count() const { ... } * * // Must return the dim'th component of the idx'th point in the class: @@ -1958,7 +1974,7 @@ class KDTreeSingleIndexDynamicAdaptor_ KDTreeSingleIndexDynamicAdaptor_( const KDTreeSingleIndexDynamicAdaptor_& rhs) = default; - /** Assignment operator definition */ + /** Assignment operator definiton */ KDTreeSingleIndexDynamicAdaptor_ operator=(const KDTreeSingleIndexDynamicAdaptor_& rhs) { KDTreeSingleIndexDynamicAdaptor_ tmp(rhs); @@ -2101,8 +2117,6 @@ class KDTreeSingleIndexDynamicAdaptor_ RadiusResultSet resultSet(radius, IndicesDists); const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams); - if (searchParams.sorted) - std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); return nFound; } From e0b326bc3d84476271df0b5da9181041cd79aabc Mon Sep 17 00:00:00 2001 From: Jaewook Lee Date: Fri, 26 Jul 2024 17:06:25 +0200 Subject: [PATCH 4/8] add dep and mininum version --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 3b109c4..b49d60a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,9 @@ classifiers=[ "Natural Language :: English", "Topic :: Scientific/Engineering", ] +dependencies = ["numpy"] dynamic = ["version"] +requires-python = ">=3.7" [tool.scikit-build.metadata.version] provider = "scikit_build_core.metadata.regex" From 0ebf179e184a2beeb7d862128bf8c68d590c1778 Mon Sep 17 00:00:00 2001 From: Jaewook Lee Date: Fri, 26 Jul 2024 17:13:31 +0200 Subject: [PATCH 5/8] use normal install --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c07e3b5..5004d04 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -18,7 +18,7 @@ jobs: python-version: "3.10" - name: build napf linux - run: pip install -e. + run: pip install . - name: test run: python3 tests/test_init_and_query.py From 6e97e0bbfa5b7ebdecf0dc424f487db9193fd151 Mon Sep 17 00:00:00 2001 From: Jaewook Lee Date: Fri, 26 Jul 2024 17:24:25 +0200 Subject: [PATCH 6/8] fix typos and add src/python --- CMakeLists.txt | 20 ++++---------------- src/python/CMakeLists.txt | 8 ++++---- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ca8559..535c1fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,9 +9,7 @@ if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) endif() # options -option(BUILD_EXAMPLES "build examples" ON) -option(BUILD_FORTRAN_MODULE "build fortran module" ON) -option(SPLINEPY_EXT "add splinepy extension" ON) +option(NAPF_BUILD_PYTHON "build python module" ON) # config set(exe_dest "bin") @@ -49,19 +47,9 @@ else() endif() target_compile_features(napf INTERFACE cxx_std_11) -if(BUILD_FORTRAN_MODULE) - message("*** building additional fortran module ***") - message("*** ------ NOT IMPLEMENTED ----------- ***") -endif(BUILD_FORTRAN_MODULE) - -if(BUILD_EXAMPLES) - message("*** building examples ***") - message("*** ------ NOT IMPLEMENTED ----------- ***") -endif(BUILD_EXAMPLES) - -if(SPLINEPY_EXT) - target_compile_definitions(napf INTERFACE -DSPLINEPYEXT) -endif(SPLINEPY_EXT) +if(NAPF_BUILD_PYTHON) + add_subdirectory(src/python) +endif() # configure config files include(CMakePackageConfigHelpers) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index a2aa959..bbdf10d 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -7,12 +7,12 @@ find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) find_package(pybind11 CONFIG REQUIRED) python_add_library(_napf MODULE ${NAPF_SOURCES}) -target_link_libraries(_napf PRIVATE pybind11::headers) -target_compile_definitions(_napf PRIVATE $<$>:NDEBUG) +target_link_libraries(_napf PRIVATE pybind11::headers napf) +target_compile_definitions(_napf PRIVATE $<$>:NDEBUG>) if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options(funi PRIVATE $<$>:-O3>) + target_compile_options(_napf PRIVATE $<$>:-O3>) elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSCV") - target_compile_options(funi PRIVATE $<$>:/O2>) + target_compile_options(_napf PRIVATE $<$>:/O2>) endif() install( From 3360429a9d671453f695942d1cbd6109d384a1d6 Mon Sep 17 00:00:00 2001 From: Jaewook Lee Date: Fri, 26 Jul 2024 17:39:23 +0200 Subject: [PATCH 7/8] rm pp and musllinux build --- .github/workflows/main.yml | 20 ++++---------------- pyproject.toml | 6 ++++++ 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0cb2d36..1fbb924 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -41,11 +41,6 @@ jobs: linux_wheel: runs-on: ubuntu-latest - strategy: - matrix: - arch: [x86_64] - cw_build: ["cp*manylinux*", "pp*manylinux*", "*musllinux*"] - steps: - uses: actions/checkout@v3 @@ -54,8 +49,8 @@ jobs: - name: build wheels uses: pypa/cibuildwheel@v2.19 env: - CIBW_ARCHS: ${{ matrix.arch }} - CIBW_BUILD: ${{ matrix.cw_build }} + CIBW_ARCHS: x86_64 + CIBW_BUILD: "cp*manylinux*" - uses: actions/upload-artifact@v3 with: @@ -66,14 +61,7 @@ jobs: strategy: matrix: arch: [aarch64, ppc64le] - cw_build: ["cp37*many*", "cp38*many*", "cp39*many*", "cp310*many*", "cp311*many*", "cp312*many*", "pp37*many*", "pp38*many*", "pp39*many*"] - exclude: - - arch: ppc64le - cw_build: "pp37*many*" - - arch: ppc64le - cw_build: "pp38*many*" - - arch: ppc64le - cw_build: "pp39*many*" + cw_build: ["cp37*many*", "cp38*many*", "cp39*many*", "cp310*many*", "cp311*many*", "cp312*many*"] steps: - uses: actions/checkout@v3 @@ -91,7 +79,7 @@ jobs: env: CIBW_ARCHS: ${{ matrix.arch }} CIBW_BUILD: ${{ matrix.cw_build }} - CIBW_TEST_SKIP: "*-*linux_{aarch64,ppc64le,s390x}" + CIBW_TEST_SKIP: "*-*linux_{aarch64,ppc64le}" - uses: actions/upload-artifact@v3 with: diff --git a/pyproject.toml b/pyproject.toml index b49d60a..bf2c0ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,12 @@ test-command = "python {project}/tests/test_init_and_query.py" file = "README.md" content-type = "text/markdown" +[tool.cibuildwheel.macos] +skip = "pp*" + +[tool.cibuildwheel.linux] +skip = "pp*" + [tool.cibuildwheel.windows] skip = "pp*" From 4469b737fc441e7cdf1c8470dba1501960ee5c7d Mon Sep 17 00:00:00 2001 From: Jaewook Lee Date: Fri, 26 Jul 2024 17:47:48 +0200 Subject: [PATCH 8/8] update readme --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 03f825a..55cd0c1 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ -# napf +

+ +**napf - [nanoflann](https://github.com/jlblancoc/nanoflann) wrappers for python and maybe fortran** + [![main](https://github.com/tataratat/napf/actions/workflows/main.yml/badge.svg)](https://github.com/tataratat/napf/actions/workflows/main.yml) [![PyPI version](https://badge.fury.io/py/napf.svg)](https://badge.fury.io/py/napf) -[nanoflann](https://github.com/jlblancoc/nanoflann) wrappers for python and maybe fortran. ## python As `nanoflann` offers template classes, separate classes are implemented in `napf` for each ___{datatype, distance metric}___. All the search functions are equipped with multi-thread execution. Uses [numpy.ndarray](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html) for data input and output.