Skip to content
Merged
11 changes: 6 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.22)
option(MV_UMAP_USE_OPENMP "Use OpenMP - by default ON" ON)
option(MV_UMAP_USE_AVX "Use AVX if available - by default OFF" OFF)
option(MV_UMAP_UNIT_TESTS "Create unit tests - by default OFF" OFF)
option(MV_UNITY_BUILD "Combine target source files into batches for faster compilation" OFF)

# Set DOWNLOAD_EXTRACT_TIMESTAMP option to the time of the extraction, added in 3.24
if(POLICY CMP0135)
Expand All @@ -21,7 +22,6 @@ PROJECT(${UMAPANALYSIS}
# CMake Options
# -----------------------------------------------------------------------------
set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_AUTOMOC ON)

if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /DWIN32 /EHsc /MP /permissive- /Zc:__cplusplus")
Expand Down Expand Up @@ -85,7 +85,6 @@ set(UMAPANALYSIS_DISTANCES
src/util/hnsw_space_corr.h
src/util/knncolle_matrix_parallel.h
src/util/knncolle_hnsw_parallel.h
src/util/knncolle_find_nearest_neighbors.h
)

set(UMAPANALYSIS_AUX
Expand Down Expand Up @@ -115,15 +114,17 @@ add_library(${UMAPANALYSIS} SHARED ${UMAPANALYSIS_SOURCES} ${UMAPANALYSIS_AUX})
# Include ManiVault headers, including system data plugins
target_include_directories(${UMAPANALYSIS} PRIVATE "${ManiVault_INCLUDE_DIR}")

# Inlcude own sources for cross-platform include consitency
target_include_directories(${UMAPANALYSIS} PRIVATE "${${CMAKE_CURRENT_SOURCE_DIR}/src}")

# -----------------------------------------------------------------------------
# Target properties
# -----------------------------------------------------------------------------
# Request C++20
target_compile_features(${UMAPANALYSIS} PRIVATE cxx_std_20)

set_target_properties(${UMAPANALYSIS} PROPERTIES
AUTOMOC ON
UNITY_BUILD ${MV_UNITY_BUILD}
)

if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
target_compile_options(${UMAPANALYSIS} PRIVATE /bigobj) # for Eigen
endif()
Expand Down
10 changes: 5 additions & 5 deletions cmake/GetExternalDependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ if(NOT libscran_umappp_FOUND AND NOT TARGET libscran::umappp)

# the patches enable use to use the local dependency versions
fetch_content_url(aarand "https://github.com/LTLA/aarand/archive/refs/tags/v1.1.0.zip")
fetch_content_url(subpar "https://github.com/LTLA/subpar/archive/refs/tags/v0.4.1.zip")
fetch_content_url(sanisizer "https://github.com/LTLA/sanisizer/archive/refs/tags/v0.2.1.zip")
fetch_content_url(subpar "https://github.com/LTLA/subpar/archive/refs/tags/v0.5.0.zip")
fetch_content_url(sanisizer "https://github.com/LTLA/sanisizer/archive/refs/tags/v0.2.2.zip")
fetch_cpm_repo_patch(knncolle "https://github.com/knncolle/knncolle.git" v3.1.0 "knncolle.patch") # depends on subpar
fetch_cpm_repo_patch(irlba "https://github.com/LTLA/CppIrlba.git" v3.0.1 "irlba.patch") # depends on eigen, aarand and subpar
fetch_cpm_repo_patch(umappp "https://github.com/libscran/umappp.git" v3.2.0 "umappp.patch") # depends on aarand, subpar, CppIrlba and knnolle
fetch_cpm_repo_patch(irlba "https://github.com/LTLA/CppIrlba.git" v3.1.0 "irlba.patch") # depends on eigen, aarand and subpar
fetch_cpm_repo_patch(umappp "https://github.com/libscran/umappp.git" v3.3.2 "umappp.patch") # depends on aarand, subpar, CppIrlba and knnolle
else()
message(STATUS "UMAPAnalysisPlugin: Using external Umappp")
endif()
Expand All @@ -37,7 +37,7 @@ endif()

find_package(knncolle_knncolle_hnsw CONFIG QUIET)
if(NOT knncolle_knncolle_hnsw_FOUND AND NOT TARGET knncolle::knncolle_hnsw)
fetch_cpm_repo_patch(knncolle_hnsw "https://github.com/knncolle/knncolle_hnsw.git" v0.3.0 "knncolle_hnsw.patch")
fetch_cpm_repo_patch(knncolle_hnsw "https://github.com/knncolle/knncolle_hnsw.git" v0.3.2 "knncolle_hnsw.patch")
endif()

find_package(Annoy CONFIG QUIET)
Expand Down
6 changes: 3 additions & 3 deletions cmake/irlba.patch
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
target_compile_features(irlba INTERFACE cxx_std_17)

-# Dependencies
-option(IRLBA_FETCH_EXTERN "Automatically fetch CppIrlba's dependencies." ON)
-option(IRLBA_FETCH_EXTERN "Automatically fetch irlba's dependencies." ON)
-if(IRLBA_FETCH_EXTERN)
- add_subdirectory(extern)
-else()
Expand All @@ -23,9 +23,9 @@
-
-# Tests
-if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
- option(IRLBA_TESTS "Build CppIrlba's test suite." ON)
- option(IRLBA_TESTS "Build irlba's test suite." ON)
-else()
- option(IRLBA_TESTS "Build CppIrlba's test suite." OFF)
- option(IRLBA_TESTS "Build irlba's test suite." OFF)
-endif()
-if(IRLBA_TESTS)
- include(CTest)
Expand Down
13 changes: 1 addition & 12 deletions cmake/knncolle_hnsw.patch
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
- add_subdirectory(extern)
-else()
- find_package(knncolle_knncolle 3.1.0 CONFIG REQUIRED)
- find_package(ltla_sanisizer 0.1.5 CONFIG REQUIRED)
- find_package(ltla_sanisizer 0.2.0 CONFIG REQUIRED)
- find_package(hnswlib CONFIG REQUIRED)
-endif()
-
Expand Down Expand Up @@ -57,14 +57,3 @@
-install(FILES "${CMAKE_CURRENT_BINARY_DIR}/knncolle_knncolle_hnswConfig.cmake"
- "${CMAKE_CURRENT_BINARY_DIR}/knncolle_knncolle_hnswConfigVersion.cmake"
- DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/knncolle_knncolle_hnsw)
--- a/include/knncolle_hnsw/Hnsw.hpp
+++ b/include/knncolle_hnsw/Hnsw.hpp
@@ -296,7 +296,7 @@ public:

// Dear God, make saveIndex() const.
auto index_ptr = const_cast<hnswlib::HierarchicalNSW<HnswData_>*>(&my_index);
- index_ptr->saveIndex(dir / "INDEX");
+ index_ptr->saveIndex((dir / "INDEX").string());
}

HnswPrebuilt(const std::filesystem::path& dir) :
32 changes: 16 additions & 16 deletions cmake/umappp.patch
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
- add_subdirectory(extern)
-else()
- find_package(ltla_aarand 1.1.0 CONFIG REQUIRED)
- find_package(ltla_irlba 3.0.0 CONFIG REQUIRED)
- find_package(ltla_irlba 3.1.0 CONFIG REQUIRED)
- find_package(Eigen3 5.0.0 CONFIG REQUIRED)
- find_package(ltla_subpar 0.4.0 CONFIG REQUIRED)
- find_package(ltla_sanisizer 0.1.3 CONFIG REQUIRED)
- find_package(knncolle_knncolle 3.0.0 CONFIG REQUIRED)
- find_package(ltla_subpar 0.5.0 CONFIG REQUIRED)
- find_package(ltla_sanisizer 0.2.0 CONFIG REQUIRED)
- find_package(knncolle_knncolle 3.1.0 CONFIG REQUIRED)
-endif()
-
target_link_libraries(umappp INTERFACE ltla::aarand ltla::irlba ltla::subpar ltla::sanisizer knncolle::knncolle)
Expand Down Expand Up @@ -66,17 +66,17 @@ index d098cf5..baba947 100644
@PACKAGE_INIT@

include(CMakeFindDependencyMacro)
-find_dependency(ltla_aarand 1.1.0 CONFIG REQUIRED)
-find_dependency(ltla_sanisizer 0.1.3 CONFIG REQUIRED)
-find_dependency(ltla_subpar 0.4.0 CONFIG REQUIRED)
-find_dependency(ltla_irlba 3.0.0 CONFIG REQUIRED)
-find_dependency(Eigen3 5.0.0 CONFIG REQUIRED)
-find_dependency(knncolle_knncolle 3.0.0 CONFIG REQUIRED)
+find_dependency(ltla_aarand CONFIG REQUIRED)
+find_dependency(ltla_sanisizer CONFIG REQUIRED)
+find_dependency(ltla_subpar CONFIG REQUIRED)
+find_dependency(ltla_irlba CONFIG REQUIRED)
+find_dependency(Eigen3 CONFIG REQUIRED)
+find_dependency(knncolle_knncolle CONFIG REQUIRED)
-find_dependency(ltla_aarand 1.1.0 CONFIG)
-find_dependency(ltla_irlba 3.1.0 CONFIG)
-find_dependency(Eigen3 5.0.0 CONFIG)
-find_dependency(ltla_subpar 0.5.0 CONFIG)
-find_dependency(ltla_sanisizer 0.2.0 CONFIG)
-find_dependency(knncolle_knncolle 3.1.0 CONFIG)
+find_dependency(ltla_aarand CONFIG)
+find_dependency(ltla_sanisizer CONFIG)
+find_dependency(ltla_subpar CONFIG)
+find_dependency(ltla_irlba CONFIG)
+find_dependency(Eigen3 CONFIG)
+find_dependency(knncolle_knncolle CONFIG)

include("${CMAKE_CURRENT_LIST_DIR}/libscran_umapppTargets.cmake")
4 changes: 2 additions & 2 deletions src/AdvancedSettingsAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ AdvancedSettingsAction::AdvancedSettingsAction(QObject* parent) :
_local_connectivity.initialize(0, 10, _advParameters.local_connectivity, 2);
_bandwidth.initialize(0, 10, _advParameters.bandwidth, 2);
_mix_ratio.initialize(0, 1, _advParameters.mix_ratio, 2);
_spread.initialize(0.01, 10, _advParameters.spread, 2);
_min_dist.initialize(0.001, 10, _advParameters.min_dist, 4);
_spread.initialize(0.01f, 10, _advParameters.spread, 2);
_min_dist.initialize(0.001f, 10, _advParameters.min_dist, 4);
_a.initialize(0, 10, _advParameters.a, 4);
_b.initialize(0, 5, _advParameters.b, 4);
_repulsion_strength.initialize(0, 10, _advParameters.repulsion_strength, 2);
Expand Down
12 changes: 7 additions & 5 deletions src/UMAPAnalysisPlugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "util/hnsw_space_corr.h"
#include "util/knncolle_matrix_parallel.h"
#include "util/knncolle_hnsw_parallel.h"
#include "util/knncolle_find_nearest_neighbors.h"

#pragma warning(disable:4267) // umappp internal: conversion warning
#include <umappp/initialize.hpp>
Expand Down Expand Up @@ -68,8 +67,10 @@ namespace

norm = 1.0f / (std::sqrt(norm) + 1e-30f);

const std::int64_t data_size = static_cast<std::int64_t>(data.size());

#pragma omp parallel
for (std::int64_t i = 0; i < data.size(); i++)
for (std::int64_t i = 0; i < data_size; i++)
data[i] *= norm;
}

Expand Down Expand Up @@ -350,7 +351,7 @@ void UMAPWorker::compute()
num_threads_knn = num_threads_available;
}

if (num_threads_layout) {
if (parallel_layout) {
num_threads_layout = num_threads_available;
}
}
Expand Down Expand Up @@ -422,7 +423,7 @@ void UMAPWorker::compute()
}

qDebug() << "UMAP: querying knn in searcher: " << numNeighbors << " neighbors";
nearestNeighbors = knncolle::find_nearest_neighbors_custom<integer_t, scalar_t, scalar_t>(*searcher, numNeighbors, num_threads_knn);
nearestNeighbors = knncolle::find_nearest_neighbors<integer_t, scalar_t, scalar_t>(*searcher, numNeighbors, num_threads_knn);
qDebug() << "UMAP: finished knn";

}
Expand Down Expand Up @@ -478,7 +479,8 @@ void UMAPWorker::compute()
opt.initialize_seed = advancedSettings.seed;

if (parallel_layout) {
opt.parallel_optimization = true;
opt.num_threads_optimize = num_threads_layout;
opt.num_threads_spectral = num_threads_layout;
opt.num_threads = num_threads_layout;
}

Expand Down
42 changes: 0 additions & 42 deletions src/util/knncolle_find_nearest_neighbors.h

This file was deleted.

82 changes: 7 additions & 75 deletions src/util/knncolle_hnsw_parallel.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,84 +7,14 @@
#include <atomic>
#include <thread>
#include <type_traits>
#include <mutex>
#include <vector>
#include <exception>
#include <queue>
#include <utility>

/*
* Source: https://github.com/nmslib/nmslib/blob/v2.1.1/similarity_search/include/thread_pool.h#L62
* Apache License Version 2.0, Main developers: Bilegsaikhan Naidan, Leonid Boytsov, Yury Malkov, Ben Frederickson, David Novak
*/
namespace hnswlib {
/*
* replacement for the openmp '#pragma omp parallel for' directive
* only handles a subset of functionality (no reductions etc)
* Process ids from start (inclusive) to end (EXCLUSIVE)
*/
template<class Function>
inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) {
if (numThreads <= 0) {
numThreads = std::thread::hardware_concurrency();
}

if (numThreads == 1) {
for (size_t id = start; id < end; id++) {
fn(id, 0);
}
}
else {
std::vector<std::thread> threads;
std::atomic<size_t> current(start);

// keep track of exceptions in threads
// https://stackoverflow.com/a/32428427/1713196
std::exception_ptr lastException = nullptr;
std::mutex lastExceptMutex;

for (size_t threadId = 0; threadId < numThreads; ++threadId) {
threads.push_back(std::thread([&, threadId] {
while (true) {
size_t id = current.fetch_add(1);

if ((id >= end)) {
break;
}

try {
fn(id, threadId);
}
catch (...) {
std::unique_lock<std::mutex> lastExcepLock(lastExceptMutex);
lastException = std::current_exception();
/*
* This will work even when current is the largest value that
* size_t can fit, because fetch_add returns the previous value
* before the increment (what will result in overflow
* and produce 0 instead of current + 1).
*/
current = end;
break;
}
}
}));
}
for (auto& thread : threads) {
thread.join();
}
if (lastException) {
std::rethrow_exception(lastException);
}
}
}

} // namespace hnswlib


/**
* Source: https://github.com/knncolle/knncolle_hnsw/blob/v0.2.1/include/knncolle_hnsw/knncolle_hnsw.hpp
* MIT License, Main developer: Aaron Lun
* Changes: parallelize adding points to hnsw search index
*/
namespace knncolle_hnsw {

Expand Down Expand Up @@ -284,10 +214,11 @@ namespace knncolle_hnsw {
auto ptr = work_par->get(0);
my_index.addPoint(ptr, 0);
const unsigned num_threads = std::thread::hardware_concurrency();
hnswlib::ParallelFor(1, my_obs, num_threads, [&](size_t i, size_t threadId) {
#pragma omp parallel for num_threads(num_threads) schedule(dynamic, 1)
for (Index_ i = 0; i < my_obs; ++i) {
auto ptr = work_par->get(i);
my_index.addPoint(ptr, i);
});
}
}

}
Expand All @@ -307,12 +238,13 @@ namespace knncolle_hnsw {
std::copy_n(ptr, my_dim, incoming.begin());
my_index.addPoint(incoming.data(), 0);
const unsigned num_threads = std::thread::hardware_concurrency();
hnswlib::ParallelFor(1, my_obs, num_threads, [&](size_t i, size_t threadId) {
#pragma omp parallel for num_threads(num_threads) schedule(dynamic, 1)
for (Index_ i = 0; i < my_obs; ++i) {
std::vector<HnswData_> incoming(my_dim);
auto ptr = work_par->get(i);
std::copy_n(ptr, my_dim, incoming.begin());
my_index.addPoint(incoming.data(), i);
});
}
}

}
Expand Down
1 change: 1 addition & 0 deletions src/util/knncolle_matrix_parallel.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
/**
* Source: https://github.com/knncolle/knncolle/blob/v3.0.1/include/knncolle/Matrix.hpp
* MIT License, Main developer: Aaron Lun
* Changes: add get(std::size_t point) to SimpleMatrixExtractor
*/
namespace knncolle {

Expand Down
Loading
Loading