diff --git a/.gitmodules b/.gitmodules index 940e1e27..c5be00cf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -46,3 +46,9 @@ [submodule "discreture"] path = discreture url = https://github.com/mraggi/discreture/ +[submodule "diskmat"] + path = diskmat + url = https://github.com/dnbaker/diskmat +[submodule "cpp-btree"] + path = cpp-btree + url = https://github.com/Kronuz/cpp-btree diff --git a/.travis.yml b/.travis.yml index 6f56f655..3aee190e 100755 --- a/.travis.yml +++ b/.travis.yml @@ -31,6 +31,7 @@ script: - ./jsdhashdbg - ./fgcinctestdbg - ./geomedtestdbg + - ./sparsepriortestdbg notifications: slack: jhu-genomics:BbHYSks7DhOolq80IYf6m9oe#libsketch rooms: diff --git a/Makefile b/Makefile index 12f7e20f..cbad73b3 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ WARNINGS+=-Wall -Wextra -Wpointer-arith -Wformat -Wunused-variable -Wno-attribut OPT?=O3 LDFLAGS+=$(LIBS) -lz $(LINKS) EXTRA?= -DEFINES+= -DBLAZE_RANDOM_NUMBER_GENERATOR='wy::WyHash' +DEFINES+= #-DBLAZE_RANDOM_NUMBER_GENERATOR='wy::WyHash' CXXFLAGS+=-$(OPT) -std=$(STD) -march=native $(WARNINGS) $(INCLUDE) $(DEFINES) $(BLAS_LINKING_FLAGS) \ -DBOOST_NO_AUTO_PTR @@ -63,7 +63,7 @@ LINKS += -ltbb endif TESTS=tbmdbg coreset_testdbg bztestdbg btestdbg osm2dimacsdbg dmlsearchdbg diskmattestdbg graphtestdbg jvtestdbg kmpptestdbg tbasdbg \ - jsdtestdbg jsdkmeanstestdbg jsdhashdbg fgcinctestdbg geomedtestdbg oracle_thorup_ddbg + jsdtestdbg jsdkmeanstestdbg jsdhashdbg fgcinctestdbg geomedtestdbg oracle_thorup_ddbg sparsepriortestdbg clust: kzclustexpdbg kzclustexp kzclustexpf @@ -78,7 +78,9 @@ CXXFLAGS += $(EXTRA) CXXFLAGS += $(LDFLAGS) -%dbg: src/%.cpp $(wildcard include/minocore/*.h) +HEADERS=$(shell find include -name '*.h') + +%dbg: src/%.cpp $(HEADERS) $(CXX) $(CXXFLAGS) $< -o $@ -pthread printlibs: @@ -91,7 +93,7 @@ graphrun: src/graphtest.cpp $(wildcard include/minocore/*.h) dmlrun: src/dmlsearch.cpp $(wildcard include/minocore/*.h) $(CXX) $(CXXFLAGS) $< -o $@ -DNDEBUG $(OMP_STR) -%: src/%.cpp $(wildcard include/minocore/*.h) +%: src/%.cpp $(HEADERS) $(CXX) $(CXXFLAGS) $< -o $@ -DNDEBUG $(OMP_STR) -O3 alphaest: src/alphaest.cpp $(wildcard include/minocore/*.h) diff --git a/cpp-btree b/cpp-btree new file mode 160000 index 00000000..405ecf43 --- /dev/null +++ b/cpp-btree @@ -0,0 +1 @@ +Subproject commit 405ecf43729be4b65d35962f244ed94028edd585 diff --git a/diskmat b/diskmat new file mode 160000 index 00000000..5e30f828 --- /dev/null +++ b/diskmat @@ -0,0 +1 @@ +Subproject commit 5e30f828ff2cdae5304eff712ff9b316d1109a15 diff --git a/exp/generate_bregman_data.py b/exp/generate_bregman_data.py new file mode 100644 index 00000000..4820e988 --- /dev/null +++ b/exp/generate_bregman_data.py @@ -0,0 +1,56 @@ +import numpy as np +import sys +import argparse + +try: + from cytoolz import frequencies as Counter +except ImportError: + from collections import Counter +np.random.seed(0) + +ap = argparse.ArgumentParser() +ap.add_argument("--num-clusters", type=int, help="Number of clusters.", default=10) +ap.add_argument("--num-rows", type=int, help="Number of rows.", default=5000) +ap.add_argument("--num-dim", type=int, help="Number of dimensions.", default=50) +ap.add_argument("--set-noise", type=float, default=1.) +ap.add_argument("--set-data-variance", type=float, default=5.) +ap.add_argument("--outfile", type=str, default="randombregman.out") +ap.add_argument("--sample-coverage", type=int, default=1000) +ap = ap.parse_args() + +num_clusters = ap.num_clusters +num_dim = ap.num_dim +num_rows = ap.num_rows + +assert num_rows % num_clusters == 0, "num rows must be divisible by number of clusters" + +# Normalize +centers = np.abs(np.random.standard_cauchy(size=(num_clusters, num_dim)) * ap.set_data_variance) + +centers = (1. / np.sum(centers, axis=1))[:,np.newaxis] * centers + +datapoints = [] +for i in range(num_clusters): + for j in range(num_rows // num_clusters): + # Generate a number of samples, and then sample them. + nsamp = np.random.poisson(ap.sample_coverage) + row = centers[i] + np.random.standard_normal(size=(num_dim,)) + row = np.abs(row) + row /= np.sum(row) + selections = Counter(np.random.choice(len(row), p=row, size=(nsamp,))[:]) + samples = np.zeros((num_dim,)) + for k, v in selections.items(): + samples[k] = v + datapoints.append(samples) + +datapoints = np.vstack(datapoints) + +ordering = np.arange(0, num_rows, dtype=np.uint32) +np.random.shuffle(ordering) +with open(ap.outfile, "w") as ofp: + ofp.write("%d/%d/%d\n" % (num_rows, num_dim, num_clusters)) + for index in ordering: + ofp.write(" ".join(map(str, datapoints[index,:])) + "\n") +with open(ap.outfile + ".labels.txt", "w") as f: + f.write("\n".join(str(ordering[i] // (num_rows // num_clusters)) for i in range(num_rows))) + f.write("\n") diff --git a/exp/generate_kmeans_data.py b/exp/generate_kmeans_data.py new file mode 100644 index 00000000..d5d9d21f --- /dev/null +++ b/exp/generate_kmeans_data.py @@ -0,0 +1,30 @@ +import numpy as np +import sys +np.random.seed(0) + +num_clusters = 10 +num_dim = 50 +num_rows = 5000 +assert num_rows % num_clusters == 0 + +centers = np.abs(np.random.standard_normal(size=(num_clusters, num_dim)) * 5.) + +points = np.vstack([np.random.standard_normal(size=(num_rows // num_clusters, num_dim)) * 1 + centers[i,:][np.newaxis, :] + for i in range(num_clusters)]) + +ordering = np.arange(0, num_rows, dtype=np.uint32) +np.random.shuffle(ordering) +if sys.argv[1:]: + ofp = open(sys.argv[1], "w") + labels = sys.argv[1] + ".labels.txt" +else: + ofp = open("random.out", "w") + labels = "random.out.labels.txt" +ofp.write("%d/%d/%d\n" % (num_rows, num_dim, num_clusters)) +for index in ordering: + ofp.write(" ".join(map(str, points[index,:])) + "\n") +with open(labels, "w") as f: + f.write("\n".join(str(ordering[i] // (num_rows // num_clusters)) for i in range(num_rows))) + f.write("\n") + +if ofp != sys.stdout: ofp.close() diff --git a/include/minocore/clustering.h b/include/minocore/clustering.h new file mode 100644 index 00000000..cdc68d59 --- /dev/null +++ b/include/minocore/clustering.h @@ -0,0 +1,8 @@ +#ifndef MINOCORE_CLUSTERING_HEADERS_H__ +#define MINOCORE_CLUSTERING_HEADERS_H__ + +#include "minocore/clustering/dispatch.h" +#include "minocore/clustering/traits.h" +#include "minocore/clustering/sampling.h" + +#endif /* MINOCORE_CLUSTERING_HEADERS_H__ */ diff --git a/include/minocore/clustering/centroid.h b/include/minocore/clustering/centroid.h new file mode 100644 index 00000000..b0e66183 --- /dev/null +++ b/include/minocore/clustering/centroid.h @@ -0,0 +1,161 @@ +#ifndef MINOCORE_CLUSTERING_CENTROID_H__ +#define MINOCORE_CLUSTERING_CENTROID_H__ +#include "minocore/dist.h" +#include "minocore/util/blaze_adaptor.h" +#include "minocore/optim/kmedian.h" + +namespace minocore { namespace clustering { + +struct CentroidPolicy { + template + static void perform_average(blaze::DenseVector &ret, const Range &r, const RowSums &rs, + const VT2 *wc = static_cast(nullptr), + dist::DissimilarityMeasure measure=static_cast(-1)) + { + using FT = blz::ElementType_t; + PREC_REQ(measure != static_cast(-1), "Must define dissimilarity measure"); + if(measure == dist::TOTAL_VARIATION_DISTANCE) { + PRETTY_SAY << "TVD: performing " << (wc ? static_cast("weighted"): static_cast("unweighted")) << "L1 median on *normalized* categorical distributions.\n"; + if(wc) + coresets::l1_median(r, ret, wc->data()); + else + coresets::l1_median(r, ret); + } + else if(measure == dist::L1) { + std::conditional_t, + blz::CompressedMatrix >, + blz::DynamicMatrix > + > cm = r % blz::expand(trans(rs), r.columns()); + PRETTY_SAY << "L1: performing " << (wc ? static_cast("weighted"): static_cast("unweighted")) << "L1 median on *unnormalized* categorical distributions, IE absolute count data.\n"; + if(wc) + coresets::l1_median(cm, ret, wc->data()); + else + coresets::l1_median(cm, ret); + } else if(measure == dist::LLR || measure == dist::UWLLR || measure == dist::OLLR) { + PRETTY_SAY << "LLR test\n"; + FT total_sum_inv; + if(wc) { + total_sum_inv = 1. / blz::dot(rs, *wc); + ~ret = blaze::sum(r % blz::expand(trans(*wc * rs), r.columns())) * total_sum_inv; + } else { + total_sum_inv = 1. / blaze::sum(rs); + ~ret = blaze::sum(r % blz::expand(trans(rs), r.columns())) * total_sum_inv; + } + } else if(wc) { + PRETTY_SAY << "Weighted, anything but L1 or LLR" << dist::detail::prob2str(measure) << '\n'; + assert((~(*wc)).size() == r.rows()); + assert(blz::expand(~(*wc), r.columns()).rows() == r.rows()); + assert(blz::expand(~(*wc), r.columns()).columns() == r.columns()); + auto wsuminv = 1. / blaze::sum(*wc); + if(!dist::detail::is_probability(measure)) { // e.g., take mean of unscaled values + auto mat2schur = blz::expand(~(*wc) * rs, r.columns()); + PRETTY_SAY << "NOTPROB r dims: " << r.rows() << "/" << r.columns() << '\n'; + PRETTY_SAY << "NOTPROB mat2schur dims: " << mat2schur.rows() << "/" << mat2schur.columns() << '\n'; + ~ret = blaze::sum(r % blz::expand(~(*wc) * rs, r.columns())) * wsuminv; + } else { // Else take mean of scaled values + auto mat2schur = blz::expand(~(*wc), r.columns()); + PRETTY_SAY << "PROB r dims: " << r.rows() << "/" << r.columns() << '\n'; + PRETTY_SAY << "PROB mat2schur dims: " << mat2schur.rows() << "/" << mat2schur.columns() << '\n'; + ~ret = blaze::sum(r % blz::expand(~(*wc), r.columns())) * wsuminv; + assert(blaze::max(~ret) < 1. || !std::fprintf(stderr, "max in ret: %g for a probability distribution.", blaze::max(~ret))); + } + } else { + PRETTY_SAY << "Unweighted, anything but L1 or LLR" << dist::detail::prob2str(measure) << '\n'; + if(dist::detail::is_probability(measure)) { + // Weighted average for all +#ifndef NDEBUG + auto expansion = blz::expand(trans(rs), r.columns()); + PRETTY_SAY << "PROB r dims: " << r.rows() << "/" << r.columns() << '\n'; + PRETTY_SAY << "NOTPROB expansion dims: " << expansion.rows() << "/" << expansion.columns() << '\n'; +#endif + ~ret = blaze::sum(r % blz::expand(trans(rs), r.columns())) * (1. / (blaze::sum(rs) * r.rows())); + } else ~ret = blz::mean(r % blz::expand(trans(rs), r.columns())); + } + } + template + static void __perform_increment(FT neww, FT cw, Row &ret, const Src &dat, FT row_sum, dist::DissimilarityMeasure measure) + { + if(measure == dist::L1 || measure == dist::TOTAL_VARIATION_DISTANCE) + throw std::invalid_argument("__perform_increment is only for linearly-calculated means, not l1 median"); + if(cw == 0.) { + if(dist::detail::is_probability(measure)) + ret = dat; + else + ret = dat * row_sum; + } else { + auto div = neww / (neww + cw); + if(dist::detail::is_probability(measure)) { + ret += (dat - ret) * div; + } else if(measure == dist::LLR || measure == dist::UWLLR) { + ret += (dat * row_sum) * neww; + // Add up total sum and subtract later + // since there are three weighting factors here: + // First, partial assignment + // Then point-wise weights (both of which are in neww) + // Then, for LLR/UWLLR, there's weighting by the row-sums + } else { + // Maintain running mean for full vector value + ret += (dat * row_sum - ret) * div; + } + } + } + + template> > + static void perform_soft_assignment(const blz::DenseMatrix &assignments, + const RowSums &rs, + OMP_ONLY(std::mutex *mutptr,) + const MatType &data, CenterCon &newcon, + const VT2 *wc = static_cast(nullptr), + dist::DissimilarityMeasure measure=static_cast(-1)) + { + using FT = blz::ElementType_t; + PREC_REQ(measure != static_cast(-1), "Must define dissimilarity measure"); + if(measure == dist::L1 || measure == dist::TOTAL_VARIATION_DISTANCE) { + OMP_PFOR + for(unsigned j = 0; j < newcon.size(); ++j) { + blz::DynamicVector newweights; + { + auto col = trans(column(assignments, j)); + if(wc) newweights = col * *wc; + else newweights = col; + } + if(measure == dist::L1) { + std::conditional_t, + blz::DynamicMatrix, blz::CompressedMatrix> + scaled_data = data % blz::expand(rs, data.columns()); + coresets::l1_median(scaled_data, newcon[j], newweights.data()); + } else { // TVD + coresets::l1_median(data, newcon[j], newweights.data()); + } + } + } else { + blz::DynamicVector summed_contribs(newcon.size(), 0.); + OMP_PFOR + for(size_t i = 0; i < data.rows(); ++i) { + auto item_weight = wc ? wc->operator[](i): static_cast(1.); + const auto row_sum = rs[i]; + auto asn(row(assignments, i, blz::unchecked)); + for(size_t j = 0; j < newcon.size(); ++j) { + auto &cw = summed_contribs[j]; + if(auto asnw = asn[j]; asnw > 0.) { + auto neww = item_weight * asnw; + OMP_ONLY(if(mutptr) mutptr[j].lock();) + __perform_increment(neww, cw, newcon[j], row(data, i, blz::unchecked), row_sum, measure); + OMP_ONLY(if(mutptr) mutptr[j].unlock();) + OMP_ATOMIC + cw += neww; + } + } + } + if(measure == dist::LLR || measure == dist::UWLLR || measure == dist::OLLR) { + OMP_PFOR + for(auto i = 0u; i < newcon.size(); ++i) + newcon[i] *= 1. / blz::dot(column(assignments, i), rs); + } + } + } +}; // CentroidPolicy + +} } // namespace minocore::clustering + +#endif /* MINOCORE_CLUSTERING_CENTROID_H__ */ diff --git a/include/minocore/clustering/dispatch.h b/include/minocore/clustering/dispatch.h new file mode 100644 index 00000000..ef00c42b --- /dev/null +++ b/include/minocore/clustering/dispatch.h @@ -0,0 +1,574 @@ +#ifndef FGC_CLUSTERING_DISPATCH_H__ +#define FGC_CLUSTERING_DISPATCH_H__ +#include "minocore/dist.h" +#include "minocore/optim/jv_solver.h" +#include "minocore/optim/lsearch.h" +#include "minocore/optim/oracle_thorup.h" +#include "minocore/util/exception.h" +#include "minocore/clustering/traits.h" +#include "minocore/clustering/sampling.h" +#include "minocore/clustering/centroid.h" + +#include "boost/iterator/zip_iterator.hpp" +#include "diskmat/diskmat.h" + +namespace minocore { + +namespace clustering { + +using dist::DissimilarityMeasure; +using blaze::ElementType_t; +using diskmat::PolymorphicMat; +using boost::make_zip_iterator; + +template +bool use_packed_distmat(const T &app) { + if constexpr(jsd::is_dissimilarity_applicator_v) { + return dist::detail::is_symmetric(app.get_measure()); + } + return true; +} + +template +auto perform_cluster_metric_kmedian(const OracleType &app, size_t np, Traits traits) +{ + MetricSelectionResult ret; + + std::unique_ptr> distmatp; + std::unique_ptr> full_distmatp; + if(traits.compute_full) { + if(use_packed_distmat(app)) { + distmatp.reset(new dm::DistanceMatrix(np)); + for(size_t i = 0; i < np; ++i) { + auto [ptr, extent] = distmatp->row_span(i); + const auto offset = i + 1; + OMP_PFOR_DYN + for(size_t j = 0; j < extent; ++j) + ptr[j] = app(i, j + offset); + } + } else { + full_distmatp.reset(new PolymorphicMat(np, np)); + for(size_t i = 0; i < np; ++i) { + auto r = row(full_distmatp->operator~(), i, blaze::unchecked); + OMP_PFOR_DYN + for(size_t j = 0; j < np; ++j) { + r[j] = app(i, j); + } + } + } + } + auto fill_distance_mat = [&](const auto &lu) { + auto &retdm = std::get<3>(ret); + retdm.resize(std::get<0>(ret).size(), np); + for(size_t i = 0; i < std::get<0>(ret).size(); ++i) { + const auto cid = std::get<0>(ret)[i]; + auto rowptr = row(retdm, i); + OMP_PFOR + for(size_t j = 0; j < np; ++j) { + rowptr[j] = (unlikely(j == cid) ? static_cast(0.): FT(lu(cid, j))); + } + } + }; + if(traits.sampling == THORUP_SAMPLING) { + auto sample_and_fill = [&](const auto &x) { + std::tie(std::get<0>(ret), std::get<1>(ret), std::get<2>(ret)) + = iterated_oracle_thorup_d( + x, np, traits.k, traits.thorup_iter, traits.thorup_sub_iter, traits.weights, traits.thorup_npermult, 3, 0.5, traits.seed); + fill_distance_mat(x); + }; + if(distmatp) { + sample_and_fill(*distmatp); + } else if(full_distmatp) { + sample_and_fill(~*full_distmatp); + } else { + auto caching_app = make_row_caching_oracle_wrapper< + shared::flat_hash_map, /*is_symmetric=*/ true, /*is_threadsafe=*/true + >(app, np); + sample_and_fill(caching_app); + } + } else switch(traits.sampling) { + case D2_SAMPLING: { + ret = select_d2(app, np, traits); + break; + } + case UNIFORM_SAMPLING: { + ret = select_uniform_random(app, np, traits); + break; + } + case GREEDY_SAMPLING: { + ret = select_greedy(app, np, traits); + break; + } + case DEFAULT_SAMPLING: default: { + char buf[128]; + auto l = std::sprintf(buf, "Unrecognized sampling: %d\n", (int)DEFAULT_SAMPLING); + throw std::invalid_argument(std::string(buf, l)); + } + fill_distance_mat(app); + } + auto &costmat = ret.facility_cost_matrix(); + std::vector center_sol; + switch(traits.metric_solver) { + case JAIN_VAZIRANI_FL: case JV_PLUS_LOCAL_SEARCH: { + auto jvs = jv::make_jv_solver(costmat); + auto [c_centers, c_assignments] = jvs.kmedian(traits.k, traits.max_jv_rounds); + if(traits.metric_solver == JAIN_VAZIRANI_FL) { + center_sol = std::move(c_centers); + break; + } + // JV_PLUS_LOCAL_SEARCH + auto lsearcher = minocore::make_kmed_lsearcher(costmat, traits.k, traits.eps, traits.seed); + lsearcher.lazy_eval_ = 2; + lsearcher.assign_centers(c_centers.begin(), c_centers.end()); + lsearcher.run(); + center_sol.assign(lsearcher.sol_.begin(), lsearcher.sol_.end()); + break; + } + case LOCAL_SEARCH: { + auto lsearcher = minocore::make_kmed_lsearcher(costmat, traits.k, traits.eps, traits.seed); + lsearcher.lazy_eval_ = 2; + lsearcher.run(); + center_sol.assign(lsearcher.sol_.begin(), lsearcher.sol_.end()); + break; + } + default: throw std::invalid_argument("Unrecognized metric solver strategy"); + } + std::transform(center_sol.begin(), center_sol.end(), center_sol.begin(), + [&sel=ret.selected()](auto x) {return sel[x];}); + blaze::DynamicVector asn(np, center_sol.front()); + blaze::DynamicVector costs = trans(row(costmat, center_sol.front())); + for(unsigned ci = 1; ci < center_sol.size(); ++ci) { + auto r = row(costmat, ci); + OMP_PFOR + for(size_t i = 0; i < np; ++i) { + if(auto newv = r[i]; newv < costs[i]) + costs[i] = newv, asn[i] = center_sol[ci]; + } + } + shared::sort(center_sol.begin(), center_sol.end()); + return std::make_tuple(center_sol, asn, costs); +} + +enum LloydLoopResult { + FINISHED, + REACHED_MAX_ROUNDS, + UNFINISHED +}; + +template, + typename CostType> +LloydLoopResult perform_lloyd_loop(CentersType ¢ers, Assignments &assignments, + const jsd::DissimilarityApplicator &app, + unsigned k, CostType &retcost, uint64_t seed=0, const WFT *weights=static_cast(nullptr), + size_t max_iter=100, double eps=1e-4) +{ + if constexpr(asn_method == HARD) { + if(retcost.size() != app.size()) retcost.resize(app.size()); + } else { + // asn_method == SOFT || asn_method == SOFT_HARMONIC_MEAN + retcost.resize(app.size(), k); + } + assert(retcost.size() == app.size() || !std::fprintf(stderr, "retcost size: %zu. app size: %zu\n", retcost.size(), app.size())); + if(co != EXTRINSIC) throw std::invalid_argument("Must be extrinsic for Lloyd's"); + using FT = ElementType_t; + auto &mat = app.data(); + const size_t npoints = app.size(); + CentersType centers_cpy(centers), centers_cache; + MINOCORE_REQUIRE(centers.size() == k, "Must have the correct number of centers"); + const auto measure = app.get_measure(); + if(dist::detail::needs_logs(measure) || dist::detail::needs_sqrt(measure)) + centers_cache.resize(k); + FT current_cost = std::numeric_limits::max(), first_cost = current_cost; + //PRETTY_SAY << "Beginning\n"; + LloydLoopResult ret = UNFINISHED; + wy::WyRand rng(seed); + size_t iternum = 0; + // HEY DANIEL WHEN YOU GET BACK HERE + // You are removing the center_distance + // and instead calculating the objective function + // and terminating when the change in objective function is less than eps * first cost. + using cv_t = blaze::CustomVector; + std::unique_ptr weight_cv; + if(weights) { + weight_cv.reset(new cv_t(const_cast(weights), npoints)); + } + auto getcache = [&] (size_t j) { + decltype(¢ers_cache[j]) ret = nullptr; + if(centers_cache.size()) ret = ¢ers_cache[j]; + return ret; + }; + assert(centers_cache.empty() || getcache(0) == nullptr); + auto getcost = [&]() { + if constexpr(asn_method == HARD) { + return weight_cv ? blz::sum(retcost * *weight_cv): blz::sum(retcost); + } else { +#ifndef NDEBUG + // Ensure that the assignments are as expected. + for(size_t i = 0; i < assignments.rows(); ++i) { + auto r(row(assignments, i)); + auto cr(row(retcost, i)); + auto maxi = std::max_element(r.begin(), r.end()) - r.begin(); + auto mini = std::min_element(cr.begin(), cr.end()) - cr.begin(); + //std::cerr << "mini: " << mini << '\n'; + //std::cerr << "maxi: " << maxi << '\n'; + assert(std::abs(blaze::sum(r) - 1.) < 1e-4); + assert(maxi == mini || r[maxi] == r[mini] || cr[mini] == cr[maxi] + || &(std::cerr << r << '\n' << cr << '\n') == nullptr); + } +#endif + if(weight_cv) { + auto ew = blaze::expand(*weight_cv, app.data().columns()); + std::fprintf(stderr, "expanded weight shape: %zu/%zu. asn: %zu/%zu\n", ew.rows(), ew.columns(), assignments.rows(), assignments.columns()); + return blaze::sum(assignments % retcost % ew); + } else { + return blaze::sum(assignments % retcost); + } + } + }; + auto check = [&]() { + ++iternum; + if(first_cost == std::numeric_limits::max()) first_cost = getcost(); + else { + FT itercost = getcost(); + if(current_cost == std::numeric_limits::max()) { + current_cost = itercost; + assert(current_cost != std::numeric_limits::max()); + } else { + if(std::abs(itercost - current_cost) < eps * first_cost) { // consider taking sign into account here + PRETTY_SAY << "Itercost: " << itercost << " vs current " << current_cost << " with diff " << std::abs(itercost - current_cost) + << "compared to first cost of " << first_cost << " with eps = " << eps << ".\n"; + return FINISHED; + } + if(iternum == max_iter) + return REACHED_MAX_ROUNDS; + } + current_cost = itercost; + } + PRETTY_SAY << "iternum: " << iternum << '\n'; + return UNFINISHED; + }; + auto soft_assignments = [&]() { + if constexpr(asn_method != HARD) { + OMP_PFOR + for(size_t i = 0; i < npoints; ++i) { + auto row = blaze::row(retcost, i BLAZE_CHECK_DEBUG); + for(unsigned j = 0; j < centers.size(); ++j) { + row[j] = app(i, centers[j], getcache(j), measure); + } + auto asnrow = blaze::row(assignments, i BLAZE_CHECK_DEBUG); + if constexpr(asn_method == SOFT_HARMONIC_MEAN) { + asnrow = 1. / row; + } else { + auto mv = blaze::min(row); + assert(mv >= 0.); + asnrow = blaze::exp(-row + mv); + assert(blaze::min(asnrow) >= 0.); + } + asnrow *= 1. / blaze::sum(asnrow); + assert(blaze::min(asnrow) >= 0.); + PRETTY_SAY << "row " << row << " yields " << asnrow << " with max " << blz::max(asnrow) << ", min " << blz::min(asnrow) <<'\n'; + } + } + }; + if constexpr(asn_method == HARD) { + std::vector> assigned(k); + OMP_ONLY(std::unique_ptr mutexes(new std::mutex[k]);) + for(;;) { + // Do it forever + if(centers_cache.size()) { + PRETTY_SAY << "Setting centers cache for measure " << dist::detail::prob2str(measure) << '\n'; + for(unsigned i = 0; i < k; ++i) + dist::detail::set_cache(centers[i], centers_cache[i], measure); + } + for(auto &i: assigned) i.clear(); + OMP_PFOR + for(size_t i = 0; i < npoints; ++i) { + auto dist = app(i, centers[0], getcache(0), measure); + unsigned asn = 0; + for(unsigned j = 1; j < k; ++j) { + auto newdist = app(i, centers[j], getcache(j), measure); + if(newdist < dist) { + asn = j; + dist = newdist; + } + } + retcost[i] = dist; + assignments[i] = asn; + { + OMP_ONLY(std::unique_lock lock(mutexes[asn]);) + assigned[asn].push_back(i); + } + } + // Check termination condition + if(auto rc = check(); rc != UNFINISHED) { + ret = rc; + goto end; + } + blaze::SmallArray centers_to_restart; + for(unsigned i = 0; i < k; ++i) + if(assigned[i].empty()) + centers_to_restart.pushBack(i); + if(auto restartn = centers_to_restart.size()) { + // Use D^2 sampling to stayrt a new cluster + // And then restart the loop + assert(retcost.size() == npoints); + retcost = std::numeric_limits::max(); + OMP_PFOR + for(size_t i = 0; i < npoints; ++i) { + for(size_t j = 0; j < k; ++j) { + if(assigned[j].empty()) continue; + auto fc = app(i, centers[j], getcache(j), measure); + if(fc < retcost[i]) retcost[i] = fc; + } + } + blaze::DynamicVector csum(npoints); + std::uniform_real_distribution urd; + for(size_t i = 0; i < restartn;) { + std::partial_sum(retcost.data(), retcost.data() + retcost.size(), csum.data()); + auto newp = std::lower_bound(csum.data(), csum.data() + csum.size(), urd(rng) * csum[csum.size() - 1]) + - csum.data(); + centers[centers_to_restart[i]] = row(app.data(), newp, blaze::unchecked); + if(++i != restartn) { + OMP_PFOR + for(size_t i = 0; i < npoints; ++i) + retcost[i] = std::min(retcost[i], app(i, newp)); + } + } + continue; // Reassign, re-center, and re-compute + } + // Make centers + for(size_t i = 0; i < centers_cpy.size(); ++i) { + auto &cref = centers_cpy[i]; + auto &assigned_ids = assigned[i]; + shared::sort(assigned_ids.begin(), assigned_ids.end()); // Better access pattern + auto aidptr = assigned_ids.data(); + const size_t nid = assigned_ids.size(); + auto rowsel = rows(mat, aidptr, nid); + auto sumsel = blaze::elements(app.row_sums(), aidptr, nid); + if(weight_cv) { + auto wsel = blaze::elements(*weight_cv, aidptr, nid); + CentroidPolicy::perform_average(cref, rowsel, sumsel, &wsel, measure); + } else { + CentroidPolicy::perform_average(cref, rowsel, sumsel, + static_cast(nullptr), measure + ); + //PRETTY_SAY << "Center " << i << " is " << cref << '\n'; + PRETTY_SAY << "Difference between previous center and new center is " << blz::sqrL2Dist(cref, centers[i]) << '\n'; + } + } + // Set the returned values to be the last iteration's. + centers = centers_cpy; + } + } else { + if(assignments.rows() != npoints || assignments.columns() != centers.size()) { + assignments.resize(npoints, centers.size()); + } + std::unique_ptr mutexes; + OMP_ONLY(mutexes.reset(new std::mutex[centers.size()]);) + for(;;) { + if(centers_cache.size()) { + for(size_t i = 0; i < centers.size(); ++i) + dist::detail::set_cache(centers[i], centers_cache[i], measure); + } + for(auto &c: centers_cpy) c = static_cast(0); + soft_assignments(); + assert(blz::sum(assignments) - assignments.rows() < 1e-3 * assignments.rows()); + for(size_t i = 0; i < centers.size(); ++i) + if(blaze::sum(blaze::column(assignments, i)) == 0.) + throw TODOError("TODO: reassignment for support goes to 0"); + // Check termination condition + if(auto rc = check(); rc != UNFINISHED) { + ret = rc; + goto end; + } + // Now points have been assigned, and we now perform center assignment + CentroidPolicy::perform_soft_assignment( + assignments, app.row_sums(), + OMP_ONLY(mutexes.get(),) + app.data(), centers_cpy, weight_cv.get(), measure + ); + } + std::swap(centers_cpy, centers); + } + end: { + if(centers_cache.size()) { + for(size_t i = 0; i < centers.size(); ++i) + dist::detail::set_cache(centers[i], centers_cache[i], measure); + } + soft_assignments(); + } + DBG_ONLY(if(ret == FINISHED) PRETTY_SAY << "Completed Lloyd's loop in " << iternum << " iterations\n";) + return ret; +} + + + +template +void update_defaults_with_measure(ClusteringTraits &ct, dist::DissimilarityMeasure measure) { + if(ct.opt == DEFAULT_OPT) { + switch(measure) { + case dist::L2: + case dist::SQRL2: + case dist::L1: case dist::TVD: + case dist::COSINE_DISTANCE: + case dist::PROBABILITY_COSINE_DISTANCE: + case dist::LLR: case dist::UWLLR: + case dist::HELLINGER: case dist::BHATTACHARYYA_DISTANCE: case dist::BHATTACHARYYA_METRIC: + ct.opt = EXPECTATION_MAXIMIZATION; break; + /* + * Bregman Divergences, LLR, cosine distance use the (weighted) mean of each + * point, in either soft or hard clustering. + * TVD and L1 use the feature-wise median. + * Scores are either calculated with softmax distance or harmonic softmax + */ + case dist::ORACLE_METRIC: case dist::ORACLE_PSEUDOMETRIC: case dist::WASSERSTEIN: + /* otherwise, use metric kmedian */ + ct.opt = METRIC_KMEDIAN; break; + default: + if(dist::detail::is_bregman(measure)) { + ct.opt = EXPECTATION_MAXIMIZATION; + break; + } + } + } + if(ct.approx == DEFAULT_APPROX) { + if(ct.opt == EXPECTATION_MAXIMIZATION) ct.approx = BICRITERIA; + else ct.approx = CONSTANT_FACTOR; + } + if(ct.sampling == DEFAULT_SAMPLING) { + ct.sampling = ct.opt == EXPECTATION_MAXIMIZATION + ? D2_SAMPLING: THORUP_SAMPLING; + } +} + + +template +auto perform_clustering(const jsd::DissimilarityApplicator &app, size_t npoints, unsigned k, + const ElementType_t *weights=nullptr, + CenterSamplingType csample=DEFAULT_SAMPLING, + OptimizationMethod opt=DEFAULT_OPT, + ApproximateSolutionType approx=DEFAULT_APPROX, + uint64_t seed=0, + size_t max_iter=100, double eps=1e-4) +{ + MINOCORE_REQUIRE(npoints == app.size(), "assumption"); + using FT = typename MatrixType::ElementType; + + // Setup clustering traits + auto ct = make_clustering_traits(npoints, k, + csample, opt, approx, weights, seed, max_iter, eps); + using ct_t = decltype(ct); + auto measure = app.get_measure(); + update_defaults_with_measure(ct, measure); + + // and helpers + typename ct_t::centers_t centers; + centers.reserve(k); + typename ct_t::assignments_t assignments; + typename ct_t::costs_t costs; + if constexpr(asn_method == HARD) { + assignments.resize(app.size()); + } else { + assignments.resize(app.size(), k); + } + PRETTY_SAY << "Assignments sized.\n"; + + + auto set_metric_return_values = [&](const auto &ret) { + MINOCORE_REQUIRE(asn_method == HARD, "Not supported: soft extrinsic clustering"); + auto &[cc, asn, retcosts] = ret; + centers.resize(cc.size()); + if constexpr(co == EXTRINSIC) { + OMP_PFOR + for(size_t i = 0; i < cc.size(); ++i) { + centers[i] = row(app.data(), cc[i], blaze::unchecked); + } + } else std::copy(cc.begin(), cc.end(), centers.begin()); // INTRINSIC + if constexpr(asn_method == HARD) { + assignments.resize(asn.size()); + std::copy(asn.begin(), asn.end(), assignments.begin()); + costs.resize(retcosts.size()); + std::copy(retcosts.begin(), retcosts.end(), costs.begin()); + } + }; + + // Delegate to solvers and set-up return values + if(dist::detail::satisfies_d2(measure) || measure == dist::L1 || measure == dist::TOTAL_VARIATION_DISTANCE || co == EXTRINSIC) { + auto [initcenters, initasn, initcosts] = jsd::make_kmeanspp(app, ct.k, ct.seed, ct.weights); + assert(initcenters.size() == k); + if(co == INTRINSIC || opt == METRIC_KMEDIAN) { + PRETTY_SAY << "Performing metric clustering\n"; + // Do graph metric calculation + MINOCORE_REQUIRE(asn_method == HARD, "Can't do soft metric k-median"); + auto metric_ret = perform_cluster_metric_kmedian(detail::make_aa(app), app.size(), ct); + set_metric_return_values(metric_ret); + } else { + PRETTY_SAY << "Setting centers with D2\n"; + for(const auto id: initcenters) + centers.emplace_back(row(app.data(), id)); + assert(centers.size() == k); + PRETTY_SAY << "Beginning lloyd loop\n"; + // Perform EM + if(auto ret = perform_lloyd_loop(centers, assignments, app, k, costs, ct.seed, ct.weights, max_iter, eps)) + std::fprintf(stderr, "lloyd loop ret: %s\n", ret == REACHED_MAX_ROUNDS ? "max rounds": "unfinished"); + } + } else if(dist::detail::satisfies_metric(measure) || dist::detail::satisfies_rho_metric(measure)) { + MINOCORE_REQUIRE(asn_method == HARD, "Can't do soft metric k-median"); + auto metric_ret = perform_cluster_metric_kmedian(detail::make_aa(app), app.size(), ct); + set_metric_return_values(metric_ret); + } else { + throw NotImplementedError("Unsupported: asymmetric measures not supporting D2 sampling"); + } + return std::make_tuple(std::move(centers), std::move(assignments), std::move(costs)); +} // perform_clustering + +// Make # points optional +template +auto perform_clustering(const jsd::DissimilarityApplicator &app, unsigned k, + const ElementType_t *weights=nullptr, + CenterSamplingType csample=DEFAULT_SAMPLING, + OptimizationMethod opt=DEFAULT_OPT, + ApproximateSolutionType approx=DEFAULT_APPROX, + uint64_t seed=0, + size_t max_iter=100, double eps=1e-4) +{ + return perform_clustering(app, app.size(), k, weights, csample, opt, approx, seed, max_iter, eps); +} + +template +auto perform_clustering(const OracleType &app, size_t npoints, unsigned k, + const FT *weights=nullptr, + CenterSamplingType csample=DEFAULT_SAMPLING, + OptimizationMethod opt=DEFAULT_OPT, + ApproximateSolutionType approx=DEFAULT_APPROX, + uint64_t seed=0, + size_t max_iter=100, double eps=ClusteringTraits::DEFAULT_EPS) +{ + // Setup + if(opt == DEFAULT_OPT) opt = METRIC_KMEDIAN; + if(approx == DEFAULT_APPROX) approx = CONSTANT_FACTOR; + if(csample == DEFAULT_SAMPLING) csample = THORUP_SAMPLING; + MINOCORE_REQUIRE(opt == METRIC_KMEDIAN, "No other method supported for metric clustering"); + auto clustering_traits = make_clustering_traits(npoints, k, + csample, opt, approx, weights, seed, max_iter, eps); + using ct_t = decltype(clustering_traits); + + // Cluster + auto [cc, asn, retcosts] = perform_cluster_metric_kmedian(app, npoints, clustering_traits); + + // Return + typename ct_t::centers_t centers(cc.size()); + typename ct_t::assignments_t assignments(asn.size()); + typename ct_t::costs_t costs(retcosts.size()); + std::copy(cc.begin(), cc.end(), centers.begin()); + std::copy(asn.begin(), asn.end(), assignments.begin()); + std::copy(retcosts.begin(), retcosts.end(), costs.begin()); + return std::make_tuple(std::move(centers), std::move(assignments), std::move(costs)); +} + + +} // namespace clustering + +} // namespace minocore + +#endif /* FGC_CLUSTERING_DISPATCH_H__ */ diff --git a/include/minocore/clustering/sampling.h b/include/minocore/clustering/sampling.h new file mode 100644 index 00000000..5a4c8c6a --- /dev/null +++ b/include/minocore/clustering/sampling.h @@ -0,0 +1,140 @@ +#ifndef CLUSTERING_SAMPLING_H__ +#define CLUSTERING_SAMPLING_H__ +#include "minocore/clustering/traits.h" +#include "minocore/optim/oracle_thorup.h" + +namespace minocore { + +namespace clustering { + +template +struct MetricSelectionResult: public std::tuple, blz::DV, std::vector, blz::DM > { + auto &selected() {return std::get<0>(*this);} + const auto &selected() const {return std::get<0>(*this);} + auto &costs() {return std::get<1>(*this);} + const auto &costs() const {return std::get<1>(*this);} + auto &assignments() {return std::get<2>(*this);} + const auto &assignments() const {return std::get<2>(*this);} + auto &facility_cost_matrix() {return std::get<3>(*this);} + const auto &facility_cost_matrix() const {return std::get<3>(*this);} +}; + + + +template +MetricSelectionResult +select_uniform_random(const OracleType &oracle, size_t np, ClusteringTraits opts) +{ + assert(opts.k != (unsigned)-1); + MetricSelectionResult ret; + size_t nsamp = std::min(size_t(std::ceil(opts.k * opts.approx_mul)), np); + std::vector selected; + std::mt19937_64 rng(opts.seed); + schism::Schismatic modder(np); + blz::DV costs(np, std::numeric_limits::max()); + std::vector assignments(np); + shared::flat_hash_set sel; + do { + IT next; + do next = modder.mod(rng()); + while(sel.find(next) != sel.end()); + OMP_PFOR + for(size_t i = 0; i < np; ++i) { + if(costs[i] == 0.) continue; + auto c = oracle(next, i); + if(c < costs[i]) + costs[i] = c, assignments[i] = selected.size(); + } + sel.insert(next); + selected.push_back(next); + } while(selected.size() < nsamp); + std::get<0>(ret) = std::move(selected); + std::get<1>(ret) = std::move(costs); + std::get<2>(ret) = std::move(assignments); + return ret; +} + +template +MetricSelectionResult +select_greedy(const OracleType &oracle, size_t np, ClusteringTraits opts) +{ + assert(opts.k != (unsigned)-1); + MetricSelectionResult ret; + size_t nsamp = std::min(size_t(std::ceil(opts.k * opts.approx_mul)), np); + blz::DV costs(np); + IT next = std::mt19937_64(opts.seed)() % np; + std::vector selected{next}, assignments(np, next); + costs[next] = 0.; + OMP_PFOR + for(size_t i = 0; i < np; ++i) { + if(unlikely(i == next)) continue; + costs[i] = oracle(i, next); + } + + while(selected.size() < nsamp) { + next = std::max_element(costs.data(), costs.data() + costs.size()) - costs.data(); + costs[next] = 0.; + assignments[next] = next; + OMP_PFOR + for(size_t i = 0; i < np; ++i) { + if(unlikely(i == next) || costs[i] == 0.) continue; + if(auto newcost = oracle(i, next); newcost < costs[i]) { + costs[i] = newcost; + assignments[i] = next; + } + } + selected.push_back(next); + } + std::get<0>(ret) = std::move(selected); + std::get<1>(ret) = std::move(costs); + std::get<2>(ret) = std::move(assignments); + return ret; +} + +template +MetricSelectionResult +select_d2(const OracleType &oracle, size_t np, ClusteringTraits opts) { + MetricSelectionResult ret; + size_t nsamp = std::min(size_t(std::ceil(opts.k * opts.approx_mul)), np); + blz::DV costs(np); + std::mt19937_64 mt(opts.seed); + IT next = mt() % np; + std::vector selected{next}, assignments(np, next); + costs[next] = 0.; + OMP_PFOR + for(size_t i = 0; i < np; ++i) { + if(unlikely(i == next)) continue; + costs[i] = oracle(i, next); + } + auto cdf = std::make_unique(np); + FT *const cdfbeg = cdf.get(), *const cdfend = cdfbeg + np; + do { + std::partial_sum(costs.data(), costs.data() + costs.size(), cdfbeg); + std::uniform_real_distribution dist; + IT id; + do { + id = std::lower_bound(cdfbeg, cdfend, cdfend[-1] * dist(mt)) - cdfbeg; + } while(std::find(selected.begin(), selected.end(), id) != selected.end()); + selected.push_back(id); + costs[id] = 0.; + assignments[id] = id; + OMP_PFOR + for(IT i = 0; i < np; ++i) { + if(costs[i] == 0.) continue; + if(auto newcost = oracle(i, id); newcost < costs[i]) { + costs[i] = newcost; + assignments[i] = id; + } + } + } while(selected.size() < nsamp); + std::get<0>(ret) = std::move(selected); + std::get<1>(ret) = std::move(costs); + std::get<2>(ret) = std::move(assignments); + return ret; +} + +} // namespace clustering + +} // namespace minocore + +#endif diff --git a/include/minocore/clustering/traits.h b/include/minocore/clustering/traits.h new file mode 100644 index 00000000..74c1ec35 --- /dev/null +++ b/include/minocore/clustering/traits.h @@ -0,0 +1,184 @@ +#ifndef FGC_CLUSTERING_TRAITS_H__ +#define FGC_CLUSTERING_TRAITS_H__ + +namespace minocore { +namespace clustering { + +using ClusteringEnumType = std::size_t; +using ce_t = ClusteringEnumType; + +/* + * + * Problem classification. These are: + * 1. Assignment + * IE, is the assignment to cluster centers hard or soft? + * 2. Center Origination + * Are cluster centers Intrinsic or Extrinsic? (IE, are centers selected from input points or not?) + * 3. Type of approximate solution. + * If using coresets, which algorithm is used for an approximate solution? + * BICRITERIA (for alpha-beta approximations, where a constant factor approximation with more than alpha centers is allowed) + * CONSTANT_FACTOR (for the exact number of centers but with a constant factor approximation) + * HEURISTIC (for a good enough solution, which we will treat as one of the above even though it isn't) + * 4. Center sampling type. + * When selecting centers to use as candidates in search, use: + * Thorup sampling + * Uniform sampling + * D2/cost sampling + * 5. Optimization technique + * Metric k-median: use metric clustering techniques, such as Jain-Vazirani or local search + * Expectation Maximization: Lloyd's algorithm or a variant + * Gradient descent (will require autograd or similar) + * Exhaustive search: combinatorial approximation + * Black box: plugging into CPLEX or Gurobi + */ + +static constexpr ce_t UNSET = ce_t(-1); + +enum Assignment: ce_t { + HARD = 0, + /* Assignment(x) = argmin_{c \in C}[d(c, x)] + * + */ + SOFT = 1, + // Assignment(X, c) = \frac{c}{\sum_{c' \in C}[d(c', x)]} + SOFT_HARMONIC_MEAN = 2, + /* Assignment(X, c) = \frac{e^{d(c, x)}}{sum_{c' \in C}[e^d(c', x)]} + * = softmax(d(C, x)) + */ +}; +enum CenterOrigination: ce_t { + INTRINSIC = 0, + EXTRINSIC = 1 +}; + +enum ApproximateSolutionType: ce_t { + BICRITERIA = 0, + CONSTANT_FACTOR = 1, + HEURISTIC = 2, + RSVD = 3, + DEFAULT_APPROX = UNSET +}; +enum CenterSamplingType: ce_t { + THORUP_SAMPLING, + D2_SAMPLING, + UNIFORM_SAMPLING, + GREEDY_SAMPLING, + DEFAULT_SAMPLING = UNSET, + COST_SAMPLING = D2_SAMPLING, +}; +enum OptimizationMethod: ce_t { + METRIC_KMEDIAN, + EXPECTATION_MAXIMIZATION, + BLACK_BOX, + GRADIENT_DESCENT, + EXHAUSTIVE_SEARCH, + DEFAULT_OPT = UNSET +}; + +enum MetricKMedianSolverMethod: ce_t { + JAIN_VAZIRANI_FL, + LOCAL_SEARCH, + JV_PLUS_LOCAL_SEARCH, + DEFAULT_SOLVER = UNSET +}; + + + +template +using assignment_fmt_t = std::conditional_t, + blaze::DynamicMatrix + >; + +template +struct ClusteringTraits { + static constexpr Assignment asn_method = asn; + static constexpr CenterOrigination center_origin = co; + ApproximateSolutionType approx = static_cast(UNSET); + CenterSamplingType sampling = static_cast(UNSET); + OptimizationMethod opt = static_cast(UNSET); + MetricKMedianSolverMethod metric_solver = JV_PLUS_LOCAL_SEARCH; + + static constexpr FT DEFAULT_EPS = 1e-6; + +// Settings + FT thorup_npermult = 7; + FT approx_mul = 50; + FT eps = DEFAULT_EPS; + unsigned thorup_iter = 4; + unsigned thorup_sub_iter = 10; + unsigned max_jv_rounds = 100; + unsigned max_lloyd_iter = 1000; + unsigned k = -1; + size_t npoints = 0; + + bool compute_full = true; + uint64_t seed = 13; + + const FT *weights = nullptr; + + static_assert(std::is_floating_point_v, "FT must be floating"); + static_assert(std::is_integral_v, "FT must be integral and support required index ranges"); + using cost_t = FT; + using index_t = IT; + + // If hard, one cost per point + // If soft, one cost per point per center + // Assignment fractions are generated as-needed (for the case of softmax) + // For this reason, matrix forms are stored as + // row = point, column = center + using costs_t = std::conditional_t, + blz::DynamicMatrix>; + // If hard assignment, then assignments are managed + using assignments_t = assignment_fmt_t; + using centers_t = std::conditional_t, + std::vector> + >; + // Thorup +}; + + +template +ClusteringTraits make_clustering_traits( + size_t npoints, unsigned k, + CenterSamplingType csample=DEFAULT_SAMPLING, OptimizationMethod opt=DEFAULT_OPT, + ApproximateSolutionType approx=DEFAULT_APPROX, const FT *weights=nullptr, uint64_t seed=0, + size_t max_iter=100, double eps=ClusteringTraits::DEFAULT_EPS) { + ClusteringTraits ret; + ret.k = k; + ret.seed = seed; + ret.max_jv_rounds = ret.max_lloyd_iter = max_iter; + ret.eps = eps; + ret.opt = opt; + ret.sampling = csample; + ret.approx = approx; + ret.weights = weights; + ret.npoints = npoints; + return ret; +} + + +namespace detail { + +template +struct ApplicatorAdaptor { + const jsd::DissimilarityApplicator &mat_; + ApplicatorAdaptor(const jsd::DissimilarityApplicator &mat): mat_(mat) {} + decltype(auto) operator()(size_t i, size_t j) const { + return mat_(i, j); + } + auto get_measure() const {return mat_.get_measure();} +}; +template +auto make_aa(const jsd::DissimilarityApplicator &mat) { + return ApplicatorAdaptor(mat); +} + +} // namespace detail + +} // clustering +} // minocore + +#endif /* FGC_CLUSTERING_TRAITS_H__ */ diff --git a/include/minocore/coreset/coreset.h b/include/minocore/coreset/coreset.h index ffe183b4..4031c018 100644 --- a/include/minocore/coreset/coreset.h +++ b/include/minocore/coreset/coreset.h @@ -3,9 +3,11 @@ #define FGC_CORESETS_H__ #include #include +#include #include "alias_sampler/alias_sampler.h" -#include "minocore/util/blaze_adaptor.h" #include "minocore/util/shared.h" +#include "blaze/math/CustomVector.h" +#include "blaze/math/DynamicVector.h" #include #ifdef _OPENMP # include @@ -188,8 +190,8 @@ struct CoresetSampler { using CoresetType = IndexCoreset; std::unique_ptr sampler_; std::unique_ptr probs_; - std::unique_ptr> weights_; - std::unique_ptr> fl_bicriteria_points_; // Used only by FL + std::unique_ptr> weights_; + std::unique_ptr> fl_bicriteria_points_; // Used only by FL std::unique_ptr fl_asn_; size_t np_; size_t k_; @@ -272,7 +274,7 @@ struct CoresetSampler { gzread(fp, &weights_present, sizeof(weights_present)); if(weights_present) { assert(weights_present == 137); - weights_.reset(new blz::DV(n)); + weights_.reset(new blaze::DynamicVector(n)); gzread(fp, weights_->data(), sizeof(FT) * n); } sampler_.reset(new Sampler(probs_.get(), probs_.get() + n, seed_)); @@ -288,7 +290,7 @@ struct CoresetSampler { ::read(fd, &weights_present, sizeof(weights_present)); if(weights_present) { assert(weights_present == 137); - weights_.reset(new blz::DV(n)); + weights_.reset(new blaze::DynamicVector(n)); ::read(fd, weights_->data(), sizeof(FT) * n); } sampler_.reset(new Sampler(probs_.get(), probs_.get() + n, seed_)); @@ -364,7 +366,7 @@ struct CoresetSampler { if(!k) k = ncenters; k_ = k; if(weights) { - weights_.reset(new blz::DV(np_)); + weights_.reset(new blaze::DynamicVector(np_)); std::memcpy(weights_->data(), weights, sizeof(FT) * np_); } else weights_.release(); if(sens == LUCIC_FAULKNER_KRAUSE_FELDMAN) { @@ -394,7 +396,7 @@ struct CoresetSampler { weights_ ? blaze::dot(*weights_, cv) : blaze::sum(cv); probs_.reset(new FT[np_]); - blz::CustomVector sensitivies(probs_.get(), np_); + blaze::CustomVector sensitivies(probs_.get(), np_); std::vector center_counts(ncenters); OMP_PFOR for(size_t i = 0; i < np_; ++i) { @@ -407,7 +409,7 @@ struct CoresetSampler { sensitivies = cv * (1. / total_cost); } // sensitivities = weights * costs / total_cost - blz::DV ccinv(ncenters); + blaze::DynamicVector ccinv(ncenters); for(unsigned i = 0; i < ncenters; ++i) ccinv[i] = 1. / center_counts[i]; OMP_PFOR @@ -430,7 +432,7 @@ struct CoresetSampler { blaze::CustomVector(fl_asn_.get(), np_) = blaze::CustomVector(asn, np_); if(bicriteria_centers) { - if(!fl_bicriteria_points_) fl_bicriteria_points_.reset(new blz::DV(b_)); + if(!fl_bicriteria_points_) fl_bicriteria_points_.reset(new blaze::DynamicVector(b_)); else fl_bicriteria_points_->resize(b_); *fl_bicriteria_points_ = blaze::CustomVector(bicriteria_centers, b_); } @@ -447,8 +449,8 @@ struct CoresetSampler { probs_[i] = getweight(i) * (costs[i]) * total_cost_inv; } } else { - blaze::CustomVector probv(const_cast(probs_.get()), np_); - probv = blz::ceil(CFT(np_) * total_cost_inv * cv) + 1.; + blaze::CustomVector probv(const_cast(probs_.get()), np_); + probv = blaze::ceil(FT(np_) * total_cost_inv * cv) + 1.; } sampler_.reset(new Sampler(probs_.get(), probs_.get() + np_, seed)); } @@ -460,8 +462,8 @@ struct CoresetSampler { const double alpha = 16 * std::log(k_) + 32., alpha2 = 2. * alpha; //auto center_counts = std::make_unique(ncenters); - blz::DV weight_sums(ncenters, FT(0)); - blz::DV cost_sums(ncenters, FT(0)); + blaze::DynamicVector weight_sums(ncenters, FT(0)); + blaze::DynamicVector cost_sums(ncenters, FT(0)); double total_costs(0.); OMP_PRAGMA("omp parallel for reduction(+:total_costs)") @@ -480,10 +482,10 @@ struct CoresetSampler { cost_sums[asn] += pointcost; total_costs += w * costs[i]; } - double weight_sum = blz::sum(weight_sums); + double weight_sum = blaze::sum(weight_sums); total_costs /= weight_sum; const double tcinv = alpha / total_costs; - blz::DV sens(np_); + blaze::DynamicVector sens(np_); for(size_t i = 0; i < ncenters; ++i) { cost_sums[i] = alpha2 * cost_sums[i] / (weight_sums[i] * total_costs) + 4 * weight_sum / weight_sums[i]; } @@ -535,6 +537,45 @@ struct CoresetSampler { auto getweight(size_t ind) const { return weights_ ? weights_->operator[](ind): static_cast(1.); } + struct importance_compare { + bool operator()(const std::pair lh, const std::pair rh) const { + return lh.second > rh.second; + } + }; + struct importance_queue: public std::priority_queue, + std::vector>, + importance_compare> + { + auto &getc() {return this->c;} + const auto &getc() const {return this->c;} + }; + IndexCoreset top_outliers(const size_t n) { + importance_queue topk; + std::pair cpoint; + for(size_t i = 0; i < size(); ++i) { + FT pi = probs_[i]; + if(topk.size() < n) { + cpoint = {IT(i), pi}; + topk.push(cpoint); + continue; + } + if(topk.top().second < pi) { + topk.pop(); + cpoint = {IT(i), pi}; + topk.push(cpoint); + } + } + auto container = std::move(topk.getc()); + // Put the most expensive items in front. + shared::sort(container.begin(), container.end(), importance_compare()); + IndexCoreset ret(n); + const double dn = n; + for(unsigned i = 0; i < n; ++i) { + auto ind = container[i].first; + ret.indices_[i] = ind; + ret.weights_[i] = getweight(ind) / (dn * container[i].second); + } + } IndexCoreset sample(const size_t n, uint64_t seed=0, double eps=0.1) { if(unlikely(!sampler_.get())) throw std::runtime_error("Sampler not constructed"); if(seed) sampler_->seed(seed); diff --git a/include/minocore/coreset/gmm.h b/include/minocore/coreset/gmm.h index a460f1e0..54b3deed 100644 --- a/include/minocore/coreset/gmm.h +++ b/include/minocore/coreset/gmm.h @@ -9,9 +9,9 @@ template struct GMM { // Related: Laplacian unsigned k_; - blz::DynamicMatrix mu_; - blz::DynamicMatrix pi_; - blz::DynamicMatrix pm_; // precision matrix + blaze::DynamicMatrix mu_; + blaze::DynamicMatrix pi_; + blaze::DynamicMatrix pm_; // precision matrix std::vector cached_det_; static constexpr double m_pi = 3.14159265358979323846; diff --git a/include/minocore/coreset/kcenter.h b/include/minocore/coreset/kcenter.h new file mode 100644 index 00000000..0557bfba --- /dev/null +++ b/include/minocore/coreset/kcenter.h @@ -0,0 +1,308 @@ +#ifndef FGC_KCENTER_CORESET_H__ +#define FGC_KCENTER_CORESET_H__ +#include "minocore/optim/kcenter.h" + +namespace minocore { +namespace coresets { +namespace outliers { + +/* +// All algorithms in this namespace are from: +// Greedy Strategy Works for k-Center Clustering with Outliers and Coreset Construction +// Hu Ding, Haikuo Yu, Zixiu Wang +*/ + +template>, + typename Cmp=std::greater<>> +struct fpq: public std::priority_queue, Container, Cmp> { + // priority queue providing access to underlying constainer with getc() + // , a reserve function and that defaults to std::greater<> for farthest points. + using super = std::priority_queue, Container, Cmp>; + using value_type = std::pair; + + IT size_; + fpq(IT size=0): size_(size) {reserve(size);} + fpq(const fpq &o) = default; + void reserve(size_t n) {this->c.reserve(n);} + auto &getc() {return this->c;} + const auto &getc() const {return this->c;} + void update(const fpq &o) { + for(const auto v: o.getc()) + add(v); + } + void add(const value_type v) { + if(this->size() < size_) this->push(v); + else if(v > this->top()) { + this->pop(); + this->push(v); + } + } + void add(FT val, IT index) { + if(this->size() < size_) { + this->push(value_type(val, index)); + } else if(val > this->top().first) { + this->pop(); + this->push(value_type(val, index)); + } + } +}; + + + +template +struct bicriteria_result_t: public std::tuple, IVec, std::vector>, double> { + using super = std::tuple, IVec, std::vector>, double>; + template + bicriteria_result_t(Args &&...args): super(std::forward(args)...) {} + auto ¢ers() {return std::get<0>(*this);} + auto &assignments() {return std::get<1>(*this);} + // alias + auto &labels() {return assignments();} + auto &outliers() {return std::get<2>(*this);} + double outlier_threshold() const {return std::get<3>(*this);} + size_t num_centers() const {return centers().size();} +}; + +/* +// Algorithm 1 from the above DYW paper +// Z = # outliers +// \mu = quality of coreset +// size of coreset: 2z + O((2/\mu)^p k) +// \gamma = z / n +*/ + +template, + typename IT=std::uint32_t, typename RNG, typename Norm=sqrL2Norm> +bicriteria_result_t +kcenter_bicriteria(Iter first, Iter end, RNG &rng, size_t, double eps, + double gamma=0.001, size_t t = 100, double eta=0.01, + const Norm &norm=Norm()) +{ + auto dm = make_index_dm(first, norm); + // Step 1: constants + assert(end > first); + size_t np = end - first; + const size_t z = std::ceil(gamma * np); + std::fprintf(stderr, "z: %zu\n", z); + size_t farthestchunksize = std::ceil((1 + eps) * z), + samplechunksize = std::ceil(std::log(1./eta) / (1 - gamma)); + IVec ret; + IVec labels(np); + ret.reserve(samplechunksize); + std::vector distances(np); + // randomly select 'log(1/eta) / (1 - eps)' vertices from X and add them to E. + while(ret.size() < samplechunksize) { + // Assuming that this is relatively small and we can take bad asymptotic complexity + auto newv = rng() % np; + if(std::find(ret.begin(), ret.end(), newv) == ret.end()) + push_back(ret, newv); + } + assert(flat_hash_set(ret.begin(), ret.end()).size() == ret.size()); + if(samplechunksize > 100) { + std::fprintf(stderr, "Warning: with samplechunksize %zu, it may end up taking a decent amount of time. Consider swapping this in for a hash set.", samplechunksize); + } + if(samplechunksize > farthestchunksize) { + std::fprintf(stderr, "samplecc is %zu (> fcs %zu). changing gcs to scc + z (%zu)\n", samplechunksize, farthestchunksize, samplechunksize + z); + farthestchunksize = samplechunksize + z; + } + fpq pq(farthestchunksize); + const auto fv = ret[0]; + labels[fv] = fv; + distances[fv] = 0.; + // Fill the priority queue from the first set +#ifdef _OPENMP + #pragma omp declare reduction (merge : fpq : omp_out.update(omp_in)) initializer(omp_priv(omp_orig)) + #pragma omp parallel for reduction(merge: pq) +#endif + for(IT i = 0; i < np; ++i) { + double dist = dm(fv, i); + double newdist; + IT label = 0; // This label is an index into the ret vector, rather than the actual index + for(size_t j = 1, e = ret.size(); j < e; ++j) { + if((newdist = dm(i, ret[j])) < dist) { + label = j; + dist = newdist; + } + } + distances[i] = dist; + labels[i] = ret[label]; + pq.add(dist, i); + } + IVec random_samples(samplechunksize); + // modulo without a div/mod instruction, much faster + schism::Schismatic div(farthestchunksize); // pq size + assert(samplechunksize >= 1.); + for(size_t j = 0;j < t;++j) { + //std::fprintf(stderr, "j: %zu/%zu\n", j, t); + // Sample 'samplechunksize' points from pq into random_samples. + // Sample them + size_t rsi = 0; + IT *rsp = random_samples.data(); + do { + IT index = div.mod(rng()); + // (Without replacement) + if(std::find(rsp, rsp + rsi, index)) + rsp[rsi++] = index; + } while(rsi < samplechunksize); + // random_samples now contains indexes *into pq* + assert(pq.getc().data()); + std::transform(rsp, rsp + rsi, rsp, + [pqi=pq.getc().data()](auto x) { + return pqi[x].second; + }); + for(size_t i = 0; i < rsi; ++i) + assert(rsp[i] < np); + // random_samples now contains indexes *into original dataset* + + // Insert into solution + for(auto it = rsp, e = rsp + rsi; it < e;++it) { + if(std::find(ret.begin(), ret.end(), *it) != ret.end()) continue; + distances[*it] = 0.; + labels[*it] = *it; + ret.pushBack(*it); + } + + // compare each point against all of the new points + pq.getc().clear(); // empty priority queue + // Fill priority queue +#ifdef _OPENMP + #pragma omp declare reduction (merge : fpq : omp_out.update(omp_in)) initializer(omp_priv(omp_orig)) + #pragma omp parallel for reduction(merge: pq) +#endif + for(size_t i = 0; i < np; ++i) { + double dist = distances[i]; + if(dist == 0.) continue; + double newdist; + IT label = labels[i]; + for(size_t j = 0; j < rsi; ++j) { + if((newdist = dm(i, rsp[j])) < dist) + dist = newdist, label = rsp[j]; + } + distances[i] = dist; + labels[i] = label; + pq.add(dist, i); + } + } + const double minmaxdist = pq.top().first; + bicriteria_result_t bicret; + assert(flat_hash_set(ret.begin(), ret.end()).size() == ret.size()); + bicret.centers() = std::move(ret); + bicret.labels() = std::move(labels); + bicret.outliers() = std::move(pq.getc()); + std::fprintf(stderr, "outliers size: %zu\n", bicret.outliers().size()); + std::get<3>(bicret) = minmaxdist; + return bicret; + // center ids, label assignments for all points besides outliers, outliers, and the distance of the closest excluded point +} // kcenter_bicriteria + +/* +// Algorithm 2 from the above DYW paper +// Z = # outliers +// \gamma = z / n +*/ + +template, + typename IT=std::uint32_t, typename RNG, typename Norm=L2Norm> +std::vector +kcenter_greedy_2approx_outliers(Iter first, Iter end, RNG &rng, size_t k, double eps, + double gamma=0.001, + const Norm &norm=Norm()) +{ + auto dm = make_index_dm(first, norm); + const size_t np = end - first; + const size_t z = std::ceil(gamma * np); + size_t farthestchunksize = std::ceil((1. + eps) * z); + fpq pq(farthestchunksize); + //pq.reserve(farthestchunksize + 1); + std::vector ret; + std::vector distances(np, std::numeric_limits::max()); + ret.reserve(k); + auto newc = rng() % np; + ret.push_back(newc); + do { + //const auto &newel = first[newc]; + // Fill pq +#ifdef _OPENMP + #pragma omp declare reduction (merge : fpq : omp_out.update(omp_in)) initializer(omp_priv(omp_orig)) + #pragma omp parallel for reduction(merge: pq) +#endif + for(IT i = 0; i < np; ++i) { + double dist = distances[i]; + if(dist == 0.) continue; + double newdist; + if((newdist = dm(i, newc)) < dist) + dist = newdist; + distances[i] = dist; + pq.add(dist, i); + } + + // Sample point + newc = pq.getc()[rng() % farthestchunksize].second; + assert(newc < np); + ret.push_back(newc); + pq.getc().clear(); + } while(ret.size() < k); + return ret; +}// kcenter_greedy_2approx_outliers + +// Algorithm 3 (coreset construction) +template, + typename IT=std::uint32_t, typename RNG, typename Norm=L2Norm> +coresets::IndexCoreset +kcenter_coreset_outliers(Iter first, Iter end, RNG &rng, size_t k, double eps=0.1, double mu=.5, + double rho=1.5, + double gamma=0.001, double eta=0.01, const Norm &norm=Norm()) { + // rho is 'D' for R^D (http://www.wisdom.weizmann.ac.il/~robi/teaching/2014b-SeminarGeometryAlgorithms/lecture1.pdf) + // in Euclidean space, as worst-case, but usually better in real data with structure. + assert(mu > 0. && mu <= 1.); + const size_t np = end - first; + size_t L = std::ceil(std::pow(2. / mu, rho) * k); + size_t nrounds = std::ceil((L + std::sqrt(L)) / (1. - eta)); + auto bic = kcenter_bicriteria(first, end, rng, k, eps, + gamma, nrounds, eta, norm); + double rtilde = bic.outlier_threshold(); + std::fprintf(stderr, "outlier threshold: %f\n", rtilde); + auto ¢ers = bic.centers(); + auto &labels = bic.labels(); + auto &outliers = bic.outliers(); +#ifndef NDEBUG + for(const auto c: centers) + assert(c < np); + for(const auto label: labels) + assert(labels[label] == label); +#endif + //std::vector counts(centers.size()); + coresets::flat_hash_map counts; + counts.reserve(centers.size()); + size_t i = 0; + SK_UNROLL_8 + do ++counts[labels[i++]]; while(i < np); + coresets::IndexCoreset ret(centers.size() + outliers.size()); + std::fprintf(stderr, "ret size: %zu. centers size: %zu. counts size %zu. outliers size: %zu\n", ret.size(), centers.size(), counts.size(), outliers.size()); + for(i = 0; i < outliers.size(); ++i) { + assert(outliers[i].second < np); + ret.indices_[i] = outliers[i].second; + ret.weights_[i] = 1.; + } + for(const auto &pair: counts) { + assert(pair.first < np); + ret.weights_[i] = pair.second; + ret.indices_[i] = pair.first; + ++i; + } + assert(i == ret.size()); + for(size_t i = 0; i < ret.indices_.size(); ++i) { + assert(ret.indices_[i] < np); + } + return ret; +} +} // namespace outliers +using outliers::kcenter_coreset_outliers; +using outliers::kcenter_greedy_2approx_outliers; +} // namespace coresets +using coresets::outliers::kcenter_greedy_2approx_outliers; + +} // namespace minocore + +#endif /* FGC_KCENTER_CORESET_H__ */ + diff --git a/include/minocore/coreset/matrix_coreset.h b/include/minocore/coreset/matrix_coreset.h index 05d5040b..f023033e 100644 --- a/include/minocore/coreset/matrix_coreset.h +++ b/include/minocore/coreset/matrix_coreset.h @@ -7,7 +7,7 @@ namespace coresets { template struct MatrixCoreset { MatrixType mat_; - blz::DynamicVector weights_; + blaze::DynamicVector weights_; bool rowwise_; MatrixCoreset &merge(const MatrixCoreset &o) { if(rowwise_ != o.rowwise_) throw std::runtime_error("Can't merge coresets of differing rowwiseness"); diff --git a/include/minocore/dist.h b/include/minocore/dist.h index dd68ee93..c402983a 100644 --- a/include/minocore/dist.h +++ b/include/minocore/dist.h @@ -2,4 +2,5 @@ #define FGC_DISTANCE_HEADERS_ #include #include +#include #endif diff --git a/include/minocore/dist/applicator.h b/include/minocore/dist/applicator.h index 1c5a5203..64bc332c 100644 --- a/include/minocore/dist/applicator.h +++ b/include/minocore/dist/applicator.h @@ -1,5 +1,6 @@ #ifndef FGC_JSD_H__ #define FGC_JSD_H__ +#include "minocore/util/exception.h" #include "minocore/coreset.h" #include "minocore/dist/distance.h" #include "distmat/distmat.h" @@ -16,11 +17,13 @@ namespace jsd { using namespace blz; using namespace blz::distance; + template -class ProbDivApplicator { +class DissimilarityApplicator { //using opposite_type = typename base_type::OppositeType; MatrixType &data_; using VecT = blaze::DynamicVector ? blaze::rowVector: blaze::columnVector>; + using matrix_type = MatrixType; VecT row_sums_; std::unique_ptr logdata_; std::unique_ptr sqrdata_; @@ -34,20 +37,22 @@ class ProbDivApplicator { public: using FT = typename MatrixType::ElementType; using MT = MatrixType; - using This = ProbDivApplicator; - using ConstThis = const ProbDivApplicator; + using This = DissimilarityApplicator; + using ConstThis = const DissimilarityApplicator; - const ProbDivType measure_; + const DissimilarityMeasure measure_; const MatrixType &data() const {return data_;} + const VecT &row_sums() const {return row_sums_;} size_t size() const {return data_.rows();} template> - ProbDivApplicator(MatrixType &ref, - ProbDivType measure=JSM, + DissimilarityApplicator(MatrixType &ref, + DissimilarityMeasure measure=JSM, Prior prior=NONE, const PriorContainer *c=nullptr): data_(ref), logdata_(nullptr), measure_(measure) { prep(prior, c); + MINOCORE_REQUIRE(dist::detail::is_valid_measure(measure_), "measure_ must be valid"); } /* * Sets distance matrix, under measure_ (if not provided) @@ -56,13 +61,13 @@ class ProbDivApplicator { template void set_distance_matrix(MatType &m, bool symmetrize=false) const {set_distance_matrix(m, measure_, symmetrize);} - template + template void set_distance_matrix(MatType &m, bool symmetrize=false) const { using blaze::sqrt; const size_t nr = m.rows(); assert(nr == m.columns()); assert(nr == data_.rows()); - static constexpr ProbDivType actual_measure = + static constexpr DissimilarityMeasure actual_measure = measure == JSM ? JSD : measure == COSINE_DISTANCE ? COSINE_SIMILARITY : measure == PROBABILITY_COSINE_DISTANCE ? PROBABILITY_COSINE_SIMILARITY @@ -81,19 +86,19 @@ class ProbDivApplicator { } if constexpr(measure == JSM) { if constexpr(blaze::IsDenseMatrix_v || blaze::IsSparseMatrix_v) { - m = blz::sqrt(m); + m = blaze::sqrt(m); } else if constexpr(dm::is_distance_matrix_v) { blaze::CustomVector cv(const_cast(m.data()), m.size()); - cv = blz::sqrt(cv); + cv = blaze::sqrt(cv); } else { std::transform(m.begin(), m.end(), m.begin(), [](auto x) {return std::sqrt(x);}); } } else if constexpr(measure == COSINE_DISTANCE || measure == PROBABILITY_COSINE_DISTANCE) { if constexpr(blaze::IsDenseMatrix_v || blaze::IsSparseMatrix_v) { - m = blz::acos(m) * PI_INV; + m = blaze::acos(m) * PI_INV; } else if constexpr(dm::is_distance_matrix_v) { blaze::CustomVector cv(const_cast(m.data()), m.size()); - cv = blz::acos(cv) * PI_INV; + cv = blaze::acos(cv) * PI_INV; } else { std::transform(m.begin(), m.end(), m.begin(), [](auto x) {return std::acos(x) * PI_INV;}); } @@ -128,7 +133,7 @@ class ProbDivApplicator { } } // set_distance_matrix template - void set_distance_matrix(MatType &m, ProbDivType measure, bool symmetrize=false) const { + void set_distance_matrix(MatType &m, DissimilarityMeasure measure, bool symmetrize=false) const { switch(measure) { case TOTAL_VARIATION_DISTANCE: set_distance_matrix(m, symmetrize); break; case L1: set_distance_matrix(m, symmetrize); break; @@ -156,7 +161,8 @@ class ProbDivApplicator { case COSINE_SIMILARITY: set_distance_matrix(m, symmetrize); break; case PROBABILITY_COSINE_SIMILARITY: set_distance_matrix(m, symmetrize); break; - default: throw std::invalid_argument(std::string("unknown dissimilarity measure: ") + std::to_string(int(measure)) + blz::detail::prob2str(measure)); + case ORACLE_METRIC: case ORACLE_PSEUDOMETRIC: std::fprintf(stderr, "These are placeholders and should not be called."); throw std::invalid_argument("Placeholders"); + default: throw std::invalid_argument(std::string("unknown dissimilarity measure: ") + std::to_string(int(measure)) + dist::detail::prob2str(measure)); } } template @@ -164,42 +170,59 @@ class ProbDivApplicator { return make_distance_matrix(measure_, symmetrize); } template - blaze::DynamicMatrix make_distance_matrix(ProbDivType measure, bool symmetrize=false) const { + blaze::DynamicMatrix make_distance_matrix(DissimilarityMeasure measure, bool symmetrize=false) const { blaze::DynamicMatrix ret(data_.rows(), data_.rows()); set_distance_matrix(ret, measure, symmetrize); return ret; } auto cosine_similarity(size_t i, size_t j) const { - return blz::dot(weighted_row(i), weighted_row(j)) * l2norm_cache_->operator[](i) * l2norm_cache_->operator[](j); + return blaze::dot(weighted_row(i), weighted_row(j)) * l2norm_cache_->operator[](i) * l2norm_cache_->operator[](j); + } + template>> + auto cosine_similarity(size_t j, const OT &o) const { + return blaze::dot(o, weighted_row(j)) / blaze::l2Norm(o) * l2norm_cache_->operator[](j); + } + template>> + auto cosine_similarity(const OT &o, size_t j) const { + return blaze::dot(o, weighted_row(j)) / blaze::l2Norm(o) * l2norm_cache_->operator[](j); } auto pcosine_similarity(size_t i, size_t j) const { - return blz::dot(row(i), row(j)) * pl2norm_cache_->operator[](i) * pl2norm_cache_->operator[](j); + return blaze::dot(row(i), row(j)) * pl2norm_cache_->operator[](i) * pl2norm_cache_->operator[](j); + } + template>> + auto pcosine_similarity(size_t j, const OT &o) const { + return blaze::dot(o, row(j)) / blaze::l2Norm(o) * pl2norm_cache_->operator[](j); + } + template>> + auto pcosine_similarity(const OT &o, size_t j) const { + return blaze::dot(o, row(j)) / blaze::l2Norm(o) * pl2norm_cache_->operator[](j); } static constexpr FT PI_INV = 1. / 3.14159265358979323846264338327950288; - auto cosine_distance(size_t i, size_t j) const { - return std::acos(cosine_similarity(i, j)) * PI_INV; + template + auto cosine_distance(Args &&...args) const { + return std::acos(cosine_similarity(std::forward(args)...)) * PI_INV; } - auto pcosine_distance(size_t i, size_t j) const { - return std::acos(cosine_similarity(i, j)) * PI_INV; + template + auto pcosine_distance(Args &&...args) const { + return std::acos(pcosine_similarity(std::forward(args)...)) * PI_INV; } auto dotproduct_distance(size_t i, size_t j) const { - return blz::dot(weighted_row(i), weighted_row(j)) * l2norm_cache_->operator[](i) * l2norm_cache_->operator[](j); + return blaze::dot(weighted_row(i), weighted_row(j)) * l2norm_cache_->operator[](i) * l2norm_cache_->operator[](j); } auto pdotproduct_distance(size_t i, size_t j) const { - return blz::dot(row(i), row(j)) * pl2norm_cache_->operator[](i) * pl2norm_cache_->operator[](j); + return blaze::dot(row(i), row(j)) * pl2norm_cache_->operator[](i) * pl2norm_cache_->operator[](j); } // Accessors decltype(auto) weighted_row(size_t ind) const { - return blz::row(data_, ind BLAZE_CHECK_DEBUG) * row_sums_[ind]; + return blaze::row(data_, ind BLAZE_CHECK_DEBUG) * row_sums_[ind]; } - auto row(size_t ind) const {return blz::row(data_, ind BLAZE_CHECK_DEBUG);} - auto logrow(size_t ind) const {return blz::row(*logdata_, ind BLAZE_CHECK_DEBUG);} - auto sqrtrow(size_t ind) const {return blz::row(*sqrdata_, ind BLAZE_CHECK_DEBUG);} - + auto row(size_t ind) const {return blaze::row(data_, ind BLAZE_CHECK_DEBUG);} + auto logrow(size_t ind) const {return blaze::row(*logdata_, ind BLAZE_CHECK_DEBUG);} + auto sqrtrow(size_t ind) const {return blaze::row(*sqrdata_, ind BLAZE_CHECK_DEBUG);} /* * Distances @@ -207,7 +230,152 @@ class ProbDivApplicator { INLINE auto operator()(size_t i, size_t j) const { return this->operator()(i, j, measure_); } - template + template && !std::is_integral_v>> + INLINE FT operator()(size_t i, OT &o, CacheT *cp=static_cast(nullptr)) const { + return this->call(i, o, cp); + } + template && !std::is_integral_v>> + INLINE FT operator()(OT &o, size_t i, CacheT *cp=static_cast(nullptr)) const { + return this->call(o, i, cp); + } + template && !std::is_integral_v>> + INLINE FT call(OT &o, size_t i, CacheT *cp=static_cast(nullptr)) const { + FT ret; + if constexpr(constexpr_measure == TOTAL_VARIATION_DISTANCE) { + ret = discrete_total_variation_distance(o, row(i)); + } else if constexpr(constexpr_measure == L1) { + ret = l1Norm(weighted_row(i) - o); + } else if constexpr(constexpr_measure == L2) { + ret = l2Norm(weighted_row(i) - o); + } else if constexpr(constexpr_measure == SQRL2) { + ret = blaze::sqrNorm(weighted_row(i) - o); + } else if constexpr(constexpr_measure == JSD) { + if(cp) { + ret = jsd(i, o, *cp); + } else ret = jsd(i, o); + } else if constexpr(constexpr_measure == JSM) { + if(cp) { + ret = jsm(i, o, *cp); + } else ret = jsm(i, o); + } else if constexpr(constexpr_measure == REVERSE_MKL) { + ret = cp ? mkl(i, o, *cp): mkl(i, o); + } else if constexpr(constexpr_measure == MKL) { + ret = cp ? mkl(o, i, *cp): mkl(o, i); + } else if constexpr(constexpr_measure == EMD) { + ret = p_wasserstein(row(i), o); + } else if constexpr(constexpr_measure == WEMD) { + ret = p_wasserstein(weighted_row(i), o); + } else if constexpr(constexpr_measure == REVERSE_POISSON) { + ret = cp ? pkl(i, o, *cp): pkl(i, o); + } else if constexpr(constexpr_measure == POISSON) { + ret = cp ? pkl(o, i, *cp): pkl(o, i); + } else if constexpr(constexpr_measure == HELLINGER) { + ret = cp ? blaze::sqrNorm(sqrtrow(i) - *cp) + : blaze::sqrNorm(sqrtrow(i) - blaze::sqrt(o)); + } else if constexpr(constexpr_measure == BHATTACHARYYA_METRIC) { + ret = bhattacharyya_metric(i, o); + } else if constexpr(constexpr_measure == BHATTACHARYYA_DISTANCE) { + ret = bhattacharyya_distance(i, o); + } else if constexpr(constexpr_measure == LLR) { + ret = cp ? llr(i, o, *cp): llr(i, o); + } else if constexpr(constexpr_measure == UWLLR) { + ret = cp ? uwllr(i, o, *cp): uwllr(i, o); + } else if constexpr(constexpr_measure == OLLR) { + throw 1; // Not implemented + } else if constexpr(constexpr_measure == ITAKURA_SAITO) { + ret = itakura_saito(o, i); + } else if constexpr(constexpr_measure == REVERSE_ITAKURA_SAITO) { + ret = itakura_saito(i, o); + } else if constexpr(constexpr_measure == COSINE_DISTANCE) { + ret = cosine_distance(i, o); + } else if constexpr(constexpr_measure == PROBABILITY_COSINE_DISTANCE) { + ret = pcosine_distance(i, o); + } else if constexpr(constexpr_measure == COSINE_SIMILARITY) { + ret = cosine_similarity(i, o); + } else if constexpr(constexpr_measure == PROBABILITY_COSINE_SIMILARITY) { + ret = pcosine_similarity(i, o); + } else { + throw std::runtime_error(std::string("Unknown measure: ") + std::to_string(int(constexpr_measure))); + } + return ret; + } + template && !std::is_integral_v>> + INLINE FT call(size_t i, OT &o, [[maybe_unused]] CacheT *cp=static_cast(nullptr)) const { + FT ret; + if constexpr(constexpr_measure == TOTAL_VARIATION_DISTANCE) { + ret = discrete_total_variation_distance(row(i), o); + } else if constexpr(constexpr_measure == L1) { + ret = l1Norm(weighted_row(i) - o); + } else if constexpr(constexpr_measure == L2) { + ret = l2Norm(weighted_row(i) - o); + } else if constexpr(constexpr_measure == SQRL2) { + assert(i < this->data().rows()); + ret = blaze::sqrNorm(weighted_row(i) - o); + } else if constexpr(constexpr_measure == JSD) { + if(cp) { + ret = jsd(i, o, *cp); + } else ret = jsd(i, o); + } else if constexpr(constexpr_measure == JSM) { + if(cp) { + ret = jsm(i, o, *cp); + } else ret = jsm(i, o); + } else if constexpr(constexpr_measure == REVERSE_MKL) { + if(cp) { + ret = mkl(o, i, *cp); + } else ret = mkl(o, i); + } else if constexpr(constexpr_measure == MKL) { + if(cp) { + ret = mkl(i, o, *cp); + } else ret = mkl(i, o); + } else if constexpr(constexpr_measure == EMD) { + ret = p_wasserstein(row(i), o); + } else if constexpr(constexpr_measure == WEMD) { + ret = p_wasserstein(weighted_row(i), o); + } else if constexpr(constexpr_measure == REVERSE_POISSON) { + ret = cp ? pkl(o, i, *cp): pkl(o, i); + } else if constexpr(constexpr_measure == POISSON) { + ret = cp ? pkl(i, o, *cp): pkl(i, o); + } else if constexpr(constexpr_measure == HELLINGER) { + if(cp) { + ret = blaze::sqrNorm(sqrtrow(i) - *cp); + } else { + ret = blaze::sqrNorm(sqrtrow(i) - blaze::sqrt(o)); + } + } else if constexpr(constexpr_measure == BHATTACHARYYA_METRIC) { + ret = cp ? bhattacharyya_metric(i, o, *cp) + : bhattacharyya_metric(i, o); + } else if constexpr(constexpr_measure == BHATTACHARYYA_DISTANCE) { + ret = cp ? bhattacharyya_distance(i, o, *cp) + : bhattacharyya_distance(i, o); + } else if constexpr(constexpr_measure == LLR) { + ret = cp ? llr(i, o, *cp): llr(i, o); + } else if constexpr(constexpr_measure == UWLLR) { + ret = cp ? uwllr(i, o, *cp): uwllr(i, o); + } else if constexpr(constexpr_measure == OLLR) { + ret = cp ? llr(i, o, *cp): llr(i, o); + std::cerr << "Note: computing LLR, not OLLR, for this case\n"; + } else if constexpr(constexpr_measure == ITAKURA_SAITO) { + ret = itakura_saito(i, o); + } else if constexpr(constexpr_measure == REVERSE_ITAKURA_SAITO) { + ret = itakura_saito(o, i); + } else if constexpr(constexpr_measure == COSINE_DISTANCE) { + ret = cosine_distance(i, o); + } else if constexpr(constexpr_measure == PROBABILITY_COSINE_DISTANCE) { + ret = pcosine_distance(i, o); + } else if constexpr(constexpr_measure == COSINE_SIMILARITY) { + ret = cosine_similarity(i, o); + } else if constexpr(constexpr_measure == PROBABILITY_COSINE_SIMILARITY) { + ret = pcosine_similarity(i, o); + } else { + throw std::runtime_error(std::string("Unknown measure: ") + std::to_string(int(constexpr_measure))); + } + return ret; + } + template INLINE FT call(size_t i, size_t j) const { FT ret; if constexpr(constexpr_measure == TOTAL_VARIATION_DISTANCE) { @@ -263,7 +431,104 @@ class ProbDivApplicator { } return ret; } - INLINE FT operator()(size_t i, size_t j, ProbDivType measure) const { + template > > + INLINE FT operator()(const OT &o, size_t i, const CacheT *cache=static_cast(nullptr)) const noexcept { + return this->operator()(o, i, cache, measure_); + } + template > > + INLINE FT operator()(const OT &o, size_t i, const CacheT *cache, DissimilarityMeasure measure) const noexcept { +#ifndef NDEBUG + if(unlikely(i >= data_.rows())) { + std::cerr << (std::string("Invalid rows selection: ") + std::to_string(i) + '\n'); + std::exit(1); + } +#endif + if(unlikely(measure == static_cast(-1))) { + std::cerr << "Unset measure\n"; + std::exit(1); + } + //PRETTY_SAY << "Performing with " << (void *)&o << " and row " << i << '\n'; + FT ret; + switch(measure) { + case TOTAL_VARIATION_DISTANCE: ret = call(o, i); break; + case L1: ret = call(o, i); break; + case L2: ret = call(o, i); break; + case SQRL2: ret = call(o, i); break; + case JSD: ret = call(o, i); break; + case JSM: ret = call(o, i); break; + case REVERSE_MKL: ret = call(o, i, cache); break; + case MKL: ret = call(o, i, cache); break; + case EMD: ret = call(o, i); break; + case WEMD: ret = call(o, i); break; + case REVERSE_POISSON: ret = call(o, i, cache); break; + case POISSON: ret = call(o, i, cache); break; + case HELLINGER: ret = call(o, i, cache); break; + case BHATTACHARYYA_METRIC: ret = call(o, i); break; + case BHATTACHARYYA_DISTANCE: ret = call(o, i); break; + case LLR: ret = call(o, i, cache); break; + case UWLLR: ret = call(o, i, cache); break; + case OLLR: ret = call(o, i, cache); break; + case ITAKURA_SAITO: ret = call(o, i, cache); break; + case COSINE_DISTANCE: ret = call(o, i); break; + case PROBABILITY_COSINE_DISTANCE: ret = call(o, i); break; + case COSINE_SIMILARITY: ret = call(o, i); break; + case PROBABILITY_COSINE_SIMILARITY: ret = call(o, i); break; + case ORACLE_METRIC: case ORACLE_PSEUDOMETRIC: std::fprintf(stderr, "These are placeholders and should not be called."); return 0.; + default: __builtin_unreachable(); + } + } + template > > + INLINE FT operator()(size_t i, const OT &o, const CacheT *cache=static_cast(nullptr)) const { + return this->operator()(i, o, cache, measure_); + } + template > > + INLINE FT operator()(size_t i, const OT &o, const CacheT *cache, DissimilarityMeasure measure) const noexcept { + if(unlikely(i >= data_.rows())) { + std::cerr << (std::string("Invalid rows selection: ") + std::to_string(i) + '\n'); + std::exit(1); + } + if(unlikely(measure == static_cast(-1))) { + std::cerr << "Unset measure\n"; + std::exit(1); + } +#if 0 + PRETTY_SAY << "Computing i vs outside o with cache and " << detail::prob2str(measure) << "\n"; + PRETTY_SAY << "Performing with " + << " row " << i << " and " + << (void *)&o + << '\n'; +#endif + FT ret; + switch(measure) { + case TOTAL_VARIATION_DISTANCE: ret = call(i, o); break; + case L1: ret = call(i, o); break; + case L2: ret = call(i, o); break; + case SQRL2: ret = call(i, o); break; + case JSD: ret = call(i, o); break; + case JSM: ret = call(i, o); break; + case REVERSE_MKL: ret = call(i, o, cache); break; + case MKL: ret = call(i, o, cache); break; + case EMD: ret = call(i, o); break; + case WEMD: ret = call(i, o); break; + case REVERSE_POISSON: ret = call(i, o, cache); break; + case POISSON: ret = call(i, o, cache); break; + case HELLINGER: ret = call(i, o, cache); break; + case BHATTACHARYYA_METRIC: ret = call(i, o); break; + case BHATTACHARYYA_DISTANCE: ret = call(i, o); break; + case LLR: ret = call(i, o, cache); break; + case UWLLR: ret = call(i, o, cache); break; + case OLLR: ret = call(i, o, cache); break; + case ITAKURA_SAITO: ret = call(i, o, cache); break; + case COSINE_DISTANCE: ret = call(i, o); break; + case PROBABILITY_COSINE_DISTANCE: ret = call(i, o); break; + case COSINE_SIMILARITY: ret = call(i, o); break; + case PROBABILITY_COSINE_SIMILARITY: ret = call(i, o); break; + case ORACLE_METRIC: case ORACLE_PSEUDOMETRIC: std::fprintf(stderr, "These are placeholders and should not be called."); return 0.; + default: __builtin_unreachable(); + } + return ret; + } + INLINE FT operator()(size_t i, size_t j, DissimilarityMeasure measure) const noexcept { if(unlikely(i >= data_.rows() || j >= data_.rows())) { std::cerr << (std::string("Invalid rows selection: ") + std::to_string(i) + ", " + std::to_string(j) + '\n'); std::exit(1); @@ -293,12 +558,13 @@ class ProbDivApplicator { case PROBABILITY_COSINE_DISTANCE: ret = call(i, j); break; case COSINE_SIMILARITY: ret = call(i, j); break; case PROBABILITY_COSINE_SIMILARITY: ret = call(i, j); break; + case ORACLE_METRIC: case ORACLE_PSEUDOMETRIC: std::fprintf(stderr, "These are placeholders and should not be called."); return 0.; default: __builtin_unreachable(); } return ret; } template - void operator()(MatType &mat, ProbDivType measure, bool symmetrize=false) { + void operator()(MatType &mat, DissimilarityMeasure measure, bool symmetrize=false) { set_distance_matrix(mat, measure, symmetrize); } template @@ -319,17 +585,51 @@ class ProbDivApplicator { throw std::runtime_error(buf); } ret = -std::numeric_limits::max(); - throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); } else { auto div = row(i) / row(j); ret = blaze::sum(div - blaze::log(div)) - row(i).size(); } return ret; } + template> > + auto itakura_saito(size_t i, const OT &o) const { + FT ret; + if constexpr(IS_SPARSE) { + if(!prior_data_) { + char buf[128]; + std::sprintf(buf, "warning: Itakura-Saito cannot be computed to sparse vectors/matrices at %zu/%p\n", i, (void *)&o); + throw std::runtime_error(buf); + } + ret = -std::numeric_limits::max(); + throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + } else { + auto div = row(i) / o; + ret = blaze::sum(div - blaze::log(div)) - row(i).size(); + } + return ret; + } + template> > + auto itakura_saito(const OT &o, size_t i) const { + FT ret; + if constexpr(IS_SPARSE) { + if(!prior_data_) { + char buf[128]; + std::sprintf(buf, "warning: Itakura-Saito cannot be computed to sparse vectors/matrices at %zu/%p\n", i, (void *)&o); + throw std::runtime_error(buf); + } + ret = -std::numeric_limits::max(); + throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + } else { + auto div = o / row(i); + ret = blaze::sum(div - blaze::log(div)) - o.size(); + } + return ret; + } auto hellinger(size_t i, size_t j) const { return sqrdata_ ? blaze::sqrNorm(sqrtrow(i) - sqrtrow(j)) - : blaze::sqrNorm(blz::sqrt(row(i)) - blz::sqrt(row(j))); + : blaze::sqrNorm(blaze::sqrt(row(i)) - blaze::sqrt(row(j))); } FT jsd(size_t i, size_t j) const { if(!IsSparseMatrix_v || !prior_data_) { @@ -338,98 +638,250 @@ class ProbDivApplicator { FT ret; auto ri = row(i), rj = row(j); //constexpr FT logp5 = -0.693147180559945; // std::log(0.5) - auto s = ri + rj; - ret = jsd_cache_->operator[](i) + jsd_cache_->operator[](j) - blz::dot(s, blaze::neginf2zero(blaze::log(s * 0.5))); -#ifndef NDEBUG - static constexpr typename MatrixType::ElementType threshold - = std::is_same_v - ? 0.: -1e-5; - assert(ret >= threshold || !std::fprintf(stderr, "ret: %g (numerical stability issues)\n", ret)); -#endif - return std::max(ret, static_cast(0.)); - } else { - throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - return FT(0); + auto s = evaluate(ri + rj); + ret = get_jsdcache(i) + get_jsdcache(j) - blaze::dot(s, blaze::neginf2zero(blaze::log(s * 0.5))); + return std::max(.5 * ret, static_cast(0.)); + } else if constexpr(IS_SPARSE) { + FT ret = get_jsdcache(i) + get_jsdcache(j); + const size_t dim = row(i).size(); + auto lhr = row(i), rhr = row(j); + auto lhit = lhr.begin(), rhit = rhr.begin(); + const auto lhe = lhr.end(), rhe = rhr.end(); + auto lhrsi = 1. / row_sums_[i]; + auto rhrsi = 1. / row_sums_[j]; + if(prior_data_->size() == 1) { + const auto lhrsimul = lhrsi * prior_data_->operator[](0); + const auto rhrsimul = rhrsi * prior_data_->operator[](0); + if(lhit == lhe || rhit == rhe) return static_cast(0); + auto dox = [&](auto x) {ret -= x * std::log(.5 * x);}; + while(lhit != lhe && rhit != rhe) { + if(lhit->index() == rhit->index()) { + dox(lhit->value() + rhit->value()); + ++lhit; ++rhit; + } else if(lhit->index() < rhit->index()) { + dox(lhit->value() + rhrsimul); + ++lhit; + } else { + dox(rhit->value() + lhrsimul); + ++rhit; + } + } + //std::fprintf(stderr, "Finished loop. lhit is end? %d rhit is ind? %d\n", lhit == lhe, rhit == rhe); + for(;lhit != lhe;++lhit) + dox(lhit->value() + rhrsimul); + for(;rhit != rhe;++rhit) + dox(rhit->value() + lhrsimul); + //std::fprintf(stderr, "Handled all lhit\n"); + const FT sump = (lhrsimul + rhrsimul); + ret -= blz::number_shared_zeros(lhr, rhr) * (sump * std::log(.5 * (sump))); + } else { + std::fprintf(stderr, "Fanciest\n"); + // This could later be accelerated, but that kind of caching is more complicated. + auto &pd = *prior_data_; + auto dox = [&](auto x, auto y) {ret -= (x + y) * std::log(.5 * (x + y));}; + auto doxy = [&](auto x) {ret -= x * std::log(.5 * x);}; + size_t first_index = lhit != lhe ? (rhit != rhe ? std::min(lhit->index(), rhit->index()): lhit->index()): rhit != rhe ? rhit->index(): dim; + for(size_t i = 0; i < first_index; ++i) + doxy(pd[i] * (lhrsi + rhrsi)); + while(lhit != lhe && rhit != rhe) { + if(lhit->index() == rhit->index()) { + dox(lhit->value(), rhit->value()); + if(++lhit == lhe) break; + if(++rhit == rhe) break; + } else if(lhit->index() < rhit->index()) { + dox(lhit->value(), pd[lhit->index()] * rhrsi); + for(size_t i = lhit->index() + 1; i < rhit->index(); ++i) + dox(pd[i] * lhrsi, pd[i] * rhrsi); + if(++lhit == lhe) break; + } else { + dox(rhit->value(), pd[rhit->index()] * lhrsi); + for(size_t i = rhit->index() + 1; i < lhit->index(); ++i) + doxy(pd[i] * (lhrsi + rhrsi)); + if(++rhit == rhe) break; + } + } + // Remaining entries + while(lhit != lhe) { + dox(lhit->value(), pd[lhit->index()] * rhrsi); + size_t i = lhit->index() + 1; + size_t nextind = (++lhit == lhe) ? dim: lhit->index(); + for(; i < nextind; ++i) + doxy(pd[i] * (lhrsi + rhrsi)); + } + while(rhit != rhe) { + dox(rhit->value(), lhrsi * pd[rhit->index()]); + size_t i = rhit->index() + 1; + size_t nextind = (++rhit == rhe) ? dim: rhit->index(); + for(; i < nextind; ++i) + doxy(pd[i] * (lhrsi + rhrsi)); + } + } + return std::max(ret * .5, static_cast(0.)); } + __builtin_unreachable(); } template>, typename OT2> auto jsd(size_t i, const OT &o, const OT2 &olog) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + if(IS_SPARSE && blaze::IsSparseVector_v && prior_data_) throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); auto mnlog = evaluate(log(0.5 * (row(i) + o))); - return (blz::dot(row(i), logrow(i) - mnlog) + blz::dot(o, olog - mnlog)); + return (blaze::dot(row(i), logrow(i) - mnlog) + blaze::dot(o, olog - mnlog)); } template>> auto jsd(size_t i, const OT &o) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - auto olog = evaluate(blaze::neginf2zero(blz::log(o))); + if(IS_SPARSE && blaze::IsSparseVector_v && prior_data_) throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + auto olog = evaluate(blaze::neginf2zero(blaze::log(o))); return jsd(i, o, olog); } auto mkl(size_t i, size_t j) const { - // Multinomial KL - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - return get_jsdcache(i) - blz::dot(row(i), logrow(j)); + if constexpr(IS_SPARSE) { + if(prior_data_) { + const auto &pd(*prior_data_); + const bool single_value = pd.size() == 1; + auto lhr = row(i); + const size_t dim = lhr.size(); + auto rhr = row(j); + auto lhit = lhr.begin(), rhit = rhr.begin(); + const auto lhe = lhr.end(), rhe = rhr.end(); + const auto lhrsi = 1. / row_sums_[i]; + const auto rhrsi = 1. / row_sums_[j]; + FT ret = 0.; + if(single_value) { + size_t i = 0; + const FT inc = pd[0]; + const FT lhinc = inc * lhrsi; + const FT rhinc = inc * rhrsi; + const FT rhincl = std::log(rhinc); + const FT empty_contrib = -lhinc * rhincl; + size_t nz = 0; + for(;;) { + if(lhit != lhe && rhit != rhe) { + size_t cind = std::min(lhit->index(), rhit->index()); + nz += cind - i; + i = cind + 1; + const size_t lhi = lhit->index(); + const size_t rhi = rhit->index(); + if(lhi == rhi) { + ret -= lhit->value() * std::log(rhit->value()); + ++lhit; + ++rhit; + } else if(lhi < rhi) { + ret -= lhit->value() * rhincl; + ++lhit; + } else { + ret -= lhinc * std::log(rhit->value()); + ++rhit; + } + } else if(lhit == lhe) { + if(rhit == rhe) { + nz += dim - i; + i = dim; + break; + } else { + for(;rhit != rhe;++rhit) { + nz += rhit->index() - i; + ret -= lhinc * std::log(rhit->value()); + i = rhit->index() + 1; + } + } + } else if(rhit == rhe) { + for(;lhit != lhe;++lhit) { + nz += lhit->index() - i; + ret -= lhit->value() * rhincl; + i = lhit->index() + 1; + } + } + } + ret += empty_contrib * nz; + } else { // if(single_value) / else + for(;;) { + if(lhit != lhe && rhit != rhe) { + size_t cind = std::min(lhit->index(), rhit->index()); + for(;i < cind;++i) + ret -= lhrsi * pd[i] * std::log(rhrsi * pd[i]); + const size_t lhi = lhit->index(); + const size_t rhi = rhit->index(); + if(lhi == rhi) { + ret -= lhit->value() * std::log(rhit->value()); + ++lhit; + ++rhit; + } else if(lhi < rhi) { + ret -= lhit->value() * std::log(rhrsi * pd[i]); + ++lhit; + } else { + // lh contrib is prior over row sum + ret -= pd[i] * lhrsi * std::log(rhit->value()); + ++rhit; + } + ++i; + } else if(lhit == lhe) { + if(rhit == rhe) { + for(;i < dim;++i) + ret -= lhrsi * pd[i] * std::log(rhrsi * pd[i]); + break; + } else { + for(;rhit != rhe;++rhit) { + for(;i < rhit->index(); ++i) + ret -= lhrsi * pd[i] * std::log(rhrsi * pd[i]); + ret -= lhrsi * pd[i] * std::log(rhit->value()); + ++i; + } + } + } else if(rhit == rhe) { + for(;lhit != lhe;++lhit) { + for(;i < lhit->index(); ++i) + ret -= lhrsi * pd[i] * std::log(rhrsi * pd[i]); + ret -= lhit->value() * std::log(rhrsi * pd[i]); + ++i; + } + } + } + } + return ret + get_jsdcache(i); + } + } + return FT(get_jsdcache(i) - blz::dot(row(i), logrow(j))); } template>> auto mkl(size_t i, const OT &o) const { - // Multinomial KL - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - return get_jsdcache(i) - blz::dot(row(i), blaze::neginf2zero(blz::log(o))); - } - template>, typename OT2> - auto mkl(size_t i, const OT &, const OT2 &olog) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - // Multinomial KL - return blz::dot(row(i), logrow(i) - olog); - } - auto pkl(size_t i, size_t j) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - // Poission KL - return get_jsdcache(i) - blz::dot(row(i), logrow(j)) + blz::sum(row(j) - row(i)); + if(IS_SPARSE && blaze::IsSparseVector_v && prior_data_) throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + return get_jsdcache(i) - blaze::dot(row(i), blaze::neginf2zero(blaze::log(o))); } template>, typename OT2> - auto pkl(size_t i, const OT &o, const OT2 &olog) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - // Poission KL - return get_jsdcache(i) - blz::dot(row(i), olog) + blz::sum(row(i) - o); + auto mkl(const OT &o, size_t i, const OT2 &olog) const { + if(IS_SPARSE && blaze::IsSparseVector_v && prior_data_) throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + return blaze::dot(o, olog - logrow(i)); } template>> - auto pkl(size_t i, const OT &o) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - return pkl(i, o, neginf2zero(blz::log(o))); - } - auto psd(size_t i, size_t j) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - // Poission JSD - auto mnlog = evaluate(log(.5 * (row(i) + row(j)))); - return (blz::dot(row(i), logrow(i) - mnlog) + blz::dot(row(j), logrow(j) - mnlog)); + auto mkl(const OT &o, size_t i) const { + if(IS_SPARSE && prior_data_) throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + return blaze::dot(o, blaze::neginf2zero(blaze::log(o)) - logrow(i)); } template>, typename OT2> - auto psd(size_t i, const OT &o, const OT2 &olog) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - // Poission JSD - auto mnlog = evaluate(log(.5 * (row(i) + o))); - return (blz::dot(row(i), logrow(i) - mnlog) + blz::dot(o, olog - mnlog)); - } - template>> - auto psd(size_t i, const OT &o) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - return psd(i, o, neginf2zero(blz::log(o))); + auto mkl(size_t i, const OT &, const OT2 &olog) const { + if(IS_SPARSE && prior_data_) throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + return blaze::dot(row(i), logrow(i) - olog); } + template + auto pkl(Args &&...args) const { return mkl(std::forward(args)...);} + template + auto psd(Args &&...args) const { return jsd(std::forward(args)...);} + template + auto psm(Args &&...args) const { return jsm(std::forward(args)...);} auto bhattacharyya_sim(size_t i, size_t j) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - return sqrdata_ ? blz::dot(sqrtrow(i), sqrtrow(j)) - : blz::sum(blz::sqrt(row(i) * row(j))); + if(IS_SPARSE && prior_data_) throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + return sqrdata_ ? blaze::dot(sqrtrow(i), sqrtrow(j)) + : blaze::sum(blaze::sqrt(row(i) * row(j))); } template>, typename OT2> auto bhattacharyya_sim(size_t i, const OT &o, const OT2 &osqrt) const { if(IS_SPARSE && prior_data_) throw std::runtime_error("Failed to calculate. TODO: complete special fast version of this supporting priors at no runtime cost."); - return sqrdata_ ? blz::dot(sqrtrow(i), osqrt) - : blz::sum(blz::sqrt(row(i) * o)); + return sqrdata_ ? blaze::dot(sqrtrow(i), osqrt) + : blaze::sum(blaze::sqrt(row(i) * o)); } template>> auto bhattacharyya_sim(size_t i, const OT &o) const { if(IS_SPARSE && prior_data_) throw std::runtime_error("Failed to calculate. TODO: complete special fast version of this supporting priors at no runtime cost."); - return bhattacharyya_sim(i, o, blz::sqrt(o)); + return bhattacharyya_sim(i, o, blaze::sqrt(o)); } template auto bhattacharyya_distance(Args &&...args) const { @@ -438,13 +890,11 @@ class ProbDivApplicator { } template auto bhattacharyya_metric(Args &&...args) const { - throw std::runtime_error("Failed to calculate. TODO: complete special fast version of this supporting priors at no runtime cost."); + if(IS_SPARSE && prior_data_) throw std::runtime_error("Failed to calculate. TODO: complete special fast version of this supporting priors at no runtime cost."); return std::sqrt(1 - bhattacharyya_sim(std::forward(args)...)); } - template - auto psm(Args &&...args) const {return std::sqrt(std::forward(args)...);} auto llr(size_t i, size_t j) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + if(IS_SPARSE && prior_data_) throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); //blaze::dot(row(i), logrow(i)) * row_sums_[i] //+ //blaze::dot(row(j), logrow(j)) * row_sums_[j] @@ -455,39 +905,51 @@ class ProbDivApplicator { const auto lambda = lhn / (lhn + rhn), m1l = 1. - lambda; auto ret = lhn * get_jsdcache(i) + rhn * get_jsdcache(j) - - blz::dot(weighted_row(i) + weighted_row(j), - neginf2zero(blz::log(lambda * row(i) + m1l * row(j))) + blaze::dot(weighted_row(i) + weighted_row(j), + neginf2zero(blaze::log(lambda * row(i) + m1l * row(j))) ); assert(ret >= -1e-2 * (row_sums_[i] + row_sums_[j]) || !std::fprintf(stderr, "ret: %g\n", ret)); return std::max(ret, 0.); } auto ollr(size_t i, size_t j) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + if(IS_SPARSE && prior_data_) throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); auto ret = get_jsdcache(i) * row_sums_[i] + get_jsdcache(j) * row_sums_[j] - - blz::dot(weighted_row(i) + weighted_row(j), neginf2zero(blz::log((row(i) + row(j)) * .5))); + - blaze::dot(weighted_row(i) + weighted_row(j), neginf2zero(blaze::log((row(i) + row(j)) * .5))); return std::max(ret, 0.); } auto uwllr(size_t i, size_t j) const { - if(IS_SPARSE && prior_data_) throw shared::TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); - const auto lhn = row_sums_[i], rhn = row_sums_[j]; - const auto lambda = lhn / (lhn + rhn), m1l = 1. - lambda; - return - std::max( - lambda * get_jsdcache(i) + - m1l * get_jsdcache(j) - - blz::dot(lambda * row(i) + m1l * row(j), - neginf2zero(blz::log( - lambda * row(i) + m1l * row(j)))), - 0.); + if(IS_SPARSE && prior_data_) throw TODOError("TODO: complete special fast version of this supporting priors at no runtime cost."); + else { + const auto lhn = row_sums_[i], rhn = row_sums_[j]; + const auto lambda = lhn / (lhn + rhn), m1l = 1. - lambda; + return + std::max( + lambda * get_jsdcache(i) + + m1l * get_jsdcache(j) - + blaze::dot(lambda * row(i) + m1l * row(j), + neginf2zero(blaze::log( + lambda * row(i) + m1l * row(j)))), + 0.); + } } template>> auto llr(size_t, const OT &) const { - throw shared::TODOError("llr is not implemented for this."); + throw TODOError("llr is not implemented for this."); return 0.; } template>, typename OT2> auto llr(size_t, const OT &, const OT2 &) const { - throw shared::TODOError("llr is not implemented for this."); + throw TODOError("llr is not implemented for this."); + return 0.; + } + template>> + auto uwllr(size_t, const OT &) const { + throw TODOError("llr is not implemented for this."); + return 0.; + } + template>, typename OT2> + auto uwllr(size_t, const OT &, const OT2 &) const { + throw TODOError("llr is not implemented for this."); return 0.; } template @@ -499,9 +961,11 @@ class ProbDivApplicator { throw std::invalid_argument(std::string("Param for lambda ") + std::to_string(param) + " is out of range."); lambda_ = param; } + auto get_measure() const {return measure_;} private: template> void prep(Prior prior, const Container *c=nullptr) { + std::fprintf(stderr, "beginning prep.\n"); switch(prior) { case NONE: break; @@ -509,15 +973,13 @@ class ProbDivApplicator { if constexpr(!IsSparseMatrix_v) { data_ += static_cast(1); } else { - prior_data_.reset(new VecT(data_.columns())); - (*prior_data_)[0] = static_cast(1); + prior_data_.reset(new VecT({FT(1)})); } break; case GAMMA_BETA: if(c == nullptr) throw std::invalid_argument("Can't do gamma_beta with null pointer"); if constexpr(IsSparseMatrix_v) { - prior_data_.reset(new VecT(data_.columns())); - (*prior_data_)[0] = (*c)[0]; + prior_data_.reset(new VecT({(*c)[0]})); } else if constexpr(IsDenseMatrix_v) { data_ += (*c)[0]; } @@ -525,7 +987,7 @@ class ProbDivApplicator { case FEATURE_SPECIFIC_PRIOR: if(c == nullptr) throw std::invalid_argument("Can't do feature-specific with null pointer"); if constexpr(IsDenseMatrix_v) { - data_ += blz::expand(*c, data_.rows()); + data_ += blaze::expand(*c, data_.rows()); } else if constexpr(IsSparseMatrix_v) { assert(c->size() == data_.columns()); prior_data_.reset(new VecT(data_.columns())); @@ -535,46 +997,96 @@ class ProbDivApplicator { } row_sums_.resize(data_.rows()); { - auto rowsumit = row_sums_.data(); - for(auto r: blz::rowiterator(data_)) { - if constexpr(blz::IsDenseMatrix_v) { + for(size_t i = 0; i < data_.rows(); ++i) { + auto r(row(i)); + FT countsum = blaze::sum(r); + if constexpr(blaze::IsDenseMatrix_v) { if(prior == NONE) { r += 1e-50; - assert(blz::min(r) > 0.); +#ifndef NDEBUG + if(dist::detail::expects_nonnegative(measure_) && blaze::min(r) < 0.) + throw std::invalid_argument(std::string("Measure ") + dist::detail::prob2str(measure_) + " expects nonnegative data"); +#endif + } + } else if constexpr(blaze::IsSparseMatrix_v) { + if(prior_data_) { + bool single_value = prior_data_->size() == 1; + if(prior == DIRICHLET) { + countsum += r.size(); + } else { + MINOCORE_VALIDATE(prior_data_ != nullptr); + countsum += single_value ? r.size() * *prior_data_->begin() + : blaze::sum(*prior_data_); + } + for(auto &item: r) + item.value() += + (*prior_data_)[single_value ? size_t(0): item.index()]; } } - const auto countsum = blz::sum(r); r /= countsum; - *rowsumit++ = countsum; + row_sums_[i] = countsum; } } - if(blz::detail::needs_logs(measure_)) { + if(dist::detail::needs_logs(measure_)) { logdata_.reset(new MatrixType(neginf2zero(log(data_)))); } - if(blz::detail::needs_sqrt(measure_)) { - sqrdata_.reset(new MatrixType(blz::sqrt(data_))); + if(dist::detail::needs_sqrt(measure_)) { + sqrdata_.reset(new MatrixType(blaze::sqrt(data_))); } - if(blz::detail::needs_l2_cache(measure_)) { + if(dist::detail::needs_l2_cache(measure_)) { l2norm_cache_.reset(new VecT(data_.rows())); OMP_PFOR for(size_t i = 0; i < data_.rows(); ++i) { - l2norm_cache_->operator[](i) = 1. / blz::l2Norm(weighted_row(i)); + l2norm_cache_->operator[](i) = 1. / blaze::l2Norm(weighted_row(i)); } } - if(blz::detail::needs_probability_l2_cache(measure_)) { + if(dist::detail::needs_probability_l2_cache(measure_)) { pl2norm_cache_.reset(new VecT(data_.rows())); OMP_PFOR for(size_t i = 0; i < data_.rows(); ++i) { - pl2norm_cache_->operator[](i) = 1. / blz::l2Norm(row(i)); + pl2norm_cache_->operator[](i) = 1. / blaze::l2Norm(row(i)); } } if(logdata_) { jsd_cache_.reset(new VecT(data_.rows())); auto &jc = *jsd_cache_; - for(size_t i = 0; i < jc.size(); ++i) { - jc[i] = dot(row(i), logrow(i)); + if constexpr(IS_SPARSE) { + if(prior_data_) { + // Handle sparse priors + MINOCORE_VALIDATE(prior_data_->size() == 1 || prior_data_->size() == data_.columns()); + auto &pd = *prior_data_; + const bool single_value = pd.size() == 1; + for(size_t i = 0; i < data_.rows(); ++i) { + const auto rs = row_sums_[i]; + auto r = row(i); + double contrib = 0.; + auto upcontrib = [&](auto x) {contrib += x * std::log(x);}; + if(single_value) { + FT invp = pd[0] / rs; + size_t number_zero = r.size() - nonZeros(r); + contrib += number_zero * (invp * std::log(invp)); // Empty + for(auto &pair: r) upcontrib(pair.value()); // Non-empty + } else { + size_t i = 0; + auto it = r.begin(); + auto contribute_range = [&](size_t end) { + while(i < end) upcontrib(pd[i++] / rs); + }; + while(it != r.end() && i < r.size()) { + contribute_range(it->index()); + upcontrib(it->value()); + if(++it == r.end()) + contribute_range(r.size()); + } + } + jc[i] = contrib; + } + } } + if(!(IS_SPARSE && prior_data_)) + for(size_t i = 0; i < jc.size(); ++i) + jc[i] = dot(row(i), logrow(i)); } } FT get_jsdcache(size_t index) const { @@ -586,13 +1098,20 @@ class ProbDivApplicator { return get_jsdcache(index) * row_sums_->operator[](index); return (*jsd_cache_)[index] * row_sums_->operator[](index); } -}; // ProbDivApplicator +}; // DissimilarityApplicator + +template +struct is_dissimilarity_applicator: std::false_type {}; +template +struct is_dissimilarity_applicator>: std::true_type {}; +template +static constexpr bool is_dissimilarity_applicator_v = is_dissimilarity_applicator::value; template -struct PairProbDivApplicator { - ProbDivApplicator &pda_; - ProbDivApplicator &pdb_; - PairProbDivApplicator(ProbDivApplicator &lhs, ProbDivApplicator &rhs): pda_(lhs), pdb_(rhs) { +struct PairDissimilarityApplicator { + DissimilarityApplicator &pda_; + DissimilarityApplicator &pdb_; + PairDissimilarityApplicator(DissimilarityApplicator &lhs, DissimilarityApplicator &rhs): pda_(lhs), pdb_(rhs) { if(lhs.measure_ != rhs.measure_) throw std::runtime_error("measures must be the same (for preprocessing reasons)."); } decltype(auto) operator()(size_t i, size_t j) const { @@ -601,22 +1120,22 @@ struct PairProbDivApplicator { }; template -class MultinomialJSDApplicator: public ProbDivApplicator { - using super = ProbDivApplicator; +class MultinomialJSDApplicator: public DissimilarityApplicator { + using super = DissimilarityApplicator; template> MultinomialJSDApplicator(MatrixType &ref, Prior prior=NONE, const PriorContainer *c=nullptr): - ProbDivApplicator(ref, JSD, prior, c) {} + DissimilarityApplicator(ref, JSD, prior, c) {} }; template -class MultinomialLLRApplicator: public ProbDivApplicator { - using super = ProbDivApplicator; +class MultinomialLLRApplicator: public DissimilarityApplicator { + using super = DissimilarityApplicator; template> MultinomialLLRApplicator(MatrixType &ref, Prior prior=NONE, const PriorContainer *c=nullptr): - ProbDivApplicator(ref, LLR, prior, c) {} + DissimilarityApplicator(ref, LLR, prior, c) {} }; template @@ -625,13 +1144,13 @@ struct BaseOperand { }; template> -auto make_probdiv_applicator(MatrixType &data, ProbDivType type=JSM, Prior prior=NONE, const PriorContainer *pc=nullptr) { +auto make_probdiv_applicator(MatrixType &data, DissimilarityMeasure type=JSM, Prior prior=NONE, const PriorContainer *pc=nullptr) { #if VERBOSE_AF std::fprintf(stderr, "[%s:%s:%d] Making probdiv applicator with %d/%s as measure, %d/%s as prior, and %s for prior container.\n", - __PRETTY_FUNCTION__, __FILE__, __LINE__, int(type), blz::detail::prob2str(type), int(prior), prior == NONE ? "No prior": prior == DIRICHLET ? "Dirichlet" : prior == GAMMA_BETA ? "Gamma/Beta": "Feature-specific prior", + __PRETTY_FUNCTION__, __FILE__, __LINE__, int(type), dist::detail::prob2str(type), int(prior), prior == NONE ? "No prior": prior == DIRICHLET ? "Dirichlet" : prior == GAMMA_BETA ? "Gamma/Beta": "Feature-specific prior", pc == nullptr ? "No prior container": (std::string("Container of size ") + std::to_string(pc->size())).data()); #endif - return ProbDivApplicator(data, type, prior, pc); + return DissimilarityApplicator(data, type, prior, pc); } template> auto make_jsm_applicator(MatrixType &data, Prior prior=NONE, const PriorContainer *pc=nullptr) { @@ -640,19 +1159,19 @@ auto make_jsm_applicator(MatrixType &data, Prior prior=NONE, const PriorContaine template -auto make_kmc2(const ProbDivApplicator &app, unsigned k, size_t m=2000, uint64_t seed=13) { +auto make_kmc2(const DissimilarityApplicator &app, unsigned k, size_t m=2000, uint64_t seed=13) { wy::WyRand gen(seed); return coresets::kmc2(app, gen, app.size(), k, m); } -template -auto make_kmeanspp(const ProbDivApplicator &app, unsigned k, uint64_t seed=13) { +template> +auto make_kmeanspp(const DissimilarityApplicator &app, unsigned k, uint64_t seed=13, const WFT *weights=nullptr) { wy::WyRand gen(seed); - return coresets::kmeanspp(app, gen, app.size(), k); + return coresets::kmeanspp(app, gen, app.size(), k, weights); } template -auto make_d2_coreset_sampler(const ProbDivApplicator &app, unsigned k, uint64_t seed=13, const WFT *weights=nullptr, coresets::SensitivityMethod sens=cs::LBK) { +auto make_d2_coreset_sampler(const DissimilarityApplicator &app, unsigned k, uint64_t seed=13, const WFT *weights=nullptr, coresets::SensitivityMethod sens=cs::LBK) { auto [centers, asn, costs] = make_kmeanspp(app, k, seed); coresets::CoresetSampler cs; cs.make_sampler(app.size(), centers.size(), costs.data(), asn.data(), weights, @@ -661,7 +1180,7 @@ auto make_d2_coreset_sampler(const ProbDivApplicator &app, unsigned } } // jsd -using jsd::ProbDivApplicator; +using jsd::DissimilarityApplicator; using jsd::make_d2_coreset_sampler; using jsd::make_kmc2; using jsd::make_kmeanspp; diff --git a/include/minocore/dist/distance.h b/include/minocore/dist/distance.h index f70ba0d4..99e93a84 100644 --- a/include/minocore/dist/distance.h +++ b/include/minocore/dist/distance.h @@ -11,14 +11,16 @@ #define BOOST_NO_AUTO_PTR 1 #endif -#include "network_simplex/network_simplex_simple.h" #include "boost/iterator/transform_iterator.hpp" namespace blz { inline namespace distance { -enum ProbDivType { + + + +enum DissimilarityMeasure { L1, L2, SQRL2, @@ -49,6 +51,8 @@ enum ProbDivType { PROBABILITY_DOT_PRODUCT_SIMILARITY, EMD, WEMD, // Weighted Earth-mover's distance + ORACLE_METRIC, + ORACLE_PSEUDOMETRIC, WLLR = LLR, // Weighted Log-likelihood Ratio, now equivalent to the LLR TVD = TOTAL_VARIATION_DISTANCE, WASSERSTEIN=EMD, @@ -90,7 +94,8 @@ namespace detail { * For all other distance measures, Jain-Vazirani and/or local search should be run. * */ -static constexpr INLINE bool is_bregman(ProbDivType d) { + +static constexpr INLINE bool is_bregman(DissimilarityMeasure d) { switch(d) { case JSD: case MKL: case POISSON: case ITAKURA_SAITO: case REVERSE_MKL: case REVERSE_POISSON: case REVERSE_ITAKURA_SAITO: return true; @@ -98,10 +103,10 @@ static constexpr INLINE bool is_bregman(ProbDivType d) { } return false; } -static constexpr INLINE bool satisfies_d2(ProbDivType d) { +static constexpr INLINE bool satisfies_d2(DissimilarityMeasure d) { return d == LLR || is_bregman(d) || d == SQRL2; } -static constexpr INLINE bool satisfies_metric(ProbDivType d) { +static constexpr INLINE bool satisfies_metric(DissimilarityMeasure d) { switch(d) { case L1: case L2: @@ -109,16 +114,18 @@ static constexpr INLINE bool satisfies_metric(ProbDivType d) { case BHATTACHARYYA_METRIC: case TOTAL_VARIATION_DISTANCE: case HELLINGER: + case ORACLE_METRIC: return true; default: ; } return false; } -static constexpr INLINE bool satisfies_rho_metric(ProbDivType d) { +static constexpr INLINE bool satisfies_rho_metric(DissimilarityMeasure d) { if(satisfies_metric(d)) return true; switch(d) { case SQRL2: // rho = 2 // These three don't, technically, but using a prior can force it to follow it on real data + case ORACLE_PSEUDOMETRIC: case LLR: case UWLLR: case OLLR: return true; default:; @@ -126,7 +133,7 @@ static constexpr INLINE bool satisfies_rho_metric(ProbDivType d) { return false; } -static constexpr INLINE bool needs_logs(ProbDivType d) { +static constexpr INLINE bool needs_logs(DissimilarityMeasure d) { switch(d) { case JSM: case JSD: case MKL: case POISSON: case LLR: case OLLR: case ITAKURA_SAITO: case REVERSE_MKL: case REVERSE_POISSON: case UWLLR: case REVERSE_ITAKURA_SAITO: return true; @@ -135,19 +142,60 @@ static constexpr INLINE bool needs_logs(ProbDivType d) { return false; } -static constexpr INLINE bool needs_l2_cache(ProbDivType d) { +static constexpr INLINE bool is_probability(DissimilarityMeasure d) { + switch(d) { + case TOTAL_VARIATION_DISTANCE: case BHATTACHARYYA_METRIC: case BHATTACHARYYA_DISTANCE: + case MKL: case POISSON: case REVERSE_MKL: case REVERSE_POISSON: + case PROBABILITY_COSINE_DISTANCE: case PROBABILITY_DOT_PRODUCT_SIMILARITY: + case ITAKURA_SAITO: case REVERSE_ITAKURA_SAITO: + return true; + default: break; + } + return false; +} + +static constexpr INLINE bool needs_l2_cache(DissimilarityMeasure d) { return d == COSINE_DISTANCE; } -static constexpr INLINE bool needs_probability_l2_cache(ProbDivType d) { +static constexpr bool expects_nonnegative(DissimilarityMeasure measure) { + switch(measure) { + case L1: case L2: case SQRL2: + case COSINE_DISTANCE: case COSINE_SIMILARITY: + case PROBABILITY_COSINE_DISTANCE: case PROBABILITY_COSINE_SIMILARITY: + case DOT_PRODUCT_SIMILARITY: + case WEMD: case EMD: case ORACLE_METRIC: case ORACLE_PSEUDOMETRIC: return false; + + default: // Unexpected, but will assume it's required. + case JSM: case JSD: case MKL: case POISSON: case HELLINGER: case BHATTACHARYYA_METRIC: + case BHATTACHARYYA_DISTANCE: case TOTAL_VARIATION_DISTANCE: case LLR: + case REVERSE_MKL: case REVERSE_POISSON: case ITAKURA_SAITO: case REVERSE_ITAKURA_SAITO: + case PROBABILITY_DOT_PRODUCT_SIMILARITY: + return true; + + } +} + +static constexpr INLINE bool is_dissimilarity(DissimilarityMeasure d) { + switch(d) { + case DOT_PRODUCT_SIMILARITY: case PROBABILITY_DOT_PRODUCT_SIMILARITY: + case COSINE_SIMILARITY: case PROBABILITY_COSINE_DISTANCE: + return false; + default: ; + } + return true; +} + + +static constexpr INLINE bool needs_probability_l2_cache(DissimilarityMeasure d) { return d == PROBABILITY_COSINE_DISTANCE; } -static constexpr INLINE bool needs_sqrt(ProbDivType d) { +static constexpr INLINE bool needs_sqrt(DissimilarityMeasure d) { return d == HELLINGER || d == BHATTACHARYYA_METRIC || d == BHATTACHARYYA_DISTANCE; } -static constexpr INLINE bool is_symmetric(ProbDivType d) { +static constexpr INLINE bool is_symmetric(DissimilarityMeasure d) { switch(d) { case L1: case L2: case EMD: case HELLINGER: case BHATTACHARYYA_DISTANCE: case BHATTACHARYYA_METRIC: case JSD: case JSM: case LLR: case UWLLR: case SQRL2: case TOTAL_VARIATION_DISTANCE: case OLLR: @@ -159,7 +207,22 @@ static constexpr INLINE bool is_symmetric(ProbDivType d) { return false; } -static constexpr INLINE const char *prob2str(ProbDivType d) { +template +void set_cache(const blz::Vector &src, blz::Vector &dest, DissimilarityMeasure d) { + if(needs_logs(d)) { + if(is_probability(d)) + ~dest = neginf2zero(log(~src)); + else + ~dest = neginf2zero(log(~src / blaze::sum(~src))); + return; + } + if(needs_sqrt(d)) { + ~dest = sqrt(~src); + return; + } +} + +static constexpr INLINE const char *prob2str(DissimilarityMeasure d) { switch(d) { case BHATTACHARYYA_DISTANCE: return "BHATTACHARYYA_DISTANCE"; case BHATTACHARYYA_METRIC: return "BHATTACHARYYA_METRIC"; @@ -184,10 +247,12 @@ static constexpr INLINE const char *prob2str(ProbDivType d) { case PROBABILITY_COSINE_DISTANCE: return "PROBABILITY_COSINE_DISTANCE"; case COSINE_SIMILARITY: return "COSINE_SIMILARITY"; case PROBABILITY_COSINE_SIMILARITY: return "PROBABILITY_COSINE_SIMILARITY"; + case ORACLE_METRIC: return "ORACLE_METRIC"; + case ORACLE_PSEUDOMETRIC: return "ORACLE_PSEUDOMETRIC"; default: return "INVALID TYPE"; } } -static constexpr INLINE const char *prob2desc(ProbDivType d) { +static constexpr INLINE const char *prob2desc(DissimilarityMeasure d) { switch(d) { case BHATTACHARYYA_DISTANCE: return "Bhattacharyya distance: -log(dot(sqrt(x) * sqrt(y)))"; case BHATTACHARYYA_METRIC: return "Bhattacharyya metric: sqrt(1 - BhattacharyyaSimilarity(x, y))"; @@ -212,11 +277,13 @@ static constexpr INLINE const char *prob2desc(ProbDivType d) { case PROBABILITY_COSINE_DISTANCE: return "Cosine distance of the probability vectors: arccos(\\frac{A \\cdot B}{|A|_2 |B|_2}) / pi"; case COSINE_SIMILARITY: return "Cosine similarity: \\frac{A \\cdot B}{|A|_2 |B|_2}"; case PROBABILITY_COSINE_SIMILARITY: return "Cosine similarity of the probability vectors: \\frac{A \\cdot B}{|A|_2 |B|_2}"; + case ORACLE_METRIC: return "Placeholder for oracle metrics, allowing us to use DissimilarityMeasure in other situations"; + case ORACLE_PSEUDOMETRIC: return "Placeholder for oracle pseudometrics"; default: return "INVALID TYPE"; } } static void print_measures() { - std::set measures { + std::set measures { L1, L2, SQRL2, @@ -230,7 +297,7 @@ static void print_measures() { TOTAL_VARIATION_DISTANCE, LLR, OLLR, - EMD, + //EMD, REVERSE_MKL, REVERSE_POISSON, UWLLR, @@ -249,6 +316,20 @@ static void print_measures() { std::fprintf(stderr, "Code: %d. Description: '%s'. Short name: '%s'\n", measure, prob2desc(measure), prob2str(measure)); } } +static constexpr bool is_valid_measure(DissimilarityMeasure measure) { + switch(measure) { + case L1: case L2: case SQRL2: case JSM: case JSD: case MKL: + case POISSON: case HELLINGER: case BHATTACHARYYA_METRIC: + case BHATTACHARYYA_DISTANCE: case TOTAL_VARIATION_DISTANCE: + case LLR: case REVERSE_MKL: case REVERSE_POISSON: case REVERSE_ITAKURA_SAITO: + case ITAKURA_SAITO: case COSINE_DISTANCE: case PROBABILITY_COSINE_DISTANCE: + case DOT_PRODUCT_SIMILARITY: case PROBABILITY_DOT_PRODUCT_SIMILARITY: + case EMD: case WEMD: case ORACLE_METRIC: case ORACLE_PSEUDOMETRIC: + return true; + default: ; + } + return false; +} } // detail @@ -480,117 +561,6 @@ inline auto s2jsd(const blz::Vector &lhs, const blaze::Vector & } -template -CommonType_t, ElementType_t> -network_p_wasserstein(const blz::Vector &x, const blz::Vector &y, double p=1.) -{ - std::fprintf(stderr, "Warning: network_p_wasserstein seems to have a bug. Do not use.\n"); - auto &xref = ~x; - auto &yref = ~y; - const size_t sz = xref.size(); - size_t nl = nonZeros(xref), nr = nonZeros(~y); - using FT = CommonType_t, ElementType_t>; - - using namespace lemon; - using Digraph = lemon::FullBipartiteDigraph; - Digraph di(nl, nr); - NetworkSimplexSimple net(di, true, nl + nr, nl * nr); - DV weights(nl + nr); - DV indices(nl + nr); - size_t i = 0; - for(size_t ii = 0; ii < sz; ++ii) { - if(xref[ii] > 0) - weights[i] = xref[ii], indices[i] = xref[ii], ++i; - } - for(size_t ii = 0; ii < sz; ++ii) { - if(yref[ii] > 0) - weights[i] = -yref[ii], indices[i] = yref[ii], ++i; - } - auto func = [p](auto x, auto y) { - auto ret = x - y; - if(p == 1) ret = std::abs(ret); - else if(p == 2.) ret = ret * ret; - else ret = std::pow(ret, p); - return ret; - }; - net.supplyMap(weights.data(), nl, weights.data() + nl, nr); - { - const auto jptr = &weights[nl]; - for(unsigned i = 0; i < nl; ++i) { - auto arcid = i * nl; - for(unsigned j = 0; j < nl; ++j) { - net.setCost(di.arcFromId(arcid++), func(weights[i], jptr[j])); - } - } - } - int rc = net.run(); - if(rc != (int)net.OPTIMAL) { - std::fprintf(stderr, "[%s:%s:%d] Warning: something went wrong in network simplex. Error code: [%s]\n", __PRETTY_FUNCTION__, __FILE__, __LINE__, - rc == (int)net.INFEASIBLE ? "infeasible" : (int)net.UNBOUNDED ? "unbounded" : "unknown"); - } - - FT ret(0); - //OMP_PRAGMA("omp parallel for reduction(+:ret)") - for(size_t i = 0; i < nl; ++i) { - for(size_t j = 0; j < nr; ++j) - ret += net.flow(i * nr + j) * func(weights[i], weights[sz + j]); - } - return ret; -} - -#if 0 -template -CommonType_t, ElementType_t> -network_p_wasserstein(const blz::SparseVector &x, const blz::SparseVector &y, double p=1., size_t maxiter=100) -{ - auto &xref = ~x; - const size_t sz = xref.size(); - size_t nl = nonZeros(xref), nr = nonZeros(~y); - using FT = CommonType_t, ElementType_t>; - - using namespace lemon; - typedef lemon::FullBipartiteDigraph Digraph; - Digraph di(nl, nr); - NetworkSimplexSimple net(di, true, nl + nr, nl * nr, maxiter); - DV weights(nl + nr); - DV indices(nl + nr); - size_t i = 0; - for(const auto &pair: xref) - weights[i] = pair.value(), indices[i] = pair.index(), ++i; - for(const auto &pair: ~y) - weights[i] = -pair.value(), indices[i] = pair.index(), ++i; // negative weight - auto func = [p](auto x, auto y) { - auto ret = x - y; - if(p == 1) ret = std::abs(ret); - else if(p == 2.) ret = ret * ret; - else ret = std::pow(ret, p); - return ret; - }; - net.supplyMap(weights.data(), nl, weights.data() + nl, nr); - { - const auto jptr = &weights[nl]; - for(unsigned i = 0; i < nl; ++i) { - auto arcid = i * nl; - for(unsigned j = 0; j < nl; ++j) { - net.setCost(di.arcFromId(arcid++), func(weights[i], jptr[j])); - } - } - } - int rc = net.run(); - if(rc != (int)net.OPTIMAL) { - std::fprintf(stderr, "[%s:%s:%d] Warning: something went wrong in network simplex. Error code: [%s]\n", __PRETTY_FUNCTION__, __FILE__, __LINE__, - rc == (int)net.INFEASIBLE ? "infeasible" : (int)net.UNBOUNDED ? "unbounded" : "unknown"); - } - FT ret(0); - //OMP_PRAGMA("omp parallel for reduction(+:ret)") - for(size_t i = 0; i < nl; ++i) { - for(size_t j = 0; j < nr; ++j) - ret += net.flow(i * nr + j) * func(weights[i], weights[sz + j]); - } - return ret; -} -#endif - template, ElementType_t>> CT scipy_p_wasserstein(const blz::SparseVector &x, const blz::SparseVector &y, double p=1.) { auto &xr = ~x; @@ -723,4 +693,6 @@ auto witten_poisson_dissimilarity(const blz::Vector &lhs, const blz::Vec } // namespace blz +namespace dist = blz::distance; + #endif // FGC_DISTANCE_AND_MEANING_H__ diff --git a/include/minocore/dist/knngraph.h b/include/minocore/dist/knngraph.h new file mode 100644 index 00000000..8d912a56 --- /dev/null +++ b/include/minocore/dist/knngraph.h @@ -0,0 +1,286 @@ +#include "minocore/graph.h" +#include "minocore/util/packed.h" +#include "minocore/dist/applicator.h" +#include "minocore/hash/hash.h" +#include + +namespace minocore { + +template +std::vector, IT>> make_knns(const jsd::DissimilarityApplicator &app, unsigned k) { + using FT = blaze::ElementType_t; + static_assert(std::is_integral_v, "Sanity"); + static_assert(std::is_floating_point_v, "Sanity"); + + MINOCORE_REQUIRE(std::numeric_limits::max() > app.size(), "sanity check"); + if(k > app.size()) { + std::fprintf(stderr, "Note: make_knn_graph was provided k (%u) > # points (%zu).\n", k, app.size()); + k = app.size(); + } + const size_t np = app.size(); + const jsd::DissimilarityMeasure measure = app.get_measure(); + std::vector> ret(k * np); + std::vector in_set(np); + const bool measure_is_sym = blz::detail::is_symmetric(measure); + const bool measure_is_dist = measure_is_dist; + std::unique_ptr locks; + OMP_ONLY(locks.reset(new std::mutex[np]);) + + // Helper functions + // Update + auto update_fwd = [&](FT d, size_t i, size_t j) { + if(in_set[i] < k) { + OMP_ONLY(std::lock_guard lock(locks[i]);) + ret[i * k + in_set[i]] = packed::pair{d, j}; + if(++in_set[i] == k) { + if(measure_is_dist) + std::make_heap(ret.data() + i * k, ret.data() + (i + 1) * k, std::less()); + else + std::make_heap(ret.data() + i * k, ret.data() + (i + 1) * k, std::greater()); + } + } else { + auto cmp = [&](auto d) {return measure_is_dist ? (ret[i * k].first > d) : (ret[i * k].first < d);}; + auto pushpop = [&](auto d) { + auto startp = &ret[i * k]; + auto stopp = startp + k; + if(measure_is_dist) std::pop_heap(startp, stopp, std::less()); + std::pop_heap(startp, stopp, std::greater()); + ret[(i + 1) * k - 1] = packed::pair{d, j}; + if(measure_is_dist) std::push_heap(startp, stopp, std::less()); + std::push_heap(startp, stopp, std::greater()); + }; + if(cmp(d)) { + OMP_ONLY(std::lock_guard lock(locks[i]);) + { + OMP_ONLY(if(cmp(d))) + pushpop(d); + } + } + } + }; + + // Sort + auto perform_sort = [&](auto ptr) { + auto end = ptr + k; + if(measure_is_dist) + shared::sort(ptr, end, std::less<>()); + else + shared::sort(ptr, end, std::greater<>()); + ptr = end; + }; + auto ptr = ret.data(); + if(measure_is_sym) { + OMP_PFOR + for(size_t i = 0; i < np; ++i) { + for(size_t j = i + 1; j < np; ++j) { + auto d = app(i, j); + update_fwd(d, i, j); + update_fwd(d, j, i); + } + perform_sort(ptr); + std::fprintf(stderr, "[Symmetric:%s] Completed %zu/%zu\n", blz::detail::prob2str(measure), i + 1, np); + } + } else { + OMP_PFOR + for(size_t i = 0; i < np; ++i) { + for(size_t j = 0; j < np; ++j) { + update_fwd(app(i, j), i, j); + } + perform_sort(ptr); + std::fprintf(stderr, "[Asymmetric:%s] Completed %zu/%zu\n", blz::detail::prob2str(measure), i + 1, np); + } + } + std::fprintf(stderr, "Created knn graph for k = %u and %zu points\n", k, np); + return ret; +} + +template +std::vector, IT>> +make_knns_by_lsh(const jsd::DissimilarityApplicator &app, hash::LSHTable &table, unsigned k, unsigned maxlshcmp=0) +{ + if(!maxlshcmp) maxlshcmp = 10 * k; + using FT = blaze::ElementType_t; + static_assert(std::is_integral_v, "Sanity"); + static_assert(std::is_floating_point_v, "Sanity"); + + MINOCORE_REQUIRE(std::numeric_limits::max() > app.size(), "sanity check"); + if(k > app.size()) { + std::fprintf(stderr, "Note: make_knn_graph was provided k (%u) > # points (%zu).\n", k, app.size()); + k = app.size(); + } + const size_t np = app.size(); + const jsd::DissimilarityMeasure measure = app.get_measure(); + std::vector> ret(k * np); + std::vector in_set(np); + const bool measure_is_sym = blz::detail::is_symmetric(measure); + const bool measure_is_dist = measure_is_dist; + std::unique_ptr locks; + OMP_ONLY(locks.reset(new std::mutex[np]);) + table.add(app.data()); + table.sort(); + + + auto update_fwd = [&](FT d, size_t i, size_t j) { + if(in_set[i] < k) { + OMP_ONLY(std::lock_guard lock(locks[i]);) + ret[i * k + in_set[i]] = packed::pair{d, j}; + if(++in_set[i] == k) { + if(measure_is_dist) + std::make_heap(ret.data() + i * k, ret.data() + (i + 1) * k, std::less()); + else + std::make_heap(ret.data() + i * k, ret.data() + (i + 1) * k, std::greater()); + } + } else { + auto cmp = [&](auto d) {return measure_is_dist ? (ret[i * k].first > d) : (ret[i * k].first < d);}; + auto pushpop = [&](auto d) { + auto startp = &ret[i * k]; + auto stopp = startp + k; + std::pop_heap(startp, stopp, std::less()); + ret[(i + 1) * k - 1] = packed::pair{d, j}; + std::push_heap(startp, stopp, std::less()); + }; + if(cmp(d)) { + OMP_ONLY(std::lock_guard lock(locks[i]);) + { + OMP_ONLY(if(cmp(d))) + pushpop(d); + } + } + } + }; + + // Sort + auto ptr = ret.data(); + MINOCORE_VALIDATE(maxlshcmp <= k); + OMP_PFOR + for(size_t i = 0; i < np; ++i) { + auto tk = table.topk(row(app.data(), i, blaze::unchecked), maxlshcmp); + for(const auto &pair: tk) { + if(pair.first != i) { + auto d = app(i, pair.first); + update_fwd(d, i, pair.first); + update_fwd(d, pair.first, i); + } + } + } + size_t number_exhaustive = 0; + for(size_t i = 0; i < np; ++i) { + if(in_set[i] >= k) continue; + ++number_exhaustive; + std::fprintf(stderr, "Warning: LSH table returned < k (%d) neighbors (only %d compared). Performing exhaustive comparisons for item %zu\n", + k, in_set[i], i); + OMP_PFOR + for(size_t j = (measure_is_sym ? i + 1: size_t(0)); j < np; ++j) { + if(unlikely(j == i)) continue; + auto d = app(i, j); + update_fwd(d, i, j); + update_fwd(d, j, i); + } + } + if(number_exhaustive) + std::fprintf(stderr, "Performed quadratic distance comparisons with %zu/%zu items\n", + number_exhaustive, np); + std::fprintf(stderr, "Created knn graph for k = %u and %zu points\n", k, np); + return ret; +} + +template +auto knns2graph(const std::vector> &knns, size_t np, bool mutual=true, bool symmetric=true) { + MINOCORE_REQUIRE(knns.size() % np == 0, "sanity"); + MINOCORE_REQUIRE(knns.size(), "nonempty"); + unsigned k = knns.size() / np; + graph::Graph ret(np); + for(size_t i = 0; i < np; ++i) { + auto p = &knns[i * k]; + SK_UNROLL_8 + for(unsigned j = 0; j < k; ++j) { + if(mutual) { + if(symmetric) { + if(p[j].first > knns[k * (p[j].second + 1) * k - 1].first) + continue; + } else { + // More expensive (O(k) vs O(1)), but does not require the assumption of symmetry. + auto start = knns.data() + p[j].second * k, stop = start + k; + if(std::find_if(start, stop, [i](auto x) {return x.second == i;})== stop) + continue; + } + } + boost::add_edge(i, static_cast(p[j].second), p[j].first, ret); + } + } + return ret; +} + +template +auto make_knn_graph(const jsd::DissimilarityApplicator &app, unsigned k, bool mutual=true) { + return knns2graph(make_knns(app, k), app.size(), mutual, blz::detail::is_symmetric(app.get_measure())); +} + +template +auto knng2mst(const Graph &gr) { + std::vector::edge_descriptor> ret; + ret.reserve(boost::num_vertices(gr) * 1.5); + boost::kruskal_minimum_spanning_tree(gr, std::back_inserter(ret)); + return ret; +} + +#if 0 +template +auto perform_rcc(const jsd::DissimilarityApplicator &app, unsigned k, bool mutual=true, size_t niter=100) { + using FT = blaze::ElementType_t; + auto graph = make_knn_graph(app, k); + auto mst = knng2mst(graph); + using eh_t = std::conditional_t >; + shared::flat_hash_set edges; + auto add_edge = [&](auto edge) { + if constexpr(sizeof(IT) <= 4) { + uint64_t encoded = (uint64_t(boost::source(edge, graph)) << 32) | boost::target(edge, graph); + edges.insert(encoded); + } else { + edges.insert(eh_t(boost::source(edge, graph), boost::target(edge, graph))); + } + }; + for(const auto edge: mst) { + add_edge(edge); + } + for(const auto edge: graph.edges()) { + add_edge(edge); + } + size_t nedges = edges.size(); + std::unique_ptr lhp(new IT[nedges]), rhp(new IT[nedges]); + size_t i = 0; + for(const auto &e: edges) { + lhp[i] = e.first; + rhp[i] = e.second; + ++i; + } + // Free unneeded memory + { shared::flat_hash_set tmp(std::move(edges)); } + const double xi = blaze::norm(app.data()); + blaze::DynamicMatrix U = app.data(); + blaze::DynamicVector lpq(nedges, 1.); + blaze::DynamicVector epsilons = blaze::generate(nedges, [&](auto x) { + return app(lhp[x], rhp[x]); + }); + shared::sort(epsilons.data(), epsilons.data() + nedges); + const int top_samples = std::minimum(250, int(std::ceil(nedges*0.01))); + double delta = blaze::mean(blaze::subvector(epsilons, 0, top_samples)); + double eps = blaze::mean(blaze::subvector(epsilons, 0, int(std::ceil(nedge * 0.01))); + const double mu = 3.0 * std::pow(epsilons[nedges - 1], 2.); + auto calculate_objective = [&]() { + auto dat = .5 * blaze::sum(blaze::pow(app.data() - U), 2.); + return dat; + }; + std::vector obj; + for(size_t iternum = 0; iternum < niter; ++iternum) { + OMP_PFOR + for(size_t i = 0; i < app.data().columns(); ++i) { + lpq[i] = mu / (mu + app(lhp[i], rhp[i])); + } + obj.push_back(calculate_objective()); + } +} +#endif + + +} // minocore diff --git a/include/minocore/graph/graphdist.h b/include/minocore/graph/graphdist.h index 539e5860..0fd95067 100644 --- a/include/minocore/graph/graphdist.h +++ b/include/minocore/graph/graphdist.h @@ -2,10 +2,11 @@ #ifndef FGC_GRAPH_DIST_H__ #define FGC_GRAPH_DIST_H__ #include "minocore/graph/graph.h" -#include "minocore/util/diskmat.h" +#include "diskmat/diskmat.h" #include namespace minocore { +using diskmat::DiskMat; namespace graph { template::vertex_descriptor>> @@ -36,14 +37,10 @@ void fill_graph_distmat(const Graph &x, MatType &mat, const VType *sources=nullp } #endif blaze::DynamicMatrix working_space(nt, boost::num_vertices(x)); -#ifndef USE_BOOST_PARALLEL OMP_PFOR -#endif for(size_t i = 0; i < nrows; ++i) { unsigned rowid = 0; -#if !defined(USE_BOOST_PARALLEL) OMP_ONLY(rowid = omp_get_thread_num();) -#endif auto vtx = all_sources ? vertices[i]: (*sources)[i]; auto wrow(row(working_space, rowid BLAZE_CHECK_DEBUG)); boost::dijkstra_shortest_paths(x, vtx, boost::distance_map(&wrow[0])); @@ -55,14 +52,7 @@ void fill_graph_distmat(const Graph &x, MatType &mat, const VType *sources=nullp } } else { assert(ncol == boost::num_vertices(x)); -#ifndef NDEBUG - if(all_sources) { - assert(boost::num_vertices(x) == nrows); - } -#endif -#ifndef USE_BOOST_PARALLEL OMP_PFOR -#endif for(size_t i = 0; i < nrows; ++i) { auto mr = row(~mat, i BLAZE_CHECK_DEBUG); auto vtx = all_sources || sources == nullptr ? vertices[i]: (*sources)[i]; @@ -91,14 +81,14 @@ graph2diskmat(const Graph &x, std::string path, const VType *sources=nullptr, bo template::vertex_descriptor>> -blz::DynamicMatrix +blaze::DynamicMatrix graph2rammat(const Graph &x, std::string, const VType *sources=nullptr, bool only_sources_as_dests=false, bool all_sources=false) { static_assert(std::is_arithmetic::value, "This should be floating point, or at least arithmetic"); using FT = typename Graph::edge_property_type::value_type; size_t nv = sources && only_sources_as_dests ? sources->size(): boost::num_vertices(x); size_t nrows = all_sources || !sources ? boost::num_vertices(x): sources->size(); std::fprintf(stderr, "all sources: %d. nrows: %zu\n", all_sources, nrows); - blz::DynamicMatrix ret(nrows, nv); + blaze::DynamicMatrix ret(nrows, nv); fill_graph_distmat(x, ret, sources, only_sources_as_dests, all_sources); return ret; } diff --git a/include/minocore/hash/hash.h b/include/minocore/hash/hash.h index bf5c644e..4470b99a 100644 --- a/include/minocore/hash/hash.h +++ b/include/minocore/hash/hash.h @@ -77,7 +77,7 @@ struct cms_distribution { } }; -template +template class JSDLSHasher { // See https://papers.nips.cc/paper/9195-locality-sensitive-hashing-for-f-divergences-mutual-information-loss-and-beyond.pdf // for the function. @@ -85,8 +85,8 @@ class JSDLSHasher { // // This relies on a U/H-approximation of the JSD (capacitory discrimination in Topsoe, 2000) // by the Hellinger distance, and uses an LSH for the Hellinger as-is. - blz::DM randproj_; - blz::DV boffsets_; + blaze::DynamicMatrix randproj_; + blaze::DynamicVector boffsets_; LSHasherSettings settings_; public: using ElementType = FT; @@ -96,33 +96,33 @@ class JSDLSHasher { if(seed == 0) seed = nd * nh + r; std::mt19937_64 mt(seed); std::normal_distribution gen; - randproj_ = blz::generate(nh, nd, [&](size_t x, size_t y){ + randproj_ = blaze::generate(nh, nd, [&](size_t x, size_t y){ std::mt19937_64 mt(seed + x + seed * y); return gen(mt); }); randproj_ /= r; - boffsets_ = blz::generate(nh, [&](size_t){return FT(mt()) / mt.max();}); + boffsets_ = blaze::generate(nh, [&](size_t){return FT(mt()) / mt.max();}); assert(settings_.k_ * settings_.l_ == randproj_.rows()); // In case of overflow, I suppose } template - decltype(auto) hash(const blz::Vector &input) const { + decltype(auto) hash(const blaze::Vector &input) const { //std::fprintf(stderr, "Regular input size: %zu. my rows/col:%zu/%zu\n", (~input).size(), randproj_.rows(), randproj_.columns()); - return blz::ceil(randproj_ * blz::sqrt(~input) + boffsets_); + return blaze::ceil(randproj_ * blaze::sqrt(~input) + boffsets_); } template - decltype(auto) hash(const blz::Vector &input) const { + decltype(auto) hash(const blaze::Vector &input) const { //std::fprintf(stderr, "Reversed input size: %zu. my rows/col:%zu/%zu\n", (~input).size(), randproj_.rows(), randproj_.columns()); - return blz::ceil(randproj_ * trans(blz::sqrt(~input)) + boffsets_); + return blaze::ceil(randproj_ * trans(blaze::sqrt(~input)) + boffsets_); } template - decltype(auto) hash(const blz::Matrix &input) const { + decltype(auto) hash(const blaze::Matrix &input) const { //std::fprintf(stderr, "Regular input rows/col: %zu/%zu. my rows/col:%zu/%zu\n", (~input).rows(), (~input).columns(), randproj_.rows(), randproj_.columns()); - return trans(blz::ceil(randproj_ * trans(blz::sqrt(~input)) + blz::expand(boffsets_, (~input).rows()))); + return trans(blaze::ceil(randproj_ * trans(blaze::sqrt(~input)) + blaze::expand(boffsets_, (~input).rows()))); } template - decltype(auto) hash(const blz::Matrix &input) const { + decltype(auto) hash(const blaze::Matrix &input) const { //std::fprintf(stderr, "Reversed SO input rows/col: %zu/%zu. my rows/col:%zu/%zu\n", (~input).rows(), (~input).columns(), randproj_.rows(), randproj_.columns()); - return trans(blz::ceil(randproj_ * blz::sqrt(~input) + blz::expand(boffsets_, (~input).columns()))); + return trans(blaze::ceil(randproj_ * blaze::sqrt(~input) + blaze::expand(boffsets_, (~input).columns()))); } const auto &matrix() const {return randproj_;} auto dim() const {return randproj_.columns();} @@ -131,7 +131,8 @@ class JSDLSHasher { auto l() const {return settings_.l_;} const auto &settings() const {return settings_;} }; -template + +template class HellingerLSHasher: public JSDLSHasher { public: template @@ -141,12 +142,8 @@ class HellingerLSHasher: public JSDLSHasher { template class Distribution, typename FT, bool SO, bool use_offsets, typename...Args> class PStableLSHasher { - // See S2JSD-LSH: A Locality-Sensitive Hashing Schema for Probability Distributions - // https://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14692 - // for the derivation - // Note that this is an LSH for the JS Metric, not the JSD. - blz::DM randproj_; - blz::DV boffsets_; + blaze::DynamicMatrix randproj_; + blaze::DynamicVector boffsets_; LSHasherSettings settings_; double w_; public: @@ -161,34 +158,34 @@ class PStableLSHasher { auto nd = settings.dim_; if(seed == 0) seed = nd * nh + w + 1. / w; std::mt19937_64 mt(seed); - randproj_ = blz::abs(blz::generate(nh, nd, [&](size_t, size_t){return gen(mt);}) * (1. / w)); + randproj_ = blaze::abs(blaze::generate(nh, nd, [&](size_t, size_t){return gen(mt);}) * (1. / w)); if constexpr(use_offsets) - boffsets_ = blz::generate(nh, [&](size_t){return FT(mt() / 2) / mt.max();}) - 0.5; + boffsets_ = blaze::generate(nh, [&](size_t){return FT(mt() / 2) / mt.max();}) - 0.5; assert(settings_.k_ * settings_.l_ == randproj_.rows()); // In case of overflow, I suppose } template - decltype(auto) hash(const blz::Vector &input) const { - if constexpr(use_offsets) return blz::floor(randproj_ * (~input) + 1.) + boffsets_; - else return blz::floor(randproj_ * (~input)); + decltype(auto) hash(const blaze::Vector &input) const { + if constexpr(use_offsets) return blaze::floor(randproj_ * (~input) + 1. + boffsets_); + else return blaze::floor(randproj_ * (~input)); } template - decltype(auto) hash(const blz::Vector &input) const { - if constexpr(use_offsets) return blz::floor(randproj_ * trans(~input) + 1.) + boffsets_; - else return blz::floor(randproj_ * trans(~input)); + decltype(auto) hash(const blaze::Vector &input) const { + if constexpr(use_offsets) return blaze::floor(randproj_ * trans(~input) + 1. + boffsets_); + else return blaze::floor(randproj_ * trans(~input)); } template - decltype(auto) hash(const blz::Matrix &input) const { + decltype(auto) hash(const blaze::Matrix &input) const { if constexpr(use_offsets) - return trans(blz::floor(randproj_ * trans(~input)) + blz::expand(boffsets_, (~input).rows())); + return trans(blaze::floor(randproj_ * trans(~input) + blaze::expand(boffsets_, (~input).rows()))); else - return trans(blz::floor(randproj_ * trans(~input))); + return trans(blaze::floor(randproj_ * trans(~input))); } template - decltype(auto) hash(const blz::Matrix &input) const { + decltype(auto) hash(const blaze::Matrix &input) const { if constexpr(use_offsets) - return trans(blz::floor(randproj_ * trans(~input)) + blz::expand(boffsets_, (~input).columns())); + return trans(blaze::floor(randproj_ * trans(~input) + blaze::expand(boffsets_, (~input).columns()))); else - return trans(blz::floor(randproj_ * trans(~input))); + return trans(blaze::floor(randproj_ * trans(~input))); } const auto &matrix() const {return randproj_;} auto dim() const {return settings_.dim_;} @@ -198,7 +195,7 @@ class PStableLSHasher { const auto &settings() const {return settings_;} }; -template +template class L2LSHasher: public PStableLSHasher { public: using super = PStableLSHasher; @@ -208,7 +205,7 @@ class L2LSHasher: public PStableLSHasher +template class L1LSHasher: public PStableLSHasher { public: using super = PStableLSHasher; @@ -217,7 +214,7 @@ class L1LSHasher: public PStableLSHasher +template class LpLSHasher: public PStableLSHasher { public: using super = PStableLSHasher; @@ -227,7 +224,7 @@ class LpLSHasher: public PStableLSHasher } }; -template +template class ClippedL1LSHasher: public PStableLSHasher { public: using super = PStableLSHasher; @@ -235,7 +232,7 @@ class ClippedL1LSHasher: public PStableLSHasher(args)...) {} }; -template +template class TVDLSHasher: public L1LSHasher { public: using super = L1LSHasher; @@ -244,14 +241,14 @@ class TVDLSHasher: public L1LSHasher { }; -template +template class S2JSDLSHasher { // See S2JSD-LSH: A Locality-Sensitive Hashing Schema for Probability Distributions // https://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14692 // for the derivation // Note that this is an LSH for the JS Metric, not the JSD. - blz::DM randproj_; - blz::DV boffsets_; + blaze::DynamicMatrix randproj_; + blaze::DynamicVector boffsets_; LSHasherSettings settings_; double w_; public: @@ -263,25 +260,25 @@ class S2JSDLSHasher { if(seed == 0) seed = nd * nh + w + 1. / w; std::mt19937_64 mt(seed); std::normal_distribution gen; - randproj_ = blz::abs(blz::generate(nh, nd, [&](size_t, size_t){return gen(mt);}) * (4. / (w * w))); - boffsets_ = blz::generate(nh, [&](size_t){return FT(mt() / 2) / mt.max();}) - 0.5; + randproj_ = blaze::abs(blaze::generate(nh, nd, [&](size_t, size_t){return gen(mt);}) * (4. / (w * w))); + boffsets_ = blaze::generate(nh, [&](size_t){return FT(mt() / 2) / mt.max();}) - 0.5; assert(settings_.k_ * settings_.l_ == randproj_.rows()); // In case of overflow, I suppose } template - decltype(auto) hash(const blz::Vector &input) const { - return blz::floor(blz::sqrt(randproj_ * (~input) + 1.) + boffsets_); + decltype(auto) hash(const blaze::Vector &input) const { + return blaze::floor(blaze::sqrt(randproj_ * (~input) + 1.) + boffsets_); } template - decltype(auto) hash(const blz::Vector &input) const { - return blz::floor(blz::sqrt(randproj_ * trans(~input) + 1.) + boffsets_); + decltype(auto) hash(const blaze::Vector &input) const { + return blaze::floor(blaze::sqrt(randproj_ * trans(~input) + 1.) + boffsets_); } template - decltype(auto) hash(const blz::Matrix &input) const { - return trans(blz::floor(blz::sqrt(randproj_ * trans(~input) + 1.) + blz::expand(boffsets_, (~input).rows()))); + decltype(auto) hash(const blaze::Matrix &input) const { + return trans(blaze::floor(blaze::sqrt(randproj_ * trans(~input) + 1.) + blaze::expand(boffsets_, (~input).rows()))); } template - decltype(auto) hash(const blz::Matrix &input) const { - return trans(blz::floor(blz::sqrt(randproj_ * (trans(~input)) + 1.) + blz::expand(boffsets_, (~input).columns()))); + decltype(auto) hash(const blaze::Matrix &input) const { + return trans(blaze::floor(blaze::sqrt(randproj_ * (trans(~input)) + 1.) + blaze::expand(boffsets_, (~input).columns()))); } const auto &matrix() const {return randproj_;} auto dim() const {return settings_.dim_;} @@ -326,6 +323,7 @@ struct LSHTable { const unsigned nh_; XXHasher xxhasher_; OMP_ONLY(std::unique_ptr mutexes;) + size_t ids_used_ = 0; static constexpr bool SO = Hasher::StorageOrder; @@ -370,8 +368,8 @@ struct LSHTable { return hasher_.hash(q); } template - void add(const blz::Vector &input, IT id) { - auto hv = blz::evaluate(hash(input)); + void add(const blaze::Vector &input, IT id) { + auto hv = blaze::evaluate(hash(input)); if(unlikely(nh_ != hv.size())) { std::fprintf(stderr, "[%s] nh_: %u. hv.size: %zu\n", __PRETTY_FUNCTION__, nh_, hv.size()); std::exit(1); @@ -381,10 +379,11 @@ struct LSHTable { auto hh = xxhasher_(&hv[i * st.k_], sizeof(ElementType) * st.k_); insert(i, hh, id); } + ++ids_used_; } template - void add(const blz::Matrix &input, IT idoffset=0) { - auto hv = blz::evaluate(hash(input)); + void add(const blaze::Matrix &input, IT idoffset=0) { + auto hv = blaze::evaluate(hash(input)); std::fprintf(stderr, "hv shape: %zu/%zu.\n", hv.rows(), hv.columns()); if(nh_ != hv.columns()) { std::fprintf(stderr, "[%s] nh_: %u. hv.columns: %zu\n", __PRETTY_FUNCTION__, nh_, hv.columns()); @@ -396,15 +395,41 @@ struct LSHTable { } const size_t nr = (~input).rows(); const auto _l = l(), _k = k(); + OMP_PFOR for(unsigned i = 0; i < nr; ++i) { - auto r = row(hv, i, blz::unchecked); + auto r = row(hv, i, blaze::unchecked); for(unsigned j = 0; j < _l; ++j) { insert(j, xxhasher_(&r[j * _k], sizeof(ElementType) * _k), idoffset + i); } } + ids_used_ += nr; + } + template + std::vector> topk(const blaze::Vector &query, unsigned maxgather=0) const { + // TODO: build with a heap + if(!maxgather) maxgather = ids_used_; + std::vector> ret; + auto retupdate = [&](auto x) { + auto rit = std::find(ret.begin(), ret.end(), x); + }; + auto hv = evaluate(hash(query)); + for(unsigned i = 0; i < l(); ++i) { + if(auto it = tables_[i].find(xxhasher_(&hv[i * k()], sizeof(ElementType) * k())); + it != tables_[i].end()) + { + for(const auto v: it->second) { + auto rit = std::find_if(ret.begin(), ret.end(), [v](auto x) {return x.first == v;}); + if(rit == ret.end()) ret.emplace_back({v, 1u}); + else ++rit->second; + } + } + } + shared::sort(ret.begin(), ret.end(), [](auto x, auto y) {return x.second > y.second;}); + if(maxgather < ret.size()) ret.resize(maxgather); + return ret; } template - shared::flat_hash_map query(const blz::Vector &query) const { + shared::flat_hash_map query(const blaze::Vector &query) const { auto hv = evaluate(hash(query)); shared::flat_hash_map ret; for(unsigned i = 0; i < l(); ++i) { @@ -422,7 +447,7 @@ struct LSHTable { } template std::vector> - query(const blz::Matrix &query) const { + query(const blaze::Matrix &query) const { auto hv = evaluate(hash(query)); //std::fprintf(stderr, "hv rows: %zu. columns: %zu. nh: %u. input num rows: %zu. input col: %zu\n", hv.rows(), hv.columns(), nh_, (~query).rows(), (~query).columns()); if(hv.columns() != nh_) throw std::runtime_error("Wrong number of columns"); diff --git a/include/minocore/minocore.h b/include/minocore/minocore.h index 2d3824f1..ffbbe033 100644 --- a/include/minocore/minocore.h +++ b/include/minocore/minocore.h @@ -11,6 +11,8 @@ #include +#include + #include #endif diff --git a/include/minocore/optim/graph_thorup.h b/include/minocore/optim/graph_thorup.h index e4c63392..20553e3a 100644 --- a/include/minocore/optim/graph_thorup.h +++ b/include/minocore/optim/graph_thorup.h @@ -265,7 +265,7 @@ std::vector::vertex_descriptor> } template -std::pair(), std::declval()))>>, +std::pair(), std::declval()))>>, std::vector> get_costs(Graph &x, const Container &container) { using edge_cost = std::decay_t()))>; @@ -274,7 +274,7 @@ get_costs(Graph &x, const Container &container) { util::ScopedSyntheticVertex vx(x); std::vector assignments(boost::num_vertices(x)); - blz::DV costs(boost::num_vertices(x)); + blaze::DynamicVector costs(boost::num_vertices(x)); std::vector p(boost::num_vertices(x)); auto synthetic_vertex = vx.get(); @@ -360,9 +360,9 @@ thorup_sample_mincost(Graph &x, unsigned k, uint64_t seed, unsigned num_iter, } template -blz::DV histogram_assignments(const Con &c, unsigned ncenters, const VertexContainer &vtces) { +blaze::DynamicVector histogram_assignments(const Con &c, unsigned ncenters, const VertexContainer &vtces) { const size_t n = std::size(vtces); - blz::DVret(ncenters, static_cast(0)); + blaze::DynamicVectorret(ncenters, static_cast(0)); OMP_PFOR for(size_t i = 0; i < n; ++i) { OMP_ATOMIC diff --git a/include/minocore/optim/jv_solver.h b/include/minocore/optim/jv_solver.h index f5d9c0c3..337265f8 100644 --- a/include/minocore/optim/jv_solver.h +++ b/include/minocore/optim/jv_solver.h @@ -6,6 +6,7 @@ #include #include #include +#include "cpp-btree/btree/set.h" namespace minocore { @@ -44,12 +45,12 @@ struct edgetup: public packed::triple { } }; -} +} // namespace jvutil namespace jv { - -template, typename IT=uint32_t> +template, typename IT=uint32_t, + template class SortedSet=btree::set> struct JVSolver { static_assert(std::is_floating_point::value, "FT must be floating-point"); @@ -65,7 +66,7 @@ struct JVSolver { return lhs.first < rhs.first || lhs.second < rhs.second; } }; - struct payment_queue: public std::set { + struct payment_queue: public SortedSet { void push(payment_t payment) { this->insert(payment); } @@ -74,7 +75,7 @@ struct JVSolver { this->insert(start, end); } auto top() const { - if(this->empty()) throw std::runtime_error("Attempting to access an empty structure"); + if(unlikely(this->empty())) throw std::runtime_error("Attempting to access an empty structure"); return *this->begin(); } void pop_top() { @@ -284,7 +285,7 @@ struct JVSolver { if(early_terminate && early_terminate->load()) return; // Assign all unassigned if(open_facilities.empty()) { - blz::DV fac_costs = blaze::sum(distmat); + blaze::DynamicVector fac_costs = blaze::sum(distmat); open_facilities.push_back(std::min_element(fac_costs.begin(), fac_costs.end()) - fac_costs.begin()); } for(const IT cid: unassigned_clients) { @@ -428,7 +429,7 @@ struct JVSolver { JVSolver(const MatrixType &mat, const CostType &cost): JVSolver() { setup(mat, cost); } - JVSolver(const MatrixType &mat): JVSolver(mat, blz::max(mat)) { + JVSolver(const MatrixType &mat): JVSolver(mat, blaze::max(mat)) { } template @@ -587,7 +588,7 @@ struct JVSolver { //DBG_ONLY(std::fprintf(stderr, "Trying to update by removing the next facility. Current in next_paid_ %zu\n", next_paid_.size());) n_open_clients_ = update_facilities(next_fac.second, working_open_facilities_[next_fac.second], time); time = next_fac.first; - if(current_n == next_paid_.size()) // If it wasn't removed + if(current_n == static_cast(next_paid_.size())) // If it wasn't removed next_paid_.pop_top(); //DBG_ONLY(std::fprintf(stderr, "n open: %zu. time: %0.12g. Now facilities left to pay: %zu\n", size_t(n_open_clients_), time, next_paid_.size());) } else { @@ -890,7 +891,7 @@ struct JVSolver { return std::make_pair(final_open_facilities_, final_open_facility_assignments_); } IT local_best_to_add() const { - blz::DV current_costs = blz::min(blz::rows(*distmatp_, final_open_facilities_.data(), final_open_facilities_.size())); + blaze::DynamicVector current_costs = blaze::min(blaze::rows(*distmatp_, final_open_facilities_.data(), final_open_facilities_.size())); FT max_improvement = -std::numeric_limits::max(); IT bestind = -1; for(size_t i = 0; i < nfac_; ++i) { @@ -908,7 +909,7 @@ struct JVSolver { return bestind; } IT local_best_to_rm() const { - blz::DV current_costs = blz::min(blz::rows(*distmatp_, final_open_facilities_.data(), final_open_facilities_.size())); + blaze::DynamicVector current_costs = blaze::min(blaze::rows(*distmatp_, final_open_facilities_.data(), final_open_facilities_.size())); FT min_loss = std::numeric_limits::max(); IT bestind = -1; std::unique_ptr min_counters(new IT[ncities_]()); @@ -933,6 +934,12 @@ struct JVSolver { } }; +template, typename IT=uint32_t> +auto make_jv_solver(const MT &mat) { + return JVSolver(mat); +} + + } // namespace jv } // namespace minocore diff --git a/include/minocore/optim/kcenter.h b/include/minocore/optim/kcenter.h index 7214e7db..54057899 100644 --- a/include/minocore/optim/kcenter.h +++ b/include/minocore/optim/kcenter.h @@ -1,4 +1,5 @@ -#pragma once +#ifndef FGC_OPTIM_KCENTER_H__ +#define FGC_OPTIM_KCENTER_H__ #include "minocore/coreset/matrix_coreset.h" #include "minocore/util/div.h" #include "minocore/util/blaze_adaptor.h" @@ -29,341 +30,57 @@ kcenter_greedy_2approx(Iter first, Iter end, RNG &rng, size_t k, const Norm &nor if(maxdest == 0) maxdest = np; std::vector centers(k); std::vector distances(np, 0.); + static constexpr FT startval = std::is_floating_point::value ? -std::numeric_limits::max(): std::numeric_limits::min(); + std::pair maxdist(startval, 0); VERBOSE_ONLY(std::fprintf(stderr, "[%s] Starting kcenter_greedy_2approx\n", __PRETTY_FUNCTION__);) - { - auto fc = rng() % maxdest; - centers[0] = fc; - distances[fc] = 0.; - OMP_ELSE(OMP_PFOR, SK_UNROLL_8) - for(size_t i = 0; i < maxdest; ++i) { - if(unlikely(i == fc)) continue; - distances[i] = dm(fc, i); + auto newc = rng() % maxdest; + centers[0] = newc; + distances[newc] = 0.; +#ifdef _OPENMP + #pragma omp declare reduction (max : std::pair : std::max(omp_in, omp_out) ) + #pragma omp parallel for reduction(max: maxdist) +#else + SK_UNROLL_8 +#endif + for(IT i = 0; i < maxdest; ++i) { + if(likely(i != newc)) { + auto v = dm(newc, i); + distances[i] = v; + maxdist = std::max(maxdist, std::make_pair(v, i)); } - assert(distances[fc] == 0.); } - - for(size_t ci = 1; ci < k; ++ci) { - auto it = std::max_element(distances.begin(), distances.end()); - VERBOSE_ONLY(std::fprintf(stderr, "maxelement is %zd from start\n", std::distance(distances.begin(), it));) - uint64_t newc = it - distances.begin(); - centers[ci] = newc; - distances[newc] = 0.; - OMP_PFOR + assert(distances[newc] == 0.); + if(k == 1) return centers; + centers[1] = newc = maxdist.second; + distances[newc] = 0.; + + for(size_t ci = 2; ci < k; ++ci) { + //auto it = std::max_element(distances.begin(), distances.end()); + //VERBOSE_ONLY(std::fprintf(stderr, "maxelement is %zd from start\n", std::distance(distances.begin(), it));) + maxdist = std::pair(startval, 0); +#ifdef _OPENMP + #pragma omp declare reduction (max : std::pair : std::max(omp_in, omp_out) ) + #pragma omp parallel for reduction(max: maxdist) +#else + SK_UNROLL_8 +#endif for(IT i = 0; i < maxdest; ++i) { if(unlikely(i == newc)) continue; auto &ldist = distances[i]; + if(!ldist) continue; const auto dist = dm(newc, i); if(dist < ldist) { ldist = dist; } + maxdist = std::max(maxdist, std::make_pair(ldist, i)); } - assert(std::find_if(distances.begin(), distances.end(), [](auto x) {return std::isnan(x) || std::isinf(x);}) - == distances.end()); + centers[ci] = newc = maxdist.second; + distances[newc] = 0.; } return centers; } // kcenter_greedy_2approx -namespace outliers { - -/* -// All algorithms in this namespace are from: -// Greedy Strategy Works for k-Center Clustering with Outliers and Coreset Construction -// Hu Ding, Haikuo Yu, Zixiu Wang -*/ - -namespace detail { -template>, - typename Cmp=std::greater<>> -struct fpq: public std::priority_queue, Container, Cmp> { - // priority queue providing access to underlying constainer with getc() - // , a reserve function and that defaults to std::greater<> for farthest points. - using super = std::priority_queue, Container, Cmp>; - template - fpq(Args &&...args): super(std::forward(args)...) {} - void reserve(size_t n) {this->c.reserve(n);} - auto &getc() {return this->c;} - const auto &getc() const {return this->c;} -}; -} // detail - - - -template -struct bicriteria_result_t: public std::tuple, IVec, std::vector>, double> { - using super = std::tuple, IVec, std::vector>, double>; - template - bicriteria_result_t(Args &&...args): super(std::forward(args)...) {} - auto ¢ers() {return std::get<0>(*this);} - auto &assignments() {return std::get<1>(*this);} - // alias - auto &labels() {return assignments();} - auto &outliers() {return std::get<2>(*this);} - double outlier_threshold() const {return std::get<3>(*this);} - size_t num_centers() const {return centers().size();} -}; - -/* -// Algorithm 1 from the above DYW paper -// Z = # outliers -// \mu = quality of coreset -// size of coreset: 2z + O((2/\mu)^p k) -// \gamma = z / n -*/ - -template, - typename IT=std::uint32_t, typename RNG, typename Norm=sqrL2Norm> -bicriteria_result_t -kcenter_bicriteria(Iter first, Iter end, RNG &rng, size_t k, double eps, - double gamma=0.001, size_t t = 100, double eta=0.01, - const Norm &norm=Norm()) -{ - std::fprintf(stderr, "Note: the value k (%zu) is not used in this function or the algorithm\n", k); - auto dm = make_index_dm(first, norm); - // Step 1: constants - assert(end > first); - size_t np = end - first; - const size_t z = std::ceil(gamma * np); - std::fprintf(stderr, "z: %zu\n", z); - size_t farthestchunksize = std::ceil((1 + eps) * z), - samplechunksize = std::ceil(std::log(1./eta) / (1 - gamma)); - IVec ret; - IVec labels(np); - ret.reserve(samplechunksize); - std::vector distances(np); - // randomly select 'log(1/eta) / (1 - eps)' vertices from X and add them to E. - while(ret.size() < samplechunksize) { - // Assuming that this is relatively small and we can take bad asymptotic complexity - auto newv = rng() % np; - if(std::find(ret.begin(), ret.end(), newv) == ret.end()) - push_back(ret, newv); - } - assert(flat_hash_set(ret.begin(), ret.end()).size() == ret.size()); - if(samplechunksize > 100) { - std::fprintf(stderr, "Warning: with samplechunksize %zu, it may end up taking a decent amount of time. Consider swapping this in for a hash set.", samplechunksize); - } - if(samplechunksize > farthestchunksize) { - std::fprintf(stderr, "samplecc is %zu (> fcs %zu). changing gcs to scc + z (%zu)\n", samplechunksize, farthestchunksize, samplechunksize + z); - farthestchunksize = samplechunksize + z; - } - detail::fpq pq; - pq.reserve(farthestchunksize + 1); - const auto fv = ret[0]; - labels[fv] = fv; - distances[fv] = 0.; - // Fill the priority queue from the first set - OMP_PFOR - for(size_t i = 0; i < np; ++i) { - double dist = dm(fv, i); - double newdist; - IT label = 0; // This label is an index into the ret vector, rather than the actual index - for(size_t j = 1, e = ret.size(); j < e; ++j) { - if((newdist = dm(i, ret[j])) < dist) { - label = j; - dist = newdist; - } - } - distances[i] = dist; - labels[i] = ret[label]; - if(pq.empty() || dist > pq.top().first) { - const auto p = std::make_pair(dist, i); - OMP_CRITICAL - { - // Check again after getting the lock - if(pq.empty() || dist > pq.top().first) { - pq.push(p); - if(pq.size() > farthestchunksize) - pq.pop(); - } - } - } - } - IVec random_samples(samplechunksize); - // modulo without a div/mod instruction, much faster - schism::Schismatic div(farthestchunksize); // pq size - assert(samplechunksize >= 1.); - for(size_t j = 0;j < t;++j) { - //std::fprintf(stderr, "j: %zu/%zu\n", j, t); - // Sample 'samplechunksize' points from pq into random_samples. - // Sample them - size_t rsi = 0; - IT *rsp = random_samples.data(); - do { - IT index = div.mod(rng()); - // (Without replacement) - if(std::find(rsp, rsp + rsi, index)) - rsp[rsi++] = index; - } while(rsi < samplechunksize); - // random_samples now contains indexes *into pq* - assert(pq.getc().data()); - std::transform(rsp, rsp + rsi, rsp, - [pqi=pq.getc().data()](auto x) { - return pqi[x].second; - }); - for(size_t i = 0; i < rsi; ++i) - assert(rsp[i] < np); - // random_samples now contains indexes *into original dataset* - - // Insert into solution -#if 0 - ret.insert(ret.end(), rsp, rsp + rsi); -#else - for(auto it = rsp, e = rsp + rsi; it < e;++it) { - if(std::find(ret.begin(), ret.end(), *it) != ret.end()) continue; - distances[*it] = 0.; - labels[*it] = *it; - ret.pushBack(*it); - } -#endif - - // compare each point against all of the new points - pq.getc().clear(); // empty priority queue - // Fill priority queue - OMP_PFOR - for(size_t i = 0; i < np; ++i) { - double dist = distances[i]; - if(dist == 0.) continue; - double newdist; - IT label = labels[i]; - for(size_t j = 0; j < rsi; ++j) { - if((newdist = dm(i, rsp[j])) < dist) - dist = newdist, label = rsp[j]; - } - distances[i] = dist; - labels[i] = label; - if(pq.empty() || dist > pq.top().first) { - const auto p = std::make_pair(dist, i); - OMP_CRITICAL - { - // Check again after getting the lock in case it's changed - if(pq.empty() || dist > pq.top().first) { - pq.push(p); - if(pq.size() > farthestchunksize) - // TODO: avoid filling it all the way by checking size but it's probably not worth it - pq.pop(); - } - } - } - } - } - const double minmaxdist = pq.top().first; - bicriteria_result_t bicret; - assert(flat_hash_set(ret.begin(), ret.end()).size() == ret.size()); - bicret.centers() = std::move(ret); - bicret.labels() = std::move(labels); - bicret.outliers() = std::move(pq.getc()); - std::fprintf(stderr, "outliers size: %zu\n", bicret.outliers().size()); - std::get<3>(bicret) = minmaxdist; - return bicret; - // center ids, label assignments for all points besides outliers, outliers, and the distance of the closest excluded point -} // kcenter_bicriteria - -/* -// Algorithm 2 from the above DYW paper -// Z = # outliers -// \gamma = z / n -*/ - -template, - typename IT=std::uint32_t, typename RNG, typename Norm=L2Norm> -std::vector -kcenter_greedy_2approx_outliers(Iter first, Iter end, RNG &rng, size_t k, double eps, - double gamma=0.001, - const Norm &norm=Norm()) -{ - auto dm = make_index_dm(first, norm); - const size_t np = end - first; - const size_t z = std::ceil(gamma * np); - size_t farthestchunksize = std::ceil((1. + eps) * z); - detail::fpq pq; - pq.reserve(farthestchunksize + 1); - std::vector ret; - std::vector distances(np, std::numeric_limits::max()); - ret.reserve(k); - auto newc = rng() % np; - ret.push_back(newc); - do { - const auto &newel = first[newc]; - // Fill pq - OMP_PFOR - for(size_t i = 0; i < np; ++i) { - double dist = distances[i]; - if(dist == 0.) continue; - double newdist; - if((newdist = dm(i, newc)) < dist) - dist = newdist; - distances[i] = dist; - if(pq.empty() || dist > pq.top().first) { - const auto p = std::make_pair(dist, i); - OMP_CRITICAL - { - if(pq.empty() || dist > pq.top().first) { - pq.push(p); - if(pq.size() > farthestchunksize) pq.pop(); - } - } - } - } - - // Sample point - newc = pq.getc()[rng() % farthestchunksize].second; - assert(newc < np); - ret.push_back(newc); - pq.getc().clear(); - } while(ret.size() < k); - return ret; -}// kcenter_greedy_2approx_outliers -// Algorithm 3 (coreset construction) -template, - typename IT=std::uint32_t, typename RNG, typename Norm=L2Norm> -coresets::IndexCoreset -kcenter_coreset(Iter first, Iter end, RNG &rng, size_t k, double eps=0.1, double mu=.5, - double rho=1.5, - double gamma=0.001, double eta=0.01, const Norm &norm=Norm()) { - // rho is 'D' for R^D (http://www.wisdom.weizmann.ac.il/~robi/teaching/2014b-SeminarGeometryAlgorithms/lecture1.pdf) - // in Euclidean space, as worst-case, but usually better in real data with structure. - assert(mu > 0. && mu <= 1.); - const size_t np = end - first; - size_t L = std::ceil(std::pow(2. / mu, rho) * k); - size_t nrounds = std::ceil((L + std::sqrt(L)) / (1. - eta)); - auto bic = kcenter_bicriteria(first, end, rng, k, eps, - gamma, nrounds, eta, norm); - double rtilde = bic.outlier_threshold(); - std::fprintf(stderr, "outlier threshold: %f\n", rtilde); - auto ¢ers = bic.centers(); - auto &labels = bic.labels(); - auto &outliers = bic.outliers(); -#ifndef NDEBUG - for(const auto c: centers) - assert(c < np); - for(const auto label: labels) - assert(labels[label] == label); -#endif - //std::vector counts(centers.size()); - coresets::flat_hash_map counts; - counts.reserve(centers.size()); - size_t i = 0; - SK_UNROLL_8 - do ++counts[labels[i++]]; while(i < np); - coresets::IndexCoreset ret(centers.size() + outliers.size()); - std::fprintf(stderr, "ret size: %zu. centers size: %zu. counts size %zu. outliers size: %zu\n", ret.size(), centers.size(), counts.size(), outliers.size()); - for(i = 0; i < outliers.size(); ++i) { - assert(outliers[i].second < np); - ret.indices_[i] = outliers[i].second; - ret.weights_[i] = 1.; - } - for(const auto &pair: counts) { - assert(pair.first < np); - ret.weights_[i] = pair.second; - ret.indices_[i] = pair.first; - ++i; - } - assert(i == ret.size()); - for(size_t i = 0; i < ret.indices_.size(); ++i) { - assert(ret.indices_[i] < np); - } - return ret; -} -}// namespace outliers - } // coresets } // minocore + +#endif /* FGC_OPTIM_KCENTER_H__ */ diff --git a/include/minocore/optim/kmeans.h b/include/minocore/optim/kmeans.h index 19aec709..a5adc7aa 100644 --- a/include/minocore/optim/kmeans.h +++ b/include/minocore/optim/kmeans.h @@ -8,6 +8,7 @@ #include "minocore/util/oracle.h" #include "minocore/util/timer.h" #include "minocore/util/div.h" +#include "minocore/util/blaze_adaptor.h" namespace minocore { @@ -104,10 +105,10 @@ kmeanspp(Iter first, Iter end, RNG &rng, size_t k, const Norm &norm=Norm(), WFT } template -std::pair, blz::DV> get_oracle_costs(const Oracle &oracle, size_t np, const Sol &sol) +std::pair, blaze::DynamicVector> get_oracle_costs(const Oracle &oracle, size_t np, const Sol &sol) { - blz::DV assignments(np); - blz::DV costs(np, std::numeric_limits::max()); + blaze::DynamicVector assignments(np); + blaze::DynamicVector costs(np, std::numeric_limits::max()); util::Timer t("get oracle costs"); OMP_PFOR for(size_t i = 0; i < np; ++i) { @@ -175,7 +176,6 @@ kmc2(const Oracle &oracle, RNG &rng, size_t np, size_t k, size_t m = 2000) } } } - std::fprintf(stderr, "[kmc2]: %zu/%zu\n", centers.size(), size_t(k)); centers.insert(x); } return std::vector(centers.begin(), centers.end()); diff --git a/include/minocore/optim/kmedian.h b/include/minocore/optim/kmedian.h index 9b5ef3b3..077c3c19 100644 --- a/include/minocore/optim/kmedian.h +++ b/include/minocore/optim/kmedian.h @@ -24,7 +24,7 @@ auto &geomedian(const blz::DenseMatrix &mat, blz::DenseVector & const auto &_mat = ~mat; ~dv = blz::mean(_mat); FT prevcost = std::numeric_limits::max(); - blz::DV costs(_mat.rows(), FT(0)); + blaze::DynamicVector costs(_mat.rows(), FT(0)); size_t iternum = 0; const size_t nr = _mat.rows(); assert((~dv).size() == (~mat).columns()); @@ -47,13 +47,14 @@ auto &geomedian(const blz::DenseMatrix &mat, blz::DenseVector & } template -void l1_unweighted_median(const blz::DenseMatrix &data, blz::DenseVector &ret, bool approx_med=false) { +void l1_unweighted_median(const blz::DenseMatrix &data, blz::DenseVector &ret) { assert((~ret).size() == (~data).columns()); auto &rr(~ret); const auto &dr(~data); const bool odd = dr.rows() % 2; const size_t hlf = dr.rows() / 2; - if(approx_med) { + if(0) { +#if 0 std::fprintf(stderr, "note: Boost approximate median takes more time and is less accurate than exact calculation via sorting.\nNot recommended.\n"); //using acc_tag = boost::accumulators::stats; using acc_tag = boost::accumulators::stats; @@ -63,6 +64,7 @@ void l1_unweighted_median(const blz::DenseMatrix &data, blz::DenseVector for(auto v: column(dr, i)) acc(v); (~ret)[i] = boost::accumulators::median(acc); } +#endif } else { for(size_t i = 0; i < dr.columns(); ++i) { blaze::DynamicVector, blaze::columnVector> tmpind = column(data, i); // Should do fast copying. @@ -75,13 +77,14 @@ void l1_unweighted_median(const blz::DenseMatrix &data, blz::DenseVector template, ElementType_t>, typename IT=uint32_t> -static inline void weighted_median(const blz::Matrix &data, blz::DenseVector &ret, const FT *weights, bool approx_med=false) { +static inline void weighted_median(const blz::Matrix &data, blz::DenseVector &ret, const FT *weights) { assert(weights); const size_t nc = (~data).columns(); if((~ret).size() != nc) { (~ret).resize(nc); } - if(approx_med) { + if(0) { +#if 0 //OMP_PFOR for(size_t i = 0; i < nc; ++i) { auto &mat = ~data; @@ -93,6 +96,7 @@ static inline void weighted_median(const blz::Matrix &data, blz::DenseVe } (~ret)[i] = boost::accumulators::median(acc); } +#endif } else { if(unlikely((~data).columns() > ((uint64_t(1) << (sizeof(IT) * CHAR_BIT)) - 1))) throw std::runtime_error("Use a different index type, there are more features than fit in IT"); @@ -129,11 +133,11 @@ static inline void weighted_median(const blz::Matrix &data, blz::DenseVe template, ElementType_t>> -void l1_median(const blz::DenseMatrix &data, blz::DenseVector &ret, const VT3 *weights=static_cast(nullptr), bool approx_med=false) { +void l1_median(const blz::DenseMatrix &data, blz::DenseVector &ret, const VT3 *weights=static_cast(nullptr)) { if(weights) - weighted_median(data, ret, weights, approx_med); + weighted_median(data, ret, weights); else - l1_unweighted_median(data, ret, approx_med); + l1_unweighted_median(data, ret); } diff --git a/include/minocore/optim/lsearch.h b/include/minocore/optim/lsearch.h index 062ec82c..abdbd676 100644 --- a/include/minocore/optim/lsearch.h +++ b/include/minocore/optim/lsearch.h @@ -1,7 +1,7 @@ #pragma once #ifndef FGC_LOCAL_SEARCH_H__ #define FGC_LOCAL_SEARCH_H__ -#include "minocore/util/diskmat.h" +#include "diskmat/diskmat.h" #include "minocore/util/oracle.h" #include "minocore/optim/kcenter.h" #include "pdqsort/pdqsort.h" @@ -33,7 +33,7 @@ struct ExhaustiveSearcher { const size_t nr = mat_.rows(); size_t nchecked = 0; for(auto &&comb: discreture::combinations(nr, k_)) { - const double cost = blz::sum(blz::min(rows(mat_, comb.data(), comb.size()))); + const double cost = blaze::sum(blaze::min(rows(mat_, comb.data(), comb.size()))); ++nchecked; if((nchecked & (nchecked - 1)) == 0) std::fprintf(stderr, "iteration %zu completed\n", nchecked); @@ -59,18 +59,19 @@ struct LocalKMedSearcher { const MatType &mat_; shared::flat_hash_set sol_; - blz::DV assignments_; - blz::DV current_costs_; + blaze::DynamicVector assignments_; + blaze::DynamicVector current_costs_; double current_cost_; double eps_, initial_cost_, init_cost_div_; IType k_; const size_t nr_, nc_; double diffthresh_; - blz::DV ordering_; + blaze::DynamicVector ordering_; uint32_t shuffle_:1; - uint32_t lazy_eval_:2; // Set to 0 to avoid lazy search, 1 to only do local search, and 2 to do lazy search and then use exhaustive + uint32_t lazy_eval_:15; uint32_t max_swap_n_:16; + // if(max_swap_n_ > 1), after exhaustive single-swap optimization, enables multiswap search. // TODO: enable searches for multiswaps. // Constructors @@ -91,7 +92,7 @@ struct LocalKMedSearcher { current_cost_(std::numeric_limits::max()), eps_(eps), k_(k), nr_(mat.rows()), nc_(mat.columns()), - ordering_(mat.rows()), shuffle_(true), lazy_eval_(false), max_swap_n_(1) + ordering_(mat.rows()), shuffle_(true), lazy_eval_(2), max_swap_n_(1) { std::iota(ordering_.begin(), ordering_.end(), 0); static_assert(std::is_integral_voperator[](0))>>, "index container must contain integral values"); @@ -137,7 +138,7 @@ struct LocalKMedSearcher { //std::fprintf(stderr, "subm rows: %zu\n", subm.rows()); std::vector approx{uint32_t(rng() % subm.rows())}; auto first = approx.front(); - blz::DV mincosts = row(subm, first); + blaze::DynamicVector mincosts = row(subm, first); std::vector remaining(subm.rows()); std::iota(remaining.begin(), remaining.end(), 0u); while(approx.size() < std::min(subm.rows(), size_t(k_))) { @@ -217,40 +218,45 @@ struct LocalKMedSearcher { } double evaluate_swap(IType newcenter, IType oldcenter, bool single_threaded=false) const { - blz::SmallArray as(sol_.begin(), sol_.end()); + blaze::SmallArray as(sol_.begin(), sol_.end()); *std::find(as.begin(), as.end(), oldcenter) = newcenter; double cost; if(single_threaded) { - cost = blaze::serial(blz::sum(blz::serial(blz::min(rows(mat_, as))))); - } else cost = blz::sum(blz::min(rows(mat_, as))); + cost = blaze::serial(blaze::sum(blaze::serial(blaze::min(rows(mat_, as))))); + } else cost = blaze::sum(blaze::min(rows(mat_, as))); return current_cost_ - cost; } - template - double evaluate_multiswap(const IType *newcenter, const IType *oldcenter, bool single_threaded=false) const { - blz::SmallArray as(sol_.begin(), sol_.end()); + template + double evaluate_multiswap(const IndexType *newcenter, const IndexType *oldcenter, bool single_threaded=false) const { + blaze::SmallArray as(sol_.begin(), sol_.end()); + shared::sort(as.begin(), as.end()); for(size_t i = 0; i < N; ++i) { *std::find(as.begin(), as.end(), oldcenter[i]) = newcenter[i]; } double cost; if(single_threaded) { - cost = blaze::serial(blz::sum(blz::serial(blz::min(rows(mat_, as))))); + cost = blaze::serial(blaze::sum(blaze::serial(blaze::min(rows(mat_, as))))); } else - cost = blz::sum(blz::min(rows(mat_, as))); + cost = blaze::sum(blaze::min(rows(mat_, as))); return current_cost_ - cost; } template double evaluate_multiswap_rt(const IndexType *newcenter, const IndexType *oldcenter, size_t N, bool single_threaded=false) const { - blz::SmallArray as(sol_.begin(), sol_.end()); + switch(N) { + case 2: return evaluate_multiswap<2>(newcenter, oldcenter, single_threaded); + case 3: return evaluate_multiswap<3>(newcenter, oldcenter, single_threaded); + } + blaze::SmallArray as(sol_.begin(), sol_.end()); for(size_t i = 0; i < N; ++i) { *std::find(as.begin(), as.end(), oldcenter[i]) = newcenter[i]; } shared::sort(as.begin(), as.end()); double cost; if(single_threaded) { - cost = blaze::serial(blz::sum(blz::serial(blz::min(rows(mat_, as))))); + cost = blaze::serial(blaze::sum(blaze::serial(blaze::min(rows(mat_, as))))); } else - cost = blz::sum(blz::min(rows(mat_, as))); + cost = blaze::sum(blaze::min(rows(mat_, as))); return current_cost_ - cost; } @@ -270,11 +276,11 @@ struct LocalKMedSearcher { current_costs_ = row(mat_, *it BLAZE_CHECK_DEBUG); } while(++it != sol_.end()) { - current_costs_ = blz::min(current_costs_, row(mat_, *it BLAZE_CHECK_DEBUG)); + current_costs_ = blaze::min(current_costs_, row(mat_, *it BLAZE_CHECK_DEBUG)); } } - blz::DV newptr = blz::min(rows(mat_, newcenters, N)); - blz::DV oldptr = blz::min(rows(mat_, oldcenters, N)); + blaze::DynamicVector newptr = blaze::min(rows(mat_, newcenters, N)); + blaze::DynamicVector oldptr = blaze::min(rows(mat_, oldcenters, N)); double diff = 0.; #ifdef _OPENMP _Pragma("omp parallel for reduction(+:diff)") @@ -285,7 +291,7 @@ struct LocalKMedSearcher { auto sub = ccost - newptr[i]; diff += sub; } else if(ccost == oldptr[i]) { - auto oldbest = blz::min(blz::elements(blz::column(mat_, i), tmp.data(), tmp.size())); + auto oldbest = blaze::min(blaze::elements(blaze::column(mat_, i), tmp.data(), tmp.size())); auto sub = ccost - std::min(oldbest, newptr[i]); diff += sub; } @@ -309,6 +315,7 @@ struct LocalKMedSearcher { std::vector newindices(sol_.begin(), sol_.end()); next: for(const auto oldcenter: sol_) { + newindices.assign(sol_.begin(), sol_.end()); std::swap(*std::find(newindices.begin(), newindices.end(), oldcenter), newindices.back()); if(shuffle_) { wy::WyRand rng(total); @@ -317,7 +324,7 @@ struct LocalKMedSearcher { // Make a vector with the original solution, but replace the old value with the new value for(size_t pi = 0; pi < nr_; ++pi) { auto potential_index = ordering_[pi]; - if(sol_.find(potential_index) != sol_.end()) continue; + if(sol_.find(potential_index) != sol_.end() || potential_index == oldcenter) continue; newindices.back() = potential_index; assert(std::find(newindices.begin(), newindices.end(), oldcenter) == newindices.end()); double val = 0.; @@ -331,26 +338,22 @@ struct LocalKMedSearcher { auto diff = oldcost - newptr[i]; val += diff; } else if(assignments_[i] == oldcenter) { - auto mincost = blz::min(blz::elements(blz::column(mat_, i), newindices.data(), newindices.size())); + auto mincost = blaze::min(blaze::elements(blaze::column(mat_, i), newindices.data(), newindices.size())); auto diff = oldcost - mincost; val += diff; } } -#ifndef NDEBUG - auto v = evaluate_swap(potential_index, oldcenter); - //assert(std::abs(v - val) <= .5 * std::abs(std::max(v, val)) || !std::fprintf(stderr, "Manual: %g. Lazy: %g\n", v, val)); assert(sol_.size() == k_); -#endif // Only calculate exhaustively if the lazy form returns yes. - if(val > diffthresh_ && (val = evaluate_swap(potential_index, oldcenter) > diffthresh_)) { + if(val > diffthresh_ && (val = evaluate_swap(potential_index, oldcenter)) > diffthresh_) { assert(sol_.size() == k_); sol_.erase(oldcenter); sol_.insert(potential_index); assert(sol_.size() == k_); assign(); - //current_cost_ = blz::sum(current_costs_); + //current_cost_ = blaze::sum(current_costs_); ++total; - std::fprintf(stderr, "Swap number %zu updated with delta %g to new cost with cost %0.12g\n", total, val, current_cost_); + std::fprintf(stderr, "Swap number %zu updated with delta %.12g to new cost with cost %0.12g\n", total, val, current_cost_); goto next; } } @@ -371,10 +374,10 @@ struct LocalKMedSearcher { diffthresh_ = diffthresh; next: { - blz::DV csol(sol_.size()); + blaze::DynamicVector csol(sol_.size()); std::copy(sol_.begin(), sol_.end(), csol.data()); - blz::DV swap_in(nc_ - sol_.size()); - blz::DV inargs(nswap), outargs(nswap); + blaze::DynamicVector swap_in(nc_ - sol_.size()); + blaze::DynamicVector inargs(nswap), outargs(nswap); for(auto &&swap_out_comb: discreture::combinations(csol.size(), nswap)) { for(auto &&swap_in_comb: discreture::combinations(swap_in.size(), nswap)) { auto v = evaluate_multiswap_rt(swap_in_comb.data(), swap_out_comb.data(), nswap); @@ -395,9 +398,8 @@ struct LocalKMedSearcher { if(mat_.rows() <= k_) return; if(lazy_eval_) { run_lazy(); - if(lazy_eval_ == 2) + if(lazy_eval_ > 1) return; - // Otherwise, running exhaustive local search after to be sure. } //const double diffthresh = 0.; std::fprintf(stderr, "diffthresh: %f\n", diffthresh); @@ -448,7 +450,7 @@ struct LocalKMedSearcher { for(size_t ci = 0; ci < nr_; ++ci) { if(std::find(wsol.begin(), wsol.end(), ci) != wsol.end()) continue; wsol[si] = ci; - const double cost = blz::sum(blz::min(rows(mat_, wsol))); + const double cost = blaze::sum(blaze::min(rows(mat_, wsol))); if(cost < ccost) { std::fprintf(stderr, "Found a better one: %g vs %g (%g)\n", cost, ccost, ccost - cost); ccost = cost; diff --git a/include/minocore/optim/oracle_thorup.h b/include/minocore/optim/oracle_thorup.h index eb05a266..8c024235 100644 --- a/include/minocore/optim/oracle_thorup.h +++ b/include/minocore/optim/oracle_thorup.h @@ -6,6 +6,7 @@ #include #include "fastiota/fastiota_ho.h" #include "minocore/util/oracle.h" +#include "boost/iterator/transform_iterator.hpp" namespace minocore { @@ -24,10 +25,10 @@ template -std::tuple, blz::DV, std::vector> +std::tuple, blaze::DynamicVector, std::vector> oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, const WFT *weights=static_cast(nullptr), double npermult=21, double nroundmult=3, double eps=0.5, uint64_t seed=1337) { - const FT total_weight = weights ? static_cast(blz::sum(blz::CustomVector((WFT *)weights, npoints))) + const FT total_weight = weights ? static_cast(blaze::sum(blaze::CustomVector((WFT *)weights, npoints))) : static_cast(npoints); size_t nperround = npermult * k * std::log(total_weight) / eps; #if VERBOSE_AF @@ -36,7 +37,7 @@ oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, const WFT *wei #endif wy::WyRand rng(seed); - blz::DV mincosts(npoints, std::numeric_limits::max()); // minimum costs per point + blaze::DynamicVector mincosts(npoints, std::numeric_limits::max()); // minimum costs per point std::vector minindices(npoints, IT(-1)); // indices to which points are assigned size_t nr = npoints; // Manually managing count std::unique_ptr R(new IT[npoints]); @@ -56,6 +57,10 @@ oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, const WFT *wei if(!weights && nr <= nperround) { //std::fprintf(stderr, "Adding all\n"); F.insert(F.end(), R.get(), R.get() + nr); + prep_range(R.get(), R.get() + nr, oracle); + // This instructs caching oracles to prepare these rows + // and results in greater efficiency for cases + // where distance computations are expensive. for(auto it = R.get(), eit = R.get() + nr; it < eit; ++it) { auto v = *it; //std::fprintf(stderr, "Adding index %zd/value %u\n", it - R.get(), v); @@ -113,10 +118,11 @@ oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, const WFT *wei } // Update F, R, and mincosts/minindices current_batch.assign(tmp.begin(), tmp.end()); - tmp.clear(); - for(const auto item: current_batch) - F.push_back(R[item]); - shared::sort(current_batch.begin(), current_batch.end(), std::greater<>()); + auto func = [&R](auto x) {return R[x];}; + auto clb = boost::make_transform_iterator(current_batch.begin(), func), + cle = boost::make_transform_iterator(current_batch.end(), func); + F.insert(F.end(), clb, cle); + prep_range(clb, cle, oracle); for(const auto v: current_batch) { auto actual_index = R[v]; minindices[actual_index] = actual_index; @@ -178,7 +184,7 @@ template -std::tuple, blz::DV, std::vector> +std::tuple, blaze::DynamicVector, std::vector> iterated_oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, unsigned num_iter=3, unsigned num_sub_iter=8, const WFT *weights=static_cast(nullptr), double npermult=21, double nroundmult=3, double eps=0.5, uint64_t seed=1337) { @@ -186,23 +192,23 @@ iterated_oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, unsig return weights ? weights[index]: static_cast(1.); }; #if !NDEBUG - const FT total_weight = weights ? blz::sum(blz::CustomVector((WFT *)weights, npoints)) + const FT total_weight = weights ? blaze::sum(blaze::CustomVector((WFT *)weights, npoints)) : WFT(npoints); #endif wy::WyHash rng(seed); - std::tuple, blz::DV, std::vector> ret; + std::tuple, blaze::DynamicVector, std::vector> ret; auto &[centers, costs, bestindices] = ret; // Unpack for named access FT best_cost; // For convenience: a custom vector // which is empty if weights is null and full otherwise. { - std::unique_ptr> wview; - if(weights) wview.reset(new blz::CustomVector(weights, npoints)); + std::unique_ptr> wview; + if(weights) wview.reset(new blaze::CustomVector(weights, npoints)); auto do_thorup_sample = [&]() { return oracle_thorup_d(oracle, npoints, k, weights, npermult, nroundmult, eps, rng()); }; auto get_cost = [&](const auto &x) { - return wview ? blz::dot(x, *wview): blz::sum(x); + return wview ? blaze::dot(x, *wview): blaze::sum(x); }; // gather first set of sampled points @@ -233,7 +239,7 @@ iterated_oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, unsig } // Calculate weights for center points - blz::DV center_weights(centers.size(), FT(0)); + blaze::DynamicVector center_weights(centers.size(), FT(0)); shared::flat_hash_map asn2id; asn2id.reserve(centers.size()); for(size_t i = 0; i < centers.size(); asn2id[centers[i]] = i, ++i); OMP_PRAGMA("omp parallel for") @@ -252,8 +258,8 @@ iterated_oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, unsig nofails = false; } } - assert(std::abs(blz::sum(center_weights) - total_weight) < 1e-4 || - !std::fprintf(stderr, "Expected sum %g, found %g\n", total_weight, blz::sum(center_weights))); + assert(std::abs(blaze::sum(center_weights) - total_weight) < 1e-4 || + !std::fprintf(stderr, "Expected sum %g, found %g\n", total_weight, blaze::sum(center_weights))); assert(nofails); #endif shared::flat_hash_map sub_asn2id; @@ -264,7 +270,7 @@ iterated_oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, unsig return oracle_thorup_d(wrapped_oracle, centers.size(), k, center_weights.data(), npermult, nroundmult, eps, rng()); }; auto get_cost = [&](const auto &x) { // Calculates the cost of a set of centers. - return blz::dot(x, center_weights); + return blaze::dot(x, center_weights); // Can this be easily done using the distance from the full without performing all recalculations? }; @@ -296,7 +302,7 @@ iterated_oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, unsig assert(sub_bestindices.size() == center_weights.size()); sub_asn2id.clear(); for(size_t i = 0; i < sub_centers.size(); sub_asn2id[sub_centers[i]] = i, ++i); - blz::DV sub_center_weights(sub_centers.size(), FT(0)); + blaze::DynamicVector sub_center_weights(sub_centers.size(), FT(0)); OMP_PFOR for(size_t i = 0; i < sub_bestindices.size(); ++i) { assert(sub_asn2id.find(sub_bestindices[i]) != sub_asn2id.end()); @@ -307,7 +313,7 @@ iterated_oracle_thorup_d(const Oracle &oracle, size_t npoints, unsigned k, unsig } DBG_ONLY(for(const auto w: sub_center_weights) assert(w > 0.);) - assert(std::abs(blz::sum(sub_center_weights) - total_weight) <= 1.e-4); + assert(std::abs(blaze::sum(sub_center_weights) - total_weight) <= 1.e-4); // Convert back to original coordinates auto transform_func = [&wrapped_oracle](auto x) {return wrapped_oracle.lookup(x);}; diff --git a/include/minocore/util/blaze_adaptor.h b/include/minocore/util/blaze_adaptor.h index 83729f0a..51d455ca 100644 --- a/include/minocore/util/blaze_adaptor.h +++ b/include/minocore/util/blaze_adaptor.h @@ -141,10 +141,25 @@ struct DynamicMatrix: public blaze::DynamicMatrix { struct const_row_iterator: public row_iterator_t {}; struct column_iterator: public column_iterator_t {}; struct const_column_iterator: public column_iterator_t {}; + decltype(auto) operator[](size_t i) const { + if constexpr(SO == blaze::rowMajor) { + return row(*this, i, blaze::unchecked); + } else { + return column(*this, i, blaze::unchecked); + } + } + decltype(auto) operator[](size_t i) { + if constexpr(SO == blaze::rowMajor) { + return row(*this, i, blaze::unchecked); + } else { + return column(*this, i, blaze::unchecked); + } + } template this_type &operator=(Args &&...args) { ((super &)*this).operator=(std::forward(args)...); return *this; } + size_t size() const {return SO == blaze::rowMajor ? this->rows(): this->columns();} auto rowiterator() {return RowViewer(*this);} auto rowiterator() const {return ConstRowViewer(*this);} auto columniterator() {return ColumnViewer(*this);} @@ -166,6 +181,21 @@ struct CustomMatrix: public blaze::CustomMatrix { ((super &)*this).operator=(std::forward(args)...); return *this; } + decltype(auto) operator[](size_t i) const { + if constexpr(SO == blaze::rowMajor) { + return row(*this, i, blaze::unchecked); + } else { + return column(*this, i, blaze::unchecked); + } + } + decltype(auto) operator[](size_t i) { + if constexpr(SO == blaze::rowMajor) { + return row(*this, i, blaze::unchecked); + } else { + return column(*this, i, blaze::unchecked); + } + } + size_t size() const {return SO == blaze::rowMajor ? this->rows(): this->columns();} auto rowiterator() {return RowViewer(*this);} auto rowiterator() const {return ConstRowViewer(*this);} auto columniterator() {return ColumnViewer(*this);} @@ -267,6 +297,38 @@ INLINE auto sum(const std::vector &vec) { template INLINE decltype(auto) sum(const OT &x) {return blaze::sum(x);} +template +size_t number_shared_zeros(const blaze::SparseVector &_lhs, const blaze::SparseVector &_rhs) { + auto &lhs = ~_lhs; + auto &rhs = ~_rhs; + assert(lhs.size() == rhs.size()); + //const size_t sz = lhs.size(); + auto lhit = lhs.begin(); + auto rhit = rhs.begin(); + auto lhe = lhs.end(); + auto rhe = rhs.end(); + if(lhit == lhe) return nonZeros(rhs); + if(rhit == rhe) return nonZeros(lhs); + auto getnextindex = [&]() { + size_t r1 = lhit == lhe ? size_t(-1): lhit->index(); + size_t r2 = rhit == rhe ? size_t(-1): rhit->index(); + if(r1 == r2) { + ++lhit; + ++rhit; + } else if(r1 < r2) ++lhit; + else ++rhit; + return std::min(r1, r2); + }; + size_t current_index = getnextindex(); + size_t ret = current_index; + for(size_t nv; (nv = getnextindex()) != size_t(-1);) { + if(nv == current_index) continue; + assert(nv > current_index); + ret += nv - current_index - 1; + current_index = nv; + } + return ret; + } template void fill_helper(blaze::Matrix &mat) { @@ -277,8 +339,8 @@ void fill_helper(blaze::Matrix &mat) { } } -template -void fill_helper(dm::DistanceMatrix &) { +template +void fill_helper(dm::DistanceMatrix &) { std::fprintf(stderr, "[%s] Warning: trying to fill_symmetric_upper_triangular on an unsupported type. Doing nothing.\n", __PRETTY_FUNCTION__); } diff --git a/include/minocore/util/csc.h b/include/minocore/util/csc.h index 0f1bf4e2..4742f376 100644 --- a/include/minocore/util/csc.h +++ b/include/minocore/util/csc.h @@ -8,12 +8,14 @@ namespace minocore { +template struct CSCMatrixView { - const uint64_t *const indptr_, *const indices_; - const uint32_t *const data_; + const IndPtrType *const indptr_; + const IndicesType *const indices_; + const DataType *const data_; const uint64_t nnz_; const uint32_t nf_, n_; - CSCMatrixView(const uint64_t *indptr, const uint64_t *indices, const uint32_t *data, + CSCMatrixView(const IndPtrType *indptr, const IndicesType *indices, const DataType *data, uint64_t nnz, uint32_t nfeat, uint32_t nitems): indptr_(indptr), indices_(indices), @@ -34,14 +36,14 @@ struct CSCMatrixView { } }; -template -blz::SM csc2sparse(const CSCMatrixView &mat, bool skip_empty=false) { +template +blz::SM csc2sparse(const CSCMatrixView &mat, bool skip_empty=false) { blz::SM ret(mat.n_, mat.nf_); ret.reserve(mat.nnz_); size_t used_rows = 0, i; for(i = 0; i < mat.n_; ++i) { auto col = mat.column(i); - if(mat.n_ > 1000000 && i % 1000000 == 0) std::fprintf(stderr, "%zu/%u\r", i, mat.n_); + if(mat.n_ > 100000 && i % 10000 == 0) std::fprintf(stderr, "%zu/%u\r", i, mat.n_); if(skip_empty && 0u == col.nnz()) continue; for(auto s = col.start_; s < col.stop_; ++s) { ret.append(used_rows, mat.indices_[s], mat.data_[s]); @@ -52,7 +54,7 @@ blz::SM csc2sparse(const CSCMatrixView &mat, bool skip_empt return ret; } -template +template blz::SM csc2sparse(std::string prefix, bool skip_empty=false) { util::Timer t("csc2sparse load time"); std::string indptrn = prefix + "indptr.file"; @@ -67,9 +69,13 @@ blz::SM csc2sparse(std::string prefix, bool skip_empty=fals std::fclose(ifp); using mmapper = mio::mmap_source; mmapper indptr(indptrn), indices(indicesn), data(datan); - CSCMatrixView matview((const uint64_t *)indptr.data(), (const uint64_t *)indices.data(), - (const uint32_t *)data.data(), indices.size() / (sizeof(uint64_t) / sizeof(indices[0])), - nfeat, nsamples); + CSCMatrixView + matview((const IndPtrType *)indptr.data(), (const IndicesType *)indices.data(), + (const DataType *)data.data(), indices.size() / sizeof(IndicesType), + nfeat, nsamples); + std::fprintf(stderr, "indptr size: %zu\n", indptr.size() / sizeof(IndPtrType)); + std::fprintf(stderr, "indices size: %zu\n", indices.size() / sizeof(IndicesType)); + std::fprintf(stderr, "data size: %zu\n", data.size() / sizeof(DataType)); #ifndef MADV_REMOVE # define MADV_FLAGS (MADV_DONTNEED | MADV_FREE) #else @@ -79,7 +85,7 @@ blz::SM csc2sparse(std::string prefix, bool skip_empty=fals ::madvise((void *)indices.data(), indices.size(), MADV_FLAGS); ::madvise((void *)data.data(), data.size(), MADV_FLAGS); #undef MADV_FLAGS - return csc2sparse(matview, skip_empty); + return csc2sparse(matview, skip_empty); } template diff --git a/include/minocore/util/diskmat.h b/include/minocore/util/diskmat.h deleted file mode 100644 index d517b44e..00000000 --- a/include/minocore/util/diskmat.h +++ /dev/null @@ -1,157 +0,0 @@ -#pragma once -#ifndef DISK_MAT_H__ -#define DISK_MAT_H__ -#include -#include "mio/single_include/mio/mio.hpp" -#include -#include -#include "blaze_adaptor.h" - -namespace minocore { - -template -struct DiskMat { - using This = DiskMat; - size_t nr_, nc_; - using mmapper = mio::mmap_sink; - std::unique_ptr ms_; - std::FILE *fp_; - bool delete_file_; - - - // TODO: - // alignment -- if offset is 0, it's already aligned. - // -- otherwise, allocate enough extra so that it is - - static constexpr blaze::AlignmentFlag AF = isAligned ? blaze::aligned: blaze::unaligned; - static constexpr blaze::PaddingFlag PF = isPadded ? blaze::padded: blaze::unpadded; - using MatType = blaze::CustomMatrix; - MatType mat_; - std::string path_; - - DiskMat(const DiskMat &o): DiskMat(o.nr_, o.nc_, nullptr) { - std::memcpy(ms_->data(), o.ms_->data(), o.ms_->size()); - } - DiskMat(DiskMat &&o): path_(o.path_) { - uint8_t *ptr = reinterpret_cast(this), *optr = reinterpret_cast(std::addressof(o)); - std::memset(ptr, 0, sizeof(*this)); - std::swap_ranges(ptr, ptr + sizeof(*this), optr); - std::fprintf(stderr, "[%s at %p] moved diskmat has path %s\n", __PRETTY_FUNCTION__, (void *)this, path_.data() ? path_.data(): "tmpfile"); - } - static constexpr size_t SIMDSIZE = blaze::SIMDTrait::size; - DiskMat(const DiskMat &o, const char *s, size_t offset=0, int delete_file=-1): - DiskMat(o.rows(), o.columns(), s, offset, delete_file >= 0 ? delete_file: o.delete_file_) - { - std::memcpy(ms_->data(), o.ms_->data(), sizeof(VT) * (~*this).spacing() * nr_); -#if VERBOSE_AF - std::fprintf(stderr, "Copied to %s\n", path_.size() ? path_.data(): "tmpfile"); -#endif - } - operator MatType &() {return ~*this;} - operator const MatType &() const {return ~*this;} - DiskMat(size_t nr, size_t nc, const char *s=nullptr, size_t offset=0, bool delete_file=true): - nr_(nr), nc_(nc), - delete_file_(delete_file), - path_(s ? s: "") - { -#if VERBOSE_AF - std::fprintf(stderr, "Opened file at %s to make matrix of size %zu, %zu\n", s ? s: "tmpfile", nr_, nc_); -#endif - if(isAligned && offset % (SIMDSIZE * sizeof(VT))) { - throw std::invalid_argument("offset is not aligned; invalid storage."); - } - const size_t nperrow = isPadded ? size_t(blaze::nextMultiple(nc_, SIMDSIZE)): nc_; - const size_t nb = nr_ * nperrow * sizeof(VT), total_nb = nb + offset; - if((fp_ = s ? std::fopen(s, "a+"): std::tmpfile()) == nullptr) { - char buf[256]; - std::sprintf(buf, "Failed to open file for writing. %s/%d (%s)", ::strerror(errno), errno, s ? s: "tmpfil"); - throw std::system_error(0, std::system_category(), buf); - } - const int fd = ::fileno(fp_); - struct stat st; - int rc; - if((rc = ::fstat(fd, &st))) { - char buf[256]; - std::sprintf(buf, "Failed to fstat fd/fp/path %d/%p/%s", fd, (void *)fp_, path_.data()); - std::fclose(fp_); - fp_ = nullptr; - throw std::system_error(rc, std::system_category(), buf); - } - size_t filesize = st.st_size; - if(filesize < total_nb) { - if((rc = ::ftruncate(fd, total_nb))) throw std::system_error(rc, std::system_category(), "Failed to resize (ftruncate)"); - ::fstat(fd, &st); - } - assert(size_t(st.st_size) >= total_nb); - ms_.reset(new mmapper(fd, offset, nb)); - mat_ = MatType((VT *)ms_->data(), nr, nc, nperrow); - assert(s ? (path_.data() && std::strcmp(path_.data(), s)) == 0: path_.empty()); - std::fprintf(stderr, "Spacing: %zu\n", (~*this).spacing()); - } - DiskMat(size_t nr, size_t nc, std::string path, size_t offset=0, bool delete_file=false): DiskMat(nr, nc, path.data(), offset, delete_file) {} - auto operator()(size_t i, size_t j) const {return (~*this)(i, j);} - auto &operator()(size_t i, size_t j) {return (~*this)(i, j);} - ~DiskMat() { - if(fp_) std::fclose(fp_); - if(delete_file_ && path_.size()) { -#if VERBOSE_AF - std::fprintf(stderr, "[%s at %p]path: %s/%p\n", __PRETTY_FUNCTION__, (void *)this, path_.data(), (void *)path_.data()); -#endif - auto rc = std::system((std::string("rm ") + path_).data()); - if(rc) { - std::fprintf(stderr, "Note: file deletion failed with exit status %d and stopsig %d\n", - WEXITSTATUS(rc), WSTOPSIG(rc)); - } - } - } - auto data() const {return mat_.data();} - auto data() {return mat_.data();} - auto spacing() const {return mat_.spacing();} - auto rows() const {return mat_.rows();} - auto columns() const {return mat_.columns();} - MatType &operator~() {return mat_;} - const MatType &operator~() const {return mat_;} -}; // DiskMat - -template -auto row(DiskMat &mat, size_t i, blaze::Check check=blaze::Check()) { - return blaze::row(~mat, i, check); -} -template -auto column(DiskMat &mat, size_t i, blaze::Check check=blaze::Check()) { - return blaze::column(~mat, i, check); -} - -#ifndef DEFAULT_MAX_NRAMBYTES -#define DEFAULT_MAX_NRAMBYTES static_cast(16ull << 30) -#endif - -template -class PolymorphicMat { - using CMType = blaze::CustomMatrix; - using DiskType = DiskMat; - std::unique_ptr diskmat_; - std::unique_ptr> rammat_; - CMType cm_; -public: - static constexpr size_t MAX_BYTES_RAM = max_nbytes; - PolymorphicMat(size_t nr, size_t nc, size_t maxmem=MAX_BYTES_RAM, const char *s=nullptr) { - size_t spacing = blaze::nextMultiple(nc, blaze::SIMDTrait::size); - size_t total_bytes = nr * spacing * sizeof(VT); - VT *ptr; - if(total_bytes > maxmem) { - diskmat_.reset(new DiskType(nr, nc, s)); - ptr = diskmat_->data(); - } else { - rammat_.reset(new blaze::DynamicMatrix(nr, nc)); - ptr = rammat_->data(); - } - cm_ = CMType(ptr, nr, nc, spacing); - } - CMType &operator~() {return cm_;} - const CMType &operator~() const {return cm_;} -}; - -} // minocore - -#endif diff --git a/include/minocore/util/exception.h b/include/minocore/util/exception.h new file mode 100644 index 00000000..b8797dbd --- /dev/null +++ b/include/minocore/util/exception.h @@ -0,0 +1,103 @@ +#ifndef FGC_EXCEPTION_H__ +#define FGC_EXCEPTION_H__ +#include +#include + +namespace minocore { + +inline namespace exception { + +struct TODOError: public std::runtime_error { + template + TODOError(A &&...a): std::runtime_error(std::forward(a)...) {} +}; + +class NotImplementedError: public std::runtime_error { +public: + template + NotImplementedError(Args &&...args): std::runtime_error(std::forward(args)...) {} + + NotImplementedError(): std::runtime_error("NotImplemented.") {} +}; + +class UnsatisfiedPreconditionError: public std::runtime_error { +public: + UnsatisfiedPreconditionError(std::string msg): std::runtime_error(std::string("Unsatisfied precondition: ") + msg) {} + + UnsatisfiedPreconditionError(): std::runtime_error("Unsatisfied precondition.") {} +}; + +static int require(bool condition, std::string s, int ec=0) { + if(!condition) { + if(ec) throw std::runtime_error(s + " Error code: " + std::to_string(ec)); + else throw std::runtime_error(s); + } + return ec; +} + +static int validate(bool condition, std::string s, int ec=0) { + if(!condition) { + if(ec) throw std::invalid_argument(s + " Error code: " + std::to_string(ec)); + else throw std::invalid_argument(s); + } + return ec; +} + + +static int precondition_require(bool condition, std::string s, int ec=0) { + if(!condition) { + if(ec) throw UnsatisfiedPreconditionError(s + " Error code: " + std::to_string(ec)); + else throw UnsatisfiedPreconditionError(s); + } + return ec; +} + +class UnsatisfiedPostconditionError: public std::runtime_error { +public: + UnsatisfiedPostconditionError(std::string msg): std::runtime_error(std::string("Unsatisfied precondition: ") + msg) {} + + UnsatisfiedPostconditionError(): std::runtime_error("Unsatisfied precondition.") {} +}; + +static int postcondition_require(bool condition, std::string s, int ec=0) { + if(!condition) { + if(ec) throw UnsatisfiedPostconditionError(s + " Error code: " + std::to_string(ec)); + else throw UnsatisfiedPostconditionError(s); + } + return ec; +} + +#ifndef PREC_REQ_EC +#define PREC_REQ_EC(condition, s, ec) \ + ::minocore::exception::precondition_require(condition, std::string(s) + '[' + __FILE__ + '|' + __PRETTY_FUNCTION__ + "|#L" + std::to_string(__LINE__) + "] Failing condition: \"" + #condition + '"', ec) +#endif + +#ifndef PREC_REQ +#define PREC_REQ(condition, s) PREC_REQ_EC(condition, s, 0) +#endif + +#ifndef POST_REQ_EC +#define POST_REQ_EC(condition, s, ec) \ + ::minocore::exception::postcondition_require(condition, std::string(s) + '[' + __FILE__ + '|' + __PRETTY_FUNCTION__ + "|#L" + std::to_string(__LINE__) + "] Failing condition: \"" + #condition + '"', ec) +#endif + +#ifndef POST_REQ +#define POST_REQ(condition, s) POST_REQ_EC(condition, s, 0) +#endif + + +#ifndef MINOCORE_REQUIRE +#define MINOCORE_REQUIRE(condition, s) \ + ::minocore::exception::require(condition, std::string(s) + '[' + __FILE__ + '|' + __PRETTY_FUNCTION__ + "|#L" + std::to_string(__LINE__) + "] Failing condition: \"" + #condition + '"') +#endif + +#ifndef MINOCORE_VALIDATE +#define MINOCORE_VALIDATE(condition) \ + ::minocore::exception::validate(condition, std::string("[") + __FILE__ + '|' + __PRETTY_FUNCTION__ + "|#L" + std::to_string(__LINE__) + "] Failing condition: \"" + #condition + '"') +#endif + +} // inline namespace exception + +} // namespace minocore + +#endif /* FGC_EXCEPTION_H__ */ diff --git a/include/minocore/util/macros.h b/include/minocore/util/macros.h index bc7d0e4d..1f475a61 100644 --- a/include/minocore/util/macros.h +++ b/include/minocore/util/macros.h @@ -135,8 +135,10 @@ #if !NDEBUG # define DBG_ONLY(...) __VA_ARGS__ +# define DBG_ELSE(x, y) x #else # define DBG_ONLY(...) +# define DBG_ELSE(x, y) y #endif #if VERBOSE_AF @@ -195,4 +197,17 @@ # endif #endif + +#ifndef NDEBUG +# include +# define PRETTY_SAY std::cerr << '[' << __PRETTY_FUNCTION__ << ':' << __FILE__ << ':' << __LINE__ << ']' +#else + struct CHEVRONEATER { + template + const CHEVRONEATER &operator<<(const T &) const {return *this;} + }; +# define PRETTY_SAY ::CHEVRONEATER{} + +#endif + #endif /* SKETCH_MACROS_H__ */ diff --git a/include/minocore/util/oracle.h b/include/minocore/util/oracle.h index 4af6ef8d..fd5e7426 100644 --- a/include/minocore/util/oracle.h +++ b/include/minocore/util/oracle.h @@ -65,14 +65,14 @@ struct PairKeyType { } static auto rh(Type v) { if constexpr(sizeof(IT) == 4) { - static constexpr Type bitmask = static_cast((uint64_t(1) << 32) - 1); - return v & bitmask; + return v & 0xFFFFFFFFu; } else { return v.second; } } }; + template class Map=std::unordered_map, bool symmetric=true, bool threadsafe=false, typename IT=std::uint32_t> struct CachingOracleWrapper { using output_type = std::decay_t()(0,0))>; @@ -220,6 +220,85 @@ auto make_matrix_m(const Mat &mat) { } +template class Map=std::unordered_map, bool symmetric=true, bool threadsafe=false, typename IT=std::uint32_t, typename FT=float, + bool use_row_vector=true> +struct RowCachingOracleWrapper { + using output_type = std::decay_t()(0,0))>; + using VType = blaze::DynamicVector; + using map_type = Map; + const Oracle &oracle_; + mutable map_type map_; + size_t np_; +private: + mutable std::shared_mutex mut_; + using map_iterator = typename map_type::iterator; + // TODO: use two kinds of locks +public: + RowCachingOracleWrapper(const Oracle &oracle, size_t np, size_t rsvsz=0): oracle_(oracle), np_(np) { + map_.reserve(rsvsz ? rsvsz: np); + } + template + void cache_range(It start, It end) const { + unsigned n = std::distance(start, end); + for(auto i = 0u; i < n; ++i) { + VType tmp(np_); + auto lhi = start[i]; + if(map_.find(lhi) != map_.end()) continue; + OMP_PFOR + for(size_t j = 0; j < np_; ++j) { + auto it = map_.find(j); + tmp[j] = (it == map_.end()) ? oracle_(lhi, j): it->second[lhi]; + } + map_.emplace(lhi, std::move(tmp)); + } + } + output_type operator()(IT lh, IT rh) const { + std::shared_lock slock(mut_); + map_iterator it; + if((it = map_.find(lh)) != map_.end()) + return it->second[rh]; + if constexpr(symmetric) { + if((it = map_.find(rh)) != map_.end()) + return it->second[lh]; + } + VType tmp(np_); +#ifdef _OPENMP +# pragma omp parallel for +#endif + for(size_t i = 0; i < np_; ++i) + tmp[i] = oracle_(lh, i); + output_type ret = tmp[rh]; +#ifndef NDEBUG + size_t oldsize = map_.size(); +#endif + if constexpr(threadsafe) { + slock.unlock(); + std::unique_lock ulock(mut_); + if(map_.find(lh) != map_.end()) return ret; + map_.emplace(lh, std::move(tmp)); + } else { + map_.emplace(lh, std::move(tmp)); + } + DBG_ONLY(if(oldsize != map_.size()) std::fprintf(stderr, "New size: %zu\n", map_.size());) + if constexpr(threadsafe) slock.unlock(); + return ret; + } +}; + +template +void prep_range(It, It2, const T &) {} + +template class Map, bool sym, bool ts, typename IT, typename FT, bool use_row_vector> +void prep_range(It start, It2 end, const RowCachingOracleWrapper &x) { + x.cache_range(start, end); +} + +template class Map=std::unordered_map, bool symmetric=true, bool threadsafe=false, typename IT=std::uint32_t, typename FT=float, typename Oracle> +auto make_row_caching_oracle_wrapper(const Oracle &oracle, size_t np, size_t rsvsz=0) { + return RowCachingOracleWrapper(oracle, np, rsvsz); +} + + } // namespace minocore #endif /* FGC_ORACLE_H__ */ diff --git a/include/minocore/util/shared.h b/include/minocore/util/shared.h index 8f932b10..36a83ad7 100644 --- a/include/minocore/util/shared.h +++ b/include/minocore/util/shared.h @@ -49,10 +49,6 @@ INLINE auto checked_posix_write(int fd, const void *buf, ssize_t count) { return ret; } -struct TODOError: public std::runtime_error { - template - TODOError(A &&...a): std::runtime_error(std::forward(a)...) {} -}; struct Deleter { void operator()(const void *x) const { diff --git a/include/minocore/util/sorted.h b/include/minocore/util/sorted.h new file mode 100644 index 00000000..12651aa6 --- /dev/null +++ b/include/minocore/util/sorted.h @@ -0,0 +1,71 @@ +#ifndef SORTED_DQ_H__ +#define SORTED_DQ_H__ +#include +#include +#include +#include +#include + + +namespace sorted { + +// Sorted deque +template class Container, typename T, typename All, typename Cmp=std::less<>, bool upper_insert=true, typename...Args> +class container { + Container data_; + Cmp cmp_; +public: + template + container(CArgs &&...args): data_(std::forward(args)...) { + sort(data_.begin(), data_.end(), cmp_); + } + template + auto lower_bound(const U &item) const { + return std::lower_bound(data_.begin(), data_.end(), item, cmp_); + } + template + auto upper_bound(const U &item) const { + return std::upper_bound(data_.begin(), data_.end(), item, cmp_); + } + auto find(const T &x) const { + return lower_bound(x); + } + auto &con() {return data_;} + auto &con() const {return data_;} + template + auto emplace(EArgs &&...args) { + T x(std::forward(args)...); + auto it = upper_insert ? upper_bound(x): lower_bound(x); + data_.insert(it, std::move(x)); + assert(std::is_sorted(data_.begin(), data_.end(), cmp_)); + } + auto erase(const T &x) { + if(auto it = find(x); it != end()) + this->erase(it); + } + T &operator[](size_t i) {return data_[i];} + const T &operator[](size_t i) const {return data_[i];} + auto begin() {return data_.begin();} + auto end() {return data_.end();} + auto begin() const {return data_.begin();} + auto end() const {return data_.end();} + auto cbegin() {return data_.cbegin();} + auto cend() {return data_.cend();} + auto size() const {return data_.size();} + auto pop() {auto ret = std::move(data_.back()); data_.pop_back(); return ret;} + using iterator = typename Container::iterator; + using const_iterator = typename Container::const_iterator; + using value_type = typename Container::value_type; + using pointer = typename Container::pointer; + using const_pointer = typename Container::const_pointer; + using reference = typename Container::reference; + using const_reference = typename Container::const_reference; +}; + +template, typename All=std::allocator> +using vector = container; +template, typename All=std::allocator> +using deque = container; + +} // sorted +#endif diff --git a/include/minocore/util/timer.h b/include/minocore/util/timer.h index 1bc0da53..59919c1c 100644 --- a/include/minocore/util/timer.h +++ b/include/minocore/util/timer.h @@ -11,7 +11,7 @@ namespace util { using hrc = std::chrono::high_resolution_clock; template -static inline uint32_t timediff2ms(std::chrono::time_point start, std::chrono::time_point stop) { +static inline double timediff2ms(std::chrono::time_point start, std::chrono::time_point stop) { if(stop < start) std::swap(stop, start); return std::chrono::duration(stop - start).count(); } diff --git a/include/minocore/utility.h b/include/minocore/utility.h index 9ede33f4..f22abeba 100644 --- a/include/minocore/utility.h +++ b/include/minocore/utility.h @@ -1,13 +1,13 @@ #ifndef FGC_UTILITY_H__ #define FGC_UTILITY_H__ +#include "minocore/util/exception.h" #include "minocore/util/macros.h" #include "minocore/util/shared.h" #include "minocore/util/blaze_adaptor.h" #include "minocore/util/Inf2Zero.h" #include "minocore/util/csc.h" -#include "minocore/util/diskmat.h" #include "minocore/util/div.h" #include "minocore/util/packed.h" diff --git a/include/minocore/wip.h b/include/minocore/wip.h index 84a12a6a..020a67fd 100644 --- a/include/minocore/wip.h +++ b/include/minocore/wip.h @@ -3,7 +3,6 @@ #include "./wip/caratheodory.h" #include "./wip/streaming.h" -#include "./wip/clustering.h" #include "./wip/gen_kmedian.h" #endif diff --git a/include/minocore/wip/clustering.h b/include/minocore/wip/clustering.h deleted file mode 100644 index 9454ab43..00000000 --- a/include/minocore/wip/clustering.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef FGC_CLUSTERING_H__ -#define FGC_CLUSTERING_H__ -#include "minocore/dist.h" - -namespace minocore { - -namespace clustering { - -#if 0 -enum ClusteringAssignmentType: size_t { - HARD - SOFT -}; -#endif - -} - -} // namespace minocore - -#endif /* FGC_CLUSTERING_H__ */ diff --git a/include/minocore/wip/old_clustering.h b/include/minocore/wip/old_clustering.h new file mode 100644 index 00000000..930f9cee --- /dev/null +++ b/include/minocore/wip/old_clustering.h @@ -0,0 +1,652 @@ +#ifndef FGC_CLUSTERING_H__ +#define FGC_CLUSTERING_H__ +#include "minocore/dist.h" +#include "minocore/optim/kmedian.h" +#include "minocore/util/exception.h" +#include "minocore/wip/clustering_traits.h" +#include + +namespace minocore { + +namespace clustering { + +using blz::DissimilarityMeasure; + +struct CentroidPolicy { + template *> + static void perform_average(blz::DenseVector &ret, const Range &r, const RowSums &rs, + const blz::Vector *wc = static_cast *>(nullptr), + DissimilarityMeasure measure=static_cast(-1)) + { + using FT = blz::ElementType_t; + if(measure==static_cast(-1)) { + std::fprintf(stderr, "Die\n"); + std::exit(1); + } + if(measure == blz::TOTAL_VARIATION_DISTANCE) { + coresets::l1_median(r, ret, wc); + } + else if(measure == blz::L1) { + using cmtype = + std::conditional_t, + blz::CompressedMatrix >, + blz::DynamicMatrix > + >; + cmtype cm = r * blz::expand(rs, r.columns()); + coresets::l1_median(cm, ret, wc); + } else if(measure == blz::LLR || measure == blz::UWLLR) { + FT total_sum_inv; + if(wc) total_sum_inv = 1. / blz::dot(rs, *wc); + else total_sum_inv = 1. / blz::sum(rs); + if(wc) { + ~ret = blz::sum(r % blz::expand(rs, r.columns())) * total_sum_inv; + } else { + ~ret = blz::sum(r % blz::expand(*wc * rs, r.columns())) * total_sum_inv; + } + } else if(wc) { + assert((~(*wc)).size() == r.rows()); + assert(blz::expand(~(*wc), r.columns()).rows() == r.rows()); + assert(blz::expand(~(*wc), r.columns()).columns() == r.columns()); + auto wsuminv = 1. / blz::sum(*wc); + if(blz::detail::is_probability(measure) { + ~ret = blz::sum(r % blz::expand(~(*wc), r.columns())) * (wsuminv / r.rows()); + } else { + // Otherwise + ~ret = blz::mean(r % blz::expand(~(*wc) * rs, r.columns()) * wsuminv; + } + } else { + if(blz::detail::is_probability(measure) + ~ret = blz::mean(r % blz::expand(rs, r.columns())); + else + ~ret = blz::mean(r); + } + } + template + static void do_inc(FT neww, FT cw, Row &ret, const Src &dat, FT row_sum, DissimilarityMeasure measure) + { + if(measure == blz::L1 || measure == blz::TOTAL_VARIATION_DISTANCE) + throw std::invalid_argument("do_inc is only for linearly-calculated means, not l1 median"); + if(cw == 0.) { + if(blz::detail::is_probability(measure)) + ret = dat; + else + ret = dat * row_sum; + } else { + auto div = neww / (neww + cw); + if(blz::detail::is_probability(measure)) { + ret += (dat - ret) * div; + } else if(measure == blz::LLR || measure == blz::UWLLR) { + ret += (dat * row_sum) * neww; + // Add up total sum and subtract later + // since there are three weighting factors here: + // First, partial assignment + // Then point-wise weights (both of which are in neww) + // Then, for LLR/UWLLR, there's weighting by the row-sums + } else { + // Maintain running mean for full vector value + ret += (dat * row_sum - ret) * div; + } + } + } + + template + static void perform_soft_assignment(const blz::DynamicMatrix &assignments, + const RowSums &rs, + std::mutex *mutptr, + const MatType &data, CenterCon &newcon, + const blz::Vector *wc = static_cast *>(nullptr), + DissimilarityMeasure measure=static_cast(-1)) + { + using FT = ElementType_t; + if(measure==static_cast(-1)) { + std::fprintf(stderr, "Die\n"); + std::exit(1); + } + if(measure == blz::L1 || measure == blz::TOTAL_VARIATION_DISTANCE) { + throw TODOError(); + } else { + blz::DV summed_contribs(newcon.size(), 0.); + for(size_t i = 0; i < data.rows(); ++i) { + auto item_weight = wc ? wc[i]: static_cast(1.); + const auto row_sum = rs[i]; + for(size_t j = 0; j < newcon.size(); ++j) { + auto &cw = summed_contribs[j]; + if(auto asnw = asn[j]; asnw > 0.) { + auto neww = item_weight * asnw; + +#ifdef _OPENMP + if(mutptr) mutptr->lock(); +#endif + do_inc(neww, cw, newcon[j], row(data, i, blz::unchecked), row_sum, measure); +#ifdef _OPENMP + if(mutptr) mutptr->unlock(); +#endif + OMP_ATOMIC + summed_contribs[j] += neww; + } + } + } + if(measure == blz::LLR || measure == blz::UWLLR) { + OMP_PFOR + for(auto i = 0u; i < newcon.size(); ++i) + newcon[i] *= 1. / blz::dot(column(assignments, i), rs); + } + } + } +}; + +template > +void perform_cluster_metric_kmedian(const jsd::DissimilarityApplicator &app, unsigned k, uint64_t seed=0, const WFT *weights=static_cast(nullptr)) +{ + throw NotImplementedError(); +} + +enum LloydLoopResult { + FINISHED, + REACHED_MAX_ROUNDS, + UNFINISHED +}; + +template +LloydLoopResult perform_lloyd_loop(CentersType ¢ers, Assignments &assignments, &const jsd::DissimilarityApplicator &app, unsigned k, uint64_t seed=0, const WFT *weights=static_cast(nullptr), + size_t max_iter=100, double eps=1e-4, LloydLoopResult &ret) +{ + if(co != EXTRINSIC) throw std::invalid_argument("Must be extrinsic for Lloyd's"); + using FT = ElementType_t; + auto &mat = app.data(); + CentersType centers_cpy(centers), centers_cache; + if(blz::detail::needs_logs(app.measure_) || blz::detail::needs_sqrt(app.measure_)) + centers_cache.resize(centers.size()); + double last_distance = std::numeric_limits::max(), first_distance = last_distance, + center_distance; + LloydLoopResult ret = UNFINISHED; + auto get_center_change_distance = [&]() { + center_distance = std::accumulate(centers_cpy.begin(), centers_cpy.end(), 0., + [&](double value, auto ¢er) { + auto ind = std::distance(*centers_cpy.begin(), ¢ers_); + return value + blz::sum(blz::abs(center - centers_[ind])); + } + ); + std::swap(centers_cpy, centers); + if(last_distance == std::numeric_limits::max()) { + last_distance = first_distance = center_distance; + iternum = 1; + } else { + last_distance = center_distance; + if(center_distance / first_distance < eps) + ret = LloydLoopResult::FINISHED; + if(++iternum > max_iter) + ret = LloydLoopResult::REACHED_MAX_ROUNDS; + ret = UNFINISHED; + } + }; + // Next: make a set of std::vectors, then use blaze to compute averages under the policy + // Everything but L1 and TVD use element-wise mean + auto getcache = [&] (size_t j) { + return centers_cache.size() ? ¢ers_cache[j]: static_cast(nullptr); + }; + if constexpr(asn_method = HARD) { + std::vector> assigned(centers.size()); + OMP_ONLY(std::unique_ptr mutexes(centers.size());) + size_t iternum = 0; + for(;;) { + // Do it forever + if(centers_cache.size()) { + for(size_t i = 0; i < centers.size(); ++i) + set_cache(centers[i], centers_cache[i], app.measure_); + } + for(auto &i: assigned) i.clear(); + OMP_PFOR + for(size_t i = 0; i < app.size(); ++i) { + auto dist = app(i, centers[0], getcache(0)); + unsigned asn = 0; + for(size_t j = 1; j < centers.size(); ++j) { + auto newdist = app(i, centers[j], getcache(j)); + if(newdist < dist) { + asn = j; + dist = newdist; + } + } + assignments[i] = asn; + { + OMP_ONLY(std::unique_lock lock(mutexes[asn]);) + assigned[asn].push_back(i); + } + } + // Make assignments + for(size_t i = 0; i < centers_cpy.size(); ++i) { + auto &cref = centers_cpy[i]; + auto &assigned_ids = assigned[i]; + shared::sort(assigned_ids.begin(), assigned_ids.end()); // Better access pattern + CentroidPolicy::perform_average( + cref, + rows(mat, assigned_ids.data(), assigned_ids.size()), + elements(app.row_sums(), assigned_ids.data(), assigned_ids.size()), + weights, app.measure_ + ); + } + get_center_change_distance(); + if(ret != UNFINISHED) return ret; + } + // Set the returned values to be the last iteration's. + } else { + size_t iternum = 0; + const size_t nc = centers.size(), nr = app.size(); + if(assignments.rows() != app.size() || assignments.columns() != centers.size()) { + assignments.resize(app.size(), centers.size()); + } + std::unique_ptr mutexes; + OMP_ONLY(mutexes.reset(new std::mutex[centers.size()]);) + for(;;) { + if(centers_cache.size()) { + for(size_t i = 0; i < centers.size(); ++i) + set_cache(centers[i], centers_cache[i], app.measure_); + } + for(auto &c: centers_cpy) c = static_cast(0); + OMP_PFOR + for(size_t i = 0; i < nr; ++i) { + auto row = row(assignments, i, BLAZE_CHECK_DEBUG); + for(unsigned j = 0; j < nc; ++j) { + row[j] = app(i, centers[j], getcache(j)); + } + if constexpr(asn_method == SOFT_HARMONIC_MEAN) { + row = 1. / row; + } else { + auto mv = blz::min(row); + row = blz::exp(-row + mv) - mv; + } + row *= 1. / blz::sum(row); + // And then compute its contribution to the mean of the points. + // Use stable running mean calculation + } + // Now points have been assigned, and we now perform center assignment + CentroidPolicy::perform_soft_assignment( + assignments, app.row_sums(), mutexes.get(), app.data(), centers_cpy, weights, app.measure_ + ); + } + get_center_change_distance(); + if(ret != UNFINISHED) return ret; + throw NotImplementedError("Not yet finished"); + } +} + + +template +auto perform_clustering(const jsd::DissimilarityApplicator &app, unsigned k, CenterSamplingType csample=DEFAULT_SAMPLING, + const blz::ElementType_t *weights=nullptr, uint64_t seed=0, OptimizationMethod opt=DEFAULT_OPT) +{ + using FT = typename MatrixType::ElementType; + ClusteringTraits clustering_traits; + clustering_traits.sampling = csample; + typename ClusteringTraits::centers_t centers; + typename ClusteringTraits::assignments_t assigments; + auto measure = app.measure_; + if(opt == DEFAULT_OPT) { + switch(measure) { + case L2: + case SQRL2: + case L1: case TVD: + case COSINE_DISTANCE: + case PROBABILITY_COSINE_DISTANCE: + case LLR: case UWLLR: + case HELLINGER: case BHATTACHARYYA_DISTANCE: + opt = EXPECTATION_MAXIMIZATION; break; + /* + * Bregman Divergences, LLR, cosine distance use the (weighted) mean of each + * point, in either soft or hard clustering. + * TVD and L1 use the feature-wise median. + * Scores are either calculated with softmax distance or harmonic softmax + */ + case ORACLE_METRIC: case ORACLE_PSEUDOMETRIC: case BHATTACHARYYA_METRIC: case WASSERSTEIN: + /* otherwise, use metric kmedian */ + opt = METRIC_KMEDIAN; break; + default: + if(blz::detail::is_bregman(opt)) { + opt = EXPECTATION_MAXIMIZATION; + break; + } + } + } + + + if(blz::detail::satisfies_d2(measure) || measure == blz::L1 || measure == blz::TOTAL_VARIATION_DISTANCE) { + auto [initcenters, initasn, initcosts] = jsd::make_kmeanspp(app, k, seed, weights); + + if constexpr(co == INTRINSIC) { + throw std::invalid_argument("Shouldn't happen"); + } + centers.reserve(k); + std::copy(initasn.begin(), initasn.end(), std::back_inserter(assignments)); + for(const auto id: initcenters) { + centers.emplace_back(row(app.data(), id)); + } + if(co == INTRINSIC || opt == METRIC_KMEDIAN) { + // Do graph metric calculation + perform_cluster_metric_kmedian(app, k, seed, weights); + } else { + // Do Lloyd's loop (``kmeans'' algorithm) + perform_lloyd_loop(centers, assignments, app, k, seed, weights); + } + } else if(blz::detail::is_symmetric(measure)) { + throw std::runtime_error("Not implemented: symmetric measure clustering. This method should perform sampling (governed by the csample variable)" + ", followed by facility location, and finished by local search."); + perform_cluster_metric_kmedian(app, k, seed, weights); + } else { + throw NotImplementedError("Unsupported: asymmetric measures not supporting D2 sampling"); + } +} + +#if 0 +namespace helpers { + +template +class LookupMatrixOracle { + const Mat &mat_; +public: + LookupMatrixOracle(const Mat &mat): mat_(mat) {} + size_t size() const {return mat_.rows();} + template + auto compute_distance(const Sol &x, size_t center_index, size_t point_index) const { + assert(center_index < mat_.rows()); + assert(point_index < mat_.columns()); + return mat_(x[center_index], point_index); + } + void operator[](size_t ) const { + throw std::runtime_error("This should never be called"); + } + auto compute_distance(nullptr_t, size_t center_index, size_t point_index) const { + assert(point_index < mat_.rows()); + assert(center_index < mat_.rows()); + return mat_(center_index, point_index); + } +}; + +template +auto make_lookup_data_oracle(const Mat &mat) { + return LookupMatrixOracle(mat); +} + +template +class ExtrinsicFunctorOracle { + const Mat &mat_; + const Functor &func_; +public: + ExtrinsicFunctorOracle(const Mat &mat, const Functor &func): mat_(mat), func_(func) {} + size_t size() const {return mat_.rows();} + template + auto compute_distance(const Sol &x, size_t center_index, size_t point_index) const { + assert(point_index < mat_.rows()); + assert(center_index < x.size()); + return func_(x[center_index], mat_[point_index]); + } + decltype(auto) operator[](size_t ind) {return mat_[ind];} + decltype(auto) operator[](size_t ind) const {return mat_[ind];} + // This function computes a distance between two points + auto compute_distance(nullptr_t, size_t center_index, size_t point_index) const { + return compute_distance(center_index, point_index); + } + auto compute_distance(size_t center_index, size_t point_index) const { + assert(point_index < size()); + assert(center_index < size()); + return func_(mat_[center_index], mat_[point_index]); + } +}; + +template +auto make_exfunc_oracle(const Mat &mat, const Func &func) { + return ExtrinsicFunctorOracle(mat, func); +} + +} // helpers +using helpers::make_exfunc_oracle; +using helpers::make_lookup_data_oracle; + + +template +struct ClusteringSolverBase: public MyClusteringTraits { + + using centers_t = typename MyClusteringTraits::centers_t; + using costs_t = typename MyClusteringTraits::costs_t; + using assignments_t = typename MyClusteringTraits::assignments_t; + using cost_t = typename MyClusteringTraits::cost_t; + using index_t = typename MyClusteringTraits::index_t; + using MyClusteringTraits::asn_method; + using MyClusteringTraits::center_origin; + using MyClusteringTraits::approx; + //using MyClusteringTraits::sampling_method; + using MyClusteringTraits::opt; + + using FT = typename MyClusteringTraits::cost_t; +private: + const DataOracle &data_oracle_; + /* + * DataOracle is the key for interfacing with the data. + * It must provide: + * 1. size() const method listing the number of points. + * 2. compute_distance(const centers_t ¢ers, unsigned center_index, unsigned point_index) + * + * For pre-computed matrices (e.g., metric distance matrix) with rows corresponding to centers, + * and columns corresponding to data points, + * DataOracle might have a mat_ field for the matrix and return + * `mat_(center_index, point_index)`. + * LookupMatrixOracle satisfies this, for instance. + * + * For distance-oracle functions, + * use the ExtrinsicFunctorOracle class. + * + * For instance, if `dm` is a dense matrix of points in row-major format: + * auto oracle = clustering::make_exfunc_oracle(dm, blz::sqrL2Norm()) + * clustering::ClusteringSolverBase solver(oracle, dm.rows(), k); + * + * + * For Applicator-supported functions, this might be + * `applicator_(point_index, centers_[center_index])` + * or have an alternate form that caches logs or sqrts. + */ + size_t np_; + uint32_t k_; + uint32_t points_to_sample_; + DissimilarityMeasure measure_; // What measure of dissimilarity. + // Use ORACLE_METRIC or ORACLE_PSEUDOMETRIC as placeholders for measures + // Not supported by the applicator + + std::unique_ptr c_sol_; + std::unique_ptr c_assignments_; + std::unique_ptr c_costs_; + std::unique_ptr pointwise_costs_; + const FT *weights_; + SensitivityMethod sens_; // Which coreset construction method + + void validate_parameters() { + assert(sens_ != static_cast(-1)); + if(opt == METRIC_KMEDIAN) { + validate(blz::detail::satisfies_metric(measure_) || blz::detail::satisfies_rho_metric(measure_)); + } +#if 0 + if(sampling_method == THORUP_SAMPLING) { + validate(blz::detail::satisfies_metric(measure_) || blz::detail::satisfies_rho_metric(measure_)); + } + if(sampling_method == D2_SAMPLING) { + validate(blz::detail::satisfies_d2(measure_)); + } +#endif + } + + void set_sensitivity_method(SensitivityMethod val=static_cast(-1)) { + bool unset = val == static_cast(-1); + if(unset) { + if(blz::detail::is_bregman(val)) sens_ = LBK; + else if(approx == BICRITERIA) { + sens_ = BFL; + } else if(approx == CONSTANT_FACTOR) { + if(blz::detail::is_bregman(val)) std::fprintf(stderr, "Warning: Bregman approximations are O(log(k)) approximate, not constant.\n"); + sens_ = VX; + } else /*approx == HEURISTIC */ { + sens_ = BFL; + } + } else { + if(val == VX) { + MINOCORE_VALIDATE(approx == CONSTANT_FACTOR || approx == HEURISTIC); + if(blz::detail::is_bregman(val)) std::fprintf(stderr, "Warning: Bregman solutions are O(log(k)) approximate, not constant.\n"); + } else if (val == LUCIC_FAULKNER_KRAUSE_FELDMAN) { + throw NotImplementedError("Not supported currently: GMM coreset sampling"); + } + sens_ = val; + } + } + +public: + void set_assignments_and_costs() { + PREC_REQ(c_sol_.get(), "Complete sol must already have been computed."); + if constexpr(asn_method == HARD) { + if(!c_assignments_) + c_assignments_.reset(new assignments_t(data_oracle_.size())); + else if(c_assignments_->size() != data_oracle_.size()) + c_assignments_->resize(data_oracle_.size()); + if(!c_costs_) + c_costs_.reset(new costs_t(data_oracle_.size())); + else if(c_costs_->size() != data_oracle_.size()) + c_costs_->resize(data_oracle_.size()); + OMP_PFOR + for(size_t i = 0; i < data_oracle_.size(); ++i) { + auto mincost = data_oracle_.compute_distance(*c_sol_, 0, i); + unsigned bestind = 0; + for(size_t j = 1; j < c_sol_->size(); ++j) { + if(auto newcost = data_oracle_.compute_distance(*c_sol_, j, i); newcost < mincost) + mincost = newcost, bestind = j; + } + c_assignments_->operator[](i) = bestind; + c_costs_->operator[](i) = mincost; + } + } else { // Soft or softmax assignments + assert(c_sol_->size() == k_); + if(!c_costs_) { + c_costs_.reset(new costs_t(np_, k_)); + } else if(c_costs_->rows() != np_ || c_costs_->columns() != k_) { + c_costs_->resize(np_, k_); + } + if(!c_assignments_) c_assignments_.reset(new assignments_t(*c_costs_)); + if(c_assignments_->size() != data_oracle_.size()) + c_assignments_->resize(data_oracle_.size()); + OMP_PFOR + for(size_t i = 0; i < data_oracle_.size(); ++i) { + // Compute costs + auto cost_row = row(*c_costs_, i, blaze::unchecked); + cost_row[0] = data_oracle_.compute_distance(*c_sol_, 0, i); + for(size_t j = 1; j < c_sol_->size(); ++j) { + cost_row[j] = data_oracle_.compute_distance(*c_sol_, j, i); + } + // Use costs to make fractional assignments + auto asn_row = row(*c_assignments_, i, blaze::unchecked); + if(asn_method == SOFT) + asn_row = blz::exp(-cost_row + blz::min(cost_row)); + else // SOFT_HARMONIC_MEAN, actually harmonic mean + asn_row = 1. / cost_row; + asn_row /= blz::sum(asn_row); + } + } + } + void approx_sol(uint64_t seed=0) { + if constexpr(opt == BLACK_BOX || opt == GRADIENT_DESCENT || opt == EXHAUSTIVE_SEARCH) + throw NotImplementedError("Optimization under black box, gd or exhaustive search not yet supported"); + if constexpr(asn_method != HARD) + throw NotImplementedError("Not completed yet: SOFT or SOFT_HARMONIC_MEAN clustering"); + else + { + // One optimization technique each for metric (JV + local search) + // and expectation maximization. + if(blz::detail::satisfies_d2(measure_)) { + auto func = [&](size_t i, size_t j) { + return data_oracle_.compute_distance(i, j); + }; + wy::WyRand rng(seed); + auto [initcenters, initasn, initcosts] = coresets::kmeanspp(func, rng, np_, k_); + std::vector> centers; + centers.reserve(k_); + for(const auto id: initcenters) { + centers.emplace_back(data_oracle_[id]); + } + set_centers(std::move(centers)); + set_assignments_and_costs(); + } else { + throw NotImplementedError("Metric K-median needs to have optimizers plugged in."); + } + } + } + auto make_coreset_sampler(uint64_t seed=0) { + PREC_REQ(this->c_costs_.get(), "Current costs must be calculated"); + const cost_t *ptr; + if constexpr(asn_method == HARD) { + // Use the c_costs->data() method. + if(!weights_) + ptr = c_costs_->data(); + else if(pointwise_costs_.get()) ptr = pointwise_costs_.get(); + else { + pointwise_costs_.reset(new cost_t[np_]); + blaze::CustomVector + pv(c_costs_->data(), np_), pc(pointwise_costs_.get(), np_); + const blaze::CustomVector wv(const_cast(weights_), np_); + pc = pv * wv; + ptr = pointwise_costs_.get(); + } + } else { + if(pointwise_costs_.get()) { + ptr = pointwise_costs_.get(); + } else { + pointwise_costs_.reset(new cost_t[np_]); + OMP_PFOR + for(size_t i = 0; i < np_; ++i) + pointwise_costs_[i] = blz::dot(row(*c_assignments_, i, blz::unchecked), + row(*c_costs_, i, blz::unchecked)) * getw(i); + if(weights_) { + blaze::CustomVector + pv(pointwise_costs_.get(), np_); + const blaze::CustomVector wv(const_cast(weights_), np_); + pv *= wv; + } + } + } + coresets::CoresetSampler sampler; + if constexpr(asn_method == HARD) throw NotImplementedError("Coreset sampler supporting fractional assignment not yet available."); + else { + sampler.make_sampler(np_, points_to_sample_, ptr, c_assignments_->data(), weights_, seed, sens_); + } + } + template + void set_centers(const OT ¢ers) { + this->c_sol_.reset(new centers_t(centers.size())); + std::copy(centers.begin(), centers.end(), this->c_sol_->begin()); + } + void set_centers(centers_t &&newcenters) { + this->c_sol_.reset(new centers_t(std::move(newcenters))); + } + ClusteringSolverBase(const DataOracle &data, size_t npoints, unsigned k, + DissimilarityMeasure measure=ORACLE_PSEUDOMETRIC, + blz::distance::SensitivityMethod sens=static_cast(-1), + unsigned points_to_sample=0, const FT *weights=nullptr): + data_oracle_(data), np_(npoints), k_(k), + points_to_sample_(points_to_sample ? points_to_sample: k_), + measure_(measure), + weights_(weights) + { + if(points_to_sample_ != k_) std::fprintf(stderr, "note: sampling different number of points"); + set_sensitivity_method(sens); + validate_parameters(); + } + double calculate_cost(const centers_t ¢ers) { + throw NotImplementedError(); + } + const assignments_t &get_assignments(bool recalc=true) { + if(!c_assignments_ || recalc) set_assignments_and_costs(); + return *c_assignments_; + } +}; +#endif + + + +} // namespace clustering + +} // namespace minocore + +#endif /* FGC_CLUSTERING_H__ */ diff --git a/network_simplex/full_bipartitegraph.h b/network_simplex/full_bipartitegraph.h deleted file mode 100644 index 8a3e5242..00000000 --- a/network_simplex/full_bipartitegraph.h +++ /dev/null @@ -1,238 +0,0 @@ -/* -*- mode: C++; indent-tabs-mode: nil; -*- - * - * This file has been adapted by Nicolas Bonneel (2013), - * from full_graph.h from LEMON, a generic C++ optimization library, - * to implement a lightweight fully connected bipartite graph. A previous - * version of this file is used as part of the Displacement Interpolation - * project, - * Web: http://www.cs.ubc.ca/labs/imager/tr/2011/DisplacementInterpolation/ - * - * - **** Original file Copyright Notice : - * Copyright (C) 2003-2010 - * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport - * (Egervary Research Group on Combinatorial Optimization, EGRES). - * - * Permission to use, modify and distribute this software is granted - * provided that this copyright notice appears in all copies. For - * precise terms see the accompanying LICENSE file. - * - * This software is provided "AS IS" with no warranty of any kind, - * express or implied, and with no claim as to its suitability for any - * purpose. - * - */ - -#ifndef LEMON_FULL_BIPARTITE_GRAPH_H -#define LEMON_FULL_BIPARTITE_GRAPH_H - -#include - -///\ingroup graphs -///\file -///\brief FullBipartiteDigraph and FullBipartiteGraph classes. - - -namespace lemon { - - ///This \c \#define creates convenient type definitions for the following - ///types of \c Digraph: \c Node, \c NodeIt, \c Arc, \c ArcIt, \c InArcIt, - ///\c OutArcIt, \c BoolNodeMap, \c IntNodeMap, \c DoubleNodeMap, - ///\c BoolArcMap, \c IntArcMap, \c DoubleArcMap. - /// - ///\note If the graph type is a dependent type, ie. the graph type depend - ///on a template parameter, then use \c TEMPLATE_DIGRAPH_TYPEDEFS() - ///macro. -#define DIGRAPH_TYPEDEFS(Digraph) \ - typedef Digraph::Node Node; \ - typedef Digraph::Arc Arc; \ - - - ///Create convenience typedefs for the digraph types and iterators - - ///\see DIGRAPH_TYPEDEFS - /// - ///\note Use this macro, if the graph type is a dependent type, - ///ie. the graph type depend on a template parameter. -#define TEMPLATE_DIGRAPH_TYPEDEFS(Digraph) \ - typedef typename Digraph::Node Node; \ - typedef typename Digraph::Arc Arc; \ - - - class FullBipartiteDigraphBase { - public: - - typedef FullBipartiteDigraphBase Digraph; - - //class Node; - typedef int Node; - //class Arc; - typedef int64_t Arc; - - protected: - - int _node_num; - int64_t _arc_num; - - FullBipartiteDigraphBase() {} - - void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = (int64_t)n1 * (int64_t)n2; _n1=n1; _n2=n2;} - - public: - - int _n1, _n2; - - - Node operator()(int ix) const { return Node(ix); } - static int index(const Node& node) { return node; } - - Arc arc(const Node& s, const Node& t) const { - if (s<_n1 && t>=_n1) - return Arc((int64_t)s * (int64_t)_n2 + (int64_t)(t-_n1) ); - else - return Arc(-1); - } - - int nodeNum() const { return _node_num; } - int64_t arcNum() const { return _arc_num; } - - int maxNodeId() const { return _node_num - 1; } - int64_t maxArcId() const { return _arc_num - 1; } - - Node source(Arc arc) const { return arc / _n2; } - Node target(Arc arc) const { return (arc % _n2) + _n1; } - - static int id(Node node) { return node; } - static int64_t id(Arc arc) { return arc; } - - static Node nodeFromId(int id) { return Node(id);} - static Arc arcFromId(int64_t id) { return Arc(id);} - - - Arc findArc(Node s, Node t, Arc prev = -1) const { - return prev == -1 ? arc(s, t) : -1; - } - - void first(Node& node) const { - node = _node_num - 1; - } - - static void next(Node& node) { - --node; - } - - void first(Arc& arc) const { - arc = _arc_num - 1; - } - - static void next(Arc& arc) { - --arc; - } - - void firstOut(Arc& arc, const Node& node) const { - if (node>=_n1) - arc = -1; - else - arc = (node + 1) * _n2 - 1; - } - - void nextOut(Arc& arc) const { - if (arc % _n2 == 0) arc = 0; - --arc; - } - - void firstIn(Arc& arc, const Node& node) const { - if (node<_n1) - arc = -1; - else - arc = _arc_num + node - _node_num; - } - - void nextIn(Arc& arc) const { - arc -= _n2; - if (arc < 0) arc = -1; - } - - }; - - /// \ingroup graphs - /// - /// \brief A directed full graph class. - /// - /// FullBipartiteDigraph is a simple and fast implmenetation of directed full - /// (complete) graphs. It contains an arc from each node to each node - /// (including a loop for each node), therefore the number of arcs - /// is the square of the number of nodes. - /// This class is completely static and it needs constant memory space. - /// Thus you can neither add nor delete nodes or arcs, however - /// the structure can be resized using resize(). - /// - /// This type fully conforms to the \ref concepts::Digraph "Digraph concept". - /// Most of its member functions and nested classes are documented - /// only in the concept class. - /// - /// This class provides constant time counting for nodes and arcs. - /// - /// \note FullBipartiteDigraph and FullBipartiteGraph classes are very similar, - /// but there are two differences. While this class conforms only - /// to the \ref concepts::Digraph "Digraph" concept, FullBipartiteGraph - /// conforms to the \ref concepts::Graph "Graph" concept, - /// moreover FullBipartiteGraph does not contain a loop for each - /// node as this class does. - /// - /// \sa FullBipartiteGraph - class FullBipartiteDigraph : public FullBipartiteDigraphBase { - typedef FullBipartiteDigraphBase Parent; - - public: - - /// \brief Default constructor. - /// - /// Default constructor. The number of nodes and arcs will be zero. - FullBipartiteDigraph() { construct(0,0); } - - /// \brief Constructor - /// - /// Constructor. - /// \param n The number of the nodes. - FullBipartiteDigraph(int n1, int n2) { construct(n1, n2); } - - - /// \brief Returns the node with the given index. - /// - /// Returns the node with the given index. Since this structure is - /// completely static, the nodes can be indexed with integers from - /// the range [0..nodeNum()-1]. - /// The index of a node is the same as its ID. - /// \sa index() - Node operator()(int ix) const { return Parent::operator()(ix); } - - /// \brief Returns the index of the given node. - /// - /// Returns the index of the given node. Since this structure is - /// completely static, the nodes can be indexed with integers from - /// the range [0..nodeNum()-1]. - /// The index of a node is the same as its ID. - /// \sa operator()() - static int index(const Node& node) { return Parent::index(node); } - - /// \brief Returns the arc connecting the given nodes. - /// - /// Returns the arc connecting the given nodes. - /*Arc arc(Node u, Node v) const { - return Parent::arc(u, v); - }*/ - - /// \brief Number of nodes. - int nodeNum() const { return Parent::nodeNum(); } - /// \brief Number of arcs. - int64_t arcNum() const { return Parent::arcNum(); } - }; - - - - -} //namespace lemon - - -#endif //LEMON_FULL_GRAPH_H diff --git a/network_simplex/network_simplex_simple.h b/network_simplex/network_simplex_simple.h deleted file mode 100644 index e1c5d996..00000000 --- a/network_simplex/network_simplex_simple.h +++ /dev/null @@ -1,1580 +0,0 @@ -/* -*- mode: C++; indent-tabs-mode: nil; -*- -* -* -* This file has been adapted by Nicolas Bonneel (2013), -* from network_simplex.h from LEMON, a generic C++ optimization library, -* to implement a lightweight network simplex for mass transport, more -* memory efficient than the original file. A previous version of this file -* is used as part of the Displacement Interpolation project, -* Web: http://www.cs.ubc.ca/labs/imager/tr/2011/DisplacementInterpolation/ -* -* Revisions: -* March 2015: added OpenMP parallelization -* March 2017: included Antoine Rolet's trick to make it more robust -* April 2018: IMPORTANT bug fix + uses 64bit integers (slightly slower but less risks of overflows), updated to a newer version of the algo by LEMON, sparse flow by default + minor edits. -* -* -**** Original file Copyright Notice : -* -* Copyright (C) 2003-2010 -* Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport -* (Egervary Research Group on Combinatorial Optimization, EGRES). -* -* Permission to use, modify and distribute this software is granted -* provided that this copyright notice appears in all copies. For -* precise terms see the accompanying LICENSE file. -* -* This software is provided "AS IS" with no warranty of any kind, -* express or implied, and with no claim as to its suitability for any -* purpose. -* -*/ - -#ifndef LEMON_NETWORK_SIMPLEX_SIMPLE_H -#define LEMON_NETWORK_SIMPLEX_SIMPLE_H - - -/// \ingroup min_cost_flow_algs -/// -/// \file -/// \brief Network Simplex algorithm for finding a minimum cost flow. - -// if your compiler has troubles with unorderedmaps, just comment the following line to use a slower std::map instead -#define HASHMAP // now handled with unorderedmaps instead of stdext::hash_map. Should be better supported. - -#define SPARSE_FLOW // a sparse flow vector will be 10-15% slower for small problems but uses less memory and becomes faster for large problems (40k total nodes) - -#include -#include -#include -#ifdef HASHMAP -#include -#else -#include -#endif -#ifdef _OPENMP -#include -#endif -#include - - -#include "full_bipartitegraph.h" - -#define INVALIDNODE -1 -#define INVALID (-1) - - -namespace lemon { -#ifndef DEFAULT_SPARSE_MAP - #ifdef HASHMAP -#define DEFAULT_SPARSE_MAP std::unordered_map - #else -#define DEFAULT_SPARSE_MAP std::map - #endif -#endif - - template class Map=DEFAULT_SPARSE_MAP> - class ProxyObject; - - template class Map=DEFAULT_SPARSE_MAP> - class SparseValueVector - { - public: - template - SparseValueVector(Args &&...) // parameter n for compatibility with standard vectors - { - } - - template - void resize(Args &&...) {/* does nothing */} - T operator[](const size_t id) const - { - auto it = data.find(id); - if (it == data.end()) - return 0; - else - return it->second; - } - - ProxyObject operator[](const size_t id) - { - return ProxyObject(this, id); - } - - //private: - Map data; - }; - - template class Map> - class ProxyObject { - public: - ProxyObject(SparseValueVector *v, size_t idx) { _v = v; _idx = idx; }; - ProxyObject & operator=(const T &v) { - // If we get here, we know that operator[] was called to perform a write access, - // so we can insert an item in the vector if needed - if (v != 0) - _v->data[_idx] = v; - return *this; - } - - operator T() { - // If we get here, we know that operator[] was called to perform a read access, - // so we can simply return the existing object - auto it = _v->data.find(_idx); - if (it == _v->data.end()) - return 0; - else - return it->second; - } - - void operator+=(T val) - { - if (val == 0) return; - auto it = _v->data.find(_idx); - if (it == _v->data.end()) - _v->data[_idx] = val; - else - { - T sum = it->second + val; - if (sum == 0) - _v->data.erase(it); - else - it->second = sum; - } - } - void operator-=(T val) - { - if (val == 0) return; - auto it = _v->data.find(_idx); - if (it == _v->data.end()) - _v->data[_idx] = -val; - else - { - T sum = it->second - val; - if (sum == 0) - _v->data.erase(it); - else - it->second = sum; - } - } - - SparseValueVector *_v; - size_t _idx; - }; - - - - /// \addtogroup min_cost_flow_algs - /// @{ - - /// \brief Implementation of the primal Network Simplex algorithm - /// for finding a \ref min_cost_flow "minimum cost flow". - /// - /// \ref NetworkSimplexSimple implements the primal Network Simplex algorithm - /// for finding a \ref min_cost_flow "minimum cost flow" - /// \ref amo93networkflows, \ref dantzig63linearprog, - /// \ref kellyoneill91netsimplex. - /// This algorithm is a highly efficient specialized version of the - /// linear programming simplex method directly for the minimum cost - /// flow problem. - /// - /// In general, %NetworkSimplexSimple is the fastest implementation available - /// in LEMON for this problem. - /// Moreover, it supports both directions of the supply/demand inequality - /// constraints. For more information, see \ref SupplyType. - /// - /// Most of the parameters of the problem (except for the digraph) - /// can be given using separate functions, and the algorithm can be - /// executed using the \ref run() function. If some parameters are not - /// specified, then default values will be used. - /// - /// \tparam GR The digraph type the algorithm runs on. - /// \tparam V The number type used for flow amounts, capacity bounds - /// and supply values in the algorithm. By default, it is \c int. - /// \tparam C The number type used for costs and potentials in the - /// algorithm. By default, it is the same as \c V. - /// - /// \warning Both number types must be signed and all input data must - /// be integer. - /// - /// \note %NetworkSimplexSimple provides five different pivot rule - /// implementations, from which the most efficient one is used - /// by default. For more information, see \ref PivotRule. - template class Map=DEFAULT_SPARSE_MAP> - class NetworkSimplexSimple - { - public: - - /// \brief Constructor. - /// - /// The constructor of the class. - /// - /// \param graph The digraph the algorithm runs on. - /// \param arc_mixing Indicate if the arcs have to be stored in a - /// mixed order in the internal data structure. - /// In special cases, it could lead to better overall performance, - /// but it is usually slower. Therefore it is disabled by default. - NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, ArcsType nb_arcs, size_t maxiters = 0) : - _graph(graph), //_arc_id(graph), - _arc_mixing(arc_mixing), _init_nb_nodes(nbnodes), _init_nb_arcs(nb_arcs) - { - // Reset data structures - reset(); - max_iter = maxiters; - } - - /// The type of the flow amounts, capacity bounds and supply values - typedef V Value; - /// The type of the arc costs - typedef C Cost; - - public: - - /// \brief Problem type constants for the \c run() function. - /// - /// Enum type containing the problem type constants that can be - /// returned by the \ref run() function of the algorithm. - enum ProblemType { - /// The problem has no feasible solution (flow). - INFEASIBLE, - /// The problem has optimal solution (i.e. it is feasible and - /// bounded), and the algorithm has found optimal flow and node - /// potentials (primal and dual solutions). - OPTIMAL, - /// The objective function of the problem is unbounded, i.e. - /// there is a directed cycle having negative total cost and - /// infinite upper bound. - UNBOUNDED - }; - - /// \brief Constants for selecting the type of the supply constraints. - /// - /// Enum type containing constants for selecting the supply type, - /// i.e. the direction of the inequalities in the supply/demand - /// constraints of the \ref min_cost_flow "minimum cost flow problem". - /// - /// The default supply type is \c GEQ, the \c LEQ type can be - /// selected using \ref supplyType(). - /// The equality form is a special case of both supply types. - enum SupplyType { - /// This option means that there are "greater or equal" - /// supply/demand constraints in the definition of the problem. - GEQ, - /// This option means that there are "less or equal" - /// supply/demand constraints in the definition of the problem. - LEQ - }; - - - - private: - size_t max_iter; - TEMPLATE_DIGRAPH_TYPEDEFS(GR); - - typedef std::vector IntVector; - typedef std::vector ArcVector; - typedef std::vector ValueVector; - typedef std::vector CostVector; - // typedef SparseValueVector CostVector; - typedef std::vector BoolVector; - // Note: vector is used instead of vector for efficiency reasons - - // State constants for arcs - enum ArcState { - STATE_UPPER = -1, - STATE_TREE = 0, - STATE_LOWER = 1 - }; - - typedef std::vector StateVector; - // Note: vector is used instead of vector for - // efficiency reasons - - private: - - // Data related to the underlying digraph - const GR &_graph; - int _node_num; - ArcsType _arc_num; - ArcsType _all_arc_num; - ArcsType _search_arc_num; - - // Parameters of the problem - SupplyType _stype; - Value _sum_supply; - - inline int _node_id(int n) const { return _node_num - n - 1; }; - - //IntArcMap _arc_id; - IntVector _source; // keep nodes as integers - IntVector _target; - bool _arc_mixing; - - // Node and arc data - CostVector _cost; - ValueVector _supply; -#ifdef SPARSE_FLOW - SparseValueVector _flow; -#else - ValueVector _flow; -#endif - - CostVector _pi; - - // Data for storing the spanning tree structure - IntVector _parent; - ArcVector _pred; - IntVector _thread; - IntVector _rev_thread; - IntVector _succ_num; - IntVector _last_succ; - IntVector _dirty_revs; - BoolVector _forward; - StateVector _state; - ArcsType _root; - - // Temporary data used in the current pivot iteration - ArcsType in_arc, join, u_in, v_in, u_out, v_out; - ArcsType first, second, right, last; - ArcsType stem, par_stem, new_stem; - Value delta; - - static constexpr Value MAX_VAL = std::numeric_limits::max(); - - ArcsType mixingCoeff; - - public: - - /// \brief Constant for infinite upper bounds (capacities). - /// - /// Constant for infinite upper bounds (capacities). - /// It is \c std::numeric_limits::infinity() if available, - /// \c std::numeric_limits::max() otherwise. - static constexpr Value INF = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : MAX_VAL; - - private: - - // thank you to DVK and MizardX from StackOverflow for this function! - inline ArcsType sequence(ArcsType k) const { - ArcsType smallv = (k > num_total_big_subsequence_numbers) & 1; - - k -= num_total_big_subsequence_numbers * smallv; - ArcsType subsequence_length2 = subsequence_length - smallv; - ArcsType subsequence_num = (k / subsequence_length2) + num_big_subsequences * smallv; - ArcsType subsequence_offset = (k % subsequence_length2) * mixingCoeff; - - return subsequence_offset + subsequence_num; - } - ArcsType subsequence_length; - ArcsType num_big_subsequences; - ArcsType num_total_big_subsequence_numbers; - - inline ArcsType getArcID(const Arc &arc) const - { - //int n = _arc_num-arc._id-1; - ArcsType n = _arc_num - GR::id(arc) - 1; - - //ArcsType a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff; - //ArcsType b = _arc_id[arc]; - if (_arc_mixing) - return sequence(n); - else - return n; - } - - // finally unused because too slow - inline ArcsType getSource(const ArcsType arc) const - { - //ArcsType a = _source[arc]; - //return a; - - ArcsType n = _arc_num - arc - 1; - if (_arc_mixing) - n = mixingCoeff*(n%mixingCoeff) + n / mixingCoeff; - - ArcsType b; - if (n >= 0) - b = _node_id(_graph.source(GR::arcFromId(n))); - else - { - n = arc + 1 - _arc_num; - if (n <= _node_num) - b = _node_num; - else - if (n >= _graph._n1) - b = _graph._n1; - else - b = _graph._n1 - n; - } - - return b; - } - - - - // Implementation of the Block Search pivot rule - class BlockSearchPivotRule - { - private: - - // References to the NetworkSimplexSimple class - const IntVector &_source; - const IntVector &_target; - const CostVector &_cost; - const StateVector &_state; - const CostVector &_pi; - ArcsType &_in_arc; - ArcsType _search_arc_num; - - // Pivot rule data - ArcsType _block_size; - ArcsType _next_arc; - NetworkSimplexSimple &_ns; - - public: - - // Constructor - BlockSearchPivotRule(NetworkSimplexSimple &ns) : - _source(ns._source), _target(ns._target), - _cost(ns._cost), _state(ns._state), _pi(ns._pi), - _in_arc(ns.in_arc), _search_arc_num(ns._search_arc_num), - _next_arc(0), _ns(ns) - { - // The main parameters of the pivot rule - const double BLOCK_SIZE_FACTOR = 1; - const ArcsType MIN_BLOCK_SIZE = 10; - - _block_size = std::max(ArcsType(BLOCK_SIZE_FACTOR * std::sqrt(double(_search_arc_num))), MIN_BLOCK_SIZE); - } - - // Find next entering arc - bool findEnteringArc() { - Cost min_val = 0; - -#ifdef _OPENMP - ArcsType N = omp_get_max_threads(); - std::vector minArray(N, 0); - std::vector arcId(N); - ArcsType bs = (ArcsType)ceil(_block_size / (double)N); -#else - static constexpr ArcsType N = 1; - std::array minArray{Cost(0)}; - std::array arcId{0}; -#endif - - for (ArcsType i = 0; i < _search_arc_num; i += _block_size) { - - ArcsType e; - ArcsType j; -#ifdef _OPENMP -#pragma omp parallel - { - int t = omp_get_thread_num(); - -#pragma omp for schedule(static, bs) lastprivate(e) - for (j = 0; j < std::min(i + _block_size, _search_arc_num) - i; j++) { - e = (_next_arc + i + j); if (e >= _search_arc_num) e -= _search_arc_num; - Cost c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); - if (c < minArray[t]) { - minArray[t] = c; - arcId[t] = e; - } - } - } - for (int j = 0; j < N; j++) { - if (minArray[j] < min_val) { - min_val = minArray[j]; - _in_arc = arcId[j]; - } - } -#else - { - - for (j = 0; j < std::min(i + _block_size, _search_arc_num) - i; j++) { - e = (_next_arc + i + j); if (e >= _search_arc_num) e -= _search_arc_num; - Cost c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); - if (c < minArray[0]) { - minArray[0] = c; - arcId[0] = e; - } - } - } - min_val = minArray[0]; - _in_arc = arcId[0]; -#endif - Cost a = std::abs(_pi[_source[_in_arc]]) > std::abs(_pi[_target[_in_arc]]) ? std::abs(_pi[_source[_in_arc]]) : std::abs(_pi[_target[_in_arc]]); - a = a > std::abs(_cost[_in_arc]) ? a : std::abs(_cost[_in_arc]); - if (min_val < -std::numeric_limits::epsilon()*a) { - _next_arc = e; - return true; - } - } - - Cost a = fabs(_pi[_source[_in_arc]]) > fabs(_pi[_target[_in_arc]]) ? fabs(_pi[_source[_in_arc]]) : fabs(_pi[_target[_in_arc]]); - a = a > fabs(_cost[_in_arc]) ? a : fabs(_cost[_in_arc]); - if (min_val >= -std::numeric_limits::epsilon()*a) return false; - - return true; - } - - - // Find next entering arc - /*bool findEnteringArc() { - Cost min_val = 0; - int N = omp_get_max_threads(); - std::vector minArray(N); - std::vector arcId(N); - - ArcsType bs = (ArcsType)ceil(_block_size / (double)N); - for (ArcsType i = 0; i < _search_arc_num; i += _block_size) { - - ArcsType maxJ = std::min(i + _block_size, _search_arc_num) - i; - ArcsType j; -#pragma omp parallel - { - int t = omp_get_thread_num(); - Cost minV = 0; - ArcsType arcStart = _next_arc + i; - ArcsType arc = -1; -#pragma omp for schedule(static, bs) - for (j = 0; j < maxJ; j++) { - ArcsType e = arcStart + j; if (e >= _search_arc_num) e -= _search_arc_num; - Cost c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); - if (c < minV) { - minV = c; - arc = e; - } - } - - minArray[t] = minV; - arcId[t] = arc; - } - for (int j = 0; j < N; j++) { - if (minArray[j] < min_val) { - min_val = minArray[j]; - _in_arc = arcId[j]; - } - } - - //FIX by Antoine Rolet to avoid precision issues - Cost a = std::max(std::abs(_cost[_in_arc]), std::max(std::abs(_pi[_source[_in_arc]]), std::abs(_pi[_target[_in_arc]]))); - if (min_val <-std::numeric_limits::epsilon()*a) { - _next_arc = _next_arc + i + maxJ - 1; - if (_next_arc >= _search_arc_num) _next_arc -= _search_arc_num; - return true; - } - } - - if (min_val >= 0) { - return false; - } - - return true; - }*/ - - - /*bool findEnteringArc() { - Cost c, min = 0; - int cnt = _block_size; - int e, min_arc = _next_arc; - for (e = _next_arc; e < _search_arc_num; ++e) { - c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); - if (c < min) { - min = c; - min_arc = e; - - } - if (--cnt == 0) { - if (min < 0) break; - cnt = _block_size; - - } - - } - if (min == 0 || cnt > 0) { - for (e = 0; e < _next_arc; ++e) { - c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); - if (c < min) { - min = c; - min_arc = e; - - } - if (--cnt == 0) { - if (min < 0) break; - cnt = _block_size; - - } - - } - - } - if (min >= 0) return false; - _in_arc = min_arc; - _next_arc = e; - return true; - }*/ - - - - }; //class BlockSearchPivotRule - - - - public: - - - - int _init_nb_nodes; - ArcsType _init_nb_arcs; - - /// \name Parameters - /// The parameters of the algorithm can be specified using these - /// functions. - - /// @{ - - - /// \brief Set the costs of the arcs. - /// - /// This function sets the costs of the arcs. - /// If it is not used before calling \ref run(), the costs - /// will be set to \c 1 on all arcs. - /// - /// \param map An arc map storing the costs. - /// Its \c Value type must be convertible to the \c Cost type - /// of the algorithm. - /// - /// \return (*this) - template - NetworkSimplexSimple& costMap(const CostMap& map) { - Arc a; _graph.first(a); - for (; a != INVALID; _graph.next(a)) { - _cost[getArcID(a)] = map[a]; - } - return *this; - } - - - /// \brief Set the costs of one arc. - /// - /// This function sets the costs of one arcs. - /// Done for memory reasons - /// - /// \param arc An arc. - /// \param arc A cost - /// - /// \return (*this) - template - NetworkSimplexSimple& setCost(const Arc& arc, const Value cost) { - _cost[getArcID(arc)] = cost; - return *this; - } - - - /// \brief Set the supply values of the nodes. - /// - /// This function sets the supply values of the nodes. - /// If neither this function nor \ref stSupply() is used before - /// calling \ref run(), the supply of each node will be set to zero. - /// - /// \param map A node map storing the supply values. - /// Its \c Value type must be convertible to the \c Value type - /// of the algorithm. - /// - /// \return (*this) - template - NetworkSimplexSimple& supplyMap(const SupplyMap& map) { - Node n; _graph.first(n); - for (; n != INVALIDNODE; _graph.next(n)) { - _supply[_node_id(n)] = map[n]; - } - return *this; - } - template - NetworkSimplexSimple& supplyMap(const SupplyMap* map1, int n1, const SupplyMap* map2, int) { - Node n; _graph.first(n); - for (; n != INVALIDNODE; _graph.next(n)) { - if (n - NetworkSimplexSimple& supplyMapAll(SupplyMap val1, int n1, SupplyMap val2, int) { - Node n; _graph.first(n); - for (; n != INVALIDNODE; _graph.next(n)) { - if (n(*this) - NetworkSimplexSimple& stSupply(const Node& s, const Node& t, Value k) { - for (int i = 0; i != _node_num; ++i) { - _supply[i] = 0; - } - _supply[_node_id(s)] = k; - _supply[_node_id(t)] = -k; - return *this; - } - - /// \brief Set the type of the supply constraints. - /// - /// This function sets the type of the supply/demand constraints. - /// If it is not used before calling \ref run(), the \ref GEQ supply - /// type will be used. - /// - /// For more information, see \ref SupplyType. - /// - /// \return (*this) - NetworkSimplexSimple& supplyType(SupplyType supply_type) { - _stype = supply_type; - return *this; - } - - /// @} - - /// \name Execution Control - /// The algorithm can be executed using \ref run(). - - /// @{ - - /// \brief Run the algorithm. - /// - /// This function runs the algorithm. - /// The paramters can be specified using functions \ref lowerMap(), - /// \ref upperMap(), \ref costMap(), \ref supplyMap(), \ref stSupply(), - /// \ref supplyType(). - /// For example, - /// \code - /// NetworkSimplexSimple ns(graph); - /// ns.lowerMap(lower).upperMap(upper).costMap(cost) - /// .supplyMap(sup).run(); - /// \endcode - /// - /// This function can be called more than once. All the given parameters - /// are kept for the next call, unless \ref resetParams() or \ref reset() - /// is used, thus only the modified parameters have to be set again. - /// If the underlying digraph was also modified after the construction - /// of the class (or the last \ref reset() call), then the \ref reset() - /// function must be called. - /// - /// \param pivot_rule The pivot rule that will be used during the - /// algorithm. For more information, see \ref PivotRule. - /// - /// \return \c INFEASIBLE if no feasible flow exists, - /// \n \c OPTIMAL if the problem has optimal solution - /// (i.e. it is feasible and bounded), and the algorithm has found - /// optimal flow and node potentials (primal and dual solutions), - /// \n \c UNBOUNDED if the objective function of the problem is - /// unbounded, i.e. there is a directed cycle having negative total - /// cost and infinite upper bound. - /// - /// \see ProblemType, PivotRule - /// \see resetParams(), reset() - ProblemType run() { - if (!init()) return INFEASIBLE; - return start(); - } - - /// \brief Reset all the parameters that have been given before. - /// - /// This function resets all the paramaters that have been given - /// before using functions \ref lowerMap(), \ref upperMap(), - /// \ref costMap(), \ref supplyMap(), \ref stSupply(), \ref supplyType(). - /// - /// It is useful for multiple \ref run() calls. Basically, all the given - /// parameters are kept for the next \ref run() call, unless - /// \ref resetParams() or \ref reset() is used. - /// If the underlying digraph was also modified after the construction - /// of the class or the last \ref reset() call, then the \ref reset() - /// function must be used, otherwise \ref resetParams() is sufficient. - /// - /// For example, - /// \code - /// NetworkSimplexSimple ns(graph); - /// - /// // First run - /// ns.lowerMap(lower).upperMap(upper).costMap(cost) - /// .supplyMap(sup).run(); - /// - /// // Run again with modified cost map (resetParams() is not called, - /// // so only the cost map have to be set again) - /// cost[e] += 100; - /// ns.costMap(cost).run(); - /// - /// // Run again from scratch using resetParams() - /// // (the lower bounds will be set to zero on all arcs) - /// ns.resetParams(); - /// ns.upperMap(capacity).costMap(cost) - /// .supplyMap(sup).run(); - /// \endcode - /// - /// \return (*this) - /// - /// \see reset(), run() - NetworkSimplexSimple& resetParams() { - for (int i = 0; i != _node_num; ++i) { - _supply[i] = 0; - } - for (ArcsType i = 0; i != _arc_num; ++i) { - _cost[i] = 1; - } - _stype = GEQ; - return *this; - } - - - /// \brief Reset the internal data structures and all the parameters - /// that have been given before. - /// - /// This function resets the internal data structures and all the - /// paramaters that have been given before using functions \ref lowerMap(), - /// \ref upperMap(), \ref costMap(), \ref supplyMap(), \ref stSupply(), - /// \ref supplyType(). - /// - /// It is useful for multiple \ref run() calls. Basically, all the given - /// parameters are kept for the next \ref run() call, unless - /// \ref resetParams() or \ref reset() is used. - /// If the underlying digraph was also modified after the construction - /// of the class or the last \ref reset() call, then the \ref reset() - /// function must be used, otherwise \ref resetParams() is sufficient. - /// - /// See \ref resetParams() for examples. - /// - /// \return (*this) - /// - /// \see resetParams(), run() - NetworkSimplexSimple& reset() { - // Resize vectors - _node_num = _init_nb_nodes; - _arc_num = _init_nb_arcs; - int all_node_num = _node_num + 1; - ArcsType max_arc_num = _arc_num + 2 * _node_num; - - _source.resize(max_arc_num); - _target.resize(max_arc_num); - - _cost.resize(max_arc_num); - _supply.resize(all_node_num); - _flow.resize(max_arc_num); - _pi.resize(all_node_num); - - _parent.resize(all_node_num); - _pred.resize(all_node_num); - _forward.resize(all_node_num); - _thread.resize(all_node_num); - _rev_thread.resize(all_node_num); - _succ_num.resize(all_node_num); - _last_succ.resize(all_node_num); - _state.resize(max_arc_num); - - - //_arc_mixing=false; - if (_arc_mixing && _node_num > 1) { - // Store the arcs in a mixed order - //ArcsType k = std::max(ArcsType(std::sqrt(double(_arc_num))), ArcsType(10)); - const ArcsType k = std::max(ArcsType(_arc_num / _node_num), ArcsType(3)); - mixingCoeff = k; - subsequence_length = _arc_num / mixingCoeff + 1; - num_big_subsequences = _arc_num % mixingCoeff; - num_total_big_subsequence_numbers = subsequence_length * num_big_subsequences; - -#ifdef _OPENMP -#pragma omp parallel for schedule(static) -#endif - for (Arc a = 0; a <= _graph.maxArcId(); a++) { // --a <=> _graph.next(a) , -1 == INVALID - ArcsType i = sequence(_graph.maxArcId()-a); - _source[i] = _node_id(_graph.source(a)); - _target[i] = _node_id(_graph.target(a)); - } - } else { - // Store the arcs in the original order - ArcsType i = 0; - Arc a; _graph.first(a); - for (; a != INVALID; _graph.next(a), ++i) { - _source[i] = _node_id(_graph.source(a)); - _target[i] = _node_id(_graph.target(a)); - //_arc_id[a] = i; - } - } - - // Reset parameters - resetParams(); - return *this; - } - - /// @} - - /// \name Query Functions - /// The results of the algorithm can be obtained using these - /// functions.\n - /// The \ref run() function must be called before using them. - - /// @{ - - /// \brief Return the total cost of the found flow. - /// - /// This function returns the total cost of the found flow. - /// Its complexity is O(e). - /// - /// \note The return type of the function can be specified as a - /// template parameter. For example, - /// \code - /// ns.totalCost(); - /// \endcode - /// It is useful if the total cost cannot be stored in the \c Cost - /// type of the algorithm, which is the default return type of the - /// function. - /// - /// \pre \ref run() must be called before using this function. - /*template - Number totalCost() const { - Number c = 0; - for (ArcIt a(_graph); a != INVALID; ++a) { - int i = getArcID(a); - c += Number(_flow[i]) * Number(_cost[i]); - } - return c; - }*/ - - template - Number totalCost() const { - Number c = 0; - -#ifdef SPARSE_FLOW - for (auto it = _flow.data.begin(); it!=_flow.data.end(); ++it) - c += Number(it->second) * Number(_cost[it->first]); - return c; -#else - for (ArcsType i = 0; i<_flow.size(); i++) - c += _flow[i] * Number(_cost[i]); - return c; -#endif - } - -#ifndef DOXYGEN - Cost totalCost() const { - return totalCost(); - } -#endif - - /// \brief Return the flow on the given arc. - /// - /// This function returns the flow on the given arc. - /// - /// \pre \ref run() must be called before using this function. - Value flow(const Arc& a) const { - return _flow[getArcID(a)]; - } - - /// \brief Return the flow map (the primal solution). - /// - /// This function copies the flow value on each arc into the given - /// map. The \c Value type of the algorithm must be convertible to - /// the \c Value type of the map. - /// - /// \pre \ref run() must be called before using this function. - template - void flowMap(FlowMap &map) const { - Arc a; _graph.first(a); - for (; a != INVALID; _graph.next(a)) { - map.set(a, _flow[getArcID(a)]); - } - } - - /// \brief Return the potential (dual value) of the given node. - /// - /// This function returns the potential (dual value) of the - /// given node. - /// - /// \pre \ref run() must be called before using this function. - Cost potential(const Node& n) const { - return _pi[_node_id(n)]; - } - - /// \brief Return the potential map (the dual solution). - /// - /// This function copies the potential (dual value) of each node - /// into the given map. - /// The \c Cost type of the algorithm must be convertible to the - /// \c Value type of the map. - /// - /// \pre \ref run() must be called before using this function. - template - void potentialMap(PotentialMap &map) const { - Node n; _graph.first(n); - for (; n != INVALID; _graph.next(n)) { - map.set(n, _pi[_node_id(n)]); - } - } - - /// @} - - private: - - // Initialize internal data structures - bool init() { - if (_node_num == 0) return false; - - // Check the sum of supply values - _sum_supply = 0; - for (int i = 0; i != _node_num; ++i) { - _sum_supply += _supply[i]; - } - /*if (!((_stype == GEQ && _sum_supply <= 0) || - (_stype == LEQ && _sum_supply >= 0))) return false;*/ - - - // Initialize artifical cost - Cost ART_COST; - if (std::numeric_limits::is_exact) { - ART_COST = std::numeric_limits::max() / 2 + 1; - } else { - ART_COST = 0; - for (ArcsType i = 0; i != _arc_num; ++i) { - if (_cost[i] > ART_COST) ART_COST = _cost[i]; - } - ART_COST = (ART_COST + 1) * _node_num; - } - - // Initialize arc maps - for (ArcsType i = 0; i != _arc_num; ++i) { -#ifndef SPARSE_FLOW - _flow[i] = 0; //by default, the sparse matrix is empty -#endif - _state[i] = STATE_LOWER; - } -#ifdef SPARSE_FLOW - _flow = SparseValueVector(); -#endif - - // Set data for the artificial root node - _root = _node_num; - _parent[_root] = -1; - _pred[_root] = -1; - _thread[_root] = 0; - _rev_thread[0] = _root; - _succ_num[_root] = _node_num + 1; - _last_succ[_root] = _root - 1; - _supply[_root] = -_sum_supply; - _pi[_root] = 0; - - // Add artificial arcs and initialize the spanning tree data structure - if (_sum_supply == 0) { - // EQ supply constraints - _search_arc_num = _arc_num; - _all_arc_num = _arc_num + _node_num; - for (ArcsType u = 0, e = _arc_num; u != static_cast(_node_num); ++u, ++e) { - _parent[u] = _root; - _pred[u] = e; - _thread[u] = u + 1; - _rev_thread[u + 1] = u; - _succ_num[u] = 1; - _last_succ[u] = u; - _state[e] = STATE_TREE; - if (_supply[u] >= 0) { - _forward[u] = true; - _pi[u] = 0; - _source[e] = u; - _target[e] = _root; - _flow[e] = _supply[u]; - _cost[e] = 0; - } else { - _forward[u] = false; - _pi[u] = ART_COST; - _source[e] = _root; - _target[e] = u; - _flow[e] = -_supply[u]; - _cost[e] = ART_COST; - } - } - } else if (_sum_supply > 0) { - // LEQ supply constraints - _search_arc_num = _arc_num + _node_num; - ArcsType f = _arc_num + _node_num; - for (ArcsType u = 0, e = _arc_num; u != static_cast(_node_num); ++u, ++e) { - _parent[u] = _root; - _thread[u] = u + 1; - _rev_thread[u + 1] = u; - _succ_num[u] = 1; - _last_succ[u] = u; - if (_supply[u] >= 0) { - _forward[u] = true; - _pi[u] = 0; - _pred[u] = e; - _source[e] = u; - _target[e] = _root; - _flow[e] = _supply[u]; - _cost[e] = 0; - _state[e] = STATE_TREE; - } else { - _forward[u] = false; - _pi[u] = ART_COST; - _pred[u] = f; - _source[f] = _root; - _target[f] = u; - _flow[f] = -_supply[u]; - _cost[f] = ART_COST; - _state[f] = STATE_TREE; - _source[e] = u; - _target[e] = _root; - //_flow[e] = 0; //by default, the sparse matrix is empty - _cost[e] = 0; - _state[e] = STATE_LOWER; - ++f; - } - } - _all_arc_num = f; - } else { - // GEQ supply constraints - _search_arc_num = _arc_num + _node_num; - ArcsType f = _arc_num + _node_num; - for (ArcsType u = 0, e = _arc_num; u != static_cast(_node_num); ++u, ++e) { - _parent[u] = _root; - _thread[u] = u + 1; - _rev_thread[u + 1] = u; - _succ_num[u] = 1; - _last_succ[u] = u; - if (_supply[u] <= 0) { - _forward[u] = false; - _pi[u] = 0; - _pred[u] = e; - _source[e] = _root; - _target[e] = u; - _flow[e] = -_supply[u]; - _cost[e] = 0; - _state[e] = STATE_TREE; - } else { - _forward[u] = true; - _pi[u] = -ART_COST; - _pred[u] = f; - _source[f] = u; - _target[f] = _root; - _flow[f] = _supply[u]; - _state[f] = STATE_TREE; - _cost[f] = ART_COST; - _source[e] = _root; - _target[e] = u; - //_flow[e] = 0; //by default, the sparse matrix is empty - _cost[e] = 0; - _state[e] = STATE_LOWER; - ++f; - } - } - _all_arc_num = f; - } - - return true; - } - - // Find the join node - void findJoinNode() { - int u = _source[in_arc]; - int v = _target[in_arc]; - while (u != v) { - if (_succ_num[u] < _succ_num[v]) { - u = _parent[u]; - } else { - v = _parent[v]; - } - } - join = u; - } - - // Find the leaving arc of the cycle and returns true if the - // leaving arc is not the same as the entering arc - bool findLeavingArc() { - // Initialize first and second nodes according to the direction - // of the cycle - if (_state[in_arc] == STATE_LOWER) { - first = _source[in_arc]; - second = _target[in_arc]; - } else { - first = _target[in_arc]; - second = _source[in_arc]; - } - delta = INF; - char result = 0; - Value d; - ArcsType e; - - // Search the cycle along the path form the first node to the root - for (auto u = first; u != join; u = _parent[u]) { - e = _pred[u]; - d = _forward[u] ? _flow[e] : INF; - if (d < delta) { - delta = d; - u_out = u; - result = 1; - } - } - // Search the cycle along the path form the second node to the root - for (int u = second; u != join; u = _parent[u]) { - e = _pred[u]; - d = _forward[u] ? INF : _flow[e]; - if (d <= delta) { - delta = d; - u_out = u; - result = 2; - } - } - - if (result == 1) { - u_in = first; - v_in = second; - } else { - u_in = second; - v_in = first; - } - return result != 0; - } - - // Change _flow and _state vectors - void changeFlow(bool change) { - // Augment along the cycle - if (delta > 0) { - Value val = _state[in_arc] * delta; - _flow[in_arc] += val; - for (auto u = _source[in_arc]; u != join; u = _parent[u]) { - _flow[_pred[u]] += _forward[u] ? -val : val; - } - for (auto u = _target[in_arc]; u != join; u = _parent[u]) { - _flow[_pred[u]] += _forward[u] ? val : -val; - } - } - // Update the state of the entering and leaving arcs - if (change) { - _state[in_arc] = STATE_TREE; - _state[_pred[u_out]] = - (_flow[_pred[u_out]] == 0) ? STATE_LOWER : STATE_UPPER; - } else { - _state[in_arc] = -_state[in_arc]; - } - } - - // Update the tree structure - void updateTreeStructure() { - int old_rev_thread = _rev_thread[u_out]; - int old_succ_num = _succ_num[u_out]; - int old_last_succ = _last_succ[u_out]; - v_out = _parent[u_out]; - - // Check if u_in and u_out coincide - if (u_in == u_out) { - // Update _parent, _pred, _pred_dir - _parent[u_in] = v_in; - _pred[u_in] = in_arc; - _forward[u_in] = (u_in == _source[in_arc]); - - // Update _thread and _rev_thread - if (_thread[v_in] != u_out) { - ArcsType after = _thread[old_last_succ]; - _thread[old_rev_thread] = after; - _rev_thread[after] = old_rev_thread; - after = _thread[v_in]; - _thread[v_in] = u_out; - _rev_thread[u_out] = v_in; - _thread[old_last_succ] = after; - _rev_thread[after] = old_last_succ; - } - } else { - // Handle the case when old_rev_thread equals to v_in - // (it also means that join and v_out coincide) - int thread_continue = old_rev_thread == static_cast(v_in) ? - _thread[old_last_succ] : _thread[v_in]; - - // Update _thread and _parent along the stem nodes (i.e. the nodes - // between u_in and u_out, whose parent have to be changed) - int stem = u_in; // the current stem node - int par_stem = v_in; // the new parent of stem - int next_stem; // the next stem node - int last = _last_succ[u_in]; // the last successor of stem - int before, after = _thread[last]; - _thread[v_in] = u_in; - _dirty_revs.clear(); - _dirty_revs.push_back(v_in); - while (stem != static_cast(u_out)) { - // Insert the next stem node into the thread list - next_stem = _parent[stem]; - _thread[last] = next_stem; - _dirty_revs.push_back(last); - - // Remove the subtree of stem from the thread list - before = _rev_thread[stem]; - _thread[before] = after; - _rev_thread[after] = before; - - // Change the parent node and shift stem nodes - _parent[stem] = par_stem; - par_stem = stem; - stem = next_stem; - - // Update last and after - last = _last_succ[stem] == _last_succ[par_stem] ? - _rev_thread[par_stem] : _last_succ[stem]; - after = _thread[last]; - } - _parent[u_out] = par_stem; - _thread[last] = thread_continue; - _rev_thread[thread_continue] = last; - _last_succ[u_out] = last; - - // Remove the subtree of u_out from the thread list except for - // the case when old_rev_thread equals to v_in - if (old_rev_thread != v_in) { - _thread[old_rev_thread] = after; - _rev_thread[after] = old_rev_thread; - } - - // Update _rev_thread using the new _thread values - for (int i = 0; i != int(_dirty_revs.size()); ++i) { - int u = _dirty_revs[i]; - _rev_thread[_thread[u]] = u; - } - - // Update _pred, _pred_dir, _last_succ and _succ_num for the - // stem nodes from u_out to u_in - int tmp_sc = 0, tmp_ls = _last_succ[u_out]; - for (int u = u_out, p = _parent[u]; u != static_cast(u_in); u = p, p = _parent[u]) { - _pred[u] = _pred[p]; - _forward[u] = !_forward[p]; - tmp_sc += _succ_num[u] - _succ_num[p]; - _succ_num[u] = tmp_sc; - _last_succ[p] = tmp_ls; - } - _pred[u_in] = in_arc; - _forward[u_in] = (static_cast(u_in) == _source[in_arc]); - _succ_num[u_in] = old_succ_num; - } - - // Update _last_succ from v_in towards the root - int up_limit_out = static_cast(_last_succ[join]) == v_in ? join : -1; - int last_succ_out = _last_succ[u_out]; - for (int u = v_in; u != -1 && _last_succ[u] == v_in; u = _parent[u]) { - _last_succ[u] = last_succ_out; - } - - // Update _last_succ from v_out towards the root - if (static_cast(join) != old_rev_thread && static_cast(v_in) != old_rev_thread) { - for (int u = v_out; u != up_limit_out && _last_succ[u] == old_last_succ; - u = _parent[u]) { - _last_succ[u] = old_rev_thread; - } - } else if (last_succ_out != old_last_succ) { - for (int u = v_out; u != up_limit_out && _last_succ[u] == old_last_succ; - u = _parent[u]) { - _last_succ[u] = last_succ_out; - } - } - - // Update _succ_num from v_in to join - for (int u = v_in; u != static_cast(join); u = _parent[u]) { - _succ_num[u] += old_succ_num; - } - // Update _succ_num from v_out to join - for (int u = v_out; u != static_cast(join); u = _parent[u]) { - _succ_num[u] -= old_succ_num; - } - } - - void updatePotential() { - Cost sigma = _pi[v_in] - _pi[u_in] - - ((_forward[u_in])?_cost[in_arc]:(-_cost[in_arc])); - int end = _thread[_last_succ[u_in]]; - for (int u = u_in; u != end; u = _thread[u]) { - _pi[u] += sigma; - } - } - - - // Heuristic initial pivots - bool initialPivots() { - Value curr, total = 0; - std::vector supply_nodes, demand_nodes; - Node u; _graph.first(u); - for (; u != INVALIDNODE; _graph.next(u)) { - curr = _supply[_node_id(u)]; - if (curr > 0) { - total += curr; - supply_nodes.push_back(u); - } else if (curr < 0) { - demand_nodes.push_back(u); - } - } - if (_sum_supply > 0) total -= _sum_supply; - if (total <= 0) return true; - - ArcVector arc_vector; - if (_sum_supply >= 0) { - if (supply_nodes.size() == 1 && demand_nodes.size() == 1) { - // Perform a reverse graph search from the sink to the source - //typename GR::template NodeMap reached(_graph, false); - BoolVector reached(_node_num, false); - Node s = supply_nodes[0], t = demand_nodes[0]; - std::vector stack; - reached[t] = true; - stack.push_back(t); - while (!stack.empty()) { - Node u, v = stack.back(); - stack.pop_back(); - if (v == s) break; - Arc a; _graph.firstIn(a, v); - for (; a != INVALID; _graph.nextIn(a)) { - if (reached[u = _graph.source(a)]) continue; - ArcsType j = getArcID(a); - arc_vector.push_back(j); - reached[u] = true; - stack.push_back(u); - } - } - } else { - arc_vector.resize(demand_nodes.size()); - // Find the min. cost incomming arc for each demand node -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (ArcsType i = 0; i < ArcsType(demand_nodes.size()); ++i) { - Node v = demand_nodes[i]; - Cost min_cost = std::numeric_limits::max(); - Arc min_arc = INVALID; - Arc a; _graph.firstIn(a, v); - for (; a != INVALID; _graph.nextIn(a)) { - Cost c = _cost[getArcID(a)]; - if (c < min_cost) { - min_cost = c; - min_arc = a; - } - } - arc_vector[i] = getArcID(min_arc); - } - arc_vector.erase(std::remove(arc_vector.begin(), arc_vector.end(), INVALID), arc_vector.end()); - } - } else { - arc_vector.resize(supply_nodes.size()); - // Find the min. cost outgoing arc for each supply node -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int i = 0; i < int(supply_nodes.size()); ++i) { - Node u = supply_nodes[i]; - Cost min_cost = std::numeric_limits::max(); - Arc min_arc = INVALID; - Arc a; _graph.firstOut(a, u); - for (; a != INVALID; _graph.nextOut(a)) { - Cost c = _cost[getArcID(a)]; - if (c < min_cost) { - min_cost = c; - min_arc = a; - } - } - arc_vector[i] = getArcID(min_arc); - } - arc_vector.erase(std::remove(arc_vector.begin(), arc_vector.end(), INVALID), arc_vector.end()); - } - - // Perform heuristic initial pivots - for (ArcsType i = 0; i != ArcsType(arc_vector.size()); ++i) { - in_arc = arc_vector[i]; - if (_state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] - - _pi[_target[in_arc]]) >= 0) continue; - findJoinNode(); - bool change = findLeavingArc(); - if (delta >= MAX_VAL) return false; - changeFlow(change); - if (change) { - updateTreeStructure(); - updatePotential(); - } - } - return true; - } - - // Execute the algorithm - ProblemType start() { - return start(); - } - - template - ProblemType start() { - PivotRuleImpl pivot(*this); - - // Perform heuristic initial pivots - if (!initialPivots()) return UNBOUNDED; - - size_t iter_number = 0; - // Execute the Network Simplex algorithm - while (pivot.findEnteringArc()) { - if ((iter_number <= max_iter&&max_iter > 0) || max_iter<=0) { - iter_number++; - findJoinNode(); - bool change = findLeavingArc(); - if (delta >= MAX_VAL) return UNBOUNDED; - changeFlow(change); - if (change) { - updateTreeStructure(); - updatePotential(); - } - } else break; - } - - // Check feasibility - for (ArcsType e = _search_arc_num; e != _all_arc_num; ++e) { - if (_flow[e] != 0) return INFEASIBLE; - } - - // Shift potentials to meet the requirements of the GEQ/LEQ type - // optimality conditions - if (_sum_supply == 0) { - if (_stype == GEQ) { - Cost max_pot = -std::numeric_limits::max(); - for (ArcsType i = 0; i != static_cast(_node_num); ++i) { - if (_pi[i] > max_pot) max_pot = _pi[i]; - } - if (max_pot > 0) { - for (ArcsType i = 0; i != static_cast(_node_num); ++i) - _pi[i] -= max_pot; - } - } else { - Cost min_pot = std::numeric_limits::max(); - for (ArcsType i = 0; i != static_cast(_node_num); ++i) { - if (_pi[i] < min_pot) min_pot = _pi[i]; - } - if (min_pot < 0) { - for (ArcsType i = 0; i != static_cast(_node_num); ++i) - _pi[i] -= min_pot; - } - } - } - - return OPTIMAL; - } - - }; //class NetworkSimplexSimple - - ///@} - -} //namespace lemon - -#endif //LEMON_NETWORK_SIMPLEX_H diff --git a/python/pycs.cpp b/python/pycs.cpp index 928a853f..3d903986 100644 --- a/python/pycs.cpp +++ b/python/pycs.cpp @@ -1,5 +1,6 @@ #include "pyfgc.h" -#include "minocore/matrix_coreset.h" +#include "minocore/coreset/matrix_coreset.h" +#include "pybind11/numpy.h" using CSType = coresets::CoresetSampler; using FNA = py::array_t; @@ -10,13 +11,21 @@ void init_coreset(py::module &m) { py::class_(m, "CoresetSampler") .def(py::init<>()) .def("make_sampler", []( - CSType &cs, size_t ncenters, FNA costs, INA assignments, py::object weights, uint64_t seed) + CSType &cs, size_t ncenters, py::array costs, INA assignments, py::object weights, uint64_t seed, minocore::coresets::SensitivityMethod sens) { py::buffer_info buf1 = costs.request(), asb = assignments.request(); if(buf1.ndim != 1) throw std::runtime_error("buffer must have one dimension (reshape if necessary)"); float *wp = nullptr; if(auto p(pybind11::cast(weights)); p) wp = static_cast(p.request().ptr); - cs.make_sampler(ncenters, costs.shape(0), (float *)buf1.ptr, (uint32_t *)asb.ptr, wp, seed); - }); + if(py::isinstance>(costs)) { + cs.make_sampler(ncenters, costs.shape(0), (float *)buf1.ptr, (uint32_t *)asb.ptr, wp, seed, sens); + } else { + cs.make_sampler(ncenters, costs.shape(0), (double *)buf1.ptr, (uint32_t *)asb.ptr, wp, seed, sens); + } + }, + "Generates a coreset sampler given a set of costs, assignments, and, optionally, weights. This can be used to generate an index coreset", + py::arg("ncenters"), py::arg("costs"), py::arg("assignments"), + py::arg("weights") = py::cast(Py_None), py::arg("seed") = 13, py::arg("sens")=minocore::coresets::BFL); + } diff --git a/python/pyfgc.cpp b/python/pyfgc.cpp index 970e6e37..e63f505d 100644 --- a/python/pyfgc.cpp +++ b/python/pyfgc.cpp @@ -3,6 +3,8 @@ PYBIND11_MODULE(pyfgc, m) { init_ex1(m); + init_coreset(m); m.doc() = "Python bindings for FGC, which allows for calling coreset/clustering code from numpy and converting results back to numpy arrays"; } - +void init_ex1(py::module &) { +} diff --git a/python/pyfgc.h b/python/pyfgc.h index 58059232..ccb0f2f6 100644 --- a/python/pyfgc.h +++ b/python/pyfgc.h @@ -2,7 +2,7 @@ #include "pybind11/pybind11.h" #include "pybind11/numpy.h" #include "aesctr/wy.h" -#include "minocore/matrix_coreset.h" +#include "minocore/minocore.h" using namespace minocore; namespace py = pybind11; void init_ex1(py::module &); diff --git a/python/setup.py b/python/setup.py index 34f9f0a3..023a7c8f 100644 --- a/python/setup.py +++ b/python/setup.py @@ -26,7 +26,7 @@ def __str__(self): extra_compile_args = ['-march=native', '-Wno-char-subscripts', '-Wno-unused-function', '-Wno-strict-aliasing', '-Wno-ignored-attributes', '-fno-wrapv', - '-lz', '-fopenmp', '-lgomp'] + '-lz', '-fopenmp', "-lgomp"] if 'BOOST_DIR' in environ: extra_compile_args.append("-I%s" % environ['BOOST_DIR']) @@ -83,7 +83,7 @@ def cpp_flag(compiler): 'is needed!') -extra_link_opts = ["-lgomp", "-lz"] +extra_link_opts = ["-fopenmp", "-lgomp", "-lz"] class BuildExt(build_ext): """A custom build extension for adding compiler-specific options.""" @@ -125,7 +125,7 @@ def build_extensions(self): author='Daniel Baker', author_email='dnb@cs.jhu.edu', url='https://github.com/dnbaker/pyfgc', - description='A python module for constructing and comparing HyperLogLogs', + description='A python module for stuff', long_description='', ext_modules=ext_modules, install_requires=['pybind11>=2.4'], diff --git a/scripts/hdf2file.py b/scripts/hdf2file.py new file mode 100644 index 00000000..ce1179eb --- /dev/null +++ b/scripts/hdf2file.py @@ -0,0 +1,14 @@ +import h5py +import numpy as np +import sys + +infname = sys.argv[1] +key = sys.argv[2] if sys.argv[2:] else "matrix" +prefix = "data" if not sys.argv[3:] else sys.argv[3] + +f = h5py.File(infname, "r") +print(f.keys()) +group = f[key] +for comp in ["shape", "indices", "indptr", "data"]: + with open(prefix + '.' + comp, "w") as f: + np.array(group[comp]).tofile(f) diff --git a/src/clustertest.cpp b/src/clustertest.cpp new file mode 100644 index 00000000..4914c2bd --- /dev/null +++ b/src/clustertest.cpp @@ -0,0 +1,78 @@ +#include "blaze/math/DynamicMatrix.h" +#include "aesctr/wy.h" +#include "minocore/clustering.h" + +using namespace minocore; + +template +blaze::DynamicMatrix parse_file(std::string path, unsigned *num_clusters) { + std::ifstream ifs(path); + std::string line; + if(!std::getline(ifs, line)) throw 1; + size_t nr = std::atoi(line.data()); + size_t nc = std::atoi(std::strchr(line.data(), '/') + 1); + *num_clusters = std::atoi(std::strchr(std::strchr(line.data(), '/') + 1, '/') + 1); + blaze::DynamicMatrix ret(nr, nc); + size_t row_index = 0; + while(std::getline(ifs, line)) { + auto r = row(ret, row_index++); + char *ptr = line.data(); + for(size_t col_index = 0;col_index < nc;r[col_index++] = std::strtod(ptr, &ptr)); + } + assert(row_index == nr); + return ret; +} + + +template +int metamain(int argc, char **argv) { + int ret = 0; + unsigned k; + std::string inpath = "random.out"; + if(argc > 1) inpath = argv[1]; + auto pointmat = parse_file(inpath, &k); + + std::cerr << "Parsed matrix of " << pointmat.rows() << " rows and " + << pointmat.columns() << " columns, with k = " << k << " clusters\n"; + auto jsdapp = make_probdiv_applicator(pointmat, blz::SQRL2); + std::cerr << "Made probdiv applicator\n"; + auto clusterdata = clustering::perform_clustering< + is_hard ? clustering::HARD: clustering::SOFT, clustering::EXTRINSIC>(jsdapp, k); + shared::flat_hash_map> labels, clabels; + std::ifstream ifs(inpath + ".labels.txt"); + size_t lno = 0; + for(std::string l; std::getline(ifs, l);) { + labels[std::atoi(l.data())].insert(lno++); + } + auto &asn = std::get<1>(clusterdata); + if constexpr(is_hard) { + for(size_t i = 0; i < asn.size(); ++i) { + clabels[asn[i]].insert(i); + } + shared::flat_hash_set sizes, csizes; + for(const auto &l: labels) sizes.insert(l.second.size()); + for(const auto &l: clabels) csizes.insert(l.second.size()); + std::cerr << "sizes size: " << sizes.size() << '\n'; + std::cerr << "csizes size: " << csizes.size() << '\n'; + assert(sizes.size() == csizes.size() && *sizes.begin() == *csizes.begin()); + // TODO: ensure that items are correctly clustered + } + return ret; +} + +int main(int argc, char **argv) { + int rc; + if((rc = metamain(argc, argv))) return rc; + if((rc = metamain(argc, argv))) return rc; + // Next: test Bregman clustering (HARD) + // Next: test Bregman clustering (SOFT) + // Next: test LLR (HARD) + // Next: test LLR (SOFT) + // Next: test TVD (HARD) + // Next: test TVD (SOFT) + // Next: test L1 (HARD) + // Next: test L1 (SOFT) + // Next: test metric k-median (universal dispatch) + throw std::runtime_error("Not completed!"); + return rc; +} diff --git a/src/csctest.cpp b/src/csctest.cpp index 06f431ae..0db0985c 100644 --- a/src/csctest.cpp +++ b/src/csctest.cpp @@ -1,9 +1,70 @@ #include "minocore/util/csc.h" +template +void dothing(std::string path) { + auto read = minocore::csc2sparse(path); + std::fprintf(stderr, "nr: %zu. nc: %zu. nnz: %zu\n", read.rows(), read.columns(), read.nonZeros()); +} + +enum VT { + U32, + U64, + F32, + F64 +}; + +VT c2v(std::string key) { + if(key == "u32") return U32; + if(key == "u64") return U64; + if(key == "f32") return F32; + if(key == "f64") return F64; + throw 1; + return F64; +} + using namespace minocore; int main(int argc, char *argv[]) { std::string inpath; - if(argc > 1) inpath = argv[1]; - auto read = csc2sparse(inpath); - std::fprintf(stderr, "nr: %zu. nc: %zu. nnz: %zu\n", read.rows(), read.columns(), read.nonZeros()); + VT ip = U64; + VT id = U64; + VT dt = F32; + for(int c;(c = getopt(argc, argv, "p:i:d:h")) >= 0;) { + switch(c) { + case 'p': ip = c2v(optarg); break; + case 'i': id = c2v(optarg); break; + case 'd': dt = c2v(optarg); break; + } + } + // Use as ./csctest -pu32 -iu32 -df32 cao_atlas_ + if(optind != argc) inpath = argv[optind]; + if(dt != U32 && dt != F32) throw std::runtime_error("Not supported: datatype other than f32 or u32"); + if(ip == U64) { + if(id == U64) { + if(dt == U32) { + dothing(inpath); + } else if(dt == F32) { + dothing(inpath); + } + } else { + if(dt == U32) { + dothing(inpath); + } else if(dt == F32) { + dothing(inpath); + } + } + } else { + if(id == U64) { + if(dt == U32) { + dothing(inpath); + } else if(dt == F32) { + dothing(inpath); + } + } else { + if(dt == U32) { + dothing(inpath); + } else if(dt == F32) { + dothing(inpath); + } + } + } } diff --git a/src/diskmattest.cpp b/src/diskmattest.cpp index d3924f6f..f7109234 100644 --- a/src/diskmattest.cpp +++ b/src/diskmattest.cpp @@ -1,8 +1,9 @@ #include "minocore/dist.h" -#include "minocore/util/diskmat.h" +#include "diskmat/diskmat.h" using namespace minocore; using namespace blz; +using diskmat::DiskMat; int main() { std::srand(0); @@ -58,7 +59,6 @@ int main() { std::cout << r1; std::cout << r0; std::fprintf(stderr, "Wasserstein distance between rows 1 and 2: %g\n", distance::p_wasserstein(r1, r0)); - std::fprintf(stderr, "Wasserstein distance between rows 1 and 2: %g\n", distance::network_p_wasserstein(r1, r0)); #if 0 std::fprintf(stderr, "multinomial jsd: %f\n", distance::multinomial_jsd(r1, r0)); std::fprintf(stderr, "multinomial jsd: %f\n", distance::multinomial_jsd(c1, c0)); diff --git a/src/dmlsearch.cpp b/src/dmlsearch.cpp index 761eaecc..8b13e92b 100644 --- a/src/dmlsearch.cpp +++ b/src/dmlsearch.cpp @@ -1,7 +1,7 @@ //#define VERBOSE_AF 1 #include "minocore/graph/graphdist.h" #include "minocore/optim/lsearch.h" -#include "minocore/util/diskmat.h" +#include "diskmat/diskmat.h" #include using namespace minocore; diff --git a/src/geomedtest.cpp b/src/geomedtest.cpp index 6e6f6f2a..835a8098 100644 --- a/src/geomedtest.cpp +++ b/src/geomedtest.cpp @@ -66,7 +66,7 @@ int main(int c, char **a) { manstop = t(); std::fprintf(stderr, "Manual l1 distances time: %zu/%g. reduction-based: %zu/%g\n", size_t((stop - start).count() / 1000), cwmed, size_t((manstop - manstart).count() / 1000), cwmed2); auto l1_approx_start = t(); - minocore::coresets::l1_median(m, v3, static_cast(nullptr), true); + minocore::coresets::l1_median(m, v3, static_cast(nullptr)); auto l1_approx_stop = t(); std::fprintf(stderr, "Time to compute exact l1 median: %gms. Approx: %gms.\n", (l1_stop - l1_start).count() * 1.e-6, (l1_approx_stop - l1_approx_start).count() * 1.e-6); std::cout << "L1 dist under geomedian: " << l1dist(m, v) << '\n'; diff --git a/src/hdf2dm.cpp b/src/hdf2dm.cpp index 3fe5e032..e3ecda2f 100644 --- a/src/hdf2dm.cpp +++ b/src/hdf2dm.cpp @@ -27,9 +27,11 @@ int main(int argc, char *argv[]) { // TODO: extract to binary file, then iterate over the file. std::string inpath = "5k_pbmc_protein_v3_raw_feature_bc_matrix.h5"; std::string outpref = ""; + std::string key = "matrix"; if(argc > 1) inpath = argv[1]; + if(argc > 2) key = argv[2]; H5::H5File file(inpath.data(), H5F_ACC_RDONLY ); - auto group = H5::Group(file.openGroup("matrix")); + auto group = H5::Group(file.openGroup(key.data())); auto shape = group.openDataSet("shape"); assert(shape.getIntType().getSize() == 4); uint32_t shape_out[2]; diff --git a/src/jsdtest.cpp b/src/jsdtest.cpp index 04cbed43..5201c982 100644 --- a/src/jsdtest.cpp +++ b/src/jsdtest.cpp @@ -8,6 +8,16 @@ using namespace blz; #define FLOAT_TYPE double #endif +#ifndef INDICESTYPE +#define INDICESTYPE uint64_t +#endif +#ifndef INDPTRTYPE +#define INDPTRTYPE uint64_t +#endif +#ifndef DATATYPE +#define DATATYPE uint32_t +#endif + int main(int argc, char *argv[]) { if(std::find_if(argv, argv + argc, [](auto x) {return std::strcmp(x, "-h") == 0 || std::strcmp(x, "--help") == 0;}) != argv + argc) { @@ -21,7 +31,7 @@ int main(int argc, char *argv[]) { input = argv[3]; std::ofstream ofs("output.txt"); auto sparsemat = input.size() ? minocore::mtx2sparse(input) - : minocore::csc2sparse("", true); + : minocore::csc2sparse("", true); std::vector nonemptyrows; size_t i = 0; while(nonemptyrows.size() < 25) { @@ -30,7 +40,7 @@ int main(int argc, char *argv[]) { ++i; } blz::SM first25 = rows(sparsemat, nonemptyrows.data(), nonemptyrows.size()); - auto jsd = minocore::jsd::make_jsm_applicator(first25); + auto jsd = minocore::jsd::make_probdiv_applicator(first25, jsd::JSM, jsd::DIRICHLET); //auto jsddistmat = jsd.make_distance_matrix(); dm::DistanceMatrix utdm(first25.rows()); jsd.set_distance_matrix(utdm); @@ -77,12 +87,10 @@ int main(int argc, char *argv[]) { timer.restart("1ksparsekl"); jsd2.set_distance_matrix(jsd_bnj, minocore::jsd::MKL, true); timer.report(); - std::cout << "Multinomial KL\n" << '\n'; //std::cout << jsd_bnj << '\n'; timer.restart("1ksparseL1"); - jsd2.set_distance_matrix(jsd_bnj, minocore::jsd::EMD, true); + jsd2.set_distance_matrix(jsd_bnj, minocore::jsd::L1, true); timer.report(); - std::cout << "EMD: " << jsd_bnj << '\n'; #if 0 timer.restart("1ldensejsd"); blz::DM densefirst25 = first25; @@ -93,8 +101,8 @@ int main(int argc, char *argv[]) { //ofs << jsd_bnj << '\n'; ofs.flush(); std::fprintf(stderr, "Starting jsm\n"); - timer.restart("1ksparsejsm"); - jsd2.set_distance_matrix(jsd_bnj, minocore::jsd::L1); + timer.restart("1ksparsetvd"); + jsd2.set_distance_matrix(jsd_bnj, minocore::jsd::TVD); timer.report(); timer.reset(); ofs << "JS Metric: \n"; diff --git a/src/kmpptest.cpp b/src/kmpptest.cpp index f3a5f04c..01c0cad4 100644 --- a/src/kmpptest.cpp +++ b/src/kmpptest.cpp @@ -1,5 +1,6 @@ #include "minocore/optim/kmeans.h" #include "minocore/optim/kcenter.h" +#include "minocore/coreset/kcenter.h" #include "minocore/dist/applicator.h" #include #include @@ -8,7 +9,7 @@ #include #endif -#define t std::chrono::high_resolution_clock::now +auto t() {return std::chrono::high_resolution_clock::now();} #ifndef FLOAT_TYPE #define FLOAT_TYPE double @@ -22,19 +23,19 @@ template void test_kccs(Mat &mat, RNG &rng, size_t npoints, double eps) { auto matrowit = blz::rowiterator(mat); auto start = t(); - double gamma = 100. / mat.rows(); + double gamma = 500. / mat.rows(); if(gamma >= 0.5) gamma = 0.05; - auto cs = outliers::kcenter_coreset(matrowit.begin(), matrowit.end(), rng, npoints, eps, + auto cs = kcenter_coreset_outliers(matrowit.begin(), matrowit.end(), rng, npoints, eps, /*mu=*/0.5, 1.5, gamma); auto maxv = *std::max_element(cs.indices_.begin(), cs.indices_.end()); std::fprintf(stderr, "max index: %u\n", unsigned(maxv)); auto stop = t(); - std::fprintf(stderr, "kcenter coreset took %0.12gs\n", double((stop - start).count()) / 1e9); + std::fprintf(stderr, "kcenter coreset took %0.12gms\n", util::timediff2ms(stop, start)); start = t(); auto csmat = index2matrix(cs, mat); stop = t(); - std::fprintf(stderr, "kcenter compacting to coreset took %0.12gs\n", double((stop - start).count()) / 1e9); + std::fprintf(stderr, "kcenter compacting to coreset took %0.12gs\n", util::timediff2ms(stop, start)); } int main(int argc, char *argv[]) { @@ -48,7 +49,6 @@ int main(int argc, char *argv[]) { nt = std::atoi(env); } OMP_ONLY(omp_set_num_threads(nt);) - std::fprintf(stderr, "%d threads used\n", nt); #endif std::srand(0); size_t n = argc == 1 ? 250000: std::atoi(argv[1]); @@ -107,7 +107,8 @@ int main(int argc, char *argv[]) { assert(min(sqmat) > 0.); { auto greedy_metric = kcenter_greedy_2approx(blz::rowiterator(sqmat).begin(), blz::rowiterator(sqmat).end(), - gen, /*k=*/3, MatrixLookup{}); + gen, /*k=*/npoints, MatrixLookup{}); + kcenter_greedy_2approx_outliers(blz::rowiterator(sqmat).begin(), blz::rowiterator(sqmat).end(), gen, /*k=*/npoints, eps, .001, MatrixLookup{}); } auto kmpp_asn = std::move(std::get<1>(centers)); std::vector counts(npoints); diff --git a/src/knntest.cpp b/src/knntest.cpp new file mode 100644 index 00000000..f03e3146 --- /dev/null +++ b/src/knntest.cpp @@ -0,0 +1,12 @@ +#include "include/minocore/dist/knngraph.h" + +int main() { + blaze::DynamicMatrix mat = blaze::generate(1000, 50, [](auto x, auto y) { + return float(std::rand()) / RAND_MAX + (x * y) / 1000. / 50.; + }); + auto app = minocore::jsd::make_probdiv_applicator(mat, blz::distance::L1); + auto knns = minocore::make_knns(app, 10); + auto graph = minocore::knns2graph(knns, app.size(), true); + auto mst = minocore::knng2mst(graph); + std::fprintf(stderr, "mst size: %zu edges vs %zu nodes\n", mst.size(), app.size()); +} diff --git a/src/mtxparse.cpp b/src/mtxparse.cpp index 7ae8a20e..451dbdab 100644 --- a/src/mtxparse.cpp +++ b/src/mtxparse.cpp @@ -1,4 +1,4 @@ -#include "minocore/csc.h" +#include "minocore/util/csc.h" #include #include "blaze/util/Serialization.h" #include diff --git a/src/sparsepriortest.cpp b/src/sparsepriortest.cpp new file mode 100644 index 00000000..970ebb2b --- /dev/null +++ b/src/sparsepriortest.cpp @@ -0,0 +1,20 @@ +#include "minocore/dist/applicator.h" + +int main() { + blaze::CompressedMatrix cm{{1., 5., 0., 3., 1., 1., 1., 3., 1., 1}, { 1., 1., 3., 2., 2., 0., 21., 1., 7., 1. }}; + std::cerr << cm << '\n'; + auto app = minocore::make_probdiv_applicator(cm, blz::JSD, minocore::jsd::DIRICHLET); + assert(std::abs(app(0, 1) - 0.16066042325849725) < 1e-10 || !std::fprintf(stderr, "got %g vs %g\n", app(0, 1), 0.16066042325849725)); + blaze::CompressedMatrix cm2{ + {0, 7, 6, 0, 6, 6, 0, 0, 7, 9, 4, 0, 0, 0, 6, 6, 0, 0, 0, 7}, + {6, 7, 0, 0, 0, 5, 6, 9, 0, 0, 0, 0, 0, 9, 0, 6, 5, 6, 0, 0} + }; + auto r1 = row(cm2, 0); + auto r2 = row(cm2, 1); + assert(blz::number_shared_zeros(r1, r2) == 4); + auto app2 = minocore::make_probdiv_applicator(cm2, blz::JSD, minocore::jsd::DIRICHLET); + double v2 = app2(0, 1); + static constexpr double correct2 = 0.2307775339934756; + assert(std::abs(correct2 - v2) < 1e-10); + //std::fprintf(stderr, "difference: %0.12g\n", correct2 - v2); +}