microsoft · MaggieQi · Aug 27, 2024 · Sep 4, 2024 · Jan 3, 2025 · Jan 7, 2025
diff --git a/AnnService/inc/Core/Common/BKTree.h b/AnnService/inc/Core/Common/BKTree.h
@@ -54,10 +54,12 @@ namespace SPTAG
 
             KmeansArgs(int k, DimensionType dim, SizeType datasize, int threadnum, DistCalcMethod distMethod, const std::shared_ptr<IQuantizer>& quantizer = nullptr) : _K(k), _DK(k), _D(dim), _RD(dim), _T(threadnum), _M(distMethod), m_pQuantizer(quantizer){
                 if (m_pQuantizer) {
+		    SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "KmeansArgs: Using quantizer!\n");
                     _RD = m_pQuantizer->ReconstructDim();
                     fComputeDistance = m_pQuantizer->DistanceCalcSelector<T>(distMethod);
                 }
                 else {
+		    SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "KmeansArgs: Using none quantizer!\n");
                     fComputeDistance = COMMON::DistanceCalcSelector<T>(distMethod);
                 }
 

diff --git a/AnnService/inc/Core/Common/PQQuantizer.h b/AnnService/inc/Core/Common/PQQuantizer.h
@@ -140,7 +140,7 @@ namespace SPTAG
         {
             if (ADC && GetEnableADC())
             {
-                auto distCalc = DistanceCalcSelector<T>(DistCalcMethod::L2);
+                auto distCalc = COMMON::DistanceCalcSelector<T>(DistCalcMethod::L2);
                 float* ADCtable = (float*) vecout;
                 T* subcodebooks = m_codebooks.get();
                 T* subvec = (T*)vec;
@@ -157,7 +157,7 @@ namespace SPTAG
             }
             else 
             {
-                auto distCalc = DistanceCalcSelector<T>(DistCalcMethod::L2);
+                auto distCalc = COMMON::DistanceCalcSelector<T>(DistCalcMethod::L2);
                 T* subvec = (T*)vec;
                 T* subcodebooks = m_codebooks.get();
                 for (int i = 0; i < m_NumSubvectors; i++) {
@@ -334,7 +334,7 @@ namespace SPTAG
         void PQQuantizer<T>::InitializeDistanceTables()
         {
             auto temp_m_L2DistanceTables = std::make_unique<float[]>(m_BlockSize * m_NumSubvectors);
-            auto L2Dist = DistanceCalcSelector<T>(DistCalcMethod::L2);
+            auto L2Dist = COMMON::DistanceCalcSelector<T>(DistCalcMethod::L2);
 
             for (int i = 0; i < m_NumSubvectors; i++) {
                 SizeType baseIdx = i * m_KsPerSubvector * m_DimPerSubvector;

diff --git a/AnnService/inc/Quantizer/Training.h b/AnnService/inc/Quantizer/Training.h
@@ -73,21 +73,19 @@ std::unique_ptr<T[]> TrainPQQuantizer(std::shared_ptr<QuantizerOptions> options,
 #pragma omp parallel for
     for (int codebookIdx = 0; codebookIdx < options->m_quantizedDim; codebookIdx++) {
         SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Training Codebook %d.\n", codebookIdx);
-        auto kargs = COMMON::KmeansArgs<T>(numCentroids, subdim, raw_vectors->Count(), options->m_threadNum, DistCalcMethod::L2, nullptr);
-        auto dset = COMMON::Dataset<T>(raw_vectors->Count(), subdim, blockRows, raw_vectors->Count());
+        COMMON::Dataset<T> dset(raw_vectors->Count(), subdim, blockRows, raw_vectors->Count());
 
         for (int vectorIdx = 0; vectorIdx < raw_vectors->Count(); vectorIdx++) {
-            auto raw_addr = reinterpret_cast<T*>(raw_vectors->GetVector(vectorIdx)) + (codebookIdx * subdim);
-            auto dset_addr = dset[vectorIdx];
-            for (int k = 0; k < subdim; k++) {
-                dset_addr[k] = raw_addr[k];
-            }
+            T* raw_addr = reinterpret_cast<T*>(raw_vectors->GetVector(vectorIdx)) + (codebookIdx * subdim);
+            T* dset_addr = dset[vectorIdx];
+            std::memcpy(dset_addr, raw_addr, sizeof(T)*subdim);
         }
 
         std::vector<SizeType> localindices;
         localindices.resize(dset.R());
         for (SizeType il = 0; il < localindices.size(); il++) localindices[il] = il;
 
+        auto kargs = COMMON::KmeansArgs<T>(numCentroids, subdim, raw_vectors->Count(), options->m_threadNum, DistCalcMethod::L2, nullptr);
         auto nclusters = COMMON::KmeansClustering<T>(dset, localindices, 0, dset.R(), kargs, options->m_trainingSamples, options->m_KmeansLambda, options->m_debug, nullptr);
 
         std::vector<SizeType> reverselocalindex;
@@ -120,4 +118,4 @@ std::unique_ptr<T[]> TrainPQQuantizer(std::shared_ptr<QuantizerOptions> options,
     }
 
     return codebooks;
-}
+}
diff --git a/AnnService/src/Core/Common/DistanceUtils.cpp b/AnnService/src/Core/Common/DistanceUtils.cpp
@@ -416,14 +416,34 @@ float DistanceUtils::ComputeL2Distance_SSE(const std::uint8_t* pX, const std::ui
     }
     float diff = DIFF128[0] + DIFF128[1] + DIFF128[2] + DIFF128[3];
 
+    float c1;
+    uint8_t a, b;
     while (pX < pEnd4) {
-        float c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
-        c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
-        c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
-        c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
+
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
+
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
+
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
     }
     while (pX < pEnd1) {
-        float c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
     }
     return diff;
 }
@@ -445,14 +465,34 @@ float DistanceUtils::ComputeL2Distance_AVX(const std::uint8_t* pX, const std::ui
     }
     float diff = DIFF128[0] + DIFF128[1] + DIFF128[2] + DIFF128[3];
 
+    float c1;
+    uint8_t a, b;
     while (pX < pEnd4) {
-        float c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
-        c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
-        c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
-        c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
+
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
+
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
+
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
     }
     while (pX < pEnd1) {
-        float c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
     }
     return diff;
 }
@@ -484,14 +524,34 @@ float DistanceUtils::ComputeL2Distance_AVX512(const std::uint8_t* pX, const std:
     }
     float diff = DIFF128[0] + DIFF128[1] + DIFF128[2] + DIFF128[3];
 
+    float c1;
+    uint8_t a, b;
     while (pX < pEnd4) {
-        float c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
-        c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
-        c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
-        c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
+
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
+
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
+
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
     }
     while (pX < pEnd1) {
-        float c1 = ((float)(*pX++) - (float)(*pY++)); diff += c1 * c1;
+	a = (*pX); b = (*pY);
+	c1 = a + 0.0f - b;
+	pX++; pY++;
+	diff += c1 * c1;
     }
     return diff;
 }