rapidsai · tarang-jain · Dec 19, 2024 · Jan 6, 2025 · Jan 6, 2025 · Jan 8, 2025
@@ -124,6 +124,7 @@ function(ConfigureAnnBench)
   target_link_libraries(
     ${BENCH_NAME}
     PRIVATE ${ConfigureAnnBench_LINKS}
+            raft::raft
             nlohmann_json::nlohmann_json
             Threads::Threads
             $<TARGET_NAME_IF_EXISTS:raft::raft_logger>

@@ -483,6 +483,14 @@ void register_search(std::shared_ptr<const dataset<T>> dataset,
                   ->MeasureProcessCPUTime()
                   ->UseRealTime();
 
+      if (metric_objective == Mode::kThroughput) {
+        if (index.algo.find("faiss_gpu") != std::string::npos) {
+          log_warn(
+            "FAISS GPU does not work in throughput mode because the underlying "
+            "StandardGpuResources object is not thread-safe. This might give unexpected results");
+        }
+        b->ThreadRange(threads[0], threads[1]);
+      }
       if (metric_objective == Mode::kThroughput) { b->ThreadRange(threads[0], threads[1]); }
     }
   }

@@ -42,6 +42,11 @@ void parse_build_param(const nlohmann::json& conf,
                        typename cuvs::bench::faiss_gpu_ivf_flat<T>::build_param& param)
 {
   parse_base_build_param<T>(conf, param);
+  if (conf.contains("use_cuvs")) {
+    param.use_cuvs = conf.at("use_cuvs");
+  } else {
+    param.use_cuvs = false;
+  }
 }
 
 template <typename T>
@@ -60,6 +65,16 @@ void parse_build_param(const nlohmann::json& conf,
   } else {
     param.use_float16 = false;
   }
+  if (conf.contains("use_cuvs")) {
+    param.use_cuvs = conf.at("use_cuvs");
+  } else {
+    param.use_cuvs = false;
+  }
+  if (conf.contains("bitsPerCode")) {
+    param.bitsPerCode = conf.at("bitsPerCode");
+  } else {
+    param.bitsPerCode = 8;
+  }
 }
 
 template <typename T>
@@ -138,5 +153,18 @@ REGISTER_ALGO_INSTANCE(std::uint8_t);
 
 #ifdef ANN_BENCH_BUILD_MAIN
 #include "../common/benchmark.hpp"
-int main(int argc, char** argv) { return cuvs::bench::run_main(argc, argv); }
+int main(int argc, char** argv)
+{
+  rmm::mr::cuda_memory_resource cuda_mr;
+  // Construct a resource that uses a coalescing best-fit pool allocator
+  // and is initially sized to half of free device memory.
+  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{
+    &cuda_mr, rmm::percent_of_free_device_memory(50)};
+  // Updates the current device resource pointer to `pool_mr`
+  auto old_mr = rmm::mr::set_current_device_resource(&pool_mr);
+  auto ret = cuvs::bench::run_main(argc, argv);
+  // Restores the current device resource pointer to its previous value
+  rmm::mr::set_current_device_resource(old_mr);
+  return ret;
+}
 #endif
@@ -17,6 +17,8 @@
 
 #include "../common/ann_types.hpp"
 #include "../common/util.hpp"
+#include <cuvs/neighbors/refine.hpp>
+#include "../cuvs/cuvs_ann_bench_utils.h"
 
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexIVFFlat.h>
@@ -32,6 +34,10 @@
 #include <faiss/index_io.h>
 #include <omp.h>
 
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_resources.hpp>
+#include <raft/core/host_mdarray.hpp>
+
 #include <cassert>
 #include <iostream>
 #include <memory>
@@ -41,7 +47,7 @@
 
 namespace {
 
-auto parse_metric_type(cuvs::bench::Metric metric) -> faiss::MetricType
+auto parse_metric_faiss(cuvs::bench::Metric metric) -> faiss::MetricType
 {
   if (metric == cuvs::bench::Metric::kInnerProduct) {
     return faiss::METRIC_INNER_PRODUCT;
@@ -93,7 +99,7 @@ class faiss_gpu : public algo<T>, public algo_gpu {
   faiss_gpu(Metric metric, int dim, const build_param& param)
     : algo<T>(metric, dim),
       gpu_resource_{std::make_shared<faiss::gpu::StandardGpuResources>()},
-      metric_type_(parse_metric_type(metric)),
+      metric_type_(parse_metric_faiss(metric)),
       nlist_{param.nlist},
       training_sample_fraction_{1.0 / double(param.ratio)}
   {
@@ -160,6 +166,7 @@ class faiss_gpu : public algo<T>, public algo_gpu {
   int device_;
   double training_sample_fraction_;
   std::shared_ptr<faiss::SearchParameters> search_params_;
+  std::shared_ptr<faiss::IndexRefineSearchParameters> refine_search_params_{nullptr};
   const T* dataset_;
   float refine_ratio_ = 1.0;
 };
@@ -199,19 +206,65 @@ template <typename T>
 void faiss_gpu<T>::search(
   const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
 {
+  // ASSERT(Mode::kLatency, "l2Knn: rowMajorIndex and rowMajorQuery should have same layout");
+  using IdxT = faiss::idx_t;
   static_assert(sizeof(size_t) == sizeof(faiss::idx_t),
                 "sizes of size_t and faiss::idx_t are different");
 
-  if (this->refine_ratio_ > 1.0) {
-    // TODO(snanditale): FAISS changed their search APIs to accept the search parameters as a struct
-    // object but their refine API doesn't allow the struct to be passed in. Once this is fixed, we
-    // need to re-enable refinement below
-    // index_refine_->search(batch_size, queries, k, distances,
-    // reinterpret_cast<faiss::idx_t*>(neighbors), this->search_params_.get()); Related FAISS issue:
-    // https://github.com/facebookresearch/faiss/issues/3118
-    throw std::runtime_error(
-      "FAISS doesn't support refinement in their new APIs so this feature is disabled in the "
-      "benchmarks for the time being.");
+  if (refine_ratio_ > 1.0) {
+    if (raft::get_device_for_address(queries) >= 0) {
+      uint32_t k0        = static_cast<uint32_t>(refine_ratio_ * k);
+      auto distances_tmp = raft::make_device_matrix<float, IdxT>(
+        gpu_resource_->getRaftHandle(device_), batch_size, k0);
+      auto candidates =
+        raft::make_device_matrix<IdxT, IdxT>(gpu_resource_->getRaftHandle(device_), batch_size, k0);
+      index_->search(batch_size,
+                     queries,
+                     k0,
+                     distances_tmp.data_handle(),
+                     candidates.data_handle(),
+                     this->search_params_.get());
+
+      auto queries_host    = raft::make_host_matrix<T, IdxT>(batch_size, index_->d);
+      auto candidates_host = raft::make_host_matrix<IdxT, IdxT>(batch_size, k0);
+      auto neighbors_host  = raft::make_host_matrix<IdxT, IdxT>(batch_size, k);
+      auto distances_host  = raft::make_host_matrix<float, IdxT>(batch_size, k);
+      auto dataset_v       = raft::make_host_matrix_view<const T, faiss::idx_t>(
+        this->dataset_, index_->ntotal, index_->d);
+
+      raft::device_resources handle_ = gpu_resource_->getRaftHandle(device_);
+
+      raft::copy(queries_host.data_handle(), queries, queries_host.size(), handle_.get_stream());
+      raft::copy(candidates_host.data_handle(),
+                 candidates.data_handle(),
+                 candidates_host.size(),
+                 handle_.get_stream());
+
+      // wait for the queries to copy to host in 'stream`
+      handle_.sync_stream();
+
+      cuvs::neighbors::refine(handle_,
+                                       dataset_v,
+                                       queries_host.view(),
+                                       candidates_host.view(),
+                                       neighbors_host.view(),
+                                       distances_host.view(),
+                                       parse_metric_type(this->metric_));
+
+      raft::copy(neighbors,
+                 neighbors_host.data_handle(),
+                 neighbors_host.size(),
+                 handle_.get_stream());
+      raft::copy(
+        distances, distances_host.data_handle(), distances_host.size(), handle_.get_stream());
+    } else {
+      index_refine_->search(batch_size,
+                            queries,
+                            k,
+                            distances,
+                            reinterpret_cast<faiss::idx_t*>(neighbors),
+                            this->refine_search_params_.get());
+    }
   } else {
     index_->search(batch_size,
                    queries,
@@ -253,15 +306,18 @@ void faiss_gpu<T>::load_(const std::string& file)
 template <typename T>
 class faiss_gpu_ivf_flat : public faiss_gpu<T> {
  public:
-  using typename faiss_gpu<T>::build_param;
+  struct build_param : public faiss_gpu<T>::build_param {
+    bool use_cuvs;
+  };
   using typename faiss_gpu<T>::search_param_base;
 
   faiss_gpu_ivf_flat(Metric metric, int dim, const build_param& param)
     : faiss_gpu<T>(metric, dim, param)
   {
     faiss::gpu::GpuIndexIVFFlatConfig config;
-    config.device = this->device_;
-    this->index_  = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
+    config.device   = this->device_;
+    config.use_cuvs = param.use_cuvs;
+    this->index_    = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
       this->gpu_resource_.get(), dim, param.nlist, this->metric_type_, config);
   }
 
@@ -298,6 +354,8 @@ class faiss_gpu_ivfpq : public faiss_gpu<T> {
     int m;
     bool use_float16;
     bool use_precomputed;
+    bool use_cuvs;
+    int bitsPerCode;
   };
   using typename faiss_gpu<T>::search_param_base;
 
@@ -307,14 +365,16 @@ class faiss_gpu_ivfpq : public faiss_gpu<T> {
     faiss::gpu::GpuIndexIVFPQConfig config;
     config.useFloat16LookupTables = param.use_float16;
     config.usePrecomputedTables   = param.use_precomputed;
-    config.device                 = this->device_;
+    config.use_cuvs               = param.use_cuvs;
+    if (param.use_cuvs) { config.interleavedLayout = param.use_cuvs; }
+    config.device = this->device_;
 
     this->index_ =
       std::make_shared<faiss::gpu::GpuIndexIVFPQ>(this->gpu_resource_.get(),
                                                   dim,
                                                   param.nlist,
                                                   param.m,
-                                                  8,  // FAISS only supports bitsPerCode=8
+                                                  param.bitsPerCode,
                                                   this->metric_type_,
                                                   config);
   }
@@ -334,6 +394,11 @@ class faiss_gpu_ivfpq : public faiss_gpu<T> {
       this->index_refine_ =
         std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
       this->index_refine_.get()->k_factor = sp.refine_ratio;
+      faiss::IndexRefineSearchParameters faiss_refine_search_params;
+      faiss_refine_search_params.k_factor          = this->index_refine_.get()->k_factor;
+      faiss_refine_search_params.base_index_params = this->search_params_.get();
+      this->refine_search_params_ =
+        std::make_unique<faiss::IndexRefineSearchParameters>(faiss_refine_search_params);
     }
   }
 

@@ -55,6 +55,7 @@ function(find_and_configure_faiss)
     EXCLUDE_FROM_ALL ${exclude}
     OPTIONS
     "FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}"
+    "FAISS_ENABLE_CUVS ${PKG_ENABLE_GPU}"
     "FAISS_ENABLE_PYTHON OFF"
     "FAISS_OPT_LEVEL ${CUVS_FAISS_OPT_LEVEL}"
     "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}"

@@ -72,9 +72,7 @@ def faiss_gpu_ivf_pq_build(params, dims):
     ret = params["M"] <= dims and dims % params["M"] == 0
     if "use_cuvs" in params and params["use_cuvs"]:
         return ret
-    pq_bits = 8
-    if "bitsPerCode" in params:
-        pq_bits = params["bitsPerCode"]
+    pq_bits = params.get("bitsPerCode", 8)
     lookup_table_size = 4
     if "useFloat16" in params and params["useFloat16"]:
         lookup_table_size = 2

@@ -5,17 +5,17 @@ groups:
       nlist: [2048]
       ratio: [10]
       useFloat16: [False, True]
-      use_raft: [False]
+      use_cuvs: [False]
     search:
       nprobe: [1, 5, 10, 50, 100, 200]
       refine_ratio: [1]
 groups:
-  baseraft:
+  basecuvs:
     build:
       nlist: [2048]
       ratio: [10]
       useFloat16: [False, True]
-      use_raft: [True]
+      use_cuvs: [True]
     search:
       nprobe: [1, 5, 10, 50, 100, 200]
       refine_ratio: [1]
@@ -6,71 +6,70 @@ groups:
   base:
     build:
       nlist: [1024, 2048, 4096, 8192]
-      M: [64, 32, 16]
-      ratio: [10]
+      M: [64, 96]
+      ratio: [4]
       usePrecomputed: [False, True]
       useFloat16: [False, True]
-      use_raft: [False]
+      use_cuvs: [False]
       bitsPerCode: [8]
     search:
-      nprobe: [1, 5, 10, 50, 100, 200]
+      nprobe: [10, 50, 100, 200]
       refine_ratio: [1, 2, 4]
-  baseraft:
+  basecuvs:
     build:
       nlist: [1024, 2048, 4096, 8192]
-      M: [64, 32, 16]
-      ratio: [10]
+      M: [96, 192, 384]
+      ratio: [4]
       usePrecomputed: [False]
       useFloat16: [False, True]
-      use_raft: [True]
-      bitsPerCode: [8, 6, 5, 4]
+      use_cuvs: [True]
+      bitsPerCode: [8]
     search:
-      nprobe: [1, 5, 10, 50, 100, 200]
+      nprobe: [10, 50, 100, 200]
       refine_ratio: [1, 2, 4]
   large:
     build:
-      nlist: [8192, 16384, 32768, 65536]
-      M: [48, 32, 16]
+      nlist: [16384, 32768, 65536]
+      M: [64, 96]
       ratio: [4]
       usePrecomputed: [False, True]
-      useFloat16: [False, True]
-      use_raft: [False]
+      useFloat16: [True]
+      use_cuvs: [False]
       bitsPerCode: [8]
     search:
       nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
       refine_ratio: [1, 2, 4]
-  largeraft:
+  largecuvs:
     build:
-      nlist: [8192, 16384, 32768, 65536]
-      M: [48, 32, 16]
+      nlist: [16384, 32768, 65536]
+      M: [96, 192, 384]
       ratio: [4]
       usePrecomputed: [False]
       useFloat16: [False, True]
-      use_raft: [True]
+      use_cuvs: [True]
       bitsPerCode: [8, 6, 5, 4]
     search:
       nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
       refine_ratio: [1, 2, 4]
   100M:
     build:
-      nlist: [50000]
-      M: [48]
-      ratio: [10]
+      nlist: [100000]
+      M: [64, 96]
+      ratio: [4]
       usePrecomputed: [False, True]
-      useFloat16: [False, True]
-      use_raft: [False]
+      useFloat16: [True]
+      use_cuvs: [False]
       bitsPerCode: [8]
     search:
       nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
       refine_ratio: [1]
-  100Mraft:
+  100Mcuvs:
     build:
-      nlist: [50000]
-      M: [48]
-      ratio: [10]
-      usePrecomputed: [False, True]
+      nlist: [100000]
+      M: [192, 384]
+      ratio: [4]
       useFloat16: [False, True]
-      use_raft: [True]
+      use_cuvs: [True]
       bitsPerCode: [8, 6, 5, 4]
     search:
       nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]