From 90654d6011e34cd5e67521ceb67c582266b302bd Mon Sep 17 00:00:00 2001 From: Gergely Szilvasy Date: Tue, 28 Nov 2023 11:50:03 -0800 Subject: [PATCH] benchmark core faiss prereqs Summary: 1. Support `search_preassigned` in IVFFastScan 2. `try_extract_index_ivf` to search recursively and support `IndexRefine` 3. `get_InvertedListScanner` to fail where not available 4. Workaround an OpenMP issue with `IndexIVFSpectralHash` Reviewed By: mdouze Differential Revision: D51427241 fbshipit-source-id: 365e3f11d24e80f101f986fc358c28dcc00805fa --- faiss/IVFlib.cpp | 22 +-- faiss/IndexIVF.cpp | 2 +- faiss/IndexIVFAdditiveQuantizerFastScan.cpp | 6 +- faiss/IndexIVFFastScan.cpp | 152 +++++++++++++++----- faiss/IndexIVFFastScan.h | 24 ++++ faiss/utils/distances.cpp | 52 +++++-- tests/test_search_params.py | 37 +++-- 7 files changed, 224 insertions(+), 71 deletions(-) diff --git a/faiss/IVFlib.cpp b/faiss/IVFlib.cpp index 6834a54b7f..91aa7af7f3 100644 --- a/faiss/IVFlib.cpp +++ b/faiss/IVFlib.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -58,24 +59,29 @@ void check_compatible_for_merge(const Index* index0, const Index* index1) { } const IndexIVF* try_extract_index_ivf(const Index* index) { - if (auto* pt = dynamic_cast(index)) { - index = pt->index; + auto* ivf = dynamic_cast(index); + if (ivf != nullptr) { + return ivf; } + if (auto* pt = dynamic_cast(index)) { + return try_extract_index_ivf(pt->index); + } if (auto* idmap = dynamic_cast(index)) { - index = idmap->index; + return try_extract_index_ivf(idmap->index); } if (auto* idmap = dynamic_cast(index)) { - index = idmap->index; + return try_extract_index_ivf(idmap->index); } if (auto* indep = dynamic_cast(index)) { - index = indep->index_ivf; + return try_extract_index_ivf(indep->index_ivf); + } + if (auto* refine = dynamic_cast(index)) { + return try_extract_index_ivf(refine->base_index); } - auto* ivf = dynamic_cast(index); - - return ivf; + return nullptr; } IndexIVF* try_extract_index_ivf(Index* index) { diff --git a/faiss/IndexIVF.cpp b/faiss/IndexIVF.cpp index 4e0f464811..a1fa8cd16b 100644 --- a/faiss/IndexIVF.cpp +++ b/faiss/IndexIVF.cpp @@ -904,7 +904,7 @@ void IndexIVF::range_search_preassigned( InvertedListScanner* IndexIVF::get_InvertedListScanner( bool /*store_pairs*/, const IDSelector* /* sel */) const { - return nullptr; + FAISS_THROW_MSG("get_InvertedListScanner not implemented"); } void IndexIVF::reconstruct(idx_t key, float* recons) const { diff --git a/faiss/IndexIVFAdditiveQuantizerFastScan.cpp b/faiss/IndexIVFAdditiveQuantizerFastScan.cpp index 22f7f2e2df..25c3aa2b06 100644 --- a/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +++ b/faiss/IndexIVFAdditiveQuantizerFastScan.cpp @@ -314,9 +314,11 @@ void IndexIVFAdditiveQuantizerFastScan::search( NormTableScaler scaler(norm_scale); if (metric_type == METRIC_L2) { - search_dispatch_implem(n, x, k, distances, labels, scaler); + search_dispatch_implem( + n, x, k, distances, labels, nullptr, nullptr, scaler); } else { - search_dispatch_implem(n, x, k, distances, labels, scaler); + search_dispatch_implem( + n, x, k, distances, labels, nullptr, nullptr, scaler); } } diff --git a/faiss/IndexIVFFastScan.cpp b/faiss/IndexIVFFastScan.cpp index 46d7a2d57c..0b9c4e0992 100644 --- a/faiss/IndexIVFFastScan.cpp +++ b/faiss/IndexIVFFastScan.cpp @@ -314,9 +314,39 @@ void IndexIVFFastScan::search( DummyScaler scaler; if (metric_type == METRIC_L2) { - search_dispatch_implem(n, x, k, distances, labels, scaler); + search_dispatch_implem( + n, x, k, distances, labels, nullptr, nullptr, scaler); } else { - search_dispatch_implem(n, x, k, distances, labels, scaler); + search_dispatch_implem( + n, x, k, distances, labels, nullptr, nullptr, scaler); + } +} + +void IndexIVFFastScan::search_preassigned( + idx_t n, + const float* x, + idx_t k, + const idx_t* assign, + const float* centroid_dis, + float* distances, + idx_t* labels, + bool store_pairs, + const IVFSearchParameters* params, + IndexIVFStats* stats) const { + FAISS_THROW_IF_NOT_MSG( + !params, "search params not supported for this index"); + FAISS_THROW_IF_NOT_MSG( + !store_pairs, "store_pairs not supported for this index"); + FAISS_THROW_IF_NOT_MSG(!stats, "stats not supported for this index"); + FAISS_THROW_IF_NOT(k > 0); + + DummyScaler scaler; + if (metric_type == METRIC_L2) { + search_dispatch_implem( + n, x, k, distances, labels, assign, centroid_dis, scaler); + } else { + search_dispatch_implem( + n, x, k, distances, labels, assign, centroid_dis, scaler); } } @@ -336,6 +366,8 @@ void IndexIVFFastScan::search_dispatch_implem( idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, const Scaler& scaler) const { using Cfloat = typename std::conditional< is_max, @@ -366,9 +398,11 @@ void IndexIVFFastScan::search_dispatch_implem( } if (impl == 1) { - search_implem_1(n, x, k, distances, labels, scaler); + search_implem_1( + n, x, k, distances, labels, coarse_ids, coarse_dis, scaler); } else if (impl == 2) { - search_implem_2(n, x, k, distances, labels, scaler); + search_implem_2( + n, x, k, distances, labels, coarse_ids, coarse_dis, scaler); } else if (impl >= 10 && impl <= 15) { size_t ndis = 0, nlist_visited = 0; @@ -381,12 +415,23 @@ void IndexIVFFastScan::search_dispatch_implem( k, distances, labels, + coarse_ids, + coarse_dis, impl, &ndis, &nlist_visited, scaler); } else if (impl == 14 || impl == 15) { - search_implem_14(n, x, k, distances, labels, impl, scaler); + search_implem_14( + n, + x, + k, + distances, + labels, + coarse_ids, + coarse_dis, + impl, + scaler); } else { search_implem_10( n, @@ -394,6 +439,8 @@ void IndexIVFFastScan::search_dispatch_implem( k, distances, labels, + coarse_ids, + coarse_dis, impl, &ndis, &nlist_visited, @@ -423,7 +470,16 @@ void IndexIVFFastScan::search_dispatch_implem( if (impl == 14 || impl == 15) { // this might require slicing if there are too // many queries (for now we keep this simple) - search_implem_14(n, x, k, distances, labels, impl, scaler); + search_implem_14( + n, + x, + k, + distances, + labels, + coarse_ids, + coarse_dis, + impl, + scaler); } else { #pragma omp parallel for reduction(+ : ndis, nlist_visited) for (int slice = 0; slice < nslice; slice++) { @@ -431,6 +487,12 @@ void IndexIVFFastScan::search_dispatch_implem( idx_t i1 = n * (slice + 1) / nslice; float* dis_i = distances + i0 * k; idx_t* lab_i = labels + i0 * k; + const idx_t* coarse_ids_i = coarse_ids != nullptr + ? coarse_ids + i0 * nprobe + : nullptr; + const float* coarse_dis_i = coarse_dis != nullptr + ? coarse_dis + i0 * nprobe + : nullptr; if (impl == 12 || impl == 13) { search_implem_12( i1 - i0, @@ -438,6 +500,8 @@ void IndexIVFFastScan::search_dispatch_implem( k, dis_i, lab_i, + coarse_ids_i, + coarse_dis_i, impl, &ndis, &nlist_visited, @@ -449,6 +513,8 @@ void IndexIVFFastScan::search_dispatch_implem( k, dis_i, lab_i, + coarse_ids_i, + coarse_dis_i, impl, &ndis, &nlist_visited, @@ -465,6 +531,22 @@ void IndexIVFFastScan::search_dispatch_implem( } } +#define COARSE_QUANTIZE \ + std::unique_ptr coarse_ids_buffer; \ + std::unique_ptr coarse_dis_buffer; \ + if (coarse_ids == nullptr || coarse_dis == nullptr) { \ + coarse_ids_buffer.reset(new idx_t[n * nprobe]); \ + coarse_dis_buffer.reset(new float[n * nprobe]); \ + quantizer->search( \ + n, \ + x, \ + nprobe, \ + coarse_dis_buffer.get(), \ + coarse_ids_buffer.get()); \ + coarse_ids = coarse_ids_buffer.get(); \ + coarse_dis = coarse_dis_buffer.get(); \ + } + template void IndexIVFFastScan::search_implem_1( idx_t n, @@ -472,19 +554,18 @@ void IndexIVFFastScan::search_implem_1( idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, const Scaler& scaler) const { FAISS_THROW_IF_NOT(orig_invlists); - std::unique_ptr coarse_ids(new idx_t[n * nprobe]); - std::unique_ptr coarse_dis(new float[n * nprobe]); - - quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get()); + COARSE_QUANTIZE; size_t dim12 = ksub * M; AlignedTable dis_tables; AlignedTable biases; - compute_LUT(n, x, coarse_ids.get(), coarse_dis.get(), dis_tables, biases); + compute_LUT(n, x, coarse_ids, coarse_dis, dis_tables, biases); bool single_LUT = !lookup_table_is_3d(); @@ -543,14 +624,12 @@ void IndexIVFFastScan::search_implem_2( idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, const Scaler& scaler) const { FAISS_THROW_IF_NOT(orig_invlists); - std::unique_ptr coarse_ids(new idx_t[n * nprobe]); - std::unique_ptr coarse_dis(new float[n * nprobe]); - - quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get()); - + COARSE_QUANTIZE; size_t dim12 = ksub * M2; AlignedTable dis_tables; AlignedTable biases; @@ -559,8 +638,8 @@ void IndexIVFFastScan::search_implem_2( compute_LUT_uint8( n, x, - coarse_ids.get(), - coarse_dis.get(), + coarse_ids, + coarse_dis, dis_tables, biases, normalizers.get()); @@ -636,6 +715,8 @@ void IndexIVFFastScan::search_implem_10( idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, int impl, size_t* ndis_out, size_t* nlist_out, @@ -647,16 +728,13 @@ void IndexIVFFastScan::search_implem_10( using ReservoirHC = ReservoirHandler; using SingleResultHC = SingleResultHandler; - std::unique_ptr coarse_ids(new idx_t[n * nprobe]); - std::unique_ptr coarse_dis(new float[n * nprobe]); - uint64_t times[10]; memset(times, 0, sizeof(times)); int ti = 0; #define TIC times[ti++] = get_cy() TIC; - quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get()); + COARSE_QUANTIZE; TIC; @@ -668,8 +746,8 @@ void IndexIVFFastScan::search_implem_10( compute_LUT_uint8( n, x, - coarse_ids.get(), - coarse_dis.get(), + coarse_ids, + coarse_dis, dis_tables, biases, normalizers.get()); @@ -757,6 +835,8 @@ void IndexIVFFastScan::search_implem_12( idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, int impl, size_t* ndis_out, size_t* nlist_out, @@ -766,16 +846,13 @@ void IndexIVFFastScan::search_implem_12( } FAISS_THROW_IF_NOT(bbs == 32); - std::unique_ptr coarse_ids(new idx_t[n * nprobe]); - std::unique_ptr coarse_dis(new float[n * nprobe]); - uint64_t times[10]; memset(times, 0, sizeof(times)); int ti = 0; #define TIC times[ti++] = get_cy() TIC; - quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get()); + COARSE_QUANTIZE; TIC; @@ -787,8 +864,8 @@ void IndexIVFFastScan::search_implem_12( compute_LUT_uint8( n, x, - coarse_ids.get(), - coarse_dis.get(), + coarse_ids, + coarse_dis, dis_tables, biases, normalizers.get()); @@ -954,6 +1031,8 @@ void IndexIVFFastScan::search_implem_14( idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, int impl, const Scaler& scaler) const { if (n == 0) { // does not work well with reservoir @@ -961,12 +1040,9 @@ void IndexIVFFastScan::search_implem_14( } FAISS_THROW_IF_NOT(bbs == 32); - std::unique_ptr coarse_ids(new idx_t[n * nprobe]); - std::unique_ptr coarse_dis(new float[n * nprobe]); - uint64_t ttg0 = get_cy(); - quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get()); + COARSE_QUANTIZE; uint64_t ttg1 = get_cy(); uint64_t coarse_search_tt = ttg1 - ttg0; @@ -979,8 +1055,8 @@ void IndexIVFFastScan::search_implem_14( compute_LUT_uint8( n, x, - coarse_ids.get(), - coarse_dis.get(), + coarse_ids, + coarse_dis, dis_tables, biases, normalizers.get()); @@ -1281,6 +1357,8 @@ template void IndexIVFFastScan::search_dispatch_implem( idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, const NormTableScaler& scaler) const; template void IndexIVFFastScan::search_dispatch_implem( @@ -1289,6 +1367,8 @@ template void IndexIVFFastScan::search_dispatch_implem( idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, const NormTableScaler& scaler) const; } // namespace faiss diff --git a/faiss/IndexIVFFastScan.h b/faiss/IndexIVFFastScan.h index c1a6b506c1..824e63ed28 100644 --- a/faiss/IndexIVFFastScan.h +++ b/faiss/IndexIVFFastScan.h @@ -105,6 +105,18 @@ struct IndexIVFFastScan : IndexIVF { idx_t* labels, const SearchParameters* params = nullptr) const override; + void search_preassigned( + idx_t n, + const float* x, + idx_t k, + const idx_t* assign, + const float* centroid_dis, + float* distances, + idx_t* labels, + bool store_pairs, + const IVFSearchParameters* params = nullptr, + IndexIVFStats* stats = nullptr) const override; + /// will just fail void range_search( idx_t n, @@ -122,6 +134,8 @@ struct IndexIVFFastScan : IndexIVF { idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, const Scaler& scaler) const; template @@ -131,6 +145,8 @@ struct IndexIVFFastScan : IndexIVF { idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, const Scaler& scaler) const; template @@ -140,6 +156,8 @@ struct IndexIVFFastScan : IndexIVF { idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, const Scaler& scaler) const; // implem 10 and 12 are not multithreaded internally, so @@ -151,6 +169,8 @@ struct IndexIVFFastScan : IndexIVF { idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, int impl, size_t* ndis_out, size_t* nlist_out, @@ -163,6 +183,8 @@ struct IndexIVFFastScan : IndexIVF { idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, int impl, size_t* ndis_out, size_t* nlist_out, @@ -176,6 +198,8 @@ struct IndexIVFFastScan : IndexIVF { idx_t k, float* distances, idx_t* labels, + const idx_t* coarse_ids, + const float* coarse_dis, int impl, const Scaler& scaler) const; diff --git a/faiss/utils/distances.cpp b/faiss/utils/distances.cpp index a94301a42f..5b66158c09 100644 --- a/faiss/utils/distances.cpp +++ b/faiss/utils/distances.cpp @@ -64,7 +64,7 @@ void fvec_norms_L2( const float* __restrict x, size_t d, size_t nx) { -#pragma omp parallel for schedule(guided) +#pragma omp parallel for if (nx > 10000) for (int64_t i = 0; i < nx; i++) { nr[i] = sqrtf(fvec_norm_L2sqr(x + i * d, d)); } @@ -75,24 +75,52 @@ void fvec_norms_L2sqr( const float* __restrict x, size_t d, size_t nx) { -#pragma omp parallel for schedule(guided) +#pragma omp parallel for if (nx > 10000) for (int64_t i = 0; i < nx; i++) nr[i] = fvec_norm_L2sqr(x + i * d, d); } -void fvec_renorm_L2(size_t d, size_t nx, float* __restrict x) { -#pragma omp parallel for schedule(guided) +// The following is a workaround to a problem +// in OpenMP in fbcode. The crash occurs +// inside OMP when IndexIVFSpectralHash::set_query() +// calls fvec_renorm_L2. set_query() is always +// calling this function with nx == 1, so even +// the omp version should run single threaded, +// as per the if condition of the omp pragma. +// Instead, the omp version crashes inside OMP. +// The workaround below is explicitly branching +// off to a codepath without omp. + +#define FVEC_RENORM_L2_IMPL \ + float* __restrict xi = x + i * d; \ + \ + float nr = fvec_norm_L2sqr(xi, d); \ + \ + if (nr > 0) { \ + size_t j; \ + const float inv_nr = 1.0 / sqrtf(nr); \ + for (j = 0; j < d; j++) \ + xi[j] *= inv_nr; \ + } + +void fvec_renorm_L2_noomp(size_t d, size_t nx, float* __restrict x) { for (int64_t i = 0; i < nx; i++) { - float* __restrict xi = x + i * d; + FVEC_RENORM_L2_IMPL + } +} - float nr = fvec_norm_L2sqr(xi, d); +void fvec_renorm_L2_omp(size_t d, size_t nx, float* __restrict x) { +#pragma omp parallel for if (nx > 10000) + for (int64_t i = 0; i < nx; i++) { + FVEC_RENORM_L2_IMPL + } +} - if (nr > 0) { - size_t j; - const float inv_nr = 1.0 / sqrtf(nr); - for (j = 0; j < d; j++) - xi[j] *= inv_nr; - } +void fvec_renorm_L2(size_t d, size_t nx, float* __restrict x) { + if (nx <= 10000) { + fvec_renorm_L2_noomp(d, nx, x); + } else { + fvec_renorm_L2_omp(d, nx, x); } } diff --git a/tests/test_search_params.py b/tests/test_search_params.py index d832a07cf8..8d3e42a49d 100644 --- a/tests/test_search_params.py +++ b/tests/test_search_params.py @@ -444,9 +444,9 @@ def test_12_92(self): class TestPrecomputed(unittest.TestCase): - def test_knn_and_range(self): - ds = datasets.SyntheticDataset(32, 1000, 100, 20) - index = faiss.index_factory(ds.d, "IVF32,Flat") + def do_test_knn_and_range(self, factory, range=True): + ds = datasets.SyntheticDataset(32, 10000, 100, 20) + index = faiss.index_factory(ds.d, factory) index.train(ds.get_train()) index.add(ds.get_database()) index.nprobe = 5 @@ -455,14 +455,27 @@ def test_knn_and_range(self): Dq, Iq = index.quantizer.search(ds.get_queries(), index.nprobe) Dnew, Inew = index.search_preassigned(ds.get_queries(), 10, Iq, Dq) np.testing.assert_equal(Iref, Inew) - np.testing.assert_equal(Dref, Dnew) + np.testing.assert_allclose(Dref, Dnew, atol=1e-5) - r2 = float(np.median(Dref[:, 5])) - Lref, Dref, Iref = index.range_search(ds.get_queries(), r2) - assert Lref.size > 10 # make sure there is something to test... + if range: + r2 = float(np.median(Dref[:, 5])) + Lref, Dref, Iref = index.range_search(ds.get_queries(), r2) + assert Lref.size > 10 # make sure there is something to test... - Lnew, Dnew, Inew = index.range_search_preassigned(ds.get_queries(), r2, Iq, Dq) - check_ref_range_results( - Lref, Dref, Iref, - Lnew, Dnew, Inew - ) + Lnew, Dnew, Inew = index.range_search_preassigned(ds.get_queries(), r2, Iq, Dq) + check_ref_range_results( + Lref, Dref, Iref, + Lnew, Dnew, Inew + ) + + def test_knn_and_range_Flat(self): + self.do_test_knn_and_range("IVF32,Flat") + + def test_knn_and_range_SQ(self): + self.do_test_knn_and_range("IVF32,SQ8") + + def test_knn_and_range_PQ(self): + self.do_test_knn_and_range("IVF32,PQ8x4np") + + def test_knn_and_range_FS(self): + self.do_test_knn_and_range("IVF32,PQ8x4fs", range=False)