diff --git a/.circleci/config.yml b/.circleci/config.yml
index 61eafebc52..ead818ccd6 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -17,11 +17,6 @@ executors:
     machine:
       image: ubuntu-2004:current
     resource_class: arm.medium
-  macosx-x86_64-cpu:
-    environment:
-      CONDA_ARCH: MacOSX-x86_64
-    macos:
-      xcode: 11.7.0  # max supported for conda build, https://circleci.com/docs/using-macos#supported-xcode-versions
   macosx-arm64-cpu:
     environment:
       CONDA_ARCH: MacOSX-arm64
@@ -66,6 +61,9 @@ jobs:
       cuda:
         type: string
         default: ""
+      raft:
+        type: string
+        default: ""
       cuda_archs:
         type: string
         default: ""
@@ -93,6 +91,8 @@ jobs:
       - run:
           name: Install conda build tools
           command: |
+            conda config --set solver libmamba
+            # conda config --set verbosity 3
             conda update -y -q conda
             conda install -y -q conda-build
       - when:
@@ -105,14 +105,16 @@ jobs:
                   conda config --set anaconda_upload yes
       - when:
           condition:
-              not: << parameters.label >>
+            and:
+              - not: << parameters.label >>
+              - not: << parameters.cuda >>
           steps:
             - run:
                 name: Conda build (CPU)
                 no_output_timeout: 30m
                 command: |
                   cd conda
-                  conda build faiss --python 3.10 -c pytorch -c pkgs/main -c conda-forge
+                  conda build faiss --python 3.11 -c pytorch
       - when:
           condition:
             and:
@@ -124,12 +126,28 @@ jobs:
                 no_output_timeout: 30m
                 command: |
                   cd conda
-                  conda build faiss --user pytorch --label <<parameters.label>> -c pytorch -c pkgs/main -c conda-forge
+                  conda build faiss --user pytorch --label <<parameters.label>> -c pytorch
+      - when:
+          condition:
+            and:
+              - not: << parameters.label >>
+              - << parameters.cuda >>
+              - not: << parameters.raft >>
+          steps:
+            - run:
+                name: Conda build (GPU)
+                no_output_timeout: 60m
+                command: |
+                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
+                  cd conda
+                  conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
+                      -c pytorch -c nvidia
       - when:
           condition:
             and:
               - << parameters.label >>
               - << parameters.cuda >>
+              - not: << parameters.raft >>
           steps:
             - run:
                 name: Conda build (GPU) w/ anaconda upload
@@ -138,7 +156,37 @@ jobs:
                   sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
                   cd conda
                   conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia -c pkgs/main -c conda-forge
+                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia
+      - when:
+          condition:
+            and:
+              - not: << parameters.label >>
+              - << parameters.cuda >>
+              - << parameters.raft >>
+          steps:
+            - run:
+                name: Conda build (GPU w/ RAFT)
+                no_output_timeout: 60m
+                command: |
+                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
+                  cd conda
+                  conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
+                      -c pytorch -c nvidia -c rapidsai -c conda-forge
+      - when:
+          condition:
+            and:
+              - << parameters.label >>
+              - << parameters.cuda >>
+              - << parameters.raft >>
+          steps:
+            - run:
+                name: Conda build (GPU w/ RAFT) w/ anaconda upload
+                no_output_timeout: 60m
+                command: |
+                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
+                  cd conda
+                  conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
+                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia -c rapidsai -c conda-forge
 
   build_cmake:
     parameters:
@@ -180,7 +228,7 @@ jobs:
           command: |
             conda config --set solver libmamba
             conda update -y -q conda
-            conda install -y -q pkgs/main::python=3.10 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64 -c pkgs/main -c conda-forge
+            conda install -y -q python=3.11 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64
       - when:
           condition:
             equal: [ "ON", << parameters.raft >> ]
@@ -282,9 +330,6 @@ workflows:
       - build_conda:
           name: Linux x86_64 (conda)
           exec: linux-x86_64-cpu
-      - build_conda:
-          name: OSX x86_64 (conda)
-          exec: macosx-x86_64-cpu
       - build_conda:
           name: Windows x86_64 (conda)
           exec: windows-x86_64-cpu
@@ -313,17 +358,21 @@ workflows:
             branches:
               ignore: /.*/
       - build_conda:
-          name: Windows x86_64 packages
-          exec: windows-x86_64-cpu
+          name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.4)
+          exec: linux-x86_64-gpu
           label: main
+          raft: "ON"
+          cuda: "11.4"
+          cuda_archs: "60;61;70;72;75;80;86"
+          compiler_version: "11.2"
           filters:
             tags:
               only: /^v.*/
             branches:
               ignore: /.*/
       - build_conda:
-          name: OSX x86_64 packages
-          exec: macosx-x86_64-cpu
+          name: Windows x86_64 packages
+          exec: windows-x86_64-cpu
           label: main
           filters:
             tags:
@@ -373,10 +422,6 @@ workflows:
           name: Windows x86_64 nightlies
           exec: windows-x86_64-cpu
           label: nightly
-      - build_conda:
-          name: OSX x86_64 nightlies
-          exec: macosx-x86_64-cpu
-          label: nightly
       - build_conda:
           name: OSX arm64 nightlies
           exec: macosx-arm64-cpu
diff --git a/conda/conda_build_config.yaml b/conda/conda_build_config.yaml
index 2a005ccea6..77f0eec0a2 100644
--- a/conda/conda_build_config.yaml
+++ b/conda/conda_build_config.yaml
@@ -1,4 +1,4 @@
 python:
-  - 3.8  # [not x86_64 or not osx]
   - 3.9
   - 3.10
+  - 3.11
diff --git a/conda/faiss-gpu-raft/build-lib.sh b/conda/faiss-gpu-raft/build-lib.sh
new file mode 100644
index 0000000000..7ca17180a4
--- /dev/null
+++ b/conda/faiss-gpu-raft/build-lib.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -e
+
+
+# Build libfaiss.so/libfaiss_avx2.so.
+cmake -B _build \
+      -DBUILD_SHARED_LIBS=ON \
+      -DBUILD_TESTING=OFF \
+      -DFAISS_OPT_LEVEL=avx2 \
+      -DFAISS_ENABLE_GPU=ON \
+      -DFAISS_ENABLE_RAFT=ON \
+      -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHS}" \
+      -DFAISS_ENABLE_PYTHON=OFF \
+      -DBLA_VENDOR=Intel10_64lp \
+      -DCMAKE_INSTALL_LIBDIR=lib \
+      -DCMAKE_BUILD_TYPE=Release .
+
+make -C _build -j$(nproc) faiss faiss_avx2
+
+cmake --install _build --prefix $PREFIX
+cmake --install _build --prefix _libfaiss_stage/
diff --git a/conda/faiss-gpu-raft/build-pkg.sh b/conda/faiss-gpu-raft/build-pkg.sh
new file mode 100644
index 0000000000..3bb61588e5
--- /dev/null
+++ b/conda/faiss-gpu-raft/build-pkg.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -e
+
+
+# Build swigfaiss.so/swigfaiss_avx2.so.
+cmake -B _build_python_${PY_VER} \
+      -Dfaiss_ROOT=_libfaiss_stage/ \
+      -DFAISS_OPT_LEVEL=avx2 \
+      -DFAISS_ENABLE_GPU=ON \
+      -DFAISS_ENABLE_RAFT=ON \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DPython_EXECUTABLE=$PYTHON \
+      faiss/python
+
+make -C _build_python_${PY_VER} -j$(nproc) swigfaiss swigfaiss_avx2
+
+# Build actual python module.
+cd _build_python_${PY_VER}/
+$PYTHON setup.py install --single-version-externally-managed --record=record.txt --prefix=$PREFIX
diff --git a/conda/faiss-gpu-raft/meta.yaml b/conda/faiss-gpu-raft/meta.yaml
new file mode 100644
index 0000000000..14a5c606b1
--- /dev/null
+++ b/conda/faiss-gpu-raft/meta.yaml
@@ -0,0 +1,104 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+{% set version = environ.get('GIT_DESCRIBE_TAG').lstrip('v') %}
+{% set suffix = "_nightly" if environ.get('PACKAGE_TYPE') == 'nightly' else "" %}
+{% set number = GIT_DESCRIBE_NUMBER %}
+
+package:
+  name: faiss-pkg
+  version: {{ version }}
+
+build:
+  number: {{ number }}
+
+about:
+  home: https://github.com/facebookresearch/faiss
+  license: MIT
+  license_family: MIT
+  license_file: LICENSE
+  summary: A library for efficient similarity search and clustering of dense vectors.
+
+source:
+  git_url: ../../
+
+outputs:
+  - name: libfaiss
+    script: build-lib.sh  # [x86_64 and not win and not osx]
+    script: build-lib-osx.sh  # [x86_64 and osx]
+    script: build-lib-arm64.sh  # [not x86_64]
+    script: build-lib.bat  # [win]
+    build:
+      string: "h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}_raft{{ suffix }}"
+      run_exports:
+        - {{ pin_compatible('libfaiss', exact=True) }}
+      script_env:
+        - CUDA_ARCHS
+    requirements:
+      build:
+        - {{ compiler('cxx') }}
+        - sysroot_linux-64  # [linux64]
+        - llvm-openmp  # [osx]
+        - cmake >=3.23.1
+        - make  # [not win]
+        - mkl-devel =2023  # [x86_64]
+      host:
+        - mkl =2023  # [x86_64]
+        - openblas  # [not x86_64]
+        - cudatoolkit {{ cudatoolkit }}
+        - libraft =23.08
+      run:
+        - mkl =2023  # [x86_64]
+        - openblas  # [not x86_64]
+        - {{ pin_compatible('cudatoolkit', max_pin='x.x') }}
+        - libraft =23.08
+    test:
+      requires:
+        - conda-build
+      commands:
+        - test -f $PREFIX/lib/libfaiss$SHLIB_EXT       # [not win]
+        - test -f $PREFIX/lib/libfaiss_avx2$SHLIB_EXT  # [x86_64 and not win]
+        - conda inspect linkages -p $PREFIX $PKG_NAME  # [not win]
+        - conda inspect objects -p $PREFIX $PKG_NAME   # [osx]
+
+  - name: faiss-gpu-raft
+    script: build-pkg.sh  # [x86_64 and not win and not osx]
+    script: build-pkg-osx.sh  # [x86_64 and osx]
+    script: build-pkg-arm64.sh # [not x86_64]
+    script: build-pkg.bat  # [win]
+    build:
+      string: "py{{ PY_VER }}_h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}{{ suffix }}"
+    requirements:
+      build:
+        - {{ compiler('cxx') }}
+        - sysroot_linux-64 =2.17 # [linux64]
+        - swig
+        - cmake >=3.23.1
+        - make  # [not win]
+      host:
+        - python {{ python }}
+        - numpy >=1.19,<2
+        - {{ pin_subpackage('libfaiss', exact=True) }}
+      run:
+        - python {{ python }}
+        - numpy >=1.19,<2
+        - {{ pin_subpackage('libfaiss', exact=True) }}
+    test:
+      requires:
+        - numpy
+        - scipy
+        - pytorch
+      commands:
+        - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
+        - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"
+        - cp tests/common_faiss_tests.py faiss/gpu/test
+        - python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "test_*"
+        - python -X faulthandler -m unittest discover -v -s faiss/gpu/test/ -p "torch_*"
+        - sh test_cpu_dispatch.sh  # [linux64]
+      files:
+        - test_cpu_dispatch.sh  # [linux64]
+      source_files:
+        - tests/
+        - faiss/gpu/test/
diff --git a/conda/faiss-gpu/build-lib.sh b/conda/faiss-gpu/build-lib.sh
index 7925b3ba82..6b6b1c28d0 100755
--- a/conda/faiss-gpu/build-lib.sh
+++ b/conda/faiss-gpu/build-lib.sh
@@ -13,6 +13,7 @@ cmake -B _build \
       -DBUILD_TESTING=OFF \
       -DFAISS_OPT_LEVEL=avx2 \
       -DFAISS_ENABLE_GPU=ON \
+      -DFAISS_ENABLE_RAFT=OFF \
       -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHS}" \
       -DFAISS_ENABLE_PYTHON=OFF \
       -DBLA_VENDOR=Intel10_64lp \
diff --git a/conda/faiss-gpu/build-pkg.sh b/conda/faiss-gpu/build-pkg.sh
index 074a930299..3a41511921 100755
--- a/conda/faiss-gpu/build-pkg.sh
+++ b/conda/faiss-gpu/build-pkg.sh
@@ -12,6 +12,7 @@ cmake -B _build_python_${PY_VER} \
       -Dfaiss_ROOT=_libfaiss_stage/ \
       -DFAISS_OPT_LEVEL=avx2 \
       -DFAISS_ENABLE_GPU=ON \
+      -DFAISS_ENABLE_RAFT=OFF \
       -DCMAKE_BUILD_TYPE=Release \
       -DPython_EXECUTABLE=$PYTHON \
       faiss/python
diff --git a/conda/faiss-gpu/install-cmake.sh b/conda/faiss-gpu/install-cmake.sh
index 88bd9b909b..b2891919d5 100755
--- a/conda/faiss-gpu/install-cmake.sh
+++ b/conda/faiss-gpu/install-cmake.sh
@@ -6,5 +6,5 @@
 
 set -e
 
-wget -O - https://github.com/Kitware/CMake/releases/download/v3.17.1/cmake-3.17.1-Linux-x86_64.tar.gz | tar xzf -
-cp -R cmake-3.17.1-Linux-x86_64/* $PREFIX
+FAISS_DISABLE_CPU_FEATURES=AVX2 LD_DEBUG=libs python -c "import faiss" 2>&1 | grep libfaiss.so
+LD_DEBUG=libs python -c "import faiss" 2>&1 | grep libfaiss_avx2.so
diff --git a/conda/faiss-gpu/meta.yaml b/conda/faiss-gpu/meta.yaml
index 034ff30120..fcfd3b4bd2 100644
--- a/conda/faiss-gpu/meta.yaml
+++ b/conda/faiss-gpu/meta.yaml
@@ -26,7 +26,8 @@ source:
 
 outputs:
   - name: libfaiss
-    script: build-lib.sh  # [x86_64 and not win]
+    script: build-lib.sh  # [x86_64 and not win and not osx]
+    script: build-lib-osx.sh  # [x86_64 and osx]
     script: build-lib-arm64.sh  # [not x86_64]
     script: build-lib.bat  # [win]
     build:
@@ -38,7 +39,7 @@ outputs:
     requirements:
       build:
         - {{ compiler('cxx') }}
-        - sysroot_linux-64 =2.17  # [linux64]
+        - sysroot_linux-64  # [linux64]
         - llvm-openmp  # [osx]
         - cmake >=3.23.1
         - make  # [not win]
@@ -61,7 +62,8 @@ outputs:
         - conda inspect objects -p $PREFIX $PKG_NAME   # [osx]
 
   - name: faiss-gpu
-    script: build-pkg.sh  # [x86_64 and not win]
+    script: build-pkg.sh  # [x86_64 and not win and not osx]
+    script: build-pkg-osx.sh  # [x86_64 and osx]
     script: build-pkg-arm64.sh # [not x86_64]
     script: build-pkg.bat  # [win]
     build:
diff --git a/conda/faiss/install-cmake.sh b/conda/faiss/install-cmake.sh
deleted file mode 100755
index c92b8d14ee..0000000000
--- a/conda/faiss/install-cmake.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh#
-# Copyright (c) Facebook, Inc. and its affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-set -e
-
-wget -O - https://github.com/Kitware/CMake/releases/download/v3.17.1/cmake-3.17.1-Linux-x86_64.tar.gz | tar xzf -
-cp -R cmake-3.17.1-Linux-x86_64/* $PREFIX
diff --git a/conda/faiss/meta.yaml b/conda/faiss/meta.yaml
index 0e430785b9..a0431a4041 100644
--- a/conda/faiss/meta.yaml
+++ b/conda/faiss/meta.yaml
@@ -37,7 +37,7 @@ outputs:
     requirements:
       build:
         - {{ compiler('cxx') }}
-        - sysroot_linux-64 =2.17  # [linux64]
+        - sysroot_linux-64  # [linux64]
         - llvm-openmp  # [osx]
         - cmake >=3.23.1
         - make  # [not win]
diff --git a/faiss/gpu/test/test_gpu_basics.py b/faiss/gpu/test/test_gpu_basics.py
index 0768f6ee0f..f3f0a525d4 100755
--- a/faiss/gpu/test/test_gpu_basics.py
+++ b/faiss/gpu/test/test_gpu_basics.py
@@ -274,16 +274,16 @@ def do_test_input_types(self, vectorsMemoryLimit, queriesMemoryLimit):
         else:
             faiss.bfKnn(res, params)
 
-        self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))
-        self.assertGreaterEqual((out_i == ref_i).sum(), ref_i.size)
+        np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
+        np.testing.assert_array_equal(out_i, ref_i)
 
-        out_d, out_i = faiss.knn_gpu(
-            res, qs, xs, k, device=gpu_id,
+        faiss.knn_gpu(
+            res, qs, xs, k, out_d, out_i, device=gpu_id,
             vectorsMemoryLimit=vectorsMemoryLimit,
             queriesMemoryLimit=queriesMemoryLimit)
 
-        self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))
-        self.assertGreaterEqual((out_i == ref_i).sum(), ref_i.size)
+        np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
+        np.testing.assert_array_equal(out_i, ref_i)
 
         # Try int32 out indices
         out_i32 = np.empty((nq, k), dtype=np.int32)
@@ -292,7 +292,8 @@ def do_test_input_types(self, vectorsMemoryLimit, queriesMemoryLimit):
 
         faiss.bfKnn(res, params)
 
-        self.assertEqual((out_i32 == ref_i).sum(), ref_i.size)
+        np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
+        np.testing.assert_array_equal(out_i32, ref_i)
 
         # Try float16 data/queries, i64 out indices
         xs_f16 = xs.astype(np.float16)
@@ -320,7 +321,7 @@ def do_test_input_types(self, vectorsMemoryLimit, queriesMemoryLimit):
         faiss.bfKnn(res, params)
 
         self.assertGreaterEqual((out_i_f16 == ref_i_f16).sum(), ref_i_f16.size - 5)
-        self.assertTrue(np.allclose(ref_d_f16, out_d_f16, atol = 2e-3))
+        np.testing.assert_allclose(ref_d_f16, out_d_f16, atol = 2e-3)
 
 class TestAllPairwiseDistance(unittest.TestCase):
     def test_dist(self):
@@ -381,7 +382,7 @@ def test_dist(self):
 
             print('f32', np.abs(ref_d - out_d).max())
 
-            self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5))
+            np.testing.assert_allclose(ref_d, out_d, atol=1e-5)
 
             # Try float16 data/queries
             xs_f16 = xs.astype(np.float16)
@@ -414,7 +415,7 @@ def test_dist(self):
 
             print('f16', np.abs(ref_d_f16 - out_d_f16).max())
 
-            self.assertTrue(np.allclose(ref_d_f16, out_d_f16, atol = 4e-3))
+            np.testing.assert_allclose(ref_d_f16, out_d_f16, atol = 4e-3)
 
 
 
diff --git a/faiss/gpu/test/test_raft.py b/faiss/gpu/test/test_raft.py
index fe99b13cde..37ae2ef003 100644
--- a/faiss/gpu/test/test_raft.py
+++ b/faiss/gpu/test/test_raft.py
@@ -58,11 +58,11 @@ def test_IndexFlat(self):
 
         index_gpu.add(xb[2000:])
         Dnew, Inew = index_gpu.search(ds.get_queries(), 13)
-        np.testing.assert_allclose(Dref, Dnew, atol=1e-5)
+        np.testing.assert_allclose(Dref, Dnew, atol=1e-4)
         np.testing.assert_array_equal(Iref, Inew)
 
         # copy back to CPU
         index2 = faiss.index_gpu_to_cpu(index_gpu)
         Dnew, Inew = index2.search(ds.get_queries(), 13)
-        np.testing.assert_allclose(Dref, Dnew, atol=1e-5)
+        np.testing.assert_allclose(Dref, Dnew, atol=1e-4)
         np.testing.assert_array_equal(Iref, Inew)
diff --git a/faiss/python/CMakeLists.txt b/faiss/python/CMakeLists.txt
index d7da50ceb7..2a7227ead7 100644
--- a/faiss/python/CMakeLists.txt
+++ b/faiss/python/CMakeLists.txt
@@ -94,6 +94,9 @@ endif()
 
 if(FAISS_ENABLE_GPU)
   find_package(CUDAToolkit REQUIRED)
+  if(FAISS_ENABLE_RAFT)
+    find_package(raft COMPONENTS compiled distributed)
+  endif()
   target_link_libraries(swigfaiss PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
   target_link_libraries(swigfaiss_avx2 PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
 endif()
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index d5b6084432..f8070fd0ab 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -29,6 +29,7 @@ set(FAISS_TEST_SRC
   test_heap.cpp
   test_code_distance.cpp
   test_hnsw.cpp
+  test_partitioning.cpp
 )
 
 add_executable(faiss_test ${FAISS_TEST_SRC})
diff --git a/tests/test_contrib.py b/tests/test_contrib.py
index 057b043573..36c17792ce 100644
--- a/tests/test_contrib.py
+++ b/tests/test_contrib.py
@@ -217,7 +217,7 @@ def test_make_LT(self):
         Yref = X @ A.T + b
         lt = inspect_tools.make_LinearTransform_matrix(A, b)
         Ynew = lt.apply(X)
-        np.testing.assert_equal(Yref, Ynew)
+        np.testing.assert_allclose(Yref, Ynew, rtol=1e-06)
 
     def test_NSG_neighbors(self):
         # FIXME number of elements to add should be >> 100
diff --git a/tests/test_local_search_quantizer.py b/tests/test_local_search_quantizer.py
index 721aada01e..4c239527db 100644
--- a/tests/test_local_search_quantizer.py
+++ b/tests/test_local_search_quantizer.py
@@ -582,7 +582,7 @@ def test_lut(self):
         lut_ref = lut_ref.reshape(nq, codebook_size)
 
         # max rtoal in OSX: 2.87e-6
-        np.testing.assert_allclose(lut, lut_ref, rtol=5e-06)
+        np.testing.assert_allclose(lut, lut_ref, rtol=1e-04)
 
 
 class TestIndexProductLocalSearchQuantizer(unittest.TestCase):
diff --git a/tests/test_partitioning.cpp b/tests/test_partitioning.cpp
new file mode 100644
index 0000000000..b719fcfe01
--- /dev/null
+++ b/tests/test_partitioning.cpp
@@ -0,0 +1,33 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <faiss/utils/AlignedTable.h>
+#include <faiss/utils/partitioning.h>
+
+using namespace faiss;
+
+typedef AlignedTable<uint16_t> AlignedTableUint16;
+
+// TODO: This test fails when Faiss is compiled with
+// GCC 13.2 from conda-forge with AVX2 enabled. This may be
+// a GCC bug that needs to be investigated further.
+// As of 16-AUG-2023 the Faiss conda packages are built
+// with GCC 11.2, so the published binaries are not affected.
+TEST(TestPartitioning, TestPartitioningBigRange) {
+    auto n = 1024;
+    AlignedTableUint16 tab(n);
+    for (auto i = 0; i < n; i++) {
+        tab[i] = i * 64;
+    }
+    int32_t hist[16]{};
+    simd_histogram_16(tab.get(), n, 0, 12, hist);
+    for (auto i = 0; i < 16; i++) {
+        ASSERT_EQ(hist[i], 64);
+    }
+}