Skip to content

Commit

Permalink
Merge branch 'main' into cuvs-migrate
Browse files Browse the repository at this point in the history
  • Loading branch information
tarang-jain authored Sep 30, 2024
2 parents d4851ab + 09bffd2 commit 54df8fa
Show file tree
Hide file tree
Showing 22 changed files with 423 additions and 40 deletions.
7 changes: 6 additions & 1 deletion .github/actions/build_cmake/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ runs:
conda update -y -q conda
echo "$CONDA/bin" >> $GITHUB_PATH
conda install -y -q python=3.11 cmake make swig numpy scipy pytest
conda install -y -q python=3.11 cmake make swig numpy scipy pytest gflags
# install base packages for ARM64
if [ "${{ runner.arch }}" = "ARM64" ]; then
Expand Down Expand Up @@ -143,6 +143,11 @@ runs:
run: |
export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/"
make -C build test
- name: C++ perf benchmarks
shell: bash
if: inputs.rocm == 'OFF'
run: |
find ./build/perf_tests/ -executable -type f -name "bench*" -exec '{}' -v \;
- name: Install Python extension
shell: bash
working-directory: build/faiss/python
Expand Down
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,12 @@ add_subdirectory(demos)
add_subdirectory(benchs)
add_subdirectory(tutorial/cpp)


# CTest must be included in the top level to enable `make test` target.
include(CTest)
if(BUILD_TESTING)
add_subdirectory(tests)

add_subdirectory(perf_tests)
if(FAISS_ENABLE_GPU)
if(FAISS_ENABLE_ROCM)
add_subdirectory(faiss/gpu-rocm/test)
Expand Down
17 changes: 17 additions & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,22 @@ The `-j` option enables parallel compilation of multiple units, leading to a
faster build, but increasing the chances of running out of memory, in which case
it is recommended to set the `-j` option to a fixed value (such as `-j4`).

If making use of optimization options, build the correct target before swigfaiss.

For AVX2:

``` shell
$ make -C build -j faiss_avx2
```

For AVX512:

``` shell
$ make -C build -j faiss_avx512
```

This will ensure the creation of neccesary files when building and installing the python package.

## Step 3: Building the python bindings (optional)

``` shell
Expand All @@ -177,6 +193,7 @@ $ (cd build/faiss/python && python setup.py install)
The first command builds the python bindings for Faiss, while the second one
generates and installs the python package.


## Step 4: Installing the C++ library and headers (optional)

``` shell
Expand Down
4 changes: 3 additions & 1 deletion benchs/bench_fw/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from statistics import mean, median
from typing import Any, Dict, List, Optional

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

import numpy as np

Expand Down Expand Up @@ -214,6 +214,7 @@ def set_io(self, benchmark_io: BenchmarkIO):
@dataclass
class TrainOperator(IndexOperator):
codec_descs: List[CodecDescriptor] = field(default_factory=lambda: [])
assemble_opaque: bool = True

def get_desc(self, name: str) -> Optional[CodecDescriptor]:
for desc in self.codec_descs:
Expand Down Expand Up @@ -248,6 +249,7 @@ def build_index_wrapper(self, codec_desc: CodecDescriptor):
factory=codec_desc.factory,
training_vectors=codec_desc.training_vectors,
codec_name=codec_desc.get_name(),
assemble_opaque=self.assemble_opaque,
)
index.set_io(self.io)
codec_desc.index = index
Expand Down
4 changes: 2 additions & 2 deletions benchs/bench_fw/benchmark_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from typing import Any, Dict, List, Optional
from zipfile import ZipFile

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

import numpy as np
import submitit
from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib
dataset_from_name,
)

Expand Down
19 changes: 13 additions & 6 deletions benchs/bench_fw/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
from dataclasses import dataclass
from typing import Any, Dict, List, Optional

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

from .benchmark_io import BenchmarkIO
from .utils import timer

logger = logging.getLogger(__name__)


# Important: filenames end with . without extension (npy, codec, index),
# when writing files, you are required to filename + "npy" etc.

@dataclass
class IndexDescriptorClassic:
bucket: Optional[str] = None
Expand Down Expand Up @@ -110,21 +113,25 @@ def get_filename(
filename += "."
return filename

def get_kmeans_filename(self, k):
return f"{self.get_filename()}kmeans_{k}."

def k_means(self, io, k, dry_run):
logger.info(f"k_means {k} {self}")
kmeans_vectors = DatasetDescriptor(
tablename=f"{self.get_filename()}kmeans_{k}.npy"
tablename=f"{self.get_filename()}kmeans_{k}"
)
meta_filename = kmeans_vectors.tablename + ".json"
if not io.file_exist(kmeans_vectors.tablename) or not io.file_exist(
kmeans_filename = kmeans_vectors.get_filename() + "npy"
meta_filename = kmeans_vectors.get_filename() + "json"
if not io.file_exist(kmeans_filename) or not io.file_exist(
meta_filename
):
if dry_run:
return None, None, kmeans_vectors.tablename
return None, None, kmeans_filename
x = io.get_dataset(self)
kmeans = faiss.Kmeans(d=x.shape[1], k=k, gpu=True)
_, t, _ = timer("k_means", lambda: kmeans.train(x))
io.write_nparray(kmeans.centroids, kmeans_vectors.tablename)
io.write_nparray(kmeans.centroids, kmeans_filename)
io.write_json({"k_means_time": t}, meta_filename)
else:
t = io.read_json(meta_filename)["k_means_time"]
Expand Down
32 changes: 23 additions & 9 deletions benchs/bench_fw/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss
import numpy as np
from faiss.benchs.bench_fw.descriptors import IndexBaseDescriptor

from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
knn_intersection_measure,
OperatingPointsWithRanges,
)
from faiss.contrib.factory_tools import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.factory_tools import ( # @manual=//faiss/contrib:faiss_contrib
reverse_index_factory,
)
from faiss.contrib.ivf_tools import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.ivf_tools import ( # @manual=//faiss/contrib:faiss_contrib
add_preassigned,
replace_ivf_quantizer,
)
Expand Down Expand Up @@ -635,11 +635,12 @@ def get_index_name(self) -> Optional[str]:

def fetch_index(self):
# read index from file if it is already available
index_filename = None
if self.index_path:
index_filename = os.path.basename(self.index_path)
else:
elif self.index_name:
index_filename = self.index_name + "index"
if self.io.file_exist(index_filename):
if index_filename and self.io.file_exist(index_filename):
if self.index_path:
index = self.io.read_index(
index_filename,
Expand Down Expand Up @@ -681,7 +682,7 @@ def fetch_index(self):
)
assert index.ntotal == xb.shape[0] or index_ivf.ntotal == xb.shape[0]
logger.info("Added vectors to index")
if self.serialize_full_index:
if self.serialize_full_index and index_filename:
codec_size = self.io.write_index(index, index_filename)
assert codec_size is not None

Expand Down Expand Up @@ -908,6 +909,7 @@ def get_codec(self):
class IndexFromFactory(Index):
factory: Optional[str] = None
training_vectors: Optional[DatasetDescriptor] = None
assemble_opaque: bool = True

def __post_init__(self):
super().__post_init__()
Expand All @@ -916,6 +918,19 @@ def __post_init__(self):
if self.factory != "Flat" and self.training_vectors is None:
raise ValueError(f"training_vectors is not set for {self.factory}")

def get_codec_name(self):
codec_name = super().get_codec_name()
if codec_name is None:
codec_name = f"{self.factory.replace(',', '_')}."
codec_name += f"d_{self.d}.{self.metric.upper()}."
if self.factory != "Flat":
assert self.training_vectors is not None
codec_name += self.training_vectors.get_filename("xt")
if self.construction_params is not None:
codec_name += IndexBaseDescriptor.param_dict_list_to_name(self.construction_params)
self.codec_name = codec_name
return self.codec_name

def fetch_meta(self, dry_run=False):
meta_filename = self.get_codec_name() + "json"
if self.io.file_exist(meta_filename):
Expand Down Expand Up @@ -1021,14 +1036,13 @@ def get_quantizer(self, dry_run, pretransform=None):
def assemble(self, dry_run):
logger.info(f"assemble {self.factory}")
model = self.get_model()
opaque = True
t_aggregate = 0
# try:
# reverse_index_factory(model)
# opaque = False
# except NotImplementedError:
# opaque = True
if opaque:
if self.assemble_opaque:
codec = model
else:
if isinstance(model, faiss.IndexPreTransform):
Expand Down
4 changes: 2 additions & 2 deletions benchs/bench_fw/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
from dataclasses import dataclass
from typing import Dict, List, Tuple

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

# from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
# from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
# OperatingPoints,
# )

Expand Down
4 changes: 2 additions & 2 deletions benchs/bench_fw/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from multiprocessing.pool import ThreadPool
from time import perf_counter

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss
import numpy as np

from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
OperatingPoints,
)

Expand Down
Loading

0 comments on commit 54df8fa

Please sign in to comment.