From eefa39105eda498ab5fcd82ea0e11b18fd3fc0d7 Mon Sep 17 00:00:00 2001 From: Alexandr Guzhva Date: Tue, 5 Dec 2023 01:39:52 -0800 Subject: [PATCH] Introduce avx512 optimization mode and FAISS_OPT_LEVEL env variable (#3150) Summary: Enables avx512 optimized code (AVX512 subsets F, CD, VL, DQ and BW, which are available for Intel Skylake+ and all AMD Zen4). Also, introduces `FAISS_OPT_LEVEL` environment variable. Set it to `AVX2`, `AVX512` or empty to pick the appropriate x86_64 instruction set. Compiled via the following ``` cmake -B build -DCMAKE_BUILD_TYPE=Release -DFAISS_ENABLE_GPU=OFF -DFAISS_OPT_LEVEL=avx512 -DBUILD_TESTING=ON . make -C build -j 8 faiss_test make -C build -j 8 swigfaiss make -C build -j 8 swigfaiss_avx2 make -C build -j 8 swigfaiss_avx512 cd build/faiss/python python3 setup.py build python3 setup.py install --force ``` Now, running the following script `1.py` ``` import logging logging.basicConfig(level=logging.DEBUG) import faiss ``` produces the following: ``` root@6179abeef23c:~/faiss# LOGLEVEL=DEBUG FAISS_OPT_LEVEL= python3 1.py DEBUG:faiss.loader:Using as an instruction set. INFO:faiss.loader:Loading faiss. INFO:faiss.loader:Successfully loaded faiss. root@6179abeef23c:~/faiss# LOGLEVEL=DEBUG FAISS_OPT_LEVEL=AVX2 python3 1.py DEBUG:faiss.loader:Using AVX2 as an instruction set. INFO:faiss.loader:Loading faiss with AVX2 support. INFO:faiss.loader:Successfully loaded faiss with AVX2 support. root@6179abeef23c:~/faiss# LOGLEVEL=DEBUG FAISS_OPT_LEVEL=AVX512 python3 1.py DEBUG:faiss.loader:Using AVX512 as an instruction set. INFO:faiss.loader:Loading faiss with AVX512 support. INFO:faiss.loader:Successfully loaded faiss with AVX512 support. root@6179abeef23c:~/faiss# LOGLEVEL=DEBUG python3 1.py DEBUG:faiss.loader:Environment variable FAISS_OPT_LEVEL is not set, so let's pick the instruction set according to the current CPU INFO:faiss.loader:Loading faiss with AVX512 support. INFO:faiss.loader:Successfully loaded faiss with AVX512 support. ``` Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3150 Reviewed By: algoriddle Differential Revision: D51701077 Pulled By: mdouze fbshipit-source-id: 4db05a287e763ff1ce1f676df7f7402532bf1e9e --- CMakeLists.txt | 2 +- INSTALL.md | 4 +- faiss/CMakeLists.txt | 34 ++++++++++++++ faiss/python/CMakeLists.txt | 28 ++++++++++++ faiss/python/loader.py | 45 ++++++++++++++++--- faiss/python/setup.py | 11 ++++- faiss/utils/distances_fused/avx512.cpp | 2 +- faiss/utils/distances_fused/avx512.h | 2 +- .../utils/distances_fused/distances_fused.cpp | 2 +- faiss/utils/utils.cpp | 2 + tests/CMakeLists.txt | 15 ++++++- 11 files changed, 131 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b5e38f6da..85c8a820bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ set(CMAKE_CXX_STANDARD 17) list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") -# Valid values are "generic", "avx2". +# Valid values are "generic", "avx2", "avx512". option(FAISS_OPT_LEVEL "" "generic") option(FAISS_ENABLE_GPU "Enable support for GPU indexes." ON) option(FAISS_ENABLE_RAFT "Enable RAFT for GPU indexes." OFF) diff --git a/INSTALL.md b/INSTALL.md index 77c9e2896c..dd04511bd2 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -119,8 +119,8 @@ Several options can be passed to CMake, among which: - `-DCMAKE_BUILD_TYPE=Release` in order to enable generic compiler optimization options (enables `-O3` on gcc for instance), - `-DFAISS_OPT_LEVEL=avx2` in order to enable the required compiler flags to - generate code using optimized SIMD instructions (possible values are `generic` - and `avx2`, by increasing order of optimization), + generate code using optimized SIMD instructions (possible values are `generic`, + `avx2` and `avx512`, by increasing order of optimization), - BLAS-related options: - `-DBLA_VENDOR=Intel10_64_dyn -DMKL_LIBRARIES=/path/to/mkl/libs` to use the Intel MKL BLAS implementation, which is significantly faster than OpenBLAS diff --git a/faiss/CMakeLists.txt b/faiss/CMakeLists.txt index 291b225cd7..27701586c8 100644 --- a/faiss/CMakeLists.txt +++ b/faiss/CMakeLists.txt @@ -244,12 +244,29 @@ else() add_compile_options(/bigobj) endif() +add_library(faiss_avx512 ${FAISS_SRC}) +if(NOT FAISS_OPT_LEVEL STREQUAL "avx512") + set_target_properties(faiss_avx512 PROPERTIES EXCLUDE_FROM_ALL TRUE) +endif() +if(NOT WIN32) + # All modern CPUs support F, CD, VL, DQ, BW extensions. + # Ref: https://en.wikipedia.org/wiki/AVX512 + target_compile_options(faiss_avx512 PRIVATE $<$:-mavx2 -mfma -mf16c -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mpopcnt>) +else() + target_compile_options(faiss_avx512 PRIVATE $<$:/arch:AVX512>) + # we need bigobj for the swig wrapper + add_compile_options(/bigobj) +endif() + # Handle `#include `. target_include_directories(faiss PUBLIC $) # Handle `#include `. target_include_directories(faiss_avx2 PUBLIC $) +# Handle `#include `. +target_include_directories(faiss_avx512 PUBLIC + $) set_target_properties(faiss PROPERTIES POSITION_INDEPENDENT_CODE ON @@ -259,31 +276,41 @@ set_target_properties(faiss_avx2 PROPERTIES POSITION_INDEPENDENT_CODE ON WINDOWS_EXPORT_ALL_SYMBOLS ON ) +set_target_properties(faiss_avx512 PROPERTIES + POSITION_INDEPENDENT_CODE ON + WINDOWS_EXPORT_ALL_SYMBOLS ON +) if(WIN32) target_compile_definitions(faiss PRIVATE FAISS_MAIN_LIB) target_compile_definitions(faiss_avx2 PRIVATE FAISS_MAIN_LIB) + target_compile_definitions(faiss_avx512 PRIVATE FAISS_MAIN_LIB) endif() target_compile_definitions(faiss PRIVATE FINTEGER=int) target_compile_definitions(faiss_avx2 PRIVATE FINTEGER=int) +target_compile_definitions(faiss_avx512 PRIVATE FINTEGER=int) find_package(OpenMP REQUIRED) target_link_libraries(faiss PRIVATE OpenMP::OpenMP_CXX) target_link_libraries(faiss_avx2 PRIVATE OpenMP::OpenMP_CXX) +target_link_libraries(faiss_avx512 PRIVATE OpenMP::OpenMP_CXX) find_package(MKL) if(MKL_FOUND) target_link_libraries(faiss PRIVATE ${MKL_LIBRARIES}) target_link_libraries(faiss_avx2 PRIVATE ${MKL_LIBRARIES}) + target_link_libraries(faiss_avx512 PRIVATE ${MKL_LIBRARIES}) else() find_package(BLAS REQUIRED) target_link_libraries(faiss PRIVATE ${BLAS_LIBRARIES}) target_link_libraries(faiss_avx2 PRIVATE ${BLAS_LIBRARIES}) + target_link_libraries(faiss_avx512 PRIVATE ${BLAS_LIBRARIES}) find_package(LAPACK REQUIRED) target_link_libraries(faiss PRIVATE ${LAPACK_LIBRARIES}) target_link_libraries(faiss_avx2 PRIVATE ${LAPACK_LIBRARIES}) + target_link_libraries(faiss_avx512 PRIVATE ${LAPACK_LIBRARIES}) endif() install(TARGETS faiss @@ -300,6 +327,13 @@ if(FAISS_OPT_LEVEL STREQUAL "avx2") LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) endif() +if(FAISS_OPT_LEVEL STREQUAL "avx512") + install(TARGETS faiss_avx512 + EXPORT faiss-targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) +endif() foreach(header ${FAISS_HEADERS}) get_filename_component(dir ${header} DIRECTORY ) diff --git a/faiss/python/CMakeLists.txt b/faiss/python/CMakeLists.txt index 2a7227ead7..8bca710f5f 100644 --- a/faiss/python/CMakeLists.txt +++ b/faiss/python/CMakeLists.txt @@ -44,9 +44,11 @@ endmacro() # CMake's SWIG wrappers only allow tweaking certain settings at source level, so # we duplicate the source in order to override the module name. configure_file(swigfaiss.swig ${CMAKE_CURRENT_SOURCE_DIR}/swigfaiss_avx2.swig COPYONLY) +configure_file(swigfaiss.swig ${CMAKE_CURRENT_SOURCE_DIR}/swigfaiss_avx512.swig COPYONLY) configure_swigfaiss(swigfaiss.swig) configure_swigfaiss(swigfaiss_avx2.swig) +configure_swigfaiss(swigfaiss_avx512.swig) if(TARGET faiss) # Manually add headers as extra dependencies of swigfaiss. @@ -54,10 +56,12 @@ if(TARGET faiss) foreach(h ${FAISS_HEADERS}) list(APPEND SWIG_MODULE_swigfaiss_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/${h}") list(APPEND SWIG_MODULE_swigfaiss_avx2_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/${h}") + list(APPEND SWIG_MODULE_swigfaiss_avx512_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/${h}") endforeach() foreach(h ${FAISS_GPU_HEADERS}) list(APPEND SWIG_MODULE_swigfaiss_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/gpu/${h}") list(APPEND SWIG_MODULE_swigfaiss_avx2_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/gpu/${h}") + list(APPEND SWIG_MODULE_swigfaiss_avx512_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/gpu/${h}") endforeach() else() find_package(faiss REQUIRED) @@ -82,14 +86,28 @@ if(NOT FAISS_OPT_LEVEL STREQUAL "avx2") set_target_properties(swigfaiss_avx2 PROPERTIES EXCLUDE_FROM_ALL TRUE) endif() +set_property(SOURCE swigfaiss_avx512.swig + PROPERTY SWIG_MODULE_NAME swigfaiss_avx512) +swig_add_library(swigfaiss_avx512 + TYPE SHARED + LANGUAGE python + SOURCES swigfaiss_avx512.swig +) +set_property(TARGET swigfaiss_avx512 PROPERTY SWIG_COMPILE_OPTIONS -doxygen) +if(NOT FAISS_OPT_LEVEL STREQUAL "avx512") + set_target_properties(swigfaiss_avx512 PROPERTIES EXCLUDE_FROM_ALL TRUE) +endif() + if(NOT WIN32) # NOTE: Python does not recognize the dylib extension. set_target_properties(swigfaiss PROPERTIES SUFFIX .so) set_target_properties(swigfaiss_avx2 PROPERTIES SUFFIX .so) + set_target_properties(swigfaiss_avx512 PROPERTIES SUFFIX .so) else() # we need bigobj for the swig wrapper target_compile_options(swigfaiss PRIVATE /bigobj) target_compile_options(swigfaiss_avx2 PRIVATE /bigobj) + target_compile_options(swigfaiss_avx512 PRIVATE /bigobj) endif() if(FAISS_ENABLE_GPU) @@ -99,6 +117,7 @@ if(FAISS_ENABLE_GPU) endif() target_link_libraries(swigfaiss PRIVATE CUDA::cudart $<$:raft::raft> $<$:nvidia::cutlass::cutlass>) target_link_libraries(swigfaiss_avx2 PRIVATE CUDA::cudart $<$:raft::raft> $<$:nvidia::cutlass::cutlass>) + target_link_libraries(swigfaiss_avx512 PRIVATE CUDA::cudart $<$:raft::raft> $<$:nvidia::cutlass::cutlass>) endif() find_package(OpenMP REQUIRED) @@ -117,10 +136,18 @@ target_link_libraries(swigfaiss_avx2 PRIVATE OpenMP::OpenMP_CXX ) +target_link_libraries(swigfaiss_avx512 PRIVATE + faiss_avx512 + Python::Module + Python::NumPy + OpenMP::OpenMP_CXX +) + # Hack so that python_callbacks.h can be included as # `#include `. target_include_directories(swigfaiss PRIVATE ${PROJECT_SOURCE_DIR}/../..) target_include_directories(swigfaiss_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/../..) +target_include_directories(swigfaiss_avx512 PRIVATE ${PROJECT_SOURCE_DIR}/../..) find_package(Python REQUIRED COMPONENTS Development NumPy @@ -140,6 +167,7 @@ target_include_directories(faiss_python_callbacks PRIVATE ${Python_INCLUDE_DIRS} target_link_libraries(swigfaiss PRIVATE faiss_python_callbacks) target_link_libraries(swigfaiss_avx2 PRIVATE faiss_python_callbacks) +target_link_libraries(swigfaiss_avx512 PRIVATE faiss_python_callbacks) configure_file(setup.py setup.py COPYONLY) configure_file(__init__.py __init__.py COPYONLY) diff --git a/faiss/python/loader.py b/faiss/python/loader.py index dd5839908e..eb60bf6800 100644 --- a/faiss/python/loader.py +++ b/faiss/python/loader.py @@ -18,7 +18,7 @@ def supported_instruction_sets(): Example: >>> supported_instruction_sets() # for x86 - {"SSE2", "AVX2", ...} + {"SSE2", "AVX2", "AVX512", ...} >>> supported_instruction_sets() # for PPC {"VSX", "VSX2", ...} >>> supported_instruction_sets() # for ARM @@ -41,25 +41,58 @@ def supported_instruction_sets(): return {"AVX2"} elif platform.system() == "Linux": import numpy.distutils.cpuinfo + result = set() if "avx2" in numpy.distutils.cpuinfo.cpu.info[0].get('flags', ""): - return {"AVX2"} + result.add("AVX2") + if "avx512" in numpy.distutils.cpuinfo.cpu.info[0].get('flags', ""): + result.add("AVX512") + return result return set() logger = logging.getLogger(__name__) -has_AVX2 = "AVX2" in supported_instruction_sets() -if has_AVX2: +instruction_sets = None + +# try to load optimization level from env variable +opt_env_variable_name = "FAISS_OPT_LEVEL" +opt_level = os.environ.get(opt_env_variable_name, None) + +if opt_level is None: + logger.debug(f"Environment variable {opt_env_variable_name} is not set, " \ + "so let's pick the instruction set according to the current CPU") + instruction_sets = supported_instruction_sets() +else: + logger.debug(f"Using {opt_level} as an instruction set.") + instruction_sets = set() + instruction_sets.add(opt_level) + +loaded = False +has_AVX512 = any("AVX512" in x.upper() for x in instruction_sets) +if has_AVX512: + try: + logger.info("Loading faiss with AVX512 support.") + from .swigfaiss_avx512 import * + logger.info("Successfully loaded faiss with AVX512 support.") + loaded = True + except ImportError as e: + logger.info(f"Could not load library with AVX512 support due to:\n{e!r}") + # reset so that we load without AVX512 below + loaded = False + +has_AVX2 = "AVX2" in instruction_sets +if has_AVX2 and not loaded: try: logger.info("Loading faiss with AVX2 support.") from .swigfaiss_avx2 import * logger.info("Successfully loaded faiss with AVX2 support.") + loaded = True except ImportError as e: logger.info(f"Could not load library with AVX2 support due to:\n{e!r}") # reset so that we load without AVX2 below - has_AVX2 = False + loaded = False -if not has_AVX2: +if not loaded: # we import * so that the symbol X can be accessed as faiss.X logger.info("Loading faiss.") from .swigfaiss import * diff --git a/faiss/python/setup.py b/faiss/python/setup.py index 6ea944e188..1c9101290d 100644 --- a/faiss/python/setup.py +++ b/faiss/python/setup.py @@ -25,13 +25,15 @@ swigfaiss_generic_lib = f"{prefix}_swigfaiss{ext}" swigfaiss_avx2_lib = f"{prefix}_swigfaiss_avx2{ext}" +swigfaiss_avx512_lib = f"{prefix}_swigfaiss_avx512{ext}" found_swigfaiss_generic = os.path.exists(swigfaiss_generic_lib) found_swigfaiss_avx2 = os.path.exists(swigfaiss_avx2_lib) +found_swigfaiss_avx512 = os.path.exists(swigfaiss_avx512_lib) -assert (found_swigfaiss_generic or found_swigfaiss_avx2), \ +assert (found_swigfaiss_generic or found_swigfaiss_avx2 or found_swigfaiss_avx512), \ f"Could not find {swigfaiss_generic_lib} or " \ - f"{swigfaiss_avx2_lib}. Faiss may not be compiled yet." + f"{swigfaiss_avx2_lib} or {swigfaiss_avx512_lib}. Faiss may not be compiled yet." if found_swigfaiss_generic: print(f"Copying {swigfaiss_generic_lib}") @@ -43,6 +45,11 @@ shutil.copyfile("swigfaiss_avx2.py", "faiss/swigfaiss_avx2.py") shutil.copyfile(swigfaiss_avx2_lib, f"faiss/_swigfaiss_avx2{ext}") +if found_swigfaiss_avx512: + print(f"Copying {swigfaiss_avx512_lib}") + shutil.copyfile("swigfaiss_avx512.py", "faiss/swigfaiss_avx512.py") + shutil.copyfile(swigfaiss_avx512_lib, f"faiss/_swigfaiss_avx512{ext}") + long_description=""" Faiss is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, diff --git a/faiss/utils/distances_fused/avx512.cpp b/faiss/utils/distances_fused/avx512.cpp index 6ae8cb0469..b5ff70f9e4 100644 --- a/faiss/utils/distances_fused/avx512.cpp +++ b/faiss/utils/distances_fused/avx512.cpp @@ -9,7 +9,7 @@ #include -#ifdef __AVX512__ +#ifdef __AVX512F__ #include diff --git a/faiss/utils/distances_fused/avx512.h b/faiss/utils/distances_fused/avx512.h index d730e3b61c..b6d5fc0556 100644 --- a/faiss/utils/distances_fused/avx512.h +++ b/faiss/utils/distances_fused/avx512.h @@ -16,7 +16,7 @@ #include -#ifdef __AVX512__ +#ifdef __AVX512F__ namespace faiss { diff --git a/faiss/utils/distances_fused/distances_fused.cpp b/faiss/utils/distances_fused/distances_fused.cpp index 650e24810d..a0af971c5c 100644 --- a/faiss/utils/distances_fused/distances_fused.cpp +++ b/faiss/utils/distances_fused/distances_fused.cpp @@ -27,7 +27,7 @@ bool exhaustive_L2sqr_fused_cmax( return true; } -#ifdef __AVX512__ +#ifdef __AVX512F__ // avx512 kernel return exhaustive_L2sqr_fused_cmax_AVX512(x, y, d, nx, ny, res, y_norms); #elif defined(__AVX2__) || defined(__aarch64__) diff --git a/faiss/utils/utils.cpp b/faiss/utils/utils.cpp index d40f14eae8..efbff502b0 100644 --- a/faiss/utils/utils.cpp +++ b/faiss/utils/utils.cpp @@ -116,6 +116,8 @@ std::string get_compile_options() { #ifdef __AVX2__ options += "AVX2 "; +#elif __AVX512F__ + options += "AVX512"; #elif defined(__aarch64__) options += "NEON "; #else diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f8070fd0ab..cc0a4f4cfd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -34,6 +34,10 @@ set(FAISS_TEST_SRC add_executable(faiss_test ${FAISS_TEST_SRC}) +if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512") + target_link_libraries(faiss_test PRIVATE faiss) +endif() + if(FAISS_OPT_LEVEL STREQUAL "avx2") if(NOT WIN32) target_compile_options(faiss_test PRIVATE $<$:-mavx2 -mfma>) @@ -41,8 +45,15 @@ if(FAISS_OPT_LEVEL STREQUAL "avx2") target_compile_options(faiss_test PRIVATE $<$:/arch:AVX2>) endif() target_link_libraries(faiss_test PRIVATE faiss_avx2) -else() - target_link_libraries(faiss_test PRIVATE faiss) +endif() + +if(FAISS_OPT_LEVEL STREQUAL "avx512") + if(NOT WIN32) + target_compile_options(faiss_test PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) + else() + target_compile_options(faiss_test PRIVATE $<$:/arch:AVX512>) + endif() + target_link_libraries(faiss_test PRIVATE faiss_avx512) endif() include(FetchContent)