diff --git a/CMakeLists.txt b/CMakeLists.txt index e0ff233155..199ff1650a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -956,8 +956,6 @@ if (USE_FLASHINFER STREQUAL "ON") message(STATUS "Build with FlashInfer") set(FLASHINFER_TVM_BINDING ON) set(FLASHINFER_TVM_HOME ${PROJECT_SOURCE_DIR}) - set(FLASHINFER_ENABLE_FP8 OFF) - set(FLASHINFER_ENABLE_BF16 OFF) set(FLASHINFER_PREFILL OFF) set(FLASHINFER_DECODE OFF) set(FLASHINFER_PAGE OFF) diff --git a/cmake/config.cmake b/cmake/config.cmake index ccb449fe2b..cd35c89294 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -444,6 +444,18 @@ set(USE_GTEST AUTO) # Need to have USE_CUDA=ON set(USE_CUTLASS OFF) +# Whether to enable FlashInfer or not. +set(USE_FLASHINFER OFF) +# Options for FlashInfer kernel compilation. +set(FLASHINFER_ENABLE_FP8 OFF) +set(FLASHINFER_ENABLE_BF16 OFF) +set(FLASHINFER_GEN_GROUP_SIZES 1 4 6 8) +set(FLASHINFER_GEN_HEAD_DIMS 128) +set(FLASHINFER_GEN_KV_LAYOUTS 1) +set(FLASHINFER_GEN_POS_ENCODING_MODES 0 1) +set(FLASHINFER_GEN_ALLOW_FP16_QK_REDUCTIONS "false") +set(FLASHINFER_GEN_CASUALS "false" "true") + # Enable to show a summary of TVM options set(SUMMARIZE OFF)