Disable CUDA fusion by default for now (#903)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
parent
1a3aaa33c1
commit
320fc606cd
@ -133,7 +133,7 @@ option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copie
|
||||
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
||||
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
||||
option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ON)
|
||||
set (GGML_CUDA_FUSION "1" CACHE STRING "ggml: enable/disable fusion")
|
||||
set (GGML_CUDA_FUSION "0" CACHE STRING "ggml: enable/disable fusion")
|
||||
|
||||
option(GGML_IQK_FLASH_ATTENTION "ggml: enable the IQK FlashAttention CPU kernels" ON)
|
||||
option(GGML_IQK_FA_ALL_QUANTS "ggml: compile all quants for IQK FlashAttention" OFF)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user