mirror of
https://github.com/ggerganov/llama.cpp
synced 2026-04-18 13:16:26 +02:00
tests: enable kv_unified to prevent cuda oom error on rtx 2060 (#20645)
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
parent
7ab321d40d
commit
fe00a84b4b
@ -89,6 +89,7 @@ struct test_context {
|
||||
cparams.n_batch = 512;
|
||||
cparams.samplers = configs.data();
|
||||
cparams.n_samplers = configs.size();
|
||||
cparams.kv_unified = true;
|
||||
|
||||
// If n_seq_max is not specified, calculate it from configs
|
||||
if (n_seq_max < 0) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user