tests: enable kv_unified to prevent cuda oom error on rtx 2060 (#20645)

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
2026-04-18 13:16:26 +02:00 · 2026-03-18 17:40:22 +08:00 · 2026-03-18 17:40:22 +08:00 · fe00a84b4b
commit fe00a84b4b
parent 7ab321d40d
1 changed files with 1 additions and 0 deletions
--- a/tests/test-backend-sampler.cpp
+++ b/tests/test-backend-sampler.cpp
@ -89,6 +89,7 @@ struct test_context {
        cparams.n_batch = 512;
        cparams.samplers = configs.data();
        cparams.n_samplers = configs.size();
+        cparams.kv_unified = true;

        // If n_seq_max is not specified, calculate it from configs
        if (n_seq_max < 0) {