mirror of
https://github.com/ggerganov/llama.cpp
synced 2026-04-18 05:05:43 +02:00
read n_ctx back after making llama_context (#21939)
This commit is contained in:
parent
5d14e5d19b
commit
e39eba26f3
@ -602,8 +602,8 @@ int main(int argc, char ** argv) {
|
||||
|
||||
int n_input = input_tokens.size();
|
||||
|
||||
if (n_input >= params.n_ctx) {
|
||||
LOG_ERR("error: input too long (%d tokens), max context is %d\n", n_input, params.n_ctx);
|
||||
if (static_cast<uint32_t>(n_input) >= llama_n_ctx(ctx)) {
|
||||
LOG_ERR("error: input too long (%d tokens), max context is %d\n", n_input, llama_n_ctx(ctx));
|
||||
llama_free(ctx);
|
||||
llama_model_free(model);
|
||||
return 1;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user