mirror of
https://github.com/ggerganov/llama.cpp
synced 2026-04-30 11:12:04 +02:00
server: allow cancel loading model (#21814)
This commit is contained in:
parent
5a4cd6741f
commit
04fe84b69d
@ -712,6 +712,11 @@ void server_models::unload(const std::string & name) {
|
||||
if (it->second.meta.is_running()) {
|
||||
SRV_INF("stopping model instance name=%s\n", name.c_str());
|
||||
stopping_models.insert(name);
|
||||
if (it->second.meta.status == SERVER_MODEL_STATUS_LOADING) {
|
||||
// special case: if model is in loading state, unloading means force-killing it
|
||||
SRV_WRN("model name=%s is still loading, force-killing\n", name.c_str());
|
||||
subprocess_terminate(it->second.subproc.get());
|
||||
}
|
||||
cv_stop.notify_all();
|
||||
// status change will be handled by the managing thread
|
||||
} else {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user