From aeafca437efa7fb28166703f845e321176aa62ab Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sat, 4 Oct 2025 08:17:25 +0200
Subject: [PATCH] vad : fix memory leak by storing ggml_context in vad context
 struct

This commit addresses a memory leak issue in the voice activity
detection (VAD) where the ggml_context is not stored within the vad
context structure.

The motivation for this change that this is causing the context memory
to stay allocated and the tensor still point to that memory but this
memory is never freed.

Resolves: https://github.com/ggml-org/whisper.cpp/issues/3452
---
 src/whisper.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/whisper.cpp b/src/whisper.cpp
index d99dd7be6..e9c2a786a 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -4402,6 +4402,7 @@ struct whisper_vad_context {
     std::vector<ggml_backend_t> backends;
     ggml_backend_buffer_t       buffer = nullptr;
     whisper_context_params      params;
+    ggml_context *              ctx = nullptr;
     std::vector<uint8_t>        ctx_buf;
     whisper_sched               sched;
 
@@ -4661,21 +4662,21 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) {
         /*.no_alloc   =*/ true,
     };
 
-    ggml_context * ctx = ggml_init(params);
-    if (!ctx) {
+    vctx->ctx = ggml_init(params);
+    if (!vctx->ctx) {
         WHISPER_LOG_ERROR("%s: failed to init LSTM state ggml context\n", __func__);
         return false;
     }
 
     // LSTM Hidden state
-    vctx->h_state = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, lstm_hidden_size);
+    vctx->h_state = ggml_new_tensor_1d(vctx->ctx, GGML_TYPE_F32, lstm_hidden_size);
     ggml_set_name(vctx->h_state, "h_state");
 
     // LSTM Cell state
-    vctx->c_state = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, lstm_hidden_size);
+    vctx->c_state = ggml_new_tensor_1d(vctx->ctx, GGML_TYPE_F32, lstm_hidden_size);
     ggml_set_name(vctx->c_state, "c_state");
 
-    vctx->buffer = ggml_backend_alloc_ctx_tensors(ctx, vctx->backends[0]);
+    vctx->buffer = ggml_backend_alloc_ctx_tensors(vctx->ctx, vctx->backends[0]);
     if (!vctx->buffer) {
         WHISPER_LOG_ERROR("%s: failed to allocate memory for the VAD state\n", __func__);
         return false;
@@ -5433,7 +5434,7 @@ void whisper_vad_free(whisper_vad_context * ctx) {
         for (auto & backend : ctx->backends) {
             ggml_backend_free(backend);
         }
-
+        ggml_free(ctx->ctx);
 
         delete ctx;
     }