whisper.cpp/tests/test-vad-full.cpp
Daniel Bevenius d566358a1d
tests : update VAD tests to use Silero V6.2.0 (#3534)
* tests : update VAD tests to use Silero V6.2.0

This commit updates the VAD tests to use the Silero V6.2.0 instead of
V5.1.2. I'm was not sure if we needed to keep testing for both versions,
but opted to just update to the latest version for simplicity.

* wasm : use C++17 for emscripten builds

This commit updates the CMakeLists.txt file to explicitly set the C++
standard to C++17 when building with Emscripten.

The motivation for this change is that building with Emscripten
will currently fail locally and on CI with the following error:
```console
[ 75%] Building CXX object examples/CMakeFiles/common.dir/common-ggml.cpp.o
In file included from /home/danbev/work/ai/whisper.cpp/examples/stream.wasm/emscripten.cpp:5:
/home/danbev/work/utils/emsdk/upstream/emscripten/cache/sysroot/include/emscripten/bind.h:11:2: error:
      "embind requires -std=c++17 or newer"
   11 | #error "embind requires -std=c++17 or newer"
      |  ^
In file included from /home/danbev/work/ai/whisper.cpp/examples/whisper.wasm/emscripten.cpp:4:
/home/danbev/work/utils/emsdk/upstream/emscripten/cache/sysroot/include/emscripten/bind.h:11:2: error:
      "embind requires -std=c++17 or newer"
   11 | #error "embind requires -std=c++17 or newer"
      |  ^
```
2025-12-06 10:58:58 +01:00

57 lines
1.8 KiB
C++

#include "whisper.h"
#include "common-whisper.h"
#include <cstdio>
#include <cfloat>
#include <string>
#include <cstring>
#ifdef NDEBUG
#undef NDEBUG
#endif
#include <cassert>
int main() {
std::string whisper_model_path = WHISPER_MODEL_PATH;
std::string vad_model_path = VAD_MODEL_PATH;
std::string sample_path = SAMPLE_PATH;
// Load the sample audio file
std::vector<float> pcmf32;
std::vector<std::vector<float>> pcmf32s;
assert(read_audio_data(sample_path.c_str(), pcmf32, pcmf32s, false));
struct whisper_context_params cparams = whisper_context_default_params();
struct whisper_context * wctx = whisper_init_from_file_with_params(
whisper_model_path.c_str(),
cparams);
struct whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH);
wparams.vad = true;
wparams.vad_model_path = vad_model_path.c_str();
wparams.vad_params.threshold = 0.5f;
wparams.vad_params.min_speech_duration_ms = 250;
wparams.vad_params.min_silence_duration_ms = 100;
wparams.vad_params.max_speech_duration_s = FLT_MAX;
wparams.vad_params.speech_pad_ms = 30;
assert(whisper_full_parallel(wctx, wparams, pcmf32.data(), pcmf32.size(), 1) == 0);
const int n_segments = whisper_full_n_segments(wctx);
assert(n_segments == 1);
printf("Segment text:\n%s", whisper_full_get_segment_text(wctx, 0));
assert(strcmp(" And so my fellow Americans, ask not what your country can do for you,"
" ask what you can do for your country.",
whisper_full_get_segment_text(wctx, 0)) == 0);
assert(whisper_full_get_segment_t0(wctx, 0) == 32);
assert(whisper_full_get_segment_t1(wctx, 0) == 1051);
whisper_free(wctx);
return 0;
}