whisper.cpp/bindings/ruby/ext/ruby_whisper_vad_context_detect.cpp
KITAITI Makoto aa1bc0d1a6
ruby : add VAD::Context#segments_from_samples, allow Pathname, etc. (#3633)
* ruby : Bump version to 1.3.6

* Fix code in example

* Add sample code to transcribe from MemoryView

* Define GetVADContext macro

* Use GetVADContext

* Extract parse_full_args function

* Use parse_full_args in ruby_whisper_full_parallel

* Free samples after use

* Check return value of parse_full_args()

* Define GetVADParams macro

* Add VAD::Context#segments_from_samples

* Add tests for VAD::Context#segments_from_samples

* Add signature for VAD::Context#segments_from_samples

* Add sample code for VAD::Context#segments_from_samples

* Add test for Whisper::Context#transcribe with Pathname

* Make Whisper::Context#transcribe and Whisper::VAD::Context#detect accept Pathname

* Update signature of Whisper::Context#transcribe

* Fix variable name

* Don't free memory view

* Make parse_full_args return struct

* Fallback when failed to get MemoryView

* Add num of samples when too long

* Check members of MemoryView

* Fix a typo

* Remove unnecessary include

* Fix a typo

* Fix a typo

* Care the case of MemoryView doesn't fit spec

* Add TODO comment

* Add optimazation option to compiler flags

* Use ALLOC_N instead of malloc

* Add description to sample code

* Rename and change args: parse_full_args -> parse_samples

* Free samples when exception raised

* Assign type check result to a variable

* Define wrapper function of whisper_full

* Change signature of parse_samples for rb_ensure

* Ensure release MemoryView

* Extract fill_samples function

* Free samples memory when filling it failed

* Free samples memory when transcription failed

* Prepare transcription in wrapper funciton

* Change function name

* Simplify function boundary
2026-01-30 22:59:36 +09:00

52 lines
1.4 KiB
C++

#include "ruby_whisper.h"
#include "common-whisper.h"
#include <string>
#include <vector>
#ifdef __cplusplus
extern "C" {
#endif
extern ID id_to_path;
extern VALUE cVADSegments;
extern const rb_data_type_t ruby_whisper_vad_context_type;
extern const rb_data_type_t ruby_whisper_vad_params_type;
extern const rb_data_type_t ruby_whisper_vad_segments_type;
extern VALUE ruby_whisper_vad_segments_s_init(struct whisper_vad_segments *segments);
VALUE
ruby_whisper_vad_detect(VALUE self, VALUE file_path, VALUE params) {
ruby_whisper_vad_context *rwvc;
ruby_whisper_vad_params *rwvp;
std::string cpp_file_path;
std::vector<float> pcmf32;
std::vector<std::vector<float>> pcmf32s;
whisper_vad_segments *segments;
GetVADContext(self, rwvc);
TypedData_Get_Struct(params, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
if (rb_respond_to(file_path, id_to_path)) {
file_path = rb_funcall(file_path, id_to_path, 0);
}
cpp_file_path = StringValueCStr(file_path);
if (!read_audio_data(cpp_file_path, pcmf32, pcmf32s, false)) {
rb_raise(rb_eRuntimeError, "Failed to open '%s' as WAV file\n", cpp_file_path.c_str());
}
segments = whisper_vad_segments_from_samples(rwvc->context, rwvp->params, pcmf32.data(), pcmf32.size());
if (segments == nullptr) {
rb_raise(rb_eRuntimeError, "Failed to process audio\n");
}
return ruby_whisper_vad_segments_s_init(segments);
}
#ifdef __cplusplus
}
#endif