fix: VAD time mapping timestamp drift caused by overlap samples (#3711)

* whisper : fix VAD segment overlap boundary handling

 - Use original segment length (pre-overlap) for vad_end in the time
   mapping table, so segment boundaries are preserved accurately

Claude Sonnet 4.6 (Low)

* whisper : remove intermediate VAD time mapping points

Now that segment boundaries are mapped accurately, the intermediate
point interpolation is no longer necessary.

---------

Co-authored-by: Lohopupa <lohopupa@gmail.com>
This commit is contained in:
lohopupa 2026-03-17 12:19:08 +06:00 committed by GitHub
parent 79218f51d0
commit dc96116622
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -6701,12 +6701,13 @@ static bool whisper_vad(
int segment_start_samples = cs_to_samples(vad_segments->data[i].start);
int segment_end_samples = cs_to_samples(vad_segments->data[i].end);
if (i < (int)vad_segments->data.size() - 1) {
segment_end_samples += overlap_samples;
}
segment_start_samples = std::min(segment_start_samples, n_samples - 1);
segment_end_samples = std::min(segment_end_samples, n_samples - 1);
int original_segment_length = segment_end_samples - segment_start_samples;
if (i < (int)vad_segments->data.size() - 1) {
segment_end_samples = std::min(segment_end_samples + overlap_samples, n_samples - 1);
}
int segment_length = segment_end_samples - segment_start_samples;
if (segment_length > 0) {
whisper_state::vad_segment_info segment;
@ -6715,7 +6716,7 @@ static bool whisper_vad(
segment.orig_end = vad_segments->data[i].end;
segment.vad_start = samples_to_cs(offset);
segment.vad_end = samples_to_cs(offset + segment_length);
segment.vad_end = samples_to_cs(offset + original_segment_length);
// Add segment boundaries to mapping table
vad_time_mapping start_mapping = {segment.vad_start, segment.orig_start};
@ -6724,29 +6725,6 @@ static bool whisper_vad(
state->vad_mapping_table.push_back(start_mapping);
state->vad_mapping_table.push_back(end_mapping);
// Add intermediate points for longer segments to improve interpolation accuracy
const int64_t min_segment_length = 100; // 1 second
const int64_t point_interval = 20; // Add a point every 200ms
if (segment.vad_end - segment.vad_start > min_segment_length) {
int64_t segment_duration = segment.vad_end - segment.vad_start;
int num_points = (int)(segment_duration / point_interval) - 1;
for (int j = 1; j <= num_points; j++) {
int64_t vad_time = segment.vad_start + j * point_interval;
if (vad_time >= segment.vad_end) continue;
int64_t vad_elapsed = vad_time - segment.vad_start;
int64_t vad_total = segment.vad_end - segment.vad_start;
int64_t orig_total = segment.orig_end - segment.orig_start;
int64_t orig_time = segment.orig_start + (vad_elapsed * orig_total) / vad_total;
vad_time_mapping intermediate_mapping = {vad_time, orig_time};
state->vad_mapping_table.push_back(intermediate_mapping);
}
}
WHISPER_LOG_INFO("%s: vad_segment_info: orig_start: %.2f, orig_end: %.2f, vad_start: %.2f, vad_end: %.2f\n",
__func__, segment.orig_start/100.0, segment.orig_end/100.0, segment.vad_start/100.0, segment.vad_end/100.0);
ctx->state->vad_segments.push_back(segment);