Bespoke ggml_repeat for Step3.5-Flash

This commit is contained in:
Kawrakow 2026-02-05 16:51:38 +00:00
parent 1ec12b8e3b
commit 9e1e1c0b5a

View File

@ -13960,6 +13960,22 @@ static void ggml_compute_forward_repeat_f32(
const struct ggml_tensor * src0 = dst->src[0];
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->ne[0] == 1 && ggml_nrows(src0) == ggml_nrows(dst)) {
int ith = params->ith;
int nth = params->nth;
int nr = ggml_nrows(dst);
int nc = dst->ne[0];
int dr = (nr + nth - 1)/nth;
int ir0 = dr*ith;
int ir1 = MIN(nr, ir0 + dr);
const float * x = (const float *)src0->data;
for (int ir = ir0; ir < ir1; ++ir) {
float * y = (float *)((char *)dst->data + ir*dst->nb[1]);
for (int j = 0; j < nc; ++j) y[j] = x[ir];
}
return;
}
if (params->ith != 0) {
return;
}