Bespoke ggml_repeat for Step3.5-Flash
This commit is contained in:
parent
1ec12b8e3b
commit
9e1e1c0b5a
@ -13960,6 +13960,22 @@ static void ggml_compute_forward_repeat_f32(
|
||||
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->ne[0] == 1 && ggml_nrows(src0) == ggml_nrows(dst)) {
|
||||
int ith = params->ith;
|
||||
int nth = params->nth;
|
||||
int nr = ggml_nrows(dst);
|
||||
int nc = dst->ne[0];
|
||||
int dr = (nr + nth - 1)/nth;
|
||||
int ir0 = dr*ith;
|
||||
int ir1 = MIN(nr, ir0 + dr);
|
||||
const float * x = (const float *)src0->data;
|
||||
for (int ir = ir0; ir < ir1; ++ir) {
|
||||
float * y = (float *)((char *)dst->data + ir*dst->nb[1]);
|
||||
for (int j = 0; j < nc; ++j) y[j] = x[ir];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (params->ith != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user