optimize row and column tile indexing in core_mma_chunk_fp16 function

2026-04-22 19:48:30 +02:00 · 2026-04-15 21:51:51 +08:00 · 2026-04-15 21:51:51 +08:00 · 2a12467cf8
commit 2a12467cf8
parent 87ab485cd5
1 changed files with 3 additions and 2 deletions
--- a/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c
+++ b/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c
@ -1520,13 +1520,14 @@ void core_mma_chunk_fp16(__fp16 *restrict c, const __fp16 *restrict a, const __f

    Q6_bias_mxmem2_A((void *)col_scales);

+    const size_t dot_tile_stride = n_dot_tiles * HMX_FP16_TILE_N_ELMS;
    for (size_t i = 0; i < n_row_tiles; ++i) {
-        const __fp16 *row_base = a + i * n_dot_tiles * HMX_FP16_TILE_N_ELMS;
+        const __fp16 *row_base = a + i * dot_tile_stride;
        __fp16 *res_base = c + i * n_col_tiles * HMX_FP16_TILE_N_ELMS;
        for (size_t j = 0; j < n_col_tiles; ++j) {
            Q6_mxclracc_hf();

-            const __fp16 *col_tiles = b + j * n_dot_tiles * HMX_FP16_TILE_N_ELMS;
+            const __fp16 *col_tiles = b + j * dot_tile_stride;
            const __fp16 *row_tiles = row_base;
            __fp16 *accum_tile = res_base + j * HMX_FP16_TILE_N_ELMS;
            if (!zero_init) {