From 2a12467cf8fce2e45f177917503067fb89cef874 Mon Sep 17 00:00:00 2001 From: chraac Date: Wed, 15 Apr 2026 21:51:51 +0800 Subject: [PATCH] optimize row and column tile indexing in core_mma_chunk_fp16 function --- ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c b/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c index 376a6b27ab..dbca8220fa 100644 --- a/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +++ b/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c @@ -1520,13 +1520,14 @@ void core_mma_chunk_fp16(__fp16 *restrict c, const __fp16 *restrict a, const __f Q6_bias_mxmem2_A((void *)col_scales); + const size_t dot_tile_stride = n_dot_tiles * HMX_FP16_TILE_N_ELMS; for (size_t i = 0; i < n_row_tiles; ++i) { - const __fp16 *row_base = a + i * n_dot_tiles * HMX_FP16_TILE_N_ELMS; + const __fp16 *row_base = a + i * dot_tile_stride; __fp16 *res_base = c + i * n_col_tiles * HMX_FP16_TILE_N_ELMS; for (size_t j = 0; j < n_col_tiles; ++j) { Q6_mxclracc_hf(); - const __fp16 *col_tiles = b + j * n_dot_tiles * HMX_FP16_TILE_N_ELMS; + const __fp16 *col_tiles = b + j * dot_tile_stride; const __fp16 *row_tiles = row_base; __fp16 *accum_tile = res_base + j * HMX_FP16_TILE_N_ELMS; if (!zero_init) {