From 2f37db78c1b9063db839d2e4156f4484282bf4a8 Mon Sep 17 00:00:00 2001 From: chraac Date: Wed, 15 Apr 2026 09:19:50 +0800 Subject: [PATCH] fix compiling error --- ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c b/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c index dba8d62ab4..8ff3f3fee6 100644 --- a/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +++ b/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c @@ -678,7 +678,7 @@ static void core_dot_chunk_fp16(__fp16 *restrict output, const __fp16 *restrict __builtin_assume(n_dot_tiles > 0); Q6_bias_mxmem2_A((void *)scales); - + for (int r = 0; r < n_row_tiles; ++r) { for (size_t c = 0; c < n_col_tiles; ++c) { Q6_mxclracc_hf(); @@ -944,7 +944,6 @@ int hmx_mat_mul_permuted_w16a32_batched(struct htp_context *ctx, const hmx_matmu const size_t weight_row_bytes = (size_t) params->weight_stride * sizeof(__fp16); HAP_compute_res_hmx_lock(ctx->vtcm_rctx); - hmx_set_output_scales(vtcm_scales); for (int b3 = 0; b3 < params->ne13; ++b3) { for (int b2_base = 0; b2_base < params->ne12; b2_base += group_size) { @@ -1017,7 +1016,7 @@ int hmx_mat_mul_permuted_w16a32_batched(struct htp_context *ctx, const hmx_matmu TIMER_START(hmx_core); { const __fp16 * vtcm_act_g = vtcm_activation + (size_t) g * act_head_stride; - core_dot_chunk_fp16(vtcm_output, vtcm_act_g, vtcm_weight, n_row_tiles, n_col_tiles, + core_dot_chunk_fp16(vtcm_output, vtcm_act_g, vtcm_weight, vtcm_scales, n_row_tiles, n_col_tiles, params->k / 32); } TIMER_STOP(hmx_core); @@ -1118,7 +1117,6 @@ int hmx_mat_mul_permuted_w16a32(struct htp_context *ctx, float *restrict dst, co TIMER_START(total); HAP_compute_res_hmx_lock(ctx->vtcm_rctx); - hmx_set_output_scales(vtcm_scales); for (size_t mr = 0; mr < m; mr += m_chunk_n_rows) { // transfer activation matrix chunk into VTCM @@ -1187,7 +1185,7 @@ int hmx_mat_mul_permuted_w16a32(struct htp_context *ctx, float *restrict dst, co TIMER_START(hmx_core); { - core_dot_chunk_fp16(vtcm_output, vtcm_activation, vtcm_weight, n_row_tiles, n_col_tiles, k / 32); + core_dot_chunk_fp16(vtcm_output, vtcm_activation, vtcm_weight, vtcm_scales, n_row_tiles, n_col_tiles, k / 32); } TIMER_STOP(hmx_core); @@ -1376,7 +1374,7 @@ int hmx_mat_mul_permuted_qk_0_d16a32(struct htp_context *ctx, float *restrict ds TIMER_START(hmx_core); { - core_dot_chunk_fp16(vtcm_output, vtcm_activation, vtcm_weight, n_row_tiles, n_col_tiles, k / 32); + core_dot_chunk_fp16(vtcm_output, vtcm_activation, vtcm_weight, vtcm_scales, n_row_tiles, n_col_tiles, k / 32); } TIMER_STOP(hmx_core);