wip

2026-04-20 14:16:46 +02:00 · 2026-04-15 21:56:52 +08:00 · 2026-04-15 21:56:52 +08:00 · cde679eff7
commit cde679eff7
parent 2a12467cf8
1 changed files with 4 additions and 4 deletions
--- a/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c
+++ b/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c
@ -943,8 +943,6 @@ int hmx_mat_mul_permuted_w16a32_batched(struct htp_context *ctx, const hmx_matmu
    const size_t fp16_row_bytes   = (size_t) params->k * sizeof(__fp16);
    const size_t weight_row_bytes = (size_t) params->weight_stride * sizeof(__fp16);

-    HAP_compute_res_hmx_lock(ctx->vtcm_rctx);
-
    for (int b3 = 0; b3 < params->ne13; ++b3) {
        for (int b2_base = 0; b2_base < params->ne12; b2_base += group_size) {
            const __fp16 *weight_group = hmx_matmul_weight_batch_ptr(params, b2_base, b3);
@ -989,6 +987,8 @@ int hmx_mat_mul_permuted_w16a32_batched(struct htp_context *ctx, const hmx_matmu
                                      fp16_row_bytes, weight_row_bytes, fp16_row_bytes, n_cols_first);
                }

+                HAP_compute_res_hmx_lock(ctx->vtcm_rctx);
+
                for (size_t nc = 0; nc < (size_t) params->n; nc += n_chunk_n_cols) {
                    const size_t n_cols = hex_smin((size_t) params->n - nc, n_chunk_n_cols);
                    const size_t n_col_tiles = hmx_ceil_div((int) n_cols, HMX_FP16_TILE_N_COLS);
@ -1029,12 +1029,12 @@ int hmx_mat_mul_permuted_w16a32_batched(struct htp_context *ctx, const hmx_matmu
                        TIMER_STOP(output_store);
                    }
                }
+
+                HAP_compute_res_hmx_unlock(ctx->vtcm_rctx);
            }
        }
    }

-    HAP_compute_res_hmx_unlock(ctx->vtcm_rctx);
-
    TIMER_STOP(total);

 #if defined(ENABLE_PROFILE_TIMERS)