#include "ggml.h" #include "ggml-cpu.h" #include #include #include #include #include #include #include #include // This is a simple model with two tensors a and b struct simple_model { struct ggml_tensor * a; struct ggml_tensor * b; // the context to define the tensor information (dimensions, size, memory data) struct ggml_context * ctx; }; // initialize the tensors of the model in this case two matrices 2x2 void load_model(simple_model & model, float * a, float * b, int rows_A, int cols_A, int rows_B, int cols_B) { size_t ctx_size = 0; { ctx_size += rows_A * cols_A * ggml_type_size(GGML_TYPE_F32); // tensor a ctx_size += rows_B * cols_B * ggml_type_size(GGML_TYPE_F32); // tensor b ctx_size += 2 * ggml_tensor_overhead(), // tensors ctx_size += ggml_graph_overhead(); // compute graph ctx_size += 1024; // some overhead } struct ggml_init_params params { /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, // NOTE: this should be false when using the legacy API }; // create context model.ctx = ggml_init(params); // create tensors model.a = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, cols_A, rows_A); model.b = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, cols_B, rows_B); memcpy(model.a->data, a, ggml_nbytes(model.a)); memcpy(model.b->data, b, ggml_nbytes(model.b)); } // build the compute graph to perform a matrix multiplication struct ggml_cgraph * build_graph(const simple_model& model) { struct ggml_cgraph * gf = ggml_new_graph(model.ctx); // result = a*b^T struct ggml_tensor * result = ggml_mul_mat(model.ctx, model.a, model.b); ggml_build_forward_expand(gf, result); return gf; } // compute with backend struct ggml_tensor * compute(const simple_model & model) { struct ggml_cgraph * gf = build_graph(model); int n_threads = 1; // number of threads to perform some operations with multi-threading ggml_graph_compute_with_ctx(model.ctx, gf, n_threads); // in this case, the output tensor is the last one in the graph return ggml_graph_node(gf, -1); } int main(void) { ggml_time_init(); // initialize data of matrices to perform matrix multiplication const int rows_A = 4, cols_A = 2; float matrix_A[rows_A * cols_A] = { 2, 8, 5, 1, 4, 2, 8, 6 }; const int rows_B = 3, cols_B = 2; /* Transpose([ 10, 9, 5, 5, 9, 4 ]) 2 rows, 3 cols */ float matrix_B[rows_B * cols_B] = { 10, 5, 9, 9, 5, 4 }; simple_model model; load_model(model, matrix_A, matrix_B, rows_A, cols_A, rows_B, cols_B); // perform computation in cpu struct ggml_tensor * result = compute(model); // get the result data pointer as a float array to print std::vector out_data(ggml_nelements(result)); memcpy(out_data.data(), result->data, ggml_nbytes(result)); // expected result: // [ 60.00 55.00 50.00 110.00 // 90.00 54.00 54.00 126.00 // 42.00 29.00 28.00 64.00 ] printf("mul mat (%d x %d) (transposed result):\n[", (int) result->ne[0], (int) result->ne[1]); for (int j = 0; j < result->ne[1] /* rows */; j++) { if (j > 0) { printf("\n"); } for (int i = 0; i < result->ne[0] /* cols */; i++) { printf(" %.2f", out_data[j * result->ne[0] + i]); } } printf(" ]\n"); // free memory ggml_free(model.ctx); return 0; }