ENH Add progress callback (#35)

2026-03-11 17:41:45 +01:00 · 2023-08-02 18:29:21 +02:00 · 2023-08-02 18:29:21 +02:00 · 0a030189d1
commit 0a030189d1
parent a18319369e
3 changed files with 107 additions and 14 deletions
--- a/bark.cpp
+++ b/bark.cpp
@ -1276,6 +1276,15 @@ bark_sequence bark_forward_text_encoder(
    const float min_eos_p) {

    bark_sequence out;
+
+    bark_progress progress;
+    progress.func = __func__;
+
+    int64_t t_sample_us  = 0;
+    int64_t t_predict_us = 0;
+
+    const int64_t t_main_start_us = ggml_time_us();
+
    int n_past = 0;
    float eos_p = 0;

@ -1290,7 +1299,10 @@ bark_sequence bark_forward_text_encoder(

    for (int i = 0; i < 768; i++) {
        merge_ctx = i == 0;
+
+        int64_t t_predict_start_us = ggml_time_us();
        gpt_eval(model, n_threads, n_past, merge_ctx, input, logits, mem_per_token);
+        t_predict_us += (ggml_time_us() - t_predict_start_us);

        float logits_pad_token = logits[SEMANTIC_PAD_TOKEN];
        logits.resize(SEMANTIC_VOCAB_SIZE);
@ -1304,7 +1316,9 @@ bark_sequence bark_forward_text_encoder(

        input.clear();

+        int64_t t_sample_start_us = ggml_time_us();
        bark_vocab::id next = gpt_sample(logits, rng, temp, &eos_p);
+        t_sample_us += (ggml_time_us() - t_sample_start_us);

        if (early_stop && ((next == SEMANTIC_VOCAB_SIZE) || (eos_p > min_eos_p)))
            break;
@ -1312,14 +1326,22 @@ bark_sequence bark_forward_text_encoder(
        input.push_back(next);
        out.push_back(next);

-        float sum_logits = std::accumulate(logits.begin(), logits.end(), 0.0f);
-        printf("%d :: %.6f :: %.3f (n=%zu)\n", next, eos_p, sum_logits, logits.size());
+        progress.callback((float) i/768);
+
+        // float sum_logits = std::accumulate(logits.begin(), logits.end(), 0.0f);
+        // printf("%d :: %.6f :: %.3f (n=%zu)\n", next, eos_p, sum_logits, logits.size());

        // printf("%d ", next);
        // fflush(stdout);
    }

-    printf("\n\nsemantic sequence length: %zu\n\n", out.size());
+    const int64_t t_main_end_us = ggml_time_us();
+
+    printf("\n\n");
+    printf("%s: mem per token = %8.2f MB\n", __func__, mem_per_token/1000.0f/1000.0f);
+    printf("%s:   sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f);
+    printf("%s:  predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past);
+    printf("%s:    total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f);

    return out;
 }
@ -1335,6 +1357,14 @@ bark_codes bark_forward_coarse_encoder(
    bark_codes out_coarse(N_COARSE_CODEBOOKS);
    bark_sequence out;

+    bark_progress progress;
+    progress.func = __func__;
+
+    int64_t t_sample_us  = 0;
+    int64_t t_predict_us = 0;
+
+    const int64_t t_main_start_us = ggml_time_us();
+
    float semantic_to_coarse_ratio = COARSE_RATE_HZ / SEMANTIC_RATE_HZ * N_COARSE_CODEBOOKS;
    int max_semantic_history = floorf(max_coarse_history / semantic_to_coarse_ratio);

@ -1383,7 +1413,9 @@ bark_codes bark_forward_coarse_encoder(
            if (step_ix >= n_steps)
                continue;

+            int64_t t_predict_start_us = ggml_time_us();
            gpt_eval(model, n_threads, n_past, false, input_in, logits, mem_per_token);
+            t_predict_us += (ggml_time_us() - t_predict_start_us);

            n_past += input_in.size();
            input_in.clear();
@ -1393,16 +1425,21 @@ bark_codes bark_forward_coarse_encoder(
            int end_ix    = SEMANTIC_VOCAB_SIZE + (2 - is_major) * CODEBOOK_SIZE;
            std::vector<float> relevant_logits(logits.begin() + start_ix, logits.begin() + end_ix);

+            int64_t t_sample_start_us = ggml_time_us();
            bark_vocab::id next = gpt_sample(relevant_logits, rng, temp, NULL);
+            t_sample_us += (ggml_time_us() - t_sample_start_us);
+
            next += start_ix;

            input_in.push_back(next);
            out.push_back(next);

-            printf("%d ", next);
-            fflush(stdout);
+            // printf("%d ", next);
+            // fflush(stdout);

            step_ix += 1;
+
+            progress.callback((float) (i*sliding_window_size+j)/n_steps);
        }
    }

@ -1416,7 +1453,13 @@ bark_codes bark_forward_coarse_encoder(
            out_coarse[1].push_back(out[i] - SEMANTIC_VOCAB_SIZE - CODEBOOK_SIZE);
    }

-    printf("\n\ncoarse sequence length: %zu\n\n", out.size());
+    const int64_t t_main_end_us = ggml_time_us();
+
+    printf("\n\n");
+    printf("%s: mem per token = %8.2f MB\n", __func__, mem_per_token/1000.0f/1000.0f);
+    printf("%s:   sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f);
+    printf("%s:  predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/step_ix);
+    printf("%s:    total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f);

    return out_coarse;
 }
@ -1427,11 +1470,20 @@ bark_codes bark_forward_fine_encoder(
    std::mt19937 & rng,
    const int n_threads,
    const float temp) {
+
    bark_codes input = tokens;
    std::vector<std::vector<float>> logits;

+    bark_progress progress;
+    progress.func = __func__;
+
+    int64_t t_sample_us  = 0;
+    int64_t t_predict_us = 0;
+
    size_t mem_per_token = 0;

+    const int64_t t_main_start_us = ggml_time_us();
+
    int n_coarse          = input.size();
    int original_seq_len  = input[0].size();
    int n_remove_from_end = 0;
@ -1471,15 +1523,23 @@ bark_codes bark_forward_fine_encoder(
        }

        for (int nn = n_coarse; nn < N_FINE_CODEBOOKS; nn++) {
+            int64_t t_predict_start_us = ggml_time_us();
            fine_gpt_eval(model, n_threads, nn, in_buffer, logits, mem_per_token);
+            t_predict_us += (ggml_time_us() - t_predict_start_us);

            bark_sequence predictions(CODEBOOK_SIZE - rel_start_fill_ix);

            for (int i = 0; i < (int) logits.size(); i++) {
                logits[i].resize(CODEBOOK_SIZE);
+
+                int64_t t_sample_start_us = ggml_time_us();
                bark_vocab::id next = gpt_sample(logits[i], rng, temp, NULL);
+                t_sample_us += (ggml_time_us() - t_sample_start_us);
+
                in_buffer[nn][rel_start_fill_ix+i] = next;
            }
+
+            progress.callback((float) (n*(N_FINE_CODEBOOKS-n_coarse)+(nn-n_coarse))/(n_loops*(N_FINE_CODEBOOKS-n_coarse)));
        }

        // transfer over info into model_in
@ -1498,6 +1558,14 @@ bark_codes bark_forward_fine_encoder(

    BARK_ASSERT(tokens[0].size() == in_arr[0].size());

+    const int64_t t_main_end_us = ggml_time_us();
+
+    printf("\n\n");
+    printf("%s: mem per token = %8.2f MB\n", __func__, mem_per_token/1000.0f/1000.0f);
+    printf("%s:   sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f);
+    printf("%s:  predict time = %8.2f ms\n", __func__, t_predict_us/1000.0f);
+    printf("%s:    total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f);
+
    return in_arr;
 }

@ -1533,19 +1601,22 @@ bool bark_generate_audio(
        printf("%d ", tokens[i]);
    }

-    printf("\n\n");
+    printf("\n");

-    // encode text (text model)
+    // semantic encoding
    bark_sequence out_semantic = bark_forward_text_encoder(
-        tokens, model.text_model, rng, n_threads, temp, early_stop, min_eos_p);
+            tokens, model.text_model, rng, n_threads, temp, early_stop, min_eos_p);
+    printf("\n");

    // coarse encoding (coarse model)
    bark_codes out_coarse = bark_forward_coarse_encoder(
-        out_semantic, model.coarse_model, rng, n_threads, temp, max_coarse_history, sliding_window_size);
+            out_semantic, model.coarse_model, rng, n_threads, temp, max_coarse_history, sliding_window_size);
+    printf("\n");

    // fine encoding (fine model)
    bark_codes out_fine = bark_forward_fine_encoder(
-        out_coarse, model.fine_model, rng, n_threads, fine_temp);
+            out_coarse, model.fine_model, rng, n_threads, fine_temp);
+    printf("\n");

    return true;
 }
--- a/bark.h
+++ b/bark.h
@ -160,7 +160,7 @@ bark_sequence bark_forward_text_encoder(
    const float min_eos_p);

 bark_codes bark_forward_coarse_encoder(
-    const std::vector<bark_vocab::id> & tokens,
+    const bark_sequence & tokens,
    const gpt_model model,
    std::mt19937 & rng,
    const int n_threads,
@ -174,3 +174,25 @@ bark_codes bark_forward_fine_encoder(
    std::mt19937 & rng,
    const int n_threads,
    const float temp);
+
+struct bark_progress {
+    float current = 0.0f;
+    const char * func = NULL;
+
+    bark_progress() {}
+
+    void callback(float progress) {
+        float percentage = progress * 100;
+        if (percentage == 0.0f && func != NULL) {
+            fprintf(stderr, "%s: ", func);
+        }
+        while (percentage > current) {
+            current = percentage;
+            fprintf(stderr, ".");
+            fflush(stderr);
+            if (percentage >= 100) {
+                fprintf(stderr, "\n");
+            }
+        }
+    }
+};
--- a/build-info.h
+++ b/build-info.h
@ -1,7 +1,7 @@
 #ifndef BUILD_INFO_H
 #define BUILD_INFO_H

-#define BUILD_NUMBER 22
-#define BUILD_COMMIT "c54e6be"
+#define BUILD_NUMBER 33
+#define BUILD_COMMIT "a183193"

 #endif // BUILD_INFO_H