From 0a030189d1cdf6971b10e437fbf713cded97157c Mon Sep 17 00:00:00 2001
From: PAB <pierreantoine.bannier@gmail.com>
Date: Wed, 2 Aug 2023 18:29:21 +0200
Subject: [PATCH] ENH Add progress callback (#35)

---
 bark.cpp     | 93 +++++++++++++++++++++++++++++++++++++++++++++-------
 bark.h       | 24 +++++++++++++-
 build-info.h |  4 +--
 3 files changed, 107 insertions(+), 14 deletions(-)
diff --git a/bark.cpp b/bark.cpp
index 186a065..77b468c 100644
--- a/bark.cpp
+++ b/bark.cpp
@@ -1276,6 +1276,15 @@ bark_sequence bark_forward_text_encoder(
     const float min_eos_p) {
 
     bark_sequence out;
+
+    bark_progress progress;
+    progress.func = __func__;
+
+    int64_t t_sample_us  = 0;
+    int64_t t_predict_us = 0;
+
+    const int64_t t_main_start_us = ggml_time_us();
+
     int n_past = 0;
     float eos_p = 0;
 
@@ -1290,7 +1299,10 @@ bark_sequence bark_forward_text_encoder(
 
     for (int i = 0; i < 768; i++) {
         merge_ctx = i == 0;
+
+        int64_t t_predict_start_us = ggml_time_us();
         gpt_eval(model, n_threads, n_past, merge_ctx, input, logits, mem_per_token);
+        t_predict_us += (ggml_time_us() - t_predict_start_us);
 
         float logits_pad_token = logits[SEMANTIC_PAD_TOKEN];
         logits.resize(SEMANTIC_VOCAB_SIZE);
@@ -1304,7 +1316,9 @@ bark_sequence bark_forward_text_encoder(
 
         input.clear();
 
+        int64_t t_sample_start_us = ggml_time_us();
         bark_vocab::id next = gpt_sample(logits, rng, temp, &eos_p);
+        t_sample_us += (ggml_time_us() - t_sample_start_us);
 
         if (early_stop && ((next == SEMANTIC_VOCAB_SIZE) || (eos_p > min_eos_p)))
             break;
@@ -1312,14 +1326,22 @@ bark_sequence bark_forward_text_encoder(
         input.push_back(next);
         out.push_back(next);
 
-        float sum_logits = std::accumulate(logits.begin(), logits.end(), 0.0f);
-        printf("%d :: %.6f :: %.3f (n=%zu)\n", next, eos_p, sum_logits, logits.size());
+        progress.callback((float) i/768);
+
+        // float sum_logits = std::accumulate(logits.begin(), logits.end(), 0.0f);
+        // printf("%d :: %.6f :: %.3f (n=%zu)\n", next, eos_p, sum_logits, logits.size());
 
         // printf("%d ", next);
         // fflush(stdout);
     }
 
-    printf("\n\nsemantic sequence length: %zu\n\n", out.size());
+    const int64_t t_main_end_us = ggml_time_us();
+
+    printf("\n\n");
+    printf("%s: mem per token = %8.2f MB\n", __func__, mem_per_token/1000.0f/1000.0f);
+    printf("%s:   sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f);
+    printf("%s:  predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past);
+    printf("%s:    total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f);
 
     return out;
 }
@@ -1335,6 +1357,14 @@ bark_codes bark_forward_coarse_encoder(
     bark_codes out_coarse(N_COARSE_CODEBOOKS);
     bark_sequence out;
 
+    bark_progress progress;
+    progress.func = __func__;
+
+    int64_t t_sample_us  = 0;
+    int64_t t_predict_us = 0;
+
+    const int64_t t_main_start_us = ggml_time_us();
+
     float semantic_to_coarse_ratio = COARSE_RATE_HZ / SEMANTIC_RATE_HZ * N_COARSE_CODEBOOKS;
     int max_semantic_history = floorf(max_coarse_history / semantic_to_coarse_ratio);
 
@@ -1383,7 +1413,9 @@ bark_codes bark_forward_coarse_encoder(
             if (step_ix >= n_steps)
                 continue;
 
+            int64_t t_predict_start_us = ggml_time_us();
             gpt_eval(model, n_threads, n_past, false, input_in, logits, mem_per_token);
+            t_predict_us += (ggml_time_us() - t_predict_start_us);
 
             n_past += input_in.size();
             input_in.clear();
@@ -1393,16 +1425,21 @@ bark_codes bark_forward_coarse_encoder(
             int end_ix    = SEMANTIC_VOCAB_SIZE + (2 - is_major) * CODEBOOK_SIZE;
             std::vector<float> relevant_logits(logits.begin() + start_ix, logits.begin() + end_ix);
 
+            int64_t t_sample_start_us = ggml_time_us();
             bark_vocab::id next = gpt_sample(relevant_logits, rng, temp, NULL);
+            t_sample_us += (ggml_time_us() - t_sample_start_us);
+
             next += start_ix;
 
             input_in.push_back(next);
             out.push_back(next);
 
-            printf("%d ", next);
-            fflush(stdout);
+            // printf("%d ", next);
+            // fflush(stdout);
 
             step_ix += 1;
+
+            progress.callback((float) (i*sliding_window_size+j)/n_steps);
         }
     }
 
@@ -1416,7 +1453,13 @@ bark_codes bark_forward_coarse_encoder(
             out_coarse[1].push_back(out[i] - SEMANTIC_VOCAB_SIZE - CODEBOOK_SIZE);
     }
 
-    printf("\n\ncoarse sequence length: %zu\n\n", out.size());
+    const int64_t t_main_end_us = ggml_time_us();
+
+    printf("\n\n");
+    printf("%s: mem per token = %8.2f MB\n", __func__, mem_per_token/1000.0f/1000.0f);
+    printf("%s:   sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f);
+    printf("%s:  predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/step_ix);
+    printf("%s:    total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f);
 
     return out_coarse;
 }
@@ -1427,11 +1470,20 @@ bark_codes bark_forward_fine_encoder(
     std::mt19937 & rng,
     const int n_threads,
     const float temp) {
+
     bark_codes input = tokens;
     std::vector<std::vector<float>> logits;
 
+    bark_progress progress;
+    progress.func = __func__;
+
+    int64_t t_sample_us  = 0;
+    int64_t t_predict_us = 0;
+
     size_t mem_per_token = 0;
 
+    const int64_t t_main_start_us = ggml_time_us();
+
     int n_coarse          = input.size();
     int original_seq_len  = input[0].size();
     int n_remove_from_end = 0;
@@ -1471,15 +1523,23 @@ bark_codes bark_forward_fine_encoder(
         }
 
         for (int nn = n_coarse; nn < N_FINE_CODEBOOKS; nn++) {
+            int64_t t_predict_start_us = ggml_time_us();
             fine_gpt_eval(model, n_threads, nn, in_buffer, logits, mem_per_token);
+            t_predict_us += (ggml_time_us() - t_predict_start_us);
 
             bark_sequence predictions(CODEBOOK_SIZE - rel_start_fill_ix);
 
             for (int i = 0; i < (int) logits.size(); i++) {
                 logits[i].resize(CODEBOOK_SIZE);
+
+                int64_t t_sample_start_us = ggml_time_us();
                 bark_vocab::id next = gpt_sample(logits[i], rng, temp, NULL);
+                t_sample_us += (ggml_time_us() - t_sample_start_us);
+
                 in_buffer[nn][rel_start_fill_ix+i] = next;
             }
+
+            progress.callback((float) (n*(N_FINE_CODEBOOKS-n_coarse)+(nn-n_coarse))/(n_loops*(N_FINE_CODEBOOKS-n_coarse)));
         }
 
         // transfer over info into model_in
@@ -1498,6 +1558,14 @@ bark_codes bark_forward_fine_encoder(
 
     BARK_ASSERT(tokens[0].size() == in_arr[0].size());
 
+    const int64_t t_main_end_us = ggml_time_us();
+
+    printf("\n\n");
+    printf("%s: mem per token = %8.2f MB\n", __func__, mem_per_token/1000.0f/1000.0f);
+    printf("%s:   sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f);
+    printf("%s:  predict time = %8.2f ms\n", __func__, t_predict_us/1000.0f);
+    printf("%s:    total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f);
+
     return in_arr;
 }
 
@@ -1533,19 +1601,22 @@ bool bark_generate_audio(
         printf("%d ", tokens[i]);
     }
 
-    printf("\n\n");
+    printf("\n");
 
-    // encode text (text model)
+    // semantic encoding
     bark_sequence out_semantic = bark_forward_text_encoder(
-        tokens, model.text_model, rng, n_threads, temp, early_stop, min_eos_p);
+            tokens, model.text_model, rng, n_threads, temp, early_stop, min_eos_p);
+    printf("\n");
 
     // coarse encoding (coarse model)
     bark_codes out_coarse = bark_forward_coarse_encoder(
-        out_semantic, model.coarse_model, rng, n_threads, temp, max_coarse_history, sliding_window_size);
+            out_semantic, model.coarse_model, rng, n_threads, temp, max_coarse_history, sliding_window_size);
+    printf("\n");
 
     // fine encoding (fine model)
     bark_codes out_fine = bark_forward_fine_encoder(
-        out_coarse, model.fine_model, rng, n_threads, fine_temp);
+            out_coarse, model.fine_model, rng, n_threads, fine_temp);
+    printf("\n");
 
     return true;
 }
diff --git a/bark.h b/bark.h
index 13634cc..c57c113 100644
--- a/bark.h
+++ b/bark.h
@@ -160,7 +160,7 @@ bark_sequence bark_forward_text_encoder(
     const float min_eos_p);
 
 bark_codes bark_forward_coarse_encoder(
-    const std::vector<bark_vocab::id> & tokens,
+    const bark_sequence & tokens,
     const gpt_model model,
     std::mt19937 & rng,
     const int n_threads,
@@ -174,3 +174,25 @@ bark_codes bark_forward_fine_encoder(
     std::mt19937 & rng,
     const int n_threads,
     const float temp);
+
+struct bark_progress {
+    float current = 0.0f;
+    const char * func = NULL;
+
+    bark_progress() {}
+
+    void callback(float progress) {
+        float percentage = progress * 100;
+        if (percentage == 0.0f && func != NULL) {
+            fprintf(stderr, "%s: ", func);
+        }
+        while (percentage > current) {
+            current = percentage;
+            fprintf(stderr, ".");
+            fflush(stderr);
+            if (percentage >= 100) {
+                fprintf(stderr, "\n");
+            }
+        }
+    }
+};
diff --git a/build-info.h b/build-info.h
index 85372a1..635c2d6 100644
--- a/build-info.h
+++ b/build-info.h
@@ -1,7 +1,7 @@
 #ifndef BUILD_INFO_H
 #define BUILD_INFO_H
 
-#define BUILD_NUMBER 22
-#define BUILD_COMMIT "c54e6be"
+#define BUILD_NUMBER 33
+#define BUILD_COMMIT "a183193"
 
 #endif // BUILD_INFO_H