From fa06a153d891648918bfde2616f3e75902dab60c Mon Sep 17 00:00:00 2001 From: PAB Date: Fri, 10 May 2024 10:35:58 +0200 Subject: [PATCH] mnt : include guard + remove `enum class` (#171) --- bark.cpp | 22 ++++++++++++++-------- bark.h | 25 ++++++++++++++++++++++--- encodec.cpp | 2 +- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/bark.cpp b/bark.cpp index 15cbaa8..15b0808 100644 --- a/bark.cpp +++ b/bark.cpp @@ -135,7 +135,8 @@ struct bark_context { bark_codes coarse_tokens; bark_codes fine_tokens; - std::vector audio_arr; + float * generated_audio = NULL; + int n_generated_samples = 0; // hyperparameters bark_context_params params; @@ -1173,7 +1174,10 @@ static bool bark_load_model_from_file( // codec model { - bctx->encodec_ctx = encodec_load_model(fin, n_gpu_layers); + const int offset = fin.tellg(); + fin.close(); + + bctx->encodec_ctx = encodec_load_model(fname.c_str(), offset, n_gpu_layers); if (!bctx->encodec_ctx) { fprintf(stderr, "%s: invalid model file '%s' (bad encodec)\n", __func__, fname.c_str()); return false; @@ -2220,12 +2224,14 @@ bool bark_generate_audio(struct bark_context* bctx, const char * text, int n_thr } } - if (!encodec_decompress_audio(bctx->encodec_ctx, encodec_tokens, n_threads)) { + if (!encodec_decompress_audio(bctx->encodec_ctx, encodec_tokens.data(), encodec_tokens.size(), n_threads)) { printf("%s: Could not generate waveform from tokens with Encodec\n", __func__); return false; } - bctx->audio_arr = bctx->encodec_ctx->out_audio; + bctx->generated_audio = encodec_get_audio(bctx->encodec_ctx); + bctx->n_generated_samples = encodec_get_audio_size(bctx->encodec_ctx); + bctx->stats.t_eval_us = ggml_time_us() - t_start_eval_us; return true; @@ -2427,17 +2433,17 @@ bool bark_model_quantize(const char * fname_inp, const char * fname_out, ggml_ft } float * bark_get_audio_data(struct bark_context *bctx) { - if (!bctx || bctx->audio_arr.empty()) { + if (!bctx) { return nullptr; } - return bctx->audio_arr.data(); + return bctx->generated_audio; } int bark_get_audio_data_size(struct bark_context *bctx) { - if (!bctx || bctx->audio_arr.empty()) { + if (!bctx || bctx->generated_audio == NULL) { return 0; } - return bctx->audio_arr.size(); + return bctx->n_generated_samples; } const bark_statistics * bark_get_statistics(struct bark_context *bctx) { diff --git a/bark.h b/bark.h index 26b59e7..a4bb5bb 100644 --- a/bark.h +++ b/bark.h @@ -1,3 +1,22 @@ +/* +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Pierre-Antoine Bannier │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#pragma once + #include "encodec.h" #include "ggml-backend.h" #include "ggml.h" @@ -5,10 +24,10 @@ #ifdef __cplusplus extern "C" { #endif - enum class bark_verbosity_level { - LOW = 0, + enum bark_verbosity_level { + LOW = 0, MEDIUM = 1, - HIGH = 2, + HIGH = 2, }; struct gpt_hparams { diff --git a/encodec.cpp b/encodec.cpp index 68e1801..ba23c7e 160000 --- a/encodec.cpp +++ b/encodec.cpp @@ -1 +1 @@ -Subproject commit 68e1801e0f1d12cf1350725e730aa3c24b7162ab +Subproject commit ba23c7eb66ccec94098b01d6140bff5a93a96fbf