/* ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2024 Pierre-Antoine Bannier │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ │ above copyright notice and this permission notice appear in all copies. │ │ │ │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #pragma once #include "encodec.h" #include "ggml-backend.h" #include "ggml.h" #ifdef _WIN32 #ifdef EXPORTING_BARK #define BARK_API __declspec(dllexport) #else #define BARK_API __declspec(dllimport) #endif #else #define BARK_API #endif #ifdef __cplusplus extern "C" { #endif enum bark_verbosity_level { LOW = 0, MEDIUM = 1, HIGH = 2, }; enum bark_encoding_step { SEMANTIC = 0, COARSE = 1, FINE = 2, }; struct bark_context; struct bark_model; // Holds the vocabulary for the semantic encoder struct bark_vocab; // Define the GPT architecture for the 3 encoders struct gpt_model; typedef void (*bark_progress_callback)(struct bark_context * bctx, enum bark_encoding_step step, int progress, void * user_data); struct bark_statistics { // Time to load model weights int64_t t_load_us; // Time to generate audio int64_t t_eval_us; // Time to generate semantic tokens int64_t t_semantic_us; // Time to generate coarse tokens int64_t t_coarse_us; // Time to generate fine tokens int64_t t_fine_us; // Number of semantic tokens sampled int32_t n_sample_semantic; // Number of coarse tokens sampled int32_t n_sample_coarse; // Number of fine tokens sampled int32_t n_sample_fine; }; struct bark_context_params { // Verbosity level enum bark_verbosity_level verbosity; // Temperature for sampling (text and coarse encoders) float temp; // Temperature for sampling (fine encoder) float fine_temp; // Minimum probability for EOS token (text encoder) float min_eos_p; // Sliding window size for coarse encoder int32_t sliding_window_size; // Max history for coarse encoder int32_t max_coarse_history; // Sample rate int32_t sample_rate; // Target bandwidth int32_t target_bandwidth; // CLS token ID int32_t cls_token_id; // SEP token ID int32_t sep_token_id; // Maximum number of semantic tokens to generate int32_t n_steps_text_encoder; // Text PAD token ID int32_t text_pad_token; // Text encoding offset int32_t text_encoding_offset; // Semantic frequency rate float semantic_rate_hz; // Semantic PAD token ID int32_t semantic_pad_token; // Vocabulary size in semantic encoder int32_t semantic_vocab_size; // Semantic infernce token ID int32_t semantic_infer_token; // Coarse frequency rate float coarse_rate_hz; // Coarse infer token ID int32_t coarse_infer_token; // Coarse semantic pad token ID int32_t coarse_semantic_pad_token; // Number of codebooks in coarse encoder int32_t n_coarse_codebooks; // Number of codebooks in fine encoder int32_t n_fine_codebooks; // Dimension of the codes int32_t codebook_size; // called on each progress update bark_progress_callback progress_callback; void * progress_callback_user_data; }; /** * @brief Returns the default parameters for a bark context. * * @return bark_context_params The default parameters for a bark context. */ BARK_API struct bark_context_params bark_context_default_params(void); /** * Loads a Bark model from the specified file path with the given parameters. * * @param model_path The directory path of the bark model to load. * @param params The parameters to use for the Bark model. * @param seed The seed to use for random number generation. * @return A pointer to the loaded bark model context. */ BARK_API struct bark_context *bark_load_model( const char *model_path, struct bark_context_params params, uint32_t seed); /** * Generates an audio file from the given text using the specified Bark context. * * @param bctx The Bark context to use for generating the audio. * @param text The text to generate audio from. * @param n_threads The number of threads to use for generating the audio. * @return An integer indicating the success of the audio generation process. */ BARK_API bool bark_generate_audio( struct bark_context *bctx, const char *text, int n_threads); /** * Retrieves the audio data generated by the Bark context. * * @param bctx The Bark context to use for generating the audio. * @return A pointer to the audio data generated by the Bark context. */ BARK_API float *bark_get_audio_data( struct bark_context *bctx); /** * Retrieves the audio data generated by the Bark context. * * @param bctx The Bark context to use for generating the audio. * @return The size of the audio data generated by the Bark context. */ BARK_API int bark_get_audio_data_size( struct bark_context *bctx); /** * Retrieves the load time of the last audio generation round. * * @param bctx The Bark context to use for generating the audio. * @return A struct containing the statistics of the last audio generation round. */ BARK_API int64_t bark_get_load_time( struct bark_context *bctx); /** * Retrieves the evaluation time of the last audio generation round. * * @param bctx The Bark context to use for generating the audio. * @return A struct containing the statistics of the last audio generation round. */ BARK_API int64_t bark_get_eval_time( struct bark_context *bctx); /** * Reset the statistics of the last audio generation round. * * @param bctx The Bark context to use for generating the audio. * @return A struct containing the statistics of the last audio generation round. */ BARK_API void bark_reset_statistics( struct bark_context *bctx); /** * Quantizes a bark model and saves the result to a file. * * @param fname_inp The name of the input file containing the BARK model. * @param fname_out The name of the output file to save the quantized model to. * @param ftype The type of the model's floating-point values. * @return True if the model was successfully quantized and saved, false otherwise. */ BARK_API bool bark_model_quantize( const char *fname_inp, const char *fname_out, enum ggml_ftype ftype); /** * @brief Frees the memory allocated for a bark context. * * @param bctx The bark context to free. */ BARK_API void bark_free( struct bark_context *bctx); #ifdef __cplusplus } #endif