#include "media_io.h" #include "log.h" #include "resource_owners.hpp" #include #include #include #include #include #include #include #include #include #include #define STB_IMAGE_IMPLEMENTATION #define STB_IMAGE_STATIC #include "stb_image.h" #define STB_IMAGE_WRITE_IMPLEMENTATION #define STB_IMAGE_WRITE_STATIC #include "stb_image_write.h" #define STB_IMAGE_RESIZE_IMPLEMENTATION #define STB_IMAGE_RESIZE_STATIC #include "stb_image_resize.h" #ifdef SD_USE_WEBP #include "webp/decode.h" #include "webp/encode.h" #include "webp/mux.h" #endif #ifdef SD_USE_WEBM #include "mkvmuxer/mkvmuxer.h" #include "mkvmuxer/mkvwriter.h" #endif namespace fs = std::filesystem; #ifdef SD_USE_WEBP struct WebPFreeDeleter { void operator()(void* ptr) const { if (ptr != nullptr) { WebPFree(ptr); } } }; struct WebPMuxDeleter { void operator()(WebPMux* mux) const { if (mux != nullptr) { WebPMuxDelete(mux); } } }; struct WebPAnimEncoderDeleter { void operator()(WebPAnimEncoder* enc) const { if (enc != nullptr) { WebPAnimEncoderDelete(enc); } } }; struct WebPDataGuard { WebPDataGuard() { WebPDataInit(&data); } ~WebPDataGuard() { WebPDataClear(&data); } WebPData data; }; struct WebPPictureGuard { WebPPictureGuard() : initialized(WebPPictureInit(&picture) != 0) { } ~WebPPictureGuard() { if (initialized) { WebPPictureFree(&picture); } } WebPPicture picture; bool initialized; }; using WebPBufferPtr = std::unique_ptr; using WebPMuxPtr = std::unique_ptr; using WebPAnimEncoderPtr = std::unique_ptr; #endif bool read_binary_file_bytes(const char* path, std::vector& data) { std::ifstream fin(fs::path(path), std::ios::binary); if (!fin) { return false; } fin.seekg(0, std::ios::end); std::streampos size = fin.tellg(); if (size < 0) { return false; } fin.seekg(0, std::ios::beg); data.resize(static_cast(size)); if (!data.empty()) { fin.read(reinterpret_cast(data.data()), size); if (!fin) { return false; } } return true; } bool write_binary_file_bytes(const std::string& path, const std::vector& data) { std::ofstream fout(fs::path(path), std::ios::binary); if (!fout) { return false; } if (!data.empty()) { fout.write(reinterpret_cast(data.data()), static_cast(data.size())); if (!fout) { return false; } } return true; } uint32_t read_u32_le_bytes(const uint8_t* data) { return static_cast(data[0]) | (static_cast(data[1]) << 8) | (static_cast(data[2]) << 16) | (static_cast(data[3]) << 24); } int stbi_ext_write_png_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int stride_bytes, const char* parameters) { int len = 0; unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len, parameters); if (png == nullptr) { return 0; } func(context, png, len); STBIW_FREE(png); return 1; } bool is_webp_signature(const uint8_t* data, size_t size) { return size >= 12 && memcmp(data, "RIFF", 4) == 0 && memcmp(data + 8, "WEBP", 4) == 0; } std::string xml_escape(const std::string& value) { std::string escaped; escaped.reserve(value.size()); for (char ch : value) { switch (ch) { case '&': escaped += "&"; break; case '<': escaped += "<"; break; case '>': escaped += ">"; break; case '"': escaped += """; break; case '\'': escaped += "'"; break; default: escaped += ch; break; } } return escaped; } #ifdef SD_USE_WEBP uint8_t* decode_webp_image_to_buffer(const uint8_t* data, size_t size, int& width, int& height, int expected_channel, int& source_channel_count) { WebPBitstreamFeatures features; if (WebPGetFeatures(data, size, &features) != VP8_STATUS_OK) { return nullptr; } width = features.width; height = features.height; source_channel_count = features.has_alpha ? 4 : 3; const size_t pixel_count = static_cast(width) * static_cast(height); if (expected_channel == 1) { int decoded_width = width; int decoded_height = height; WebPBufferPtr decoded(features.has_alpha ? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height) : WebPDecodeRGB(data, size, &decoded_width, &decoded_height)); if (decoded == nullptr) { return nullptr; } FreeUniquePtr grayscale((uint8_t*)malloc(pixel_count)); if (grayscale == nullptr) { return nullptr; } const int decoded_channels = features.has_alpha ? 4 : 3; for (size_t i = 0; i < pixel_count; ++i) { const uint8_t* src = decoded.get() + i * decoded_channels; grayscale.get()[i] = static_cast((77 * src[0] + 150 * src[1] + 29 * src[2] + 128) >> 8); } return grayscale.release(); } if (expected_channel != 3 && expected_channel != 4) { return nullptr; } int decoded_width = width; int decoded_height = height; WebPBufferPtr decoded((expected_channel == 4) ? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height) : WebPDecodeRGB(data, size, &decoded_width, &decoded_height)); if (decoded == nullptr) { return nullptr; } const size_t out_size = pixel_count * static_cast(expected_channel); FreeUniquePtr output((uint8_t*)malloc(out_size)); if (output == nullptr) { return nullptr; } memcpy(output.get(), decoded.get(), out_size); return output.release(); } std::string build_webp_xmp_packet(const std::string& parameters) { if (parameters.empty()) { return ""; } const std::string escaped_parameters = xml_escape(parameters); return "\n" "\n" " \n" " \n" " " + escaped_parameters + "\n" " \n" " \n" "\n" ""; } bool encode_webp_image_to_vector(const uint8_t* image, int width, int height, int channels, const std::string& parameters, int quality, std::vector& out) { if (image == nullptr || width <= 0 || height <= 0) { return false; } std::vector rgb_image; const uint8_t* input_image = image; int input_channels = channels; if (channels == 1) { rgb_image.resize(static_cast(width) * static_cast(height) * 3); for (int i = 0; i < width * height; ++i) { rgb_image[i * 3 + 0] = image[i]; rgb_image[i * 3 + 1] = image[i]; rgb_image[i * 3 + 2] = image[i]; } input_image = rgb_image.data(); input_channels = 3; } if (input_channels != 3 && input_channels != 4) { return false; } uint8_t* encoded_raw = nullptr; size_t encoded_size = (input_channels == 4) ? WebPEncodeRGBA(input_image, width, height, width * input_channels, static_cast(quality), &encoded_raw) : WebPEncodeRGB(input_image, width, height, width * input_channels, static_cast(quality), &encoded_raw); WebPBufferPtr encoded(encoded_raw); if (encoded == nullptr || encoded_size == 0) { return false; } out.assign(encoded.get(), encoded.get() + encoded_size); if (parameters.empty()) { return true; } WebPData image_data; WebPDataInit(&image_data); WebPDataGuard assembled_data; image_data.bytes = out.data(); image_data.size = out.size(); WebPMuxPtr mux(WebPMuxNew()); if (mux == nullptr) { return false; } const std::string xmp_packet = build_webp_xmp_packet(parameters); WebPData xmp_data; WebPDataInit(&xmp_data); xmp_data.bytes = reinterpret_cast(xmp_packet.data()); xmp_data.size = xmp_packet.size(); const bool ok = WebPMuxSetImage(mux.get(), &image_data, 1) == WEBP_MUX_OK && WebPMuxSetChunk(mux.get(), "XMP ", &xmp_data, 1) == WEBP_MUX_OK && WebPMuxAssemble(mux.get(), &assembled_data.data) == WEBP_MUX_OK; if (ok) { out.assign(assembled_data.data.bytes, assembled_data.data.bytes + assembled_data.data.size); } return ok; } #ifdef SD_USE_WEBM bool extract_vp8_frame_from_webp(const std::vector& webp_data, std::vector& vp8_frame) { if (!is_webp_signature(webp_data.data(), webp_data.size())) { return false; } size_t offset = 12; while (offset + 8 <= webp_data.size()) { const uint8_t* chunk = webp_data.data() + offset; const uint32_t chunk_len = read_u32_le_bytes(chunk + 4); const size_t chunk_start = offset + 8; const size_t padded_len = static_cast(chunk_len) + (chunk_len & 1u); if (chunk_start + chunk_len > webp_data.size()) { return false; } if (memcmp(chunk, "VP8 ", 4) == 0) { vp8_frame.assign(webp_data.data() + chunk_start, webp_data.data() + chunk_start + chunk_len); return !vp8_frame.empty(); } offset = chunk_start + padded_len; } return false; } bool encode_sd_image_to_vp8_frame(const sd_image_t& image, int quality, std::vector& vp8_frame) { if (image.data == nullptr || image.width == 0 || image.height == 0) { return false; } const int width = static_cast(image.width); const int height = static_cast(image.height); const int input_channel = static_cast(image.channel); if (input_channel != 1 && input_channel != 3 && input_channel != 4) { return false; } std::vector rgb_buffer; const uint8_t* rgb_data = image.data; if (input_channel == 1) { rgb_buffer.resize(static_cast(width) * static_cast(height) * 3); for (int i = 0; i < width * height; ++i) { rgb_buffer[i * 3 + 0] = image.data[i]; rgb_buffer[i * 3 + 1] = image.data[i]; rgb_buffer[i * 3 + 2] = image.data[i]; } rgb_data = rgb_buffer.data(); } else if (input_channel == 4) { rgb_buffer.resize(static_cast(width) * static_cast(height) * 3); for (int i = 0; i < width * height; ++i) { rgb_buffer[i * 3 + 0] = image.data[i * 4 + 0]; rgb_buffer[i * 3 + 1] = image.data[i * 4 + 1]; rgb_buffer[i * 3 + 2] = image.data[i * 4 + 2]; } rgb_data = rgb_buffer.data(); } std::vector encoded_webp; if (!encode_webp_image_to_vector(rgb_data, width, height, 3, "", quality, encoded_webp)) { return false; } return extract_vp8_frame_from_webp(encoded_webp, vp8_frame); } #endif #endif uint8_t* load_image_common(bool from_memory, const char* image_path_or_bytes, int len, int& width, int& height, int expected_width, int expected_height, int expected_channel) { const char* image_path; FreeUniquePtr image_buffer; int source_channel_count = 0; #ifdef SD_USE_WEBP if (from_memory) { image_path = "memory"; if (len > 0 && is_webp_signature(reinterpret_cast(image_path_or_bytes), static_cast(len))) { image_buffer.reset(decode_webp_image_to_buffer(reinterpret_cast(image_path_or_bytes), static_cast(len), width, height, expected_channel, source_channel_count)); } } else { image_path = image_path_or_bytes; if (encoded_image_format_from_path(image_path_or_bytes) == EncodedImageFormat::WEBP) { std::vector file_bytes; if (!read_binary_file_bytes(image_path_or_bytes, file_bytes)) { LOG_ERROR("load image from '%s' failed", image_path_or_bytes); return nullptr; } if (!is_webp_signature(file_bytes.data(), file_bytes.size())) { LOG_ERROR("load image from '%s' failed", image_path_or_bytes); return nullptr; } image_buffer.reset(decode_webp_image_to_buffer(file_bytes.data(), file_bytes.size(), width, height, expected_channel, source_channel_count)); } } #endif if (from_memory) { image_path = "memory"; if (image_buffer == nullptr) { int c = 0; image_buffer.reset((uint8_t*)stbi_load_from_memory((const stbi_uc*)image_path_or_bytes, len, &width, &height, &c, expected_channel)); source_channel_count = c; } } else { image_path = image_path_or_bytes; if (image_buffer == nullptr) { int c = 0; image_buffer.reset((uint8_t*)stbi_load(image_path_or_bytes, &width, &height, &c, expected_channel)); source_channel_count = c; } } if (image_buffer == nullptr) { LOG_ERROR("load image from '%s' failed", image_path); return nullptr; } if (source_channel_count < expected_channel) { fprintf(stderr, "the number of channels for the input image must be >= %d," "but got %d channels, image_path = %s", expected_channel, source_channel_count, image_path); return nullptr; } if (width <= 0) { LOG_ERROR("error: the width of image must be greater than 0, image_path = %s", image_path); return nullptr; } if (height <= 0) { LOG_ERROR("error: the height of image must be greater than 0, image_path = %s", image_path); return nullptr; } if ((expected_width > 0 && expected_height > 0) && (height != expected_height || width != expected_width)) { float dst_aspect = (float)expected_width / (float)expected_height; float src_aspect = (float)width / (float)height; int crop_x = 0, crop_y = 0; int crop_w = width, crop_h = height; if (src_aspect > dst_aspect) { crop_w = (int)(height * dst_aspect); crop_x = (width - crop_w) / 2; } else if (src_aspect < dst_aspect) { crop_h = (int)(width / dst_aspect); crop_y = (height - crop_h) / 2; } if (crop_x != 0 || crop_y != 0) { LOG_INFO("crop input image from %dx%d to %dx%d, image_path = %s", width, height, crop_w, crop_h, image_path); FreeUniquePtr cropped_image_buffer((uint8_t*)malloc(crop_w * crop_h * expected_channel)); if (cropped_image_buffer == nullptr) { LOG_ERROR("error: allocate memory for crop\n"); return nullptr; } for (int row = 0; row < crop_h; row++) { uint8_t* src = image_buffer.get() + ((crop_y + row) * width + crop_x) * expected_channel; uint8_t* dst = cropped_image_buffer.get() + (row * crop_w) * expected_channel; memcpy(dst, src, crop_w * expected_channel); } width = crop_w; height = crop_h; image_buffer = std::move(cropped_image_buffer); } LOG_INFO("resize input image from %dx%d to %dx%d", width, height, expected_width, expected_height); FreeUniquePtr resized_image_buffer((uint8_t*)malloc(expected_height * expected_width * expected_channel)); if (resized_image_buffer == nullptr) { LOG_ERROR("error: allocate memory for resize input image\n"); return nullptr; } stbir_resize(image_buffer.get(), width, height, 0, resized_image_buffer.get(), expected_width, expected_height, 0, STBIR_TYPE_UINT8, expected_channel, STBIR_ALPHA_CHANNEL_NONE, 0, STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_FILTER_BOX, STBIR_FILTER_BOX, STBIR_COLORSPACE_SRGB, nullptr); width = expected_width; height = expected_height; image_buffer = std::move(resized_image_buffer); } return image_buffer.release(); } typedef struct { uint32_t offset; uint32_t size; } avi_index_entry; void write_u32_le(FILE* f, uint32_t val) { fwrite(&val, 4, 1, f); } void write_u16_le(FILE* f, uint16_t val) { fwrite(&val, 2, 1, f); } EncodedImageFormat encoded_image_format_from_path(const std::string& path) { std::string ext = fs::path(path).extension().string(); std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower); if (ext == ".jpg" || ext == ".jpeg" || ext == ".jpe") { return EncodedImageFormat::JPEG; } if (ext == ".png") { return EncodedImageFormat::PNG; } if (ext == ".webp") { return EncodedImageFormat::WEBP; } return EncodedImageFormat::UNKNOWN; } std::vector encode_image_to_vector(EncodedImageFormat format, const uint8_t* image, int width, int height, int channels, const std::string& parameters, int quality) { std::vector buffer; auto write_func = [&buffer](void* context, void* data, int size) { (void)context; uint8_t* src = reinterpret_cast(data); buffer.insert(buffer.end(), src, src + size); }; struct ContextWrapper { decltype(write_func)& func; } ctx{write_func}; auto c_func = [](void* context, void* data, int size) { auto* wrapper = reinterpret_cast(context); wrapper->func(context, data, size); }; int result = 0; switch (format) { case EncodedImageFormat::JPEG: result = stbi_write_jpg_to_func(c_func, &ctx, width, height, channels, image, quality); break; case EncodedImageFormat::PNG: result = stbi_ext_write_png_to_func(c_func, &ctx, width, height, channels, image, width * channels, parameters.empty() ? nullptr : parameters.c_str()); break; case EncodedImageFormat::WEBP: #ifdef SD_USE_WEBP if (!encode_webp_image_to_vector(image, width, height, channels, parameters, quality, buffer)) { buffer.clear(); } result = buffer.empty() ? 0 : 1; break; #else result = 0; break; #endif default: result = 0; break; } if (!result) { buffer.clear(); } return buffer; } bool write_image_to_file(const std::string& path, const uint8_t* image, int width, int height, int channels, const std::string& parameters, int quality) { const EncodedImageFormat format = encoded_image_format_from_path(path); switch (format) { case EncodedImageFormat::JPEG: return stbi_write_jpg(path.c_str(), width, height, channels, image, quality, parameters.empty() ? nullptr : parameters.c_str()) != 0; case EncodedImageFormat::PNG: return stbi_write_png(path.c_str(), width, height, channels, image, 0, parameters.empty() ? nullptr : parameters.c_str()) != 0; case EncodedImageFormat::WEBP: { const std::vector encoded = encode_image_to_vector(format, image, width, height, channels, parameters, quality); return !encoded.empty() && write_binary_file_bytes(path, encoded); } default: return false; } } uint8_t* load_image_from_file(const char* image_path, int& width, int& height, int expected_width, int expected_height, int expected_channel) { return load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel); } bool load_sd_image_from_file(sd_image_t* image, const char* image_path, int expected_width, int expected_height, int expected_channel) { int width; int height; image->data = load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel); if (image->data == nullptr) { return false; } image->width = width; image->height = height; image->channel = expected_channel; return true; } uint8_t* load_image_from_memory(const char* image_bytes, int len, int& width, int& height, int expected_width, int expected_height, int expected_channel) { return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel); } int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { if (num_images == 0) { fprintf(stderr, "Error: Image array is empty.\n"); return -1; } FilePtr file(fopen(filename, "wb")); if (!file) { perror("Error opening file for writing"); return -1; } FILE* f = file.get(); uint32_t width = images[0].width; uint32_t height = images[0].height; uint32_t channels = images[0].channel; if (channels != 3 && channels != 4) { fprintf(stderr, "Error: Unsupported channel count: %u\n", channels); return -1; } fwrite("RIFF", 4, 1, f); long riff_size_pos = ftell(f); write_u32_le(f, 0); fwrite("AVI ", 4, 1, f); fwrite("LIST", 4, 1, f); write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); fwrite("hdrl", 4, 1, f); fwrite("avih", 4, 1, f); write_u32_le(f, 56); write_u32_le(f, 1000000 / fps); write_u32_le(f, 0); write_u32_le(f, 0); write_u32_le(f, 0x110); write_u32_le(f, num_images); write_u32_le(f, 0); write_u32_le(f, 1); write_u32_le(f, width * height * 3); write_u32_le(f, width); write_u32_le(f, height); write_u32_le(f, 0); write_u32_le(f, 0); write_u32_le(f, 0); write_u32_le(f, 0); fwrite("LIST", 4, 1, f); write_u32_le(f, 4 + 8 + 56 + 8 + 40); fwrite("strl", 4, 1, f); fwrite("strh", 4, 1, f); write_u32_le(f, 56); fwrite("vids", 4, 1, f); fwrite("MJPG", 4, 1, f); write_u32_le(f, 0); write_u16_le(f, 0); write_u16_le(f, 0); write_u32_le(f, 0); write_u32_le(f, 1); write_u32_le(f, fps); write_u32_le(f, 0); write_u32_le(f, num_images); write_u32_le(f, width * height * 3); write_u32_le(f, (uint32_t)-1); write_u32_le(f, 0); write_u16_le(f, 0); write_u16_le(f, 0); write_u16_le(f, 0); write_u16_le(f, 0); fwrite("strf", 4, 1, f); write_u32_le(f, 40); write_u32_le(f, 40); write_u32_le(f, width); write_u32_le(f, height); write_u16_le(f, 1); write_u16_le(f, 24); fwrite("MJPG", 4, 1, f); write_u32_le(f, width * height * 3); write_u32_le(f, 0); write_u32_le(f, 0); write_u32_le(f, 0); write_u32_le(f, 0); fwrite("LIST", 4, 1, f); long movi_size_pos = ftell(f); write_u32_le(f, 0); fwrite("movi", 4, 1, f); std::vector index(static_cast(num_images)); std::vector jpeg_data; for (int i = 0; i < num_images; i++) { jpeg_data.clear(); auto write_to_buf = [](void* context, void* data, int size) { auto* buffer = reinterpret_cast*>(context); const uint8_t* src = reinterpret_cast(data); buffer->insert(buffer->end(), src, src + size); }; if (!stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, quality)) { fprintf(stderr, "Error: Failed to encode JPEG frame.\n"); return -1; } fwrite("00dc", 4, 1, f); write_u32_le(f, (uint32_t)jpeg_data.size()); index[i].offset = ftell(f) - 8; index[i].size = (uint32_t)jpeg_data.size(); fwrite(jpeg_data.data(), 1, jpeg_data.size(), f); if (jpeg_data.size() % 2) { fputc(0, f); } } long cur_pos = ftell(f); long movi_size = cur_pos - movi_size_pos - 4; fseek(f, movi_size_pos, SEEK_SET); write_u32_le(f, movi_size); fseek(f, cur_pos, SEEK_SET); fwrite("idx1", 4, 1, f); write_u32_le(f, num_images * 16); for (int i = 0; i < num_images; i++) { fwrite("00dc", 4, 1, f); write_u32_le(f, 0x10); write_u32_le(f, index[i].offset); write_u32_le(f, index[i].size); } cur_pos = ftell(f); long file_size = cur_pos - riff_size_pos - 4; fseek(f, riff_size_pos, SEEK_SET); write_u32_le(f, file_size); fseek(f, cur_pos, SEEK_SET); return 0; } #ifdef SD_USE_WEBP int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { if (num_images == 0) { fprintf(stderr, "Error: Image array is empty.\n"); return -1; } if (fps <= 0) { fprintf(stderr, "Error: FPS must be positive.\n"); return -1; } const int width = static_cast(images[0].width); const int height = static_cast(images[0].height); const int channels = static_cast(images[0].channel); if (channels != 1 && channels != 3 && channels != 4) { fprintf(stderr, "Error: Unsupported channel count: %d\n", channels); return -1; } WebPAnimEncoderOptions anim_options; WebPConfig config; if (!WebPAnimEncoderOptionsInit(&anim_options) || !WebPConfigInit(&config)) { fprintf(stderr, "Error: Failed to initialize WebP animation encoder.\n"); return -1; } config.quality = static_cast(quality); config.method = 4; config.thread_level = 1; if (channels == 4) { config.exact = 1; } if (!WebPValidateConfig(&config)) { fprintf(stderr, "Error: Invalid WebP encoder configuration.\n"); return -1; } WebPAnimEncoderPtr enc(WebPAnimEncoderNew(width, height, &anim_options)); if (enc == nullptr) { fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n"); return -1; } const int frame_duration_ms = std::max(1, static_cast(std::lround(1000.0 / static_cast(fps)))); int timestamp_ms = 0; for (int i = 0; i < num_images; ++i) { const sd_image_t& image = images[i]; if (static_cast(image.width) != width || static_cast(image.height) != height) { fprintf(stderr, "Error: Frame dimensions do not match.\n"); return -1; } WebPPictureGuard picture; if (!picture.initialized) { fprintf(stderr, "Error: Failed to initialize WebPPicture.\n"); return -1; } picture.picture.use_argb = 1; picture.picture.width = width; picture.picture.height = height; bool picture_ok = false; std::vector rgb_buffer; if (image.channel == 1) { rgb_buffer.resize(static_cast(width) * static_cast(height) * 3); for (int p = 0; p < width * height; ++p) { rgb_buffer[p * 3 + 0] = image.data[p]; rgb_buffer[p * 3 + 1] = image.data[p]; rgb_buffer[p * 3 + 2] = image.data[p]; } picture_ok = WebPPictureImportRGB(&picture.picture, rgb_buffer.data(), width * 3) != 0; } else if (image.channel == 4) { picture_ok = WebPPictureImportRGBA(&picture.picture, image.data, width * 4) != 0; } else { picture_ok = WebPPictureImportRGB(&picture.picture, image.data, width * 3) != 0; } if (!picture_ok) { fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n"); return -1; } if (!WebPAnimEncoderAdd(enc.get(), &picture.picture, timestamp_ms, &config)) { fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc.get())); return -1; } timestamp_ms += frame_duration_ms; } if (!WebPAnimEncoderAdd(enc.get(), nullptr, timestamp_ms, nullptr)) { fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc.get())); return -1; } WebPDataGuard webp_data; if (!WebPAnimEncoderAssemble(enc.get(), &webp_data.data)) { fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc.get())); return -1; } FilePtr f(fopen(filename, "wb")); if (!f) { perror("Error opening file for writing"); return -1; } if (webp_data.data.size > 0 && fwrite(webp_data.data.bytes, 1, webp_data.data.size, f.get()) != webp_data.data.size) { fprintf(stderr, "Error: Failed to write animated WebP file.\n"); return -1; } return 0; } #endif #ifdef SD_USE_WEBM int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { if (num_images == 0) { fprintf(stderr, "Error: Image array is empty.\n"); return -1; } if (fps <= 0) { fprintf(stderr, "Error: FPS must be positive.\n"); return -1; } const int width = static_cast(images[0].width); const int height = static_cast(images[0].height); if (width <= 0 || height <= 0) { fprintf(stderr, "Error: Invalid frame dimensions.\n"); return -1; } mkvmuxer::MkvWriter writer; if (!writer.Open(filename)) { fprintf(stderr, "Error: Could not open WebM file for writing.\n"); return -1; } const int ret = [&]() -> int { mkvmuxer::Segment segment; if (!segment.Init(&writer)) { fprintf(stderr, "Error: Failed to initialize WebM muxer.\n"); return -1; } segment.set_mode(mkvmuxer::Segment::kFile); segment.OutputCues(true); const uint64_t track_number = segment.AddVideoTrack(width, height, 0); if (track_number == 0) { fprintf(stderr, "Error: Failed to add VP8 video track.\n"); return -1; } if (!segment.CuesTrack(track_number)) { fprintf(stderr, "Error: Failed to set WebM cues track.\n"); return -1; } mkvmuxer::VideoTrack* video_track = static_cast(segment.GetTrackByNumber(track_number)); if (video_track != nullptr) { video_track->set_display_width(static_cast(width)); video_track->set_display_height(static_cast(height)); video_track->set_frame_rate(static_cast(fps)); } segment.GetSegmentInfo()->set_writing_app("stable-diffusion.cpp"); segment.GetSegmentInfo()->set_muxing_app("stable-diffusion.cpp"); const uint64_t frame_duration_ns = std::max( 1, static_cast(std::llround(1000000000.0 / static_cast(fps)))); uint64_t timestamp_ns = 0; for (int i = 0; i < num_images; ++i) { const sd_image_t& image = images[i]; if (static_cast(image.width) != width || static_cast(image.height) != height) { fprintf(stderr, "Error: Frame dimensions do not match.\n"); return -1; } std::vector vp8_frame; if (!encode_sd_image_to_vp8_frame(image, quality, vp8_frame)) { fprintf(stderr, "Error: Failed to encode frame %d as VP8.\n", i); return -1; } if (!segment.AddFrame(vp8_frame.data(), static_cast(vp8_frame.size()), track_number, timestamp_ns, true)) { fprintf(stderr, "Error: Failed to mux frame %d into WebM.\n", i); return -1; } timestamp_ns += frame_duration_ns; } if (!segment.Finalize()) { fprintf(stderr, "Error: Failed to finalize WebM output.\n"); return -1; } return 0; }(); writer.Close(); return ret; } #endif int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { std::string path = filename ? filename : ""; auto pos = path.find_last_of('.'); std::string ext = pos == std::string::npos ? "" : path.substr(pos); for (char& ch : ext) { ch = static_cast(tolower(static_cast(ch))); } #ifdef SD_USE_WEBM if (ext == ".webm") { return create_webm_from_sd_images(filename, images, num_images, fps, quality); } #endif #ifdef SD_USE_WEBP if (ext == ".webp") { return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality); } #endif return create_mjpg_avi_from_sd_images(filename, images, num_images, fps, quality); }