feat: add webm support (#1391)

This commit is contained in:
leejet 2026-04-06 01:49:28 +08:00 committed by GitHub
parent 9369ab759f
commit 7397ddaa86
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 286 additions and 11 deletions

View File

@ -239,6 +239,7 @@ jobs:
id: build-push
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64
push: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
file: Dockerfile.${{ matrix.variant }}

3
.gitmodules vendored
View File

@ -7,3 +7,6 @@
[submodule "thirdparty/libwebp"]
path = thirdparty/libwebp
url = https://github.com/webmproject/libwebp.git
[submodule "thirdparty/libwebm"]
path = thirdparty/libwebm
url = https://github.com/webmproject/libwebm.git

View File

@ -32,6 +32,16 @@ else()
set(SD_WEBP_DEFAULT ${SD_USE_SYSTEM_WEBP})
endif()
set(SD_SUBMODULE_WEBM FALSE)
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libwebm/CMakeLists.txt")
set(SD_SUBMODULE_WEBM TRUE)
endif()
if(SD_SUBMODULE_WEBM)
set(SD_WEBM_DEFAULT ON)
else()
set(SD_WEBM_DEFAULT ${SD_USE_SYSTEM_WEBM})
endif()
#
# Option list
#
@ -41,6 +51,8 @@ endif()
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
option(SD_WEBP "sd: enable WebP image I/O support" ${SD_WEBP_DEFAULT})
option(SD_USE_SYSTEM_WEBP "sd: link against system libwebp" OFF)
option(SD_WEBM "sd: enable WebM video output support" ${SD_WEBM_DEFAULT})
option(SD_USE_SYSTEM_WEBM "sd: link against system libwebm" OFF)
option(SD_CUDA "sd: cuda backend" OFF)
option(SD_HIPBLAS "sd: rocm backend" OFF)
option(SD_METAL "sd: metal backend" OFF)
@ -111,7 +123,31 @@ if(SD_WEBP)
)
endif()
endif()
add_compile_definitions(SD_USE_WEBP)
endif()
if(SD_WEBM)
if(NOT SD_WEBP)
message(FATAL_ERROR "SD_WEBM requires SD_WEBP because WebM output reuses libwebp VP8 encoding.")
endif()
if(NOT SD_SUBMODULE_WEBM AND NOT SD_USE_SYSTEM_WEBM)
message(FATAL_ERROR "WebM support enabled but no source found.
Either initialize the submodule:\n git submodule update --init thirdparty/libwebm\n\n"
"Or link against system library:\n cmake (...) -DSD_USE_SYSTEM_WEBM=ON")
endif()
if(SD_USE_SYSTEM_WEBM)
find_path(WEBM_INCLUDE_DIR
NAMES mkvmuxer/mkvmuxer.h mkvparser/mkvparser.h common/webmids.h
PATH_SUFFIXES webm
REQUIRED)
find_library(WEBM_LIBRARY
NAMES webm libwebm
REQUIRED)
add_library(webm UNKNOWN IMPORTED)
set_target_properties(webm PROPERTIES
IMPORTED_LOCATION "${WEBM_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${WEBM_INCLUDE_DIR}")
endif()
endif()
set(SD_LIB stable-diffusion)

View File

@ -16,15 +16,23 @@ git submodule init
git submodule update
```
## WebP Support in Examples
## WebP and WebM Support in Examples
The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O. This is enabled by default.
The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O, and `examples/cli` can also use `libwebm` for `.webm` video output. Both are enabled by default. WebM output currently reuses `libwebp` to encode each frame as VP8 before muxing with `libwebm`.
If you do not want WebP support, you can disable it at configure time:
If you do not want WebP/WebM support, you can disable them at configure time:
```shell
mkdir build && cd build
cmake .. -DSD_WEBP=OFF
cmake .. -DSD_WEBP=OFF -DSD_WEBM=OFF
cmake --build . --config Release
```
If the submodules are not available, you can also link against system packages instead:
```shell
mkdir build && cd build
cmake .. -DSD_USE_SYSTEM_WEBP=ON -DSD_USE_SYSTEM_WEBM=ON
cmake --build . --config Release
```

View File

@ -9,6 +9,11 @@ add_executable(${TARGET}
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
if(SD_WEBP)
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBP)
target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
endif()
if(SD_WEBM)
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBM)
target_link_libraries(${TARGET} PRIVATE webm)
endif()
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)

View File

@ -5,8 +5,8 @@ usage: ./bin/sd-cli [options]
CLI Options:
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default:
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi and animated .webp
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi and animated .webp
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi, .webm, and animated .webp
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
every step)
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)

View File

@ -58,7 +58,7 @@ struct SDCliParams {
options.string_options = {
{"-o",
"--output",
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)",
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png). Single-file video outputs support .avi, .webm, and animated .webp",
&output_path},
{"",
"--image",
@ -70,7 +70,7 @@ struct SDCliParams {
&metadata_format},
{"",
"--preview-path",
"path to write preview image to (default: ./preview.png)",
"path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp",
&preview_path},
};
@ -396,7 +396,9 @@ bool save_results(const SDCliParams& cli_params,
if (!ext.empty()) {
if (output_format == EncodedImageFormat::JPEG ||
output_format == EncodedImageFormat::PNG ||
output_format == EncodedImageFormat::WEBP) {
output_format == EncodedImageFormat::WEBP ||
ext_lower == ".avi" ||
ext_lower == ".webm") {
base_path.replace_extension();
}
}
@ -438,7 +440,7 @@ bool save_results(const SDCliParams& cli_params,
}
if (cli_params.mode == VID_GEN && num_results > 1) {
if (ext_lower != ".avi" && ext_lower != ".webp")
if (ext_lower != ".avi" && ext_lower != ".webp" && ext_lower != ".webm")
ext = ".avi";
fs::path video_path = base_path;
video_path += ext;

View File

@ -30,6 +30,11 @@
#include "webp/mux.h"
#endif
#ifdef SD_USE_WEBM
#include "mkvmuxer/mkvmuxer.h"
#include "mkvmuxer/mkvwriter.h"
#endif
namespace fs = std::filesystem;
namespace {
@ -71,6 +76,13 @@ bool write_binary_file_bytes(const std::string& path, const std::vector<uint8_t>
return true;
}
uint32_t read_u32_le_bytes(const uint8_t* data) {
return static_cast<uint32_t>(data[0]) |
(static_cast<uint32_t>(data[1]) << 8) |
(static_cast<uint32_t>(data[2]) << 16) |
(static_cast<uint32_t>(data[3]) << 24);
}
int stbi_ext_write_png_to_func(stbi_write_func* func,
void* context,
int x,
@ -289,6 +301,76 @@ bool encode_webp_image_to_vector(const uint8_t* image,
WebPMuxDelete(mux);
return ok;
}
#ifdef SD_USE_WEBM
bool extract_vp8_frame_from_webp(const std::vector<uint8_t>& webp_data, std::vector<uint8_t>& vp8_frame) {
if (!is_webp_signature(webp_data.data(), webp_data.size())) {
return false;
}
size_t offset = 12;
while (offset + 8 <= webp_data.size()) {
const uint8_t* chunk = webp_data.data() + offset;
const uint32_t chunk_len = read_u32_le_bytes(chunk + 4);
const size_t chunk_start = offset + 8;
const size_t padded_len = static_cast<size_t>(chunk_len) + (chunk_len & 1u);
if (chunk_start + chunk_len > webp_data.size()) {
return false;
}
if (memcmp(chunk, "VP8 ", 4) == 0) {
vp8_frame.assign(webp_data.data() + chunk_start,
webp_data.data() + chunk_start + chunk_len);
return !vp8_frame.empty();
}
offset = chunk_start + padded_len;
}
return false;
}
bool encode_sd_image_to_vp8_frame(const sd_image_t& image, int quality, std::vector<uint8_t>& vp8_frame) {
if (image.data == nullptr || image.width == 0 || image.height == 0) {
return false;
}
const int width = static_cast<int>(image.width);
const int height = static_cast<int>(image.height);
const int input_channel = static_cast<int>(image.channel);
if (input_channel != 1 && input_channel != 3 && input_channel != 4) {
return false;
}
std::vector<uint8_t> rgb_buffer;
const uint8_t* rgb_data = image.data;
if (input_channel == 1) {
rgb_buffer.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
for (int i = 0; i < width * height; ++i) {
rgb_buffer[i * 3 + 0] = image.data[i];
rgb_buffer[i * 3 + 1] = image.data[i];
rgb_buffer[i * 3 + 2] = image.data[i];
}
rgb_data = rgb_buffer.data();
} else if (input_channel == 4) {
rgb_buffer.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
for (int i = 0; i < width * height; ++i) {
rgb_buffer[i * 3 + 0] = image.data[i * 4 + 0];
rgb_buffer[i * 3 + 1] = image.data[i * 4 + 1];
rgb_buffer[i * 3 + 2] = image.data[i * 4 + 2];
}
rgb_data = rgb_buffer.data();
}
std::vector<uint8_t> encoded_webp;
if (!encode_webp_image_to_vector(rgb_data, width, height, 3, "", quality, encoded_webp)) {
return false;
}
return extract_vp8_frame_from_webp(encoded_webp, vp8_frame);
}
#endif
#endif
uint8_t* load_image_common(bool from_memory,
@ -861,6 +943,99 @@ cleanup:
}
#endif
#ifdef SD_USE_WEBM
int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
if (num_images == 0) {
fprintf(stderr, "Error: Image array is empty.\n");
return -1;
}
if (fps <= 0) {
fprintf(stderr, "Error: FPS must be positive.\n");
return -1;
}
const int width = static_cast<int>(images[0].width);
const int height = static_cast<int>(images[0].height);
if (width <= 0 || height <= 0) {
fprintf(stderr, "Error: Invalid frame dimensions.\n");
return -1;
}
mkvmuxer::MkvWriter writer;
if (!writer.Open(filename)) {
fprintf(stderr, "Error: Could not open WebM file for writing.\n");
return -1;
}
const int ret = [&]() -> int {
mkvmuxer::Segment segment;
if (!segment.Init(&writer)) {
fprintf(stderr, "Error: Failed to initialize WebM muxer.\n");
return -1;
}
segment.set_mode(mkvmuxer::Segment::kFile);
segment.OutputCues(true);
const uint64_t track_number = segment.AddVideoTrack(width, height, 0);
if (track_number == 0) {
fprintf(stderr, "Error: Failed to add VP8 video track.\n");
return -1;
}
if (!segment.CuesTrack(track_number)) {
fprintf(stderr, "Error: Failed to set WebM cues track.\n");
return -1;
}
mkvmuxer::VideoTrack* video_track = static_cast<mkvmuxer::VideoTrack*>(segment.GetTrackByNumber(track_number));
if (video_track != nullptr) {
video_track->set_display_width(static_cast<uint64_t>(width));
video_track->set_display_height(static_cast<uint64_t>(height));
video_track->set_frame_rate(static_cast<double>(fps));
}
segment.GetSegmentInfo()->set_writing_app("stable-diffusion.cpp");
segment.GetSegmentInfo()->set_muxing_app("stable-diffusion.cpp");
const uint64_t frame_duration_ns = std::max<uint64_t>(
1, static_cast<uint64_t>(std::llround(1000000000.0 / static_cast<double>(fps))));
uint64_t timestamp_ns = 0;
for (int i = 0; i < num_images; ++i) {
const sd_image_t& image = images[i];
if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) {
fprintf(stderr, "Error: Frame dimensions do not match.\n");
return -1;
}
std::vector<uint8_t> vp8_frame;
if (!encode_sd_image_to_vp8_frame(image, quality, vp8_frame)) {
fprintf(stderr, "Error: Failed to encode frame %d as VP8.\n", i);
return -1;
}
if (!segment.AddFrame(vp8_frame.data(),
static_cast<uint64_t>(vp8_frame.size()),
track_number,
timestamp_ns,
true)) {
fprintf(stderr, "Error: Failed to mux frame %d into WebM.\n", i);
return -1;
}
timestamp_ns += frame_duration_ns;
}
if (!segment.Finalize()) {
fprintf(stderr, "Error: Failed to finalize WebM output.\n");
return -1;
}
return 0;
}();
writer.Close();
return ret;
}
#endif
int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::string path = filename ? filename : "";
auto pos = path.find_last_of('.');
@ -869,6 +1044,12 @@ int create_video_from_sd_images(const char* filename, sd_image_t* images, int nu
ch = static_cast<char>(tolower(static_cast<unsigned char>(ch)));
}
#ifdef SD_USE_WEBM
if (ext == ".webm") {
return create_webm_from_sd_images(filename, images, num_images, fps, quality);
}
#endif
#ifdef SD_USE_WEBP
if (ext == ".webp") {
return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality);

View File

@ -67,6 +67,14 @@ int create_animated_webp_from_sd_images(const char* filename,
int quality = 90);
#endif
#ifdef SD_USE_WEBM
int create_webm_from_sd_images(const char* filename,
sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#endif
int create_video_from_sd_images(const char* filename,
sd_image_t* images,
int num_images,

View File

@ -75,8 +75,13 @@ endif()
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
if(SD_WEBP)
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBP)
target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
endif()
if(SD_WEBM)
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBM)
target_link_libraries(${TARGET} PRIVATE webm)
endif()
# due to httplib; it contains a pragma for MSVC, but other things need explicit flags
if(WIN32 AND NOT MSVC)

View File

@ -18,3 +18,28 @@ if(SD_WEBP AND NOT SD_USE_SYSTEM_WEBP)
add_subdirectory(libwebp EXCLUDE_FROM_ALL)
endif()
if(SD_WEBM AND NOT SD_USE_SYSTEM_WEBM)
if(MSVC)
set(MSVC_RUNTIME dll)
endif()
set(ENABLE_WEBMTS OFF)
set(ENABLE_WEBMINFO OFF)
set(ENABLE_TESTS OFF)
set(ENABLE_WEBM_PARSER OFF)
set(ENABLE_SAMPLE_PROGRAMS OFF)
set(SD_LIBWEBM_PARENT_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
add_subdirectory(libwebm EXCLUDE_FROM_ALL)
# libwebm mutates the global CMAKE_CXX_FLAGS for non-MSVC compilers to force
# C++11. Restore the parent flags so the main project keeps its own C++17
# requirements, then pin the libwebm targets to C++17 explicitly.
set(CMAKE_CXX_FLAGS "${SD_LIBWEBM_PARENT_CXX_FLAGS}" CACHE STRING "" FORCE)
target_compile_features(mkvmuxer PRIVATE cxx_std_17)
target_compile_features(mkvparser PRIVATE cxx_std_17)
target_compile_features(webm PRIVATE cxx_std_17)
target_include_directories(webm INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/libwebm)
endif()

1
thirdparty/libwebm vendored Submodule

@ -0,0 +1 @@
Subproject commit 5bf12267eea773a32fcf4949de52b0add158a8d5