fix: improve handling of VAE decode failures (#1222)

This commit is contained in:
stduhpf 2026-02-09 16:29:41 +01:00 committed by GitHub
parent 5e264372ce
commit aa0b899397
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 48 additions and 24 deletions

View File

@ -409,7 +409,7 @@ bool save_results(const SDCliParams& cli_params,
auto write_image = [&](const fs::path& path, int idx) {
const sd_image_t& img = results[idx];
if (!img.data)
return;
return false;
std::string params = get_image_params(cli_params, ctx_params, gen_params, gen_params.seed + idx);
int ok = 0;
@ -419,8 +419,11 @@ bool save_results(const SDCliParams& cli_params,
ok = stbi_write_png(path.string().c_str(), img.width, img.height, img.channel, img.data, 0, params.c_str());
}
LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure");
return ok != 0;
};
int sucessful_reults = 0;
if (std::regex_search(cli_params.output_path, format_specifier_regex)) {
if (!is_jpg && ext_lower != ".png")
ext = ".png";
@ -429,9 +432,12 @@ bool save_results(const SDCliParams& cli_params,
for (int i = 0; i < num_results; ++i) {
fs::path img_path = format_frame_idx(pattern.string(), output_begin_idx + i);
write_image(img_path, i);
if (write_image(img_path, i)) {
sucessful_reults++;
}
}
return true;
LOG_INFO("%d/%d images saved", sucessful_reults, num_results);
return sucessful_reults != 0;
}
if (cli_params.mode == VID_GEN && num_results > 1) {
@ -439,9 +445,13 @@ bool save_results(const SDCliParams& cli_params,
ext = ".avi";
fs::path video_path = base_path;
video_path += ext;
create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps);
LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str());
return true;
if (create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) {
LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str());
return true;
} else {
LOG_ERROR("Failed to save result MPG AVI video to '%s'", video_path.string().c_str());
return false;
}
}
if (!is_jpg && ext_lower != ".png")
@ -453,10 +463,12 @@ bool save_results(const SDCliParams& cli_params,
img_path += "_" + std::to_string(output_begin_idx + i);
}
img_path += ext;
write_image(img_path, i);
if (write_image(img_path, i)) {
sucessful_reults++;
}
}
return true;
LOG_INFO("%d/%d images saved", sucessful_reults, num_results);
return sucessful_reults != 0;
}
int main(int argc, const char* argv[]) {

View File

@ -767,7 +767,7 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_silu_act(ggml_context* ctx, ggml_tensor*
return x;
}
typedef std::function<void(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;
typedef std::function<bool(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;
__STATIC_INLINE__ void sd_tiling_calc_tiles(int& num_tiles_dim,
float& tile_overlap_factor_dim,
@ -918,12 +918,15 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
int64_t t1 = ggml_time_ms();
ggml_ext_tensor_split_2d(input, input_tile, x_in, y_in);
on_processing(input_tile, output_tile, false);
ggml_ext_tensor_merge_2d(output_tile, output, x_out, y_out, overlap_x_out, overlap_y_out, dx, dy);
if (on_processing(input_tile, output_tile, false)) {
ggml_ext_tensor_merge_2d(output_tile, output, x_out, y_out, overlap_x_out, overlap_y_out, dx, dy);
int64_t t2 = ggml_time_ms();
last_time = (t2 - t1) / 1000.0f;
pretty_progress(tile_count, num_tiles, last_time);
int64_t t2 = ggml_time_ms();
last_time = (t2 - t1) / 1000.0f;
pretty_progress(tile_count, num_tiles, last_time);
} else {
LOG_ERROR("Failed to process patch %d at (%d, %d)", tile_count, x, y);
}
tile_count++;
}
last_x = false;

View File

@ -1558,7 +1558,7 @@ public:
if (vae_tiling_params.enabled) {
// split latent in 32x32 tiles and compute in several steps
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
first_stage_model->compute(n_threads, in, true, &out, nullptr);
return first_stage_model->compute(n_threads, in, true, &out, nullptr);
};
silent_tiling(latents, result, get_vae_scale_factor(), 32, 0.5f, on_tiling);
@ -1577,7 +1577,7 @@ public:
if (vae_tiling_params.enabled) {
// split latent in 64x64 tiles and compute in several steps
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
tae_first_stage->compute(n_threads, in, true, &out, nullptr);
return tae_first_stage->compute(n_threads, in, true, &out, nullptr);
};
silent_tiling(latents, result, get_vae_scale_factor(), 64, 0.5f, on_tiling);
} else {
@ -2546,7 +2546,7 @@ public:
LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y);
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
first_stage_model->compute(n_threads, in, false, &out, work_ctx);
return first_stage_model->compute(n_threads, in, false, &out, work_ctx);
};
sd_tiling_non_square(x, result, vae_scale_factor, tile_size_x, tile_size_y, tile_overlap, on_tiling);
} else {
@ -2557,7 +2557,7 @@ public:
if (vae_tiling_params.enabled && !encode_video) {
// split latent in 32x32 tiles and compute in several steps
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
tae_first_stage->compute(n_threads, in, false, &out, nullptr);
return tae_first_stage->compute(n_threads, in, false, &out, nullptr);
};
sd_tiling(x, result, vae_scale_factor, 64, 0.5f, on_tiling);
} else {
@ -2675,11 +2675,15 @@ public:
// split latent in 32x32 tiles and compute in several steps
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
first_stage_model->compute(n_threads, in, true, &out, nullptr);
return first_stage_model->compute(n_threads, in, true, &out, nullptr);
};
sd_tiling_non_square(x, result, vae_scale_factor, tile_size_x, tile_size_y, tile_overlap, on_tiling);
} else {
first_stage_model->compute(n_threads, x, true, &result, work_ctx);
if(!first_stage_model->compute(n_threads, x, true, &result, work_ctx)){
LOG_ERROR("Failed to decode latetnts");
first_stage_model->free_compute_buffer();
return nullptr;
}
}
first_stage_model->free_compute_buffer();
process_vae_output_tensor(result);
@ -2687,11 +2691,15 @@ public:
if (vae_tiling_params.enabled) {
// split latent in 64x64 tiles and compute in several steps
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
tae_first_stage->compute(n_threads, in, true, &out);
return tae_first_stage->compute(n_threads, in, true, &out);
};
sd_tiling(x, result, vae_scale_factor, 64, 0.5f, on_tiling);
} else {
tae_first_stage->compute(n_threads, x, true, &result);
if(!tae_first_stage->compute(n_threads, x, true, &result)){
LOG_ERROR("Failed to decode latetnts");
tae_first_stage->free_compute_buffer();
return nullptr;
}
}
tae_first_stage->free_compute_buffer();
}
@ -3461,6 +3469,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
ggml_free(work_ctx);
return nullptr;
}
memset(result_images, 0, batch_count * sizeof(sd_image_t));
for (size_t i = 0; i < decoded_images.size(); i++) {
result_images[i].width = width;

View File

@ -89,7 +89,7 @@ struct UpscalerGGML {
ggml_tensor* upscaled = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, output_width, output_height, 3, 1);
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
esrgan_upscaler->compute(n_threads, in, &out);
return esrgan_upscaler->compute(n_threads, in, &out);
};
int64_t t0 = ggml_time_ms();
sd_tiling(input_image_tensor, upscaled, esrgan_upscaler->scale, esrgan_upscaler->tile_size, 0.25f, on_tiling);