mirror of
https://github.com/leejet/stable-diffusion.cpp
synced 2026-04-19 21:35:52 +02:00
feat: add er_sde sampler (#1403)
This commit is contained in:
parent
d73b4198a4
commit
1b4e9be643
@ -97,6 +97,7 @@ API and command-line option may change frequently.***
|
||||
- `DPM++ 2M`
|
||||
- [`DPM++ 2M v2`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457)
|
||||
- `DPM++ 2S a`
|
||||
- `ER-SDE`
|
||||
- [`LCM`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/13952)
|
||||
- Cross-platform reproducibility
|
||||
- `--rng cuda`, default, consistent with the `stable-diffusion-webui GPU RNG`
|
||||
|
||||
@ -114,7 +114,7 @@ Generation Options:
|
||||
medium
|
||||
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
||||
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
||||
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
|
||||
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
|
||||
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
||||
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
||||
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
||||
@ -122,7 +122,7 @@ Generation Options:
|
||||
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
|
||||
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
|
||||
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
|
||||
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
|
||||
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
|
||||
--strength <float> strength for noising/unnoising (default: 0.75)
|
||||
--pm-style-strength <float>
|
||||
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
|
||||
@ -133,10 +133,10 @@ Generation Options:
|
||||
--disable-image-metadata do not embed generation metadata on image files
|
||||
-s, --seed RNG seed (default: 42, use random seed for < 0)
|
||||
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
||||
tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
|
||||
tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a
|
||||
otherwise)
|
||||
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
|
||||
ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
|
||||
ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan,
|
||||
euler_a otherwise
|
||||
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
|
||||
kl_optimal, lcm, bong_tangent], default: discrete
|
||||
|
||||
@ -855,7 +855,7 @@ ArgOptions SDGenerationParams::get_options() {
|
||||
&sample_params.guidance.slg.layer_end},
|
||||
{"",
|
||||
"--eta",
|
||||
"noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)",
|
||||
"noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)",
|
||||
&sample_params.eta},
|
||||
{"",
|
||||
"--flow-shift",
|
||||
@ -887,7 +887,7 @@ ArgOptions SDGenerationParams::get_options() {
|
||||
&high_noise_sample_params.guidance.slg.layer_end},
|
||||
{"",
|
||||
"--high-noise-eta",
|
||||
"(high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)",
|
||||
"(high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)",
|
||||
&high_noise_sample_params.eta},
|
||||
{"",
|
||||
"--strength",
|
||||
@ -1185,12 +1185,12 @@ ArgOptions SDGenerationParams::get_options() {
|
||||
on_seed_arg},
|
||||
{"",
|
||||
"--sampling-method",
|
||||
"sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s] "
|
||||
"sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde] "
|
||||
"(default: euler for Flux/SD3/Wan, euler_a otherwise)",
|
||||
on_sample_method_arg},
|
||||
{"",
|
||||
"--high-noise-sampling-method",
|
||||
"(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s]"
|
||||
"(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde]"
|
||||
" default: euler for Flux/SD3/Wan, euler_a otherwise",
|
||||
on_high_noise_sample_method_arg},
|
||||
{"",
|
||||
|
||||
@ -219,7 +219,7 @@ Default Generation Options:
|
||||
medium
|
||||
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
||||
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
||||
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
|
||||
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
|
||||
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
||||
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
||||
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
||||
@ -227,7 +227,7 @@ Default Generation Options:
|
||||
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
|
||||
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
|
||||
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
|
||||
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
|
||||
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
|
||||
--strength <float> strength for noising/unnoising (default: 0.75)
|
||||
--pm-style-strength <float>
|
||||
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
|
||||
@ -238,10 +238,10 @@ Default Generation Options:
|
||||
--disable-image-metadata do not embed generation metadata on image files
|
||||
-s, --seed RNG seed (default: 42, use random seed for < 0)
|
||||
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
||||
tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
|
||||
tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a
|
||||
otherwise)
|
||||
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
|
||||
ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
|
||||
ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan,
|
||||
euler_a otherwise
|
||||
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
|
||||
kl_optimal, lcm, bong_tangent], default: discrete
|
||||
|
||||
@ -50,6 +50,7 @@ enum sample_method_t {
|
||||
TCD_SAMPLE_METHOD,
|
||||
RES_MULTISTEP_SAMPLE_METHOD,
|
||||
RES_2S_SAMPLE_METHOD,
|
||||
ER_SDE_SAMPLE_METHOD,
|
||||
SAMPLE_METHOD_COUNT
|
||||
};
|
||||
|
||||
|
||||
136
src/denoiser.hpp
136
src/denoiser.hpp
@ -1285,6 +1285,140 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
|
||||
return x;
|
||||
}
|
||||
|
||||
static sd::Tensor<float> sample_er_sde(denoise_cb_t model,
|
||||
sd::Tensor<float> x,
|
||||
std::vector<float> sigmas,
|
||||
std::shared_ptr<RNG> rng,
|
||||
bool is_flow_denoiser,
|
||||
float eta) {
|
||||
constexpr int max_stage = 3;
|
||||
constexpr int num_integration_points = 200;
|
||||
constexpr float num_integration_points_f = 200.0f;
|
||||
float s_noise = eta;
|
||||
|
||||
auto er_sde_flow_sigma = [](float sigma) -> float {
|
||||
sigma = std::max(sigma, 1e-6f);
|
||||
sigma = std::min(sigma, 1.0f - 1e-4f);
|
||||
return sigma;
|
||||
};
|
||||
|
||||
auto sigma_to_er_sde_lambda = [&](float sigma, bool is_flow_denoiser) -> float {
|
||||
if (is_flow_denoiser) {
|
||||
sigma = er_sde_flow_sigma(sigma);
|
||||
return sigma / std::max(1.0f - sigma, 1e-6f);
|
||||
}
|
||||
return std::max(sigma, 1e-6f);
|
||||
};
|
||||
|
||||
auto sigma_to_er_sde_alpha = [&](float sigma, bool is_flow_denoiser) -> float {
|
||||
if (is_flow_denoiser) {
|
||||
sigma = er_sde_flow_sigma(sigma);
|
||||
return 1.0f - sigma;
|
||||
}
|
||||
return 1.0f;
|
||||
};
|
||||
|
||||
auto er_sde_noise_scaler = [](float x) -> float {
|
||||
x = std::max(x, 0.0f);
|
||||
return x * (std::exp(std::pow(x, 0.3f)) + 10.0f);
|
||||
};
|
||||
|
||||
if (is_flow_denoiser) {
|
||||
for (size_t i = 0; i + 1 < sigmas.size(); ++i) {
|
||||
if (sigmas[i] > 1.0f) {
|
||||
sigmas[i] = er_sde_flow_sigma(sigmas[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<float> er_lambdas(sigmas.size(), 0.0f);
|
||||
for (size_t i = 0; i < sigmas.size(); ++i) {
|
||||
er_lambdas[i] = sigma_to_er_sde_lambda(sigmas[i], is_flow_denoiser);
|
||||
}
|
||||
|
||||
sd::Tensor<float> old_denoised = x;
|
||||
sd::Tensor<float> old_denoised_d = x;
|
||||
bool have_old_denoised = false;
|
||||
bool have_old_denoised_d = false;
|
||||
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
sd::Tensor<float> denoised = model(x, sigmas[i], i + 1);
|
||||
if (denoised.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
int stage_used = std::min(max_stage, i + 1);
|
||||
|
||||
if (sigmas[i + 1] == 0.0f) {
|
||||
x = denoised;
|
||||
} else {
|
||||
float er_lambda_s = er_lambdas[i];
|
||||
float er_lambda_t = er_lambdas[i + 1];
|
||||
float alpha_s = sigma_to_er_sde_alpha(sigmas[i], is_flow_denoiser);
|
||||
float alpha_t = sigma_to_er_sde_alpha(sigmas[i + 1], is_flow_denoiser);
|
||||
float scaled_s = er_sde_noise_scaler(er_lambda_s);
|
||||
float scaled_t = er_sde_noise_scaler(er_lambda_t);
|
||||
float r_alpha = alpha_s > 0.0f ? alpha_t / alpha_s : 0.0f;
|
||||
float r = scaled_s > 0.0f ? scaled_t / scaled_s : 0.0f;
|
||||
|
||||
x = r_alpha * r * x + alpha_t * (1.0f - r) * denoised;
|
||||
|
||||
if (stage_used >= 2 && have_old_denoised) {
|
||||
float dt = er_lambda_t - er_lambda_s;
|
||||
float lambda_step_size = -dt / num_integration_points_f;
|
||||
float s = 0.0f;
|
||||
float s_u = 0.0f;
|
||||
|
||||
for (int p = 0; p < num_integration_points; ++p) {
|
||||
float lambda_pos = er_lambda_t + p * lambda_step_size;
|
||||
float scaled_pos = er_sde_noise_scaler(lambda_pos);
|
||||
if (scaled_pos <= 0.0f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
s += 1.0f / scaled_pos;
|
||||
if (stage_used >= 3 && have_old_denoised_d) {
|
||||
s_u += (lambda_pos - er_lambda_s) / scaled_pos;
|
||||
}
|
||||
}
|
||||
|
||||
s *= lambda_step_size;
|
||||
|
||||
float denom_d = er_lambda_s - er_lambdas[i - 1];
|
||||
if (std::fabs(denom_d) > 1e-12f) {
|
||||
float coeff_d = alpha_t * (dt + s * scaled_t);
|
||||
sd::Tensor<float> denoised_d = (denoised - old_denoised) / denom_d;
|
||||
x += coeff_d * denoised_d;
|
||||
|
||||
if (stage_used >= 3 && have_old_denoised_d) {
|
||||
float denom_u = (er_lambda_s - er_lambdas[i - 2]) * 0.5f;
|
||||
if (std::fabs(denom_u) > 1e-12f) {
|
||||
s_u *= lambda_step_size;
|
||||
float coeff_u = alpha_t * (0.5f * dt * dt + s_u * scaled_t);
|
||||
sd::Tensor<float> denoised_u = (denoised_d - old_denoised_d) / denom_u;
|
||||
x += coeff_u * denoised_u;
|
||||
}
|
||||
}
|
||||
|
||||
old_denoised_d = denoised_d;
|
||||
have_old_denoised_d = true;
|
||||
}
|
||||
}
|
||||
|
||||
float noise_scale_sq = er_lambda_t * er_lambda_t - er_lambda_s * er_lambda_s * r * r;
|
||||
if (s_noise > 0.0f && noise_scale_sq > 0.0f) {
|
||||
float noise_scale = alpha_t * std::sqrt(std::max(noise_scale_sq, 0.0f));
|
||||
x += sd::Tensor<float>::randn_like(x, rng) * noise_scale;
|
||||
}
|
||||
}
|
||||
|
||||
old_denoised = denoised;
|
||||
have_old_denoised = true;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
|
||||
sd::Tensor<float> x,
|
||||
const std::vector<float>& sigmas,
|
||||
@ -1446,6 +1580,8 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
|
||||
return sample_res_multistep(model, std::move(x), sigmas, rng, eta);
|
||||
case RES_2S_SAMPLE_METHOD:
|
||||
return sample_res_2s(model, std::move(x), sigmas, rng, eta);
|
||||
case ER_SDE_SAMPLE_METHOD:
|
||||
return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
|
||||
case DDIM_TRAILING_SAMPLE_METHOD:
|
||||
return sample_ddim_trailing(model, std::move(x), sigmas, rng, eta);
|
||||
case TCD_SAMPLE_METHOD:
|
||||
|
||||
@ -71,6 +71,7 @@ const char* sampling_methods_str[] = {
|
||||
"TCD",
|
||||
"Res Multistep",
|
||||
"Res 2s",
|
||||
"ER-SDE",
|
||||
};
|
||||
|
||||
/*================================================== Helper Functions ================================================*/
|
||||
@ -1991,6 +1992,7 @@ const char* sample_method_to_str[] = {
|
||||
"tcd",
|
||||
"res_multistep",
|
||||
"res_2s",
|
||||
"er_sde",
|
||||
};
|
||||
|
||||
const char* sd_sample_method_name(enum sample_method_t sample_method) {
|
||||
@ -2473,6 +2475,7 @@ static float resolve_eta(sd_ctx_t* sd_ctx,
|
||||
return 0.0f;
|
||||
case EULER_A_SAMPLE_METHOD:
|
||||
case DPMPP2S_A_SAMPLE_METHOD:
|
||||
case ER_SDE_SAMPLE_METHOD:
|
||||
return 1.0f;
|
||||
default:;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user