feat: add er_sde sampler (#1403)

This commit is contained in:
rmatif 2026-04-16 19:32:16 +02:00 committed by GitHub
parent d73b4198a4
commit 1b4e9be643
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 153 additions and 12 deletions

View File

@ -97,6 +97,7 @@ API and command-line option may change frequently.***
- `DPM++ 2M`
- [`DPM++ 2M v2`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457)
- `DPM++ 2S a`
- `ER-SDE`
- [`LCM`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/13952)
- Cross-platform reproducibility
- `--rng cuda`, default, consistent with the `stable-diffusion-webui GPU RNG`

View File

@ -114,7 +114,7 @@ Generation Options:
medium
--skip-layer-start <float> SLG enabling point (default: 0.01)
--skip-layer-end <float> SLG disabling point (default: 0.2)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
@ -122,7 +122,7 @@ Generation Options:
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--strength <float> strength for noising/unnoising (default: 0.75)
--pm-style-strength <float>
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
@ -133,10 +133,10 @@ Generation Options:
--disable-image-metadata do not embed generation metadata on image files
-s, --seed RNG seed (default: 42, use random seed for < 0)
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a
otherwise)
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan,
euler_a otherwise
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
kl_optimal, lcm, bong_tangent], default: discrete

View File

@ -855,7 +855,7 @@ ArgOptions SDGenerationParams::get_options() {
&sample_params.guidance.slg.layer_end},
{"",
"--eta",
"noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)",
"noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)",
&sample_params.eta},
{"",
"--flow-shift",
@ -887,7 +887,7 @@ ArgOptions SDGenerationParams::get_options() {
&high_noise_sample_params.guidance.slg.layer_end},
{"",
"--high-noise-eta",
"(high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)",
"(high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)",
&high_noise_sample_params.eta},
{"",
"--strength",
@ -1185,12 +1185,12 @@ ArgOptions SDGenerationParams::get_options() {
on_seed_arg},
{"",
"--sampling-method",
"sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s] "
"sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde] "
"(default: euler for Flux/SD3/Wan, euler_a otherwise)",
on_sample_method_arg},
{"",
"--high-noise-sampling-method",
"(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s]"
"(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde]"
" default: euler for Flux/SD3/Wan, euler_a otherwise",
on_high_noise_sample_method_arg},
{"",

View File

@ -219,7 +219,7 @@ Default Generation Options:
medium
--skip-layer-start <float> SLG enabling point (default: 0.01)
--skip-layer-end <float> SLG disabling point (default: 0.2)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
@ -227,7 +227,7 @@ Default Generation Options:
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--strength <float> strength for noising/unnoising (default: 0.75)
--pm-style-strength <float>
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
@ -238,10 +238,10 @@ Default Generation Options:
--disable-image-metadata do not embed generation metadata on image files
-s, --seed RNG seed (default: 42, use random seed for < 0)
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a
otherwise)
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan,
euler_a otherwise
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
kl_optimal, lcm, bong_tangent], default: discrete

View File

@ -50,6 +50,7 @@ enum sample_method_t {
TCD_SAMPLE_METHOD,
RES_MULTISTEP_SAMPLE_METHOD,
RES_2S_SAMPLE_METHOD,
ER_SDE_SAMPLE_METHOD,
SAMPLE_METHOD_COUNT
};

View File

@ -1285,6 +1285,140 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
return x;
}
static sd::Tensor<float> sample_er_sde(denoise_cb_t model,
sd::Tensor<float> x,
std::vector<float> sigmas,
std::shared_ptr<RNG> rng,
bool is_flow_denoiser,
float eta) {
constexpr int max_stage = 3;
constexpr int num_integration_points = 200;
constexpr float num_integration_points_f = 200.0f;
float s_noise = eta;
auto er_sde_flow_sigma = [](float sigma) -> float {
sigma = std::max(sigma, 1e-6f);
sigma = std::min(sigma, 1.0f - 1e-4f);
return sigma;
};
auto sigma_to_er_sde_lambda = [&](float sigma, bool is_flow_denoiser) -> float {
if (is_flow_denoiser) {
sigma = er_sde_flow_sigma(sigma);
return sigma / std::max(1.0f - sigma, 1e-6f);
}
return std::max(sigma, 1e-6f);
};
auto sigma_to_er_sde_alpha = [&](float sigma, bool is_flow_denoiser) -> float {
if (is_flow_denoiser) {
sigma = er_sde_flow_sigma(sigma);
return 1.0f - sigma;
}
return 1.0f;
};
auto er_sde_noise_scaler = [](float x) -> float {
x = std::max(x, 0.0f);
return x * (std::exp(std::pow(x, 0.3f)) + 10.0f);
};
if (is_flow_denoiser) {
for (size_t i = 0; i + 1 < sigmas.size(); ++i) {
if (sigmas[i] > 1.0f) {
sigmas[i] = er_sde_flow_sigma(sigmas[i]);
}
}
}
std::vector<float> er_lambdas(sigmas.size(), 0.0f);
for (size_t i = 0; i < sigmas.size(); ++i) {
er_lambdas[i] = sigma_to_er_sde_lambda(sigmas[i], is_flow_denoiser);
}
sd::Tensor<float> old_denoised = x;
sd::Tensor<float> old_denoised_d = x;
bool have_old_denoised = false;
bool have_old_denoised_d = false;
int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) {
sd::Tensor<float> denoised = model(x, sigmas[i], i + 1);
if (denoised.empty()) {
return {};
}
int stage_used = std::min(max_stage, i + 1);
if (sigmas[i + 1] == 0.0f) {
x = denoised;
} else {
float er_lambda_s = er_lambdas[i];
float er_lambda_t = er_lambdas[i + 1];
float alpha_s = sigma_to_er_sde_alpha(sigmas[i], is_flow_denoiser);
float alpha_t = sigma_to_er_sde_alpha(sigmas[i + 1], is_flow_denoiser);
float scaled_s = er_sde_noise_scaler(er_lambda_s);
float scaled_t = er_sde_noise_scaler(er_lambda_t);
float r_alpha = alpha_s > 0.0f ? alpha_t / alpha_s : 0.0f;
float r = scaled_s > 0.0f ? scaled_t / scaled_s : 0.0f;
x = r_alpha * r * x + alpha_t * (1.0f - r) * denoised;
if (stage_used >= 2 && have_old_denoised) {
float dt = er_lambda_t - er_lambda_s;
float lambda_step_size = -dt / num_integration_points_f;
float s = 0.0f;
float s_u = 0.0f;
for (int p = 0; p < num_integration_points; ++p) {
float lambda_pos = er_lambda_t + p * lambda_step_size;
float scaled_pos = er_sde_noise_scaler(lambda_pos);
if (scaled_pos <= 0.0f) {
continue;
}
s += 1.0f / scaled_pos;
if (stage_used >= 3 && have_old_denoised_d) {
s_u += (lambda_pos - er_lambda_s) / scaled_pos;
}
}
s *= lambda_step_size;
float denom_d = er_lambda_s - er_lambdas[i - 1];
if (std::fabs(denom_d) > 1e-12f) {
float coeff_d = alpha_t * (dt + s * scaled_t);
sd::Tensor<float> denoised_d = (denoised - old_denoised) / denom_d;
x += coeff_d * denoised_d;
if (stage_used >= 3 && have_old_denoised_d) {
float denom_u = (er_lambda_s - er_lambdas[i - 2]) * 0.5f;
if (std::fabs(denom_u) > 1e-12f) {
s_u *= lambda_step_size;
float coeff_u = alpha_t * (0.5f * dt * dt + s_u * scaled_t);
sd::Tensor<float> denoised_u = (denoised_d - old_denoised_d) / denom_u;
x += coeff_u * denoised_u;
}
}
old_denoised_d = denoised_d;
have_old_denoised_d = true;
}
}
float noise_scale_sq = er_lambda_t * er_lambda_t - er_lambda_s * er_lambda_s * r * r;
if (s_noise > 0.0f && noise_scale_sq > 0.0f) {
float noise_scale = alpha_t * std::sqrt(std::max(noise_scale_sq, 0.0f));
x += sd::Tensor<float>::randn_like(x, rng) * noise_scale;
}
}
old_denoised = denoised;
have_old_denoised = true;
}
return x;
}
static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas,
@ -1446,6 +1580,8 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
return sample_res_multistep(model, std::move(x), sigmas, rng, eta);
case RES_2S_SAMPLE_METHOD:
return sample_res_2s(model, std::move(x), sigmas, rng, eta);
case ER_SDE_SAMPLE_METHOD:
return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
case DDIM_TRAILING_SAMPLE_METHOD:
return sample_ddim_trailing(model, std::move(x), sigmas, rng, eta);
case TCD_SAMPLE_METHOD:

View File

@ -71,6 +71,7 @@ const char* sampling_methods_str[] = {
"TCD",
"Res Multistep",
"Res 2s",
"ER-SDE",
};
/*================================================== Helper Functions ================================================*/
@ -1991,6 +1992,7 @@ const char* sample_method_to_str[] = {
"tcd",
"res_multistep",
"res_2s",
"er_sde",
};
const char* sd_sample_method_name(enum sample_method_t sample_method) {
@ -2473,6 +2475,7 @@ static float resolve_eta(sd_ctx_t* sd_ctx,
return 0.0f;
case EULER_A_SAMPLE_METHOD:
case DPMPP2S_A_SAMPLE_METHOD:
case ER_SDE_SAMPLE_METHOD:
return 1.0f;
default:;
}