perf: speed up tensor_to_sd_image conversion

This commit is contained in:
leejet 2026-04-30 01:03:35 +08:00
parent b8079e253d
commit b2a30b9a0e
2 changed files with 71 additions and 25 deletions

View File

@ -24,6 +24,75 @@ static inline void preprocessing_set_4d(sd::Tensor<float>& tensor, float value,
tensor.values()[static_cast<size_t>(preprocessing_offset_4d(tensor, i0, i1, i2, i3))] = value;
}
static inline uint8_t preprocessing_float_to_u8(float value) {
if (value <= 0.0f) {
return 0;
}
if (value >= 1.0f) {
return 255;
}
return static_cast<uint8_t>(value * 255.0f + 0.5f);
}
static inline void preprocessing_tensor_frame_to_sd_image(const sd::Tensor<float>& tensor, int frame_index, uint8_t* image_data) {
const auto& shape = tensor.shape();
GGML_ASSERT(shape.size() == 4 || shape.size() == 5);
GGML_ASSERT(image_data != nullptr);
const int width = static_cast<int>(shape[0]);
const int height = static_cast<int>(shape[1]);
const int channel = static_cast<int>(shape[shape.size() == 5 ? 3 : 2]);
const size_t pixels = static_cast<size_t>(width) * static_cast<size_t>(height);
const float* src = tensor.data();
if (shape.size() == 4) {
GGML_ASSERT(frame_index >= 0 && frame_index < shape[3]);
const size_t frame_stride = pixels * static_cast<size_t>(channel);
const float* frame_ptr = src + static_cast<size_t>(frame_index) * frame_stride;
if (channel == 3) {
const float* c0 = frame_ptr;
const float* c1 = frame_ptr + pixels;
const float* c2 = frame_ptr + pixels * 2;
for (size_t i = 0; i < pixels; ++i) {
image_data[i * 3 + 0] = preprocessing_float_to_u8(c0[i]);
image_data[i * 3 + 1] = preprocessing_float_to_u8(c1[i]);
image_data[i * 3 + 2] = preprocessing_float_to_u8(c2[i]);
}
return;
}
for (size_t i = 0; i < pixels; ++i) {
for (int c = 0; c < channel; ++c) {
image_data[i * static_cast<size_t>(channel) + static_cast<size_t>(c)] =
preprocessing_float_to_u8(frame_ptr[i + pixels * static_cast<size_t>(c)]);
}
}
return;
}
GGML_ASSERT(frame_index >= 0 && frame_index < shape[2]);
const size_t channel_stride = pixels * static_cast<size_t>(shape[2]);
const float* frame_ptr = src + static_cast<size_t>(frame_index) * pixels;
if (channel == 3) {
const float* c0 = frame_ptr;
const float* c1 = frame_ptr + channel_stride;
const float* c2 = frame_ptr + channel_stride * 2;
for (size_t i = 0; i < pixels; ++i) {
image_data[i * 3 + 0] = preprocessing_float_to_u8(c0[i]);
image_data[i * 3 + 1] = preprocessing_float_to_u8(c1[i]);
image_data[i * 3 + 2] = preprocessing_float_to_u8(c2[i]);
}
return;
}
for (size_t i = 0; i < pixels; ++i) {
for (int c = 0; c < channel; ++c) {
image_data[i * static_cast<size_t>(channel) + static_cast<size_t>(c)] =
preprocessing_float_to_u8(frame_ptr[i + channel_stride * static_cast<size_t>(c)]);
}
}
}
static inline sd::Tensor<float> sd_image_to_preprocessing_tensor(sd_image_t image) {
sd::Tensor<float> tensor({static_cast<int64_t>(image.width), static_cast<int64_t>(image.height), static_cast<int64_t>(image.channel), 1});
for (uint32_t y = 0; y < image.height; ++y) {
@ -39,20 +108,7 @@ static inline sd::Tensor<float> sd_image_to_preprocessing_tensor(sd_image_t imag
static inline void preprocessing_tensor_to_sd_image(const sd::Tensor<float>& tensor, uint8_t* image_data) {
GGML_ASSERT(tensor.dim() == 4);
GGML_ASSERT(tensor.shape()[3] == 1);
GGML_ASSERT(image_data != nullptr);
int width = static_cast<int>(tensor.shape()[0]);
int height = static_cast<int>(tensor.shape()[1]);
int channel = static_cast<int>(tensor.shape()[2]);
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
for (int c = 0; c < channel; ++c) {
float value = preprocessing_get_4d(tensor, x, y, c, 0);
value = std::min(1.0f, std::max(0.0f, value));
image_data[(y * width + x) * channel + c] = static_cast<uint8_t>(std::round(value * 255.0f));
}
}
}
preprocessing_tensor_frame_to_sd_image(tensor, 0, image_data);
}
static inline sd::Tensor<float> gaussian_kernel_tensor(int kernel_size) {

View File

@ -505,17 +505,7 @@ sd_image_t tensor_to_sd_image(const sd::Tensor<float>& tensor, int frame_index)
int channel = static_cast<int>(shape[shape.size() == 5 ? 3 : 2]);
uint8_t* data = (uint8_t*)malloc(static_cast<size_t>(width * height * channel));
GGML_ASSERT(data != nullptr);
for (int iw = 0; iw < width; ++iw) {
for (int ih = 0; ih < height; ++ih) {
for (int ic = 0; ic < channel; ++ic) {
float value = shape.size() == 5 ? tensor.index(iw, ih, frame_index, ic, 0)
: tensor.index(iw, ih, ic, frame_index);
value = std::clamp(value, 0.0f, 1.0f);
data[(ih * width + iw) * channel + ic] = static_cast<uint8_t>(std::round(value * 255.0f));
}
}
}
preprocessing_tensor_frame_to_sd_image(tensor, frame_index, data);
return {
static_cast<uint32_t>(width),
static_cast<uint32_t>(height),