From 983ca8992ec46d7b94587a00d925b5a4bf2d9c81 Mon Sep 17 00:00:00 2001 From: tha80 <7176001+tha80@users.noreply.github.com> Date: Mon, 27 Apr 2026 23:55:00 +0200 Subject: [PATCH] server: (router) Forward form-data to model server (Fixes #22044) (#22118) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * This commit enables the router to forward form-data to model server. Fixes #22044 (enabling to use the /v1/audio/transcriptions in router mode) * * Applied the suggestion from Copilots first comment: using the non-throwing json::parse overload. * Addressed Copilots third comment by extending the files representation to also include filename and content-type * Addressed Copilots fourth comment by making the RNG thread_local * Changed variable body from std::string to std::ostringstream in build_multipart_body as suggested by ngxson in https://github.com/ggml-org/llama.cpp/pull/22118#discussion_r3127099053 * Added sanitize_field lambda in build_multipart_body for key, filename and content_type as suggested by ngxson in https://github.com/ggml-org/llama.cpp/pull/22118#discussion_r3127104647 * explicitly checking if value/item is string before calling value/item.get() as requested by ngxson in https://github.com/ggml-org/llama.cpp/pull/22118#discussion_r3127111279 * Added double quote to the sanitize lambda and throw on json parse failure --------- Co-authored-by: Ralph Paßgang --- tools/server/server-chat.cpp | 4 +- tools/server/server-chat.h | 3 +- tools/server/server-cors-proxy.h | 1 + tools/server/server-http.cpp | 8 ++- tools/server/server-http.h | 8 ++- tools/server/server-models.cpp | 120 +++++++++++++++++++++++++++++-- tools/server/server-models.h | 1 + 7 files changed, 135 insertions(+), 10 deletions(-) diff --git a/tools/server/server-chat.cpp b/tools/server/server-chat.cpp index a155834694..f276f8da58 100644 --- a/tools/server/server-chat.cpp +++ b/tools/server/server-chat.cpp @@ -575,14 +575,14 @@ json server_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) { json convert_transcriptions_to_chatcmpl( const json & inp_body, const common_chat_templates * tmpls, - const std::map & in_files, + const std::map & in_files, std::vector & out_files) { // TODO @ngxson : this function may need to be improved in the future // handle input files out_files.clear(); auto it = in_files.find("file"); if (it != in_files.end()) { - out_files.push_back(it->second); + out_files.push_back(it->second.data); } else { throw std::invalid_argument("No input file found for transcription"); } diff --git a/tools/server/server-chat.h b/tools/server/server-chat.h index 5c5b792cf5..102eae688a 100644 --- a/tools/server/server-chat.h +++ b/tools/server/server-chat.h @@ -4,6 +4,7 @@ #include "chat.h" #include "server-common.h" +#include "server-http.h" #include @@ -19,7 +20,7 @@ json server_chat_convert_anthropic_to_oai(const json & body); json convert_transcriptions_to_chatcmpl( const json & body, const common_chat_templates * tmpls, - const std::map & in_files, + const std::map & in_files, std::vector & out_files); json server_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff); diff --git a/tools/server/server-cors-proxy.h b/tools/server/server-cors-proxy.h index 887013152b..2af0c7e1c2 100644 --- a/tools/server/server-cors-proxy.h +++ b/tools/server/server-cors-proxy.h @@ -49,6 +49,7 @@ static server_http_res_ptr proxy_request(const server_http_req & req, std::strin parsed_url.path, headers, req.body, + req.files, req.should_stop, 600, // timeout_read (default to 10 minutes) 600 // timeout_write (default to 10 minutes) diff --git a/tools/server/server-http.cpp b/tools/server/server-http.cpp index ae39fbff9b..6f24f83ef3 100644 --- a/tools/server/server-http.cpp +++ b/tools/server/server-http.cpp @@ -438,7 +438,7 @@ void server_http_context::get(const std::string & path, const server_http_contex void server_http_context::post(const std::string & path, const server_http_context::handler_t & handler) const { pimpl->srv->Post(path_prefix + path, [handler](const httplib::Request & req, httplib::Response & res) { std::string body = req.body; - std::map files; + std::map files; if (req.is_multipart_form_data()) { // translate text fields to a JSON object and use it as the body @@ -459,7 +459,11 @@ void server_http_context::post(const std::string & path, const server_http_conte // populate files from multipart form for (const auto & [key, file] : req.form.files) { - files[key] = raw_buffer(file.content.begin(), file.content.end()); + files[key] = uploaded_file{ + raw_buffer(file.content.begin(), file.content.end()), + file.filename, + file.content_type, + }; } } diff --git a/tools/server/server-http.h b/tools/server/server-http.h index 68ae2170cf..d4d3b6e536 100644 --- a/tools/server/server-http.h +++ b/tools/server/server-http.h @@ -36,13 +36,19 @@ struct server_http_res { using server_http_res_ptr = std::unique_ptr; using raw_buffer = std::vector; +struct uploaded_file { + raw_buffer data; + std::string filename; + std::string content_type; +}; + struct server_http_req { std::map params; // path_params + query_params std::map headers; // used by MCP proxy std::string path; std::string query_string; // query parameters string (e.g. "action=save") std::string body; - std::map files; // used for file uploads (form data) + std::map files; // used for file uploads (form data) const std::function & should_stop; std::string get_param(const std::string & key, const std::string & def = "") const { diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 15c11c3c9f..db6cbce8f9 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #ifdef _WIN32 @@ -823,6 +825,7 @@ server_http_res_ptr server_models::proxy_request(const server_http_req & req, co proxy_path, req.headers, req.body, + req.files, req.should_stop, base_params.timeout_read, base_params.timeout_write @@ -1126,6 +1129,77 @@ static bool should_strip_proxy_header(const std::string & header_name) { return false; } +static std::string generate_multipart_boundary() { + thread_local std::mt19937 gen(std::random_device{}()); + static const char chars[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + std::uniform_int_distribution<> dis(0, sizeof(chars) - 2); + std::string boundary = "----llama-cpp-proxy-"; + for (int i = 0; i < 16; i++) { + boundary += chars[dis(gen)]; + } + return boundary; +} + +static std::string build_multipart_body( + const json & form_fields, + const std::map & files, + const std::string & boundary) { + static auto sanitize_field = [](const std::string & text) { + std::string result; + result.reserve(text.size()); + for (char c : text) { + if (c != '\n' && c != '\r' && c != '"') { + result += c; + } + } + return result; + }; + + std::ostringstream body; + + for (const auto & [key, value] : form_fields.items()) { + if (value.is_array()) { + for (const auto & item : value) { + body << "--" << boundary << "\r\n"; + body << "Content-Disposition: form-data; name=\"" << sanitize_field(key) << "\"\r\n"; + body << "\r\n"; + if (!item.is_string()) { + throw std::invalid_argument("expected string"); + } + body << item.get() << "\r\n"; + } + } else { + body << "--" << boundary << "\r\n"; + body << "Content-Disposition: form-data; name=\"" << sanitize_field(key) << "\"\r\n"; + body << "\r\n"; + if (!value.is_string()) { + throw std::invalid_argument("expected string"); + } + body << value.get() << "\r\n"; + } + } + + for (const auto & [key, file] : files) { + body << "--" << boundary << "\r\n"; + body << "Content-Disposition: form-data; name=\"" << sanitize_field(key) << "\""; + if (!file.filename.empty()) { + body << "; filename=\"" << sanitize_field(file.filename) << "\""; + } + body << "\r\n"; + if (!file.content_type.empty()) { + body << "Content-Type: " << sanitize_field(file.content_type) << "\r\n"; + } else { + body << "Content-Type: application/octet-stream\r\n"; + } + body << "\r\n"; + body.write(reinterpret_cast(file.data.data()), file.data.size()); + body << "\r\n"; + } + + body << "--" << boundary << "--\r\n"; + return body.str(); +} + server_http_proxy::server_http_proxy( const std::string & method, const std::string & scheme, @@ -1134,6 +1208,7 @@ server_http_proxy::server_http_proxy( const std::string & path, const std::map & headers, const std::string & body, + const std::map & files, const std::function should_stop, int32_t timeout_read, int32_t timeout_write @@ -1195,28 +1270,65 @@ server_http_proxy::server_http_proxy( return pipe->write({{}, 0, std::string(data, data_length), ""}); }; + // when files are present, the body was converted from multipart form data to JSON + // we need to reconstruct the multipart body for the downstream server + std::string effective_body = body; + std::string override_content_type; + bool has_files = !files.empty(); + + if (has_files) { + json form_fields = json::parse(body, nullptr, false); + if (!form_fields.is_discarded()) { + auto boundary = generate_multipart_boundary(); + effective_body = build_multipart_body(form_fields, files, boundary); + override_content_type = "multipart/form-data; boundary=" + boundary; + } else { + throw std::runtime_error("failed to parse multipart form fields JSON"); + } + } + // prepare the request to destination server httplib::Request req; { req.method = method; req.path = path; for (const auto & [key, value] : headers) { - if (key == "Accept-Encoding") { + const auto lowered = to_lower_copy(key); + if (lowered == "accept-encoding") { // disable Accept-Encoding to avoid compressed responses continue; } - if (key == "Transfer-Encoding") { + if (lowered == "transfer-encoding") { // the body is already decoded continue; } - if (key == "Host" || key == "host") { + if (lowered == "content-length") { + // let httplib calculate Content-Length from the actual body + continue; + } + if (lowered == "content-type") { + if (has_files) { + // we set our own Content-Type with the new boundary + continue; + } + // when no files but the original request was multipart, + // the body is now JSON, so correct the Content-Type + if (value.find("multipart/form-data") != std::string::npos) { + override_content_type = "application/json; charset=utf-8"; + continue; + } + } + if (lowered == "host") { bool is_default_port = (scheme == "https" && port == 443) || (scheme == "http" && port == 80); req.set_header(key, is_default_port ? host : host + ":" + std::to_string(port)); } else { req.set_header(key, value); } } - req.body = body; + req.body = effective_body; + if (!override_content_type.empty()) { + req.set_header("Content-Type", override_content_type); + } req.response_handler = response_handler; req.content_receiver = content_receiver; } diff --git a/tools/server/server-models.h b/tools/server/server-models.h index 1db34b6c4d..b3428ef544 100644 --- a/tools/server/server-models.h +++ b/tools/server/server-models.h @@ -202,6 +202,7 @@ public: const std::string & path, const std::map & headers, const std::string & body, + const std::map & files, const std::function should_stop, int32_t timeout_read, int32_t timeout_write