mirror of
https://github.com/ggerganov/llama.cpp
synced 2026-03-14 19:21:00 +01:00
* jinja vm * lexer * add vm types * demo * clean up * parser ok * binary_expression::execute * shadow naming * bin ops works! * fix map object * add string builtins * add more builtins * wip * use mk_val * eval with is_user_input * render gemma tmpl ok * track input string even after transformations * support binded functions * keyword arguments and slicing array * use shared_ptr for values * add mk_stmt * allow print source on exception * fix negate test * testing more templates * mostly works * add filter_statement * allow func to access ctx * add jinja-value.cpp * impl global_from_json * a lot of fixes * more tests * more fix, more tests * more fixes * rm workarounds * demo: type inferrence * add placeholder for tojson * improve function args handling * rm type inference * no more std::regex * trailing spaces * make testing more flexible * make output a bit cleaner * (wip) redirect minja calls * test: add --output * fix crash on macro kwargs * add minimal caps system * add some workarounds * rm caps_apply_workarounds * get rid of preprocessing * more fixes * fix test-chat-template * move test-chat-jinja into test-chat-template * rm test-chat-jinja from cmake * test-chat-template: use common * fix build * fix build (2) * rename vm --> interpreter * improve error reporting * correct lstrip behavior * add tojson * more fixes * disable tests for COMMON_CHAT_FORMAT_GENERIC * make sure tojson output correct order * add object.length * fully functional selectattr / rejectattr * improve error reporting * more builtins added, more fixes * create jinja rendering tests * fix testing.h path * adjust whitespace rules * more fixes * temporary disable test for ibm-granite * r/lstrip behavior matched with hf.js * minimax, glm4.5 ok * add append and pop * kimi-k2 ok * test-chat passed * fix lstrip_block * add more jinja tests * cast to unsigned char * allow dict key to be numeric * nemotron: rm windows newline * tests ok * fix test * rename interpreter --> runtime * fix build * add more checks * bring back generic format support * fix Apertus * [json.exception.out_of_range.403] key 'content' not found * rm generic test * refactor input marking * add docs * fix windows build * clarify error message * improved tests * split/rsplit with maxsplit * non-inverse maxsplit forgot to change after simplifying * implement separators for tojson and fix indent * i like to move it move it * rename null -- > none * token::eof * some nits + comments * add exception classes for lexer and parser * null -> none * rename global -> env * rm minja * update docs * docs: add input marking caveats * imlement missing jinja-tests functions * oops * support trim filter with args, remove bogus to_json reference * numerous argument fixes * updated tests * implement optional strip chars parameter * use new chars parameter * float filter also has default * always leave at least one decimal in float string * jinja : static analysis + header cleanup + minor fixes * add fuzz test * add string.cpp * fix chat_template_kwargs * nits * fix build * revert * unrevert sorry :) * add fuzz func_args, refactor to be safer * fix array.map() * loosen ensure_vals max count condition, add not impl for map(int) * hopefully fix windows * check if empty first * normalize newlines --------- Co-authored-by: Alde Rojas <hello@alde.dev> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
208 lines
5.1 KiB
C++
208 lines
5.1 KiB
C++
#include "jinja/string.h"
|
|
#include "jinja/value.h"
|
|
|
|
#include <algorithm>
|
|
#include <functional>
|
|
#include <optional>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
namespace jinja {
|
|
|
|
//
|
|
// string_part
|
|
//
|
|
|
|
bool string_part::is_uppercase() const {
|
|
for (char c : val) {
|
|
if (std::islower(static_cast<unsigned char>(c))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool string_part::is_lowercase() const {
|
|
for (char c : val) {
|
|
if (std::isupper(static_cast<unsigned char>(c))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//
|
|
// string
|
|
//
|
|
|
|
void string::mark_input() {
|
|
for (auto & part : parts) {
|
|
part.is_input = true;
|
|
}
|
|
}
|
|
|
|
std::string string::str() const {
|
|
if (parts.size() == 1) {
|
|
return parts[0].val;
|
|
}
|
|
std::ostringstream oss;
|
|
for (const auto & part : parts) {
|
|
oss << part.val;
|
|
}
|
|
return oss.str();
|
|
}
|
|
|
|
size_t string::length() const {
|
|
size_t len = 0;
|
|
for (const auto & part : parts) {
|
|
len += part.val.length();
|
|
}
|
|
return len;
|
|
}
|
|
|
|
bool string::all_parts_are_input() const {
|
|
for (const auto & part : parts) {
|
|
if (!part.is_input) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool string::is_uppercase() const {
|
|
for (const auto & part : parts) {
|
|
if (!part.is_uppercase()) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool string::is_lowercase() const {
|
|
for (const auto & part : parts) {
|
|
if (!part.is_lowercase()) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// mark this string as input if other has ALL parts as input
|
|
void string::mark_input_based_on(const string & other) {
|
|
if (other.all_parts_are_input()) {
|
|
for (auto & part : parts) {
|
|
part.is_input = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
string string::append(const string & other) {
|
|
for (const auto & part : other.parts) {
|
|
parts.push_back(part);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
// in-place transformation
|
|
|
|
using transform_fn = std::function<std::string(const std::string&)>;
|
|
static string apply_transform(string & self, const transform_fn & fn) {
|
|
for (auto & part : self.parts) {
|
|
part.val = fn(part.val);
|
|
}
|
|
return self;
|
|
}
|
|
|
|
string string::uppercase() {
|
|
return apply_transform(*this, [](const std::string & s) {
|
|
std::string res = s;
|
|
std::transform(res.begin(), res.end(), res.begin(), ::toupper);
|
|
return res;
|
|
});
|
|
}
|
|
string string::lowercase() {
|
|
return apply_transform(*this, [](const std::string & s) {
|
|
std::string res = s;
|
|
std::transform(res.begin(), res.end(), res.begin(), ::tolower);
|
|
return res;
|
|
});
|
|
}
|
|
string string::capitalize() {
|
|
return apply_transform(*this, [](const std::string & s) {
|
|
if (s.empty()) return s;
|
|
std::string res = s;
|
|
res[0] = ::toupper(static_cast<unsigned char>(res[0]));
|
|
std::transform(res.begin() + 1, res.end(), res.begin() + 1, ::tolower);
|
|
return res;
|
|
});
|
|
}
|
|
string string::titlecase() {
|
|
return apply_transform(*this, [](const std::string & s) {
|
|
std::string res = s;
|
|
bool capitalize_next = true;
|
|
for (char &c : res) {
|
|
if (isspace(static_cast<unsigned char>(c))) {
|
|
capitalize_next = true;
|
|
} else if (capitalize_next) {
|
|
c = ::toupper(static_cast<unsigned char>(c));
|
|
capitalize_next = false;
|
|
} else {
|
|
c = ::tolower(static_cast<unsigned char>(c));
|
|
}
|
|
}
|
|
return res;
|
|
});
|
|
}
|
|
string string::strip(bool left, bool right, std::optional<const std::string_view> chars) {
|
|
static auto strip_part = [](const std::string & s, bool left, bool right, std::optional<const std::string_view> chars) -> std::string {
|
|
size_t start = 0;
|
|
size_t end = s.length();
|
|
auto match_char = [&chars](unsigned char c) -> bool {
|
|
return chars ? (*chars).find(c) != std::string::npos : isspace(c);
|
|
};
|
|
if (left) {
|
|
while (start < end && match_char(static_cast<unsigned char>(s[start]))) {
|
|
++start;
|
|
}
|
|
}
|
|
if (right) {
|
|
while (end > start && match_char(static_cast<unsigned char>(s[end - 1]))) {
|
|
--end;
|
|
}
|
|
}
|
|
return s.substr(start, end - start);
|
|
};
|
|
if (parts.empty()) {
|
|
return *this;
|
|
}
|
|
if (left) {
|
|
for (size_t i = 0; i < parts.size(); ++i) {
|
|
parts[i].val = strip_part(parts[i].val, true, false, chars);
|
|
if (parts[i].val.empty()) {
|
|
// remove empty part
|
|
parts.erase(parts.begin() + i);
|
|
--i;
|
|
continue;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (right) {
|
|
for (size_t i = parts.size(); i-- > 0;) {
|
|
parts[i].val = strip_part(parts[i].val, false, true, chars);
|
|
if (parts[i].val.empty()) {
|
|
// remove empty part
|
|
parts.erase(parts.begin() + i);
|
|
continue;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
} // namespace jinja
|