llama.cpp/tests/test-chat-peg-parser.cpp
Aldehir Rojas 0a8026e768
common : introduce composable PEG parser combinators for chat parsing (#17136)
* common : implement parser combinators to simplify chat parsing

* add virtual destructor to parser_base

* fix memory leak from circular references of rules

* implement gbnf grammar building

* remove unused private variable

* create a base visitor and implement id assignment as a visitor

* fix const ref for grammar builder

* clean up types, friend classes, and class declarations

* remove builder usage from until_parser

* Use a counter class to help assign rule ids

* cache everything

* add short description for each parser

* create a type for the root parser

* implement repetition parser

* Make optional, one_or_more, and zero_or_more subclasses of repetition

* improve context constructor

* improve until parsing and add benchmarks

* remove cached() pattern, cache in parser_base with specialized parsing functions for each parser

* improve json parsing performance to better match legacy parsing

* fix const auto * it for windows

* move id assignment to classes instead of using a visitor

* create named rules in the command r7b example

* use '.' for any in GBNF

* fix parens around choices in gbnf grammar

* add convenience operators to turn strings to literals

* add free-form operators for const char * to simplify defining literals

* simplify test case parser

* implement semantic actions

* remove groups in favor of actions and a scratchpad

* add built in actions for common operations

* add actions to command r7b example

* use std::default_searcher for platforms that don't have bm

* improve parser_type handling and add cast helper

* add partial result type to better control when to run actions

* fix bug in until()

* run actions on partial results by default

* use common_chat_msg for result

* add qwen3 example wip

* trash partial idea and simplify

* move action arguments to a struct

* implement aho-corasick matcher for until_parser and to build exclusion grammars

* use std::string for input, since std::string_view is incompatible with std::regex

* Refactor tests

* improve qwen3 example

* implement sax-style parsing and refactor

* fix json string in test

* rename classes to use common_chat_ prefix

* remove is_ suffix from functions

* rename from id_counter to just counter

* Final refactored tests

* Fix executable name and editorconfig-checker

* Third time's the charm...

* add trigger parser to begin lazy grammar rule generation

* working lazy grammar

* refactor json rules now that we check for reachability

* reduce pointer usage

* print out grammars in example

* rename to chat-peg-parser* and common_chat_peg_parser*

* Revert unrelated changes

* New macros for CMakeLists to enable multi-file compilations

* starting unicode support

* add unicode support to char_parser

* use unparsed args as additional sources

* Refactor tests to new harness

* Fix CMakeLists

* fix rate calculation

* add unicode tests

* fix trailing whitespace and line endings

skip-checks: true

* Helpers + rewrite qwen3 with helpers

* Fix whitespace

* extract unicode functions to separate file

* refactor parse unicode function

* fix compiler error

* improve construction of sequence/choice parsers

* be less clever

* add make_parser helper function

* expand usage of make_parser, alias common_chat_msg_peg_parser_builder to builder in source

* lower bench iterations

* add unicode support to until_parser

* add unicode support to json_string_parser

* clean up unicode tests

* reduce unicode details to match src/unicode.cpp

* simplify even further

* remove unused functions

* fix type

* reformat char class parsing

* clean up json string parser

* clean up + fix diagnostics

* reorder includes

* compact builder functions

* replace action_parser with capture_parser, rename env to semantics

* rename env to semantics

* clean up common_chat_parse_context

* move type() to below constant

* use default constructor for common_chat_peg_parser

* make all operators functions for consistency

* fix compilation errors in test-optional.cpp

* simplify result values

* rename json_string_unquoted to json_string_content

* Move helper to separate class, add separate explicit and helper classes

* Whitespace

* Change + to append()

* Reformat

* Add extra helpers, tests and Minimax example

* Add some extra optional debugging prints + real example of how to use them

* fix bug in repetitions when min_count = 0 reports failures

* dump rule in debug

* fix token accumulation and assert parsing never fails

* indent debug by depth

* use LOG_* in tests so logs sync up with test logs

* - Add selective testing
- Refactor all messaging to use LOG_ERR
- Fix lack of argument / tool name capturing
- Temporary fix for double event capture

* refactor rule() and introduce ref()

* clean up visitor

* clean up indirection in root parser w.r.t rules

* store shared ptr directly in parser classes

* replace aho-corasick automation with a simple trie

* Reset prev for qwen3 helper example variant

* refactor to use value semantics with std::variant/std::visit

* simplify trie_matcher result

* fix linting issues

* add annotations to rules

* revert test workaround

* implement serializing the parser

* remove redundant parsers

* remove tests

* gbnf generation fixes

* remove LOG_* use in tests

* update gbnf tests to test entire grammar

* clean up gbnf generation and fix a few bugs

* fix typo in test output

* remove implicit conversion rules

* improve test output

* rename trie_matcher to trie

* simplify trie to just know if a node is the end of a word

* remove common_chat_ prefix and ensure a common_peg_ prefix to all types

* rename chat-peg-parser -> peg-parser

* promote chat-peg-parser-helper to chat-peg-parser

* checkpoint

* use a static_assert to ensure we handle every branch

* inline trivial peg parser builders

* use json strings for now

* implement basic and native chat peg parser builders/extractors

* resolve refs to their rules

* remove packrat caching (for now)

* update tests

* compare parsers with incremental input

* benchmark both complete and incremental parsing

* add raw string generation from json schema

* add support for string schemas in gbnf generation

* fix qwen example to include \n

* tidy up example

* rename extractor to mapper

* rename ast_arena to ast

* place basic tests into one

* use gbnf_format_literal from json-schema-to-grammar

* integrate parser with common/chat and server

* clean up schema and serialization

* add json-schema raw string tests

* clean up json creation and remove capture parser

* trim spaces from reasoning and content

* clean up redundant rules and comments

* rename input_is_complete to is_partial to match rest of project

* simplify json rules

* remove extraneous file

* remove comment

* implement += and |= operators

* add comments to qwen3 implementation

* reorder arguments to common_chat_peg_parse

* remove commented outdated tests

* add explicit copy constructor

* fix operators and constness

* wip: update test-chat for qwen3-coder

* bring json parser closer to json-schema-to-grammar rules

* trim trailing space for most things

* fix qwen3 coder rules w.r.t. trailing spaces

* group rules

* do not trim trailing space from string args

* tweak spacing of qwen3 grammar

* update qwen3-coder tests

* qwen3-coder small fixes

* place parser in common_chat_syntax to simplify invocation

* use std::set to collect rules to keep order predictable for tests

* initialize parser to make certain platforms happy

* revert back to std::unordered_set, sort rule names at the end instead

* uncomment rest of chat tests

* define explicit default constructor

* improve arena init and server integration

* fix chat test

* add json_member()

* add a comprehensive native example

* clean up example qwen test and add response_format example to native test

* make build_peg_parser accept std::function instead of template

* change peg parser parameters into const ref

* push tool call on tool open for constructed parser

* add parsing documentation

* clean up some comments

* add json schema support to qwen3-coder

* add id initializer in tests

* remove grammar debug line from qwen3-coder

* refactor qwen3-coder to use sequence over operators

* only call common_chat_peg_parse if appropriate format

* simplify qwen3-coder space handling

* revert qwen3-coder implementation

* revert json-schema-to-grammar changes

* remove unnecessary forward declaration

* small adjustment to until_parser

* rename C/C++ files to use dashes

* codeowners : add aldehir to peg-parser and related files

---------

Co-authored-by: Piotr Wilkin <piotr.wilkin@syndatis.com>
2025-12-03 12:45:32 +02:00

769 lines
32 KiB
C++

#include <string>
#include <iostream>
#include <numeric>
#include "chat-parser.h"
#include "chat-peg-parser.h"
#include "chat.h"
#include "common.h"
#include "json-schema-to-grammar.h"
#include "peg-parser.h"
#include "peg-parser/testing.h"
#include "peg-parser/simple-tokenize.h"
#include "nlohmann/json.hpp"
using json = nlohmann::ordered_json;
static json create_tools();
static void test_example_native(testing & t);
static void test_example_qwen3_coder(testing & t);
static void test_command7_parser_compare(testing & t);
int main(int argc, char *argv[]) {
testing t(std::cout);
if (argc >= 2) {
t.set_filter(argv[1]);
}
const char * verbose = getenv("LLAMA_TEST_VERBOSE");
if (verbose) {
t.verbose = std::string(verbose) == "1";
}
t.test("native", test_example_native);
t.test("qwen3 coder", test_example_qwen3_coder);
t.test("comparison", test_command7_parser_compare);
return t.summary();
}
static json create_tools() {
json tools = json::array();
json tool_weather = {
{"type", "function"},
{"function", {
{"name", "get_current_weather"},
{"description", "Get the current weather in a given location"},
{"parameters", {
{"type", "object"},
{"properties", {
{"location", {
{"type", "string"},
{"description", "The city and state, e.g. San Francisco, CA"}
}},
{"unit", {
{"type", "string"},
{"enum", {"celsius", "fahrenheit"}},
{"description", "The temperature unit to use. Infer this from the users location."}
}}
}},
{"required", {"location", "unit"}},
}},
}}
};
tools.push_back(tool_weather);
json tool_forecast = {
{"type", "function"},
{"function", {
{"name", "get_forecast"},
{"description", "Get the weather forecast for a given location"},
{"parameters", {
{"type", "object"},
{"properties", {
{"location", {
{"type", "string"},
{"description", "The city and state, e.g. San Francisco, CA"}
}},
{"unit", {
{"type", "string"},
{"enum", {"celsius", "fahrenheit"}},
{"description", "The temperature unit to use. Infer this from the users location."}
}},
{"days", {
{"type", "integer"},
{"description", "Number of days to forecast (1-10)"},
{"minimum", 1},
{"maximum", 10}
}}
}},
{"required", {"location", "unit"}},
}},
}}
};
tools.push_back(tool_forecast);
json tool_search = {
{"type", "function"},
{"function", {
{"name", "search_knowledge_base"},
{"description", "Search the internal technical documentation knowledge base."},
{"parameters", {
{"type", "object"},
{"properties", {
{"query", {
{"type", "string"},
{"description", "The search query string."}
}},
{"max_results", {
{"type", "integer"},
{"description", "The maximum number of results to return."},
{"default", 5}
}},
{"category", {
{"type", "string"},
{"enum", {"api", "troubleshooting", "billing", "general"}},
{"description", "Filter search by specific category."}
}}
}},
{"required", {"query", "category"}},
{"additionalProperties", false}
}},
{"strict", true}
}}
};
tools.push_back(tool_search);
return tools;
}
struct tool_argument {
std::string name;
std::string type;
bool is_required;
json schema;
};
struct tool_definition {
std::string name;
std::vector<tool_argument> arguments;
json schema;
};
// Test fictitious model output that emits arguments as JSON.
static void test_example_native(testing & t) {
struct test_case {
// Parameters
std::string name;
json tools;
common_chat_tool_choice tool_choice;
common_reasoning_format reasoning_format;
json json_schema;
bool parallel_tool_calls;
bool thinking_forced_open;
std::string input;
// Expect
std::string expect_reasoning;
std::string expect_content;
std::vector<common_chat_tool_call> expect_tool_calls;
};
auto build_parser = [](const test_case & tc) {
return build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE);
auto reasoning = p.eps();
if (tc.thinking_forced_open) {
// If thinking is forced open, expect a closing tag
reasoning = p.reasoning(p.until("</think>")) + "</think>" + p.space();
} else {
// Otherwise, optionally accept thinking wrapped in tags
reasoning = p.optional("<think>" + p.reasoning(p.until("</think>")) + "</think>" + p.space());
}
// tool calling parser
if (tc.tools.is_array() && !tc.tools.empty()) {
auto tools = p.choice();
for (const auto & tool : tc.tools) {
const auto & function = tool.at("function");
std::string name = function.at("name");
const auto & schema = function.at("parameters");
auto tool_name = p.json_member("name", "\"" + p.tool_name(p.literal(name)) + "\"");
auto tool_args = p.json_member("arguments", p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
tools |= p.rule("tool-" + name, p.tool_open(p.literal("{")) << tool_name << "," << tool_args << "}");
};
auto parallel_calls = p.eps();
if (tc.parallel_tool_calls) {
parallel_calls = p.zero_or_more("," << tools);
}
auto tool_call = p.trigger_rule("tool-call",
p.sequence({
p.literal("<tool_call>["),
tools,
parallel_calls,
p.literal("]</tool_call>")
})
);
return p.sequence({
(reasoning_in_content ? p.eps() : reasoning),
p.content(p.until("<tool_call>")),
p.optional(p.space() + tool_call),
p.space(),
p.end()
});
}
// response_format parser
if (tc.json_schema.is_object() && !tc.json_schema.empty()) {
return p.sequence({
(reasoning_in_content ? p.eps() : reasoning),
p.content(p.schema(p.json(), "response-output", tc.json_schema)),
p.space(),
p.end()
});
}
// Content-only parser
return p.sequence({
(reasoning_in_content ? p.eps() : reasoning),
p.content(p.rest()),
p.end()
});
});
};
std::vector<test_case> test_cases = std::vector<test_case>{
{
/* .name = */ "content with thinking_forced_open = false",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .thinking_forced_open = */ false,
/* .input = */ (
"<think>The user said hello, I must say hello back</think>\nHello"
),
/* .expect_reasoning = */ "The user said hello, I must say hello back",
/* .expect_content = */ "Hello",
/* .expect_tool_calls = */ {},
},
{
/* .name = */ "content with thinking_forced_open = false and no reasoning",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .thinking_forced_open = */ false,
/* .input = */ (
"Hello"
),
/* .expect_reasoning = */ "",
/* .expect_content = */ "Hello",
/* .expect_tool_calls = */ {},
},
{
/* .name = */ "content with thinking_forced_open = false and reasoning_format = none",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .thinking_forced_open = */ true,
/* .input = */ (
"<think>The user said hello, I must say hello back</think>\nHello"
),
/* .expect_reasoning = */ "",
/* .expect_content = */ "<think>The user said hello, I must say hello back</think>\nHello",
/* .expect_tool_calls = */ {},
},
{
/* .name = */ "content with thinking_forced_open = true",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .thinking_forced_open = */ true,
/* .input = */ (
"The user said hello, I must say hello back</think>\nHello"
),
/* .expect_reasoning = */ "The user said hello, I must say hello back",
/* .expect_content = */ "Hello",
/* .expect_tool_calls = */ {},
},
{
/* .name = */ "content with thinking_forced_open = true and reasoning_format = none",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .thinking_forced_open = */ true,
/* .input = */ (
"The user said hello, I must say hello back</think>\nHello"
),
/* .expect_reasoning = */ "",
/* .expect_content = */ "The user said hello, I must say hello back</think>\nHello",
/* .expect_tool_calls = */ {},
},
{
/* .name = */ "tools with tool_choice = auto and no parallel_tool_calls",
/* .tools = */ create_tools(),
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .thinking_forced_open = */ true,
/* .input = */ (
"I must get the weather in New York</think>\n"
"<tool_call>["
R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
"]</tool_call>"
),
/* .expect_reasoning = */ "I must get the weather in New York",
/* .expect_content = */ "",
/* .expect_tool_calls = */ {{
/* .name = */ "get_current_weather",
/* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
/* .id = */ "",
}},
},
{
/* .name = */ "tools with tool_choice = auto and parallel_tool_calls",
/* .tools = */ create_tools(),
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ true,
/* .thinking_forced_open = */ true,
/* .input = */ (
"I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me search that for you."
"<tool_call>["
R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
", "
R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
", "
R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
", "
R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
"]</tool_call>"
),
/* .expect_reasoning = */ "I must get the weather in New York and San Francisco and a 3 day forecast of each.",
/* .expect_content = */ "Let me search that for you.",
/* .expect_tool_calls = */ {{
/* .name = */ "get_current_weather",
/* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
/* .id = */ "",
}, {
/* .name = */ "get_current_weather",
/* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
/* .id = */ "",
}, {
/* .name = */ "get_forecast",
/* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
/* .id = */ "",
}, {
/* .name = */ "get_forecast",
/* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
/* .id = */ "",
}},
},
{
/* .name = */ "response_format with thinking_forced_open = true",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {
{"type", "object"},
{"properties", {
{"invoice_number", {{"type", "string"}}},
{"amount", {{"type", "number"}}},
{"due_date", {{"type", "string"}}}
}},
{"required", {"invoice_number", "amount", "due_date"}}
},
/* .parallel_tool_calls = */ false,
/* .thinking_forced_open = */ true,
/* .input = */ (
"I must produce the invoice in the requested format</think>\n"
R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"
),
/* .expect_reasoning = */ "I must produce the invoice in the requested format",
/* .expect_content = */ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})",
/* .expect_tool_calls = */ {},
},
};
for (const auto & tc : test_cases) {
t.test(tc.name, [&](testing & t) {
auto parser = build_parser(tc);
auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
auto grammar = build_grammar([&](const common_grammar_builder & builder) {
for (auto const & def : tc.tools) {
auto function = def.at("function");
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
};
parser.build_grammar(builder, lazy);
});
t.log("Grammar:");
for (auto const & line : string_split(grammar, "\n")) {
t.log(line);
}
common_peg_parse_context ctx(tc.input, false);
auto result = parser.parse(ctx);
t.assert_true("success", result.success());
common_chat_msg msg;
auto mapper = common_chat_peg_native_mapper(msg);
mapper.from_ast(ctx.ast, result);
t.assert_equal("content equal", tc.expect_content, msg.content);
t.assert_equal("reasoning equal", tc.expect_reasoning, msg.reasoning_content);
t.assert_equal("number of tool calls", tc.expect_tool_calls.size(), msg.tool_calls.size());
for (auto i = 0u; i < std::min(tc.expect_tool_calls.size(), msg.tool_calls.size()); i++) {
t.assert_equal("tool name", tc.expect_tool_calls[i].name, msg.tool_calls[i].name);
t.assert_equal("tool args", tc.expect_tool_calls[i].arguments, msg.tool_calls[i].arguments);
}
});
}
}
static void test_example_qwen3_coder(testing & t) {
auto tools = create_tools();
auto parser = build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
auto content = p.rule("content", p.content(p.until("<tool_call>")));
std::vector<common_peg_parser> tool_parsers;
for (auto const & def : tools) {
auto function = def.at("function");
std::string name = function.at("name");
auto parameters = function.at("parameters");
auto properties = parameters.at("properties");
std::set<std::string> required_properties;
if (function.contains("required")) {
function.at("required").get_to(required_properties);
}
std::vector<common_peg_parser> arg_parsers;
for (const auto & [param_name, param_schema] : properties.items()) {
bool is_required = required_properties.find(param_name) != required_properties.end();
auto type = param_schema.value("type", "object");
auto arg = p.tool_arg(p.sequence({
p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
(type == "string" ?
p.tool_arg_string_value(
p.schema(
p.until_one_of({
"</parameter>\n<parameter=",
"</parameter>\n</function>"
}),
"tool-" + name + "-arg-" + param_name + "-schema",
param_schema,
true
)
) : p.tool_arg_json_value(
p.schema(
p.json(),
"tool-" + name + "-arg-" + param_name + "-schema",
param_schema
)
)
),
p.tool_arg_close(
"</parameter>\n" +
p.peek(p.literal("<parameter=") | p.literal("</function>"))
)
}));
arg_parsers.push_back(is_required ?
p.rule("tool-" + name + "-arg-" + param_name, arg) :
p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
}
tool_parsers.push_back(p.rule("tool-" + name,
p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
<< p.sequence(arg_parsers)
<< p.tool_close(p.literal("</function>"))
));
};
auto tool_call = p.trigger_rule("tool-call",
"<tool_call>"
<< p.choice(tool_parsers)
<< "</tool_call>"
);
return content + p.zero_or_more(p.space() + tool_call) + p.end();
});
auto grammar = build_grammar([&](const common_grammar_builder & builder) {
for (auto const & def : tools) {
auto function = def.at("function");
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
};
parser.build_grammar(builder);
});
t.log("Grammar:");
for (auto const & line : string_split(grammar, "\n")) {
t.log(line);
}
t.test("incremental parsing", [&](testing &t) {
std::string input =
"Let me search the knowledge base for cat pictures."
"<tool_call>\n"
"<function=search_knowledge_base>\n"
"<parameter=query>cat pictures</parameter>\n"
"<parameter=category>general</parameter>\n"
"</function>\n"
"</tool_call>";
std::vector<std::string> tokens = simple_tokenize(input);
common_chat_msg prev;
for (auto it = tokens.begin(); it != tokens.end(); it++) {
std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
common_peg_parse_context ctx(in, it + 1 < tokens.end());
auto result = parser.parse(ctx);
if (!t.assert_equal("not fail", false, result.fail())) {
t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
}
common_chat_msg msg;
auto mapper = common_chat_peg_constructed_mapper(msg);
mapper.from_ast(ctx.ast, result);
//t.log("Input: " + input);
t.log("===========================================");
t.log("Iteration " + std::to_string(in.size()));
t.log("Reasoning: " + msg.reasoning_content);
t.log("Content : " + msg.content);
for (const auto & tc : msg.tool_calls) {
t.log("Tool name: " + tc.name);
t.log("Tool args: " + tc.arguments);
}
try {
// This shouldn't emit any runtime errors
auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
} catch(const std::exception & e) {
t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
t.assert_true(std::string("failed with ") + e.what(), false);
}
prev = msg;
}
});
}
void test_command7_parser_compare(testing & t) {
auto parser = build_chat_peg_native_parser([](common_chat_peg_native_builder & p) {
auto thinking = p.reasoning_block(
"<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
auto response = "<|START_RESPONSE|>" << p.content(p.until("<|END_RESPONSE|>")) << "<|END_RESPONSE|>";
auto tool_call_id = p.atomic("\"tool_call_id\"" << (":" << ("\"" + p.tool_id(p.json_string_content()) + "\"")));
auto tool_call_name = p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
auto tool_call_args = "\"parameters\"" << (":" << p.tool_args(p.json()));
auto tool_call_fields = p.rule("tool-call-fields", tool_call_id | tool_call_name | tool_call_args);
auto tool_call = p.rule("tool-call", p.tool(
p.tool_open(p.literal("{"))
<< tool_call_fields
<< p.zero_or_more( p.literal(",") << tool_call_fields)
<< p.tool_close(p.literal("}"))
));
auto tool_calls = p.rule("tool-calls",
"<|START_ACTION|>"
<< ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
<< "<|END_ACTION|>");
return p.optional(thinking) << (tool_calls | response) + p.end();
});
auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, bool print_results) {
common_peg_parse_context ctx(input, is_partial);
auto result = p.parse(ctx);
common_chat_msg msg;
auto mapper = common_chat_peg_native_mapper(msg);
mapper.from_ast(ctx.ast, result);
if (print_results) {
std::cout << "== Parsed (new) ==\n";
std::cout << "=== Reasoning ===\n";
std::cout << msg.reasoning_content << "\n";
std::cout << "\n\n=== Content ===\n";
std::cout << msg.content << "\n";
std::cout << "\n\n=== Tool Calls ===\n";
for (const auto & tc : msg.tool_calls) {
std::cout << "id: " << tc.id << "\n";
std::cout << "name: " << tc.name << "\n";
std::cout << "args: " << tc.arguments << "\n";
}
}
};
auto test_legacy = [&](const std::string & input, bool need_more_input, bool print_results) {
// Original common_chat_combinator_parser taken from chat.cpp
common_chat_msg_parser builder(
input,
/* .is_partial = */ need_more_input,
{
/* .format = */ COMMON_CHAT_FORMAT_GENERIC,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .reasoning_in_content = */ false,
/* .thinking_forced_open = */ false,
}
);
builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
static const common_regex start_action_regex("<\\|START_ACTION\\|>");
static const common_regex end_action_regex("<\\|END_ACTION\\|>");
static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
if (auto res = builder.try_find_regex(start_action_regex)) {
// If we didn't extract thoughts, prelude includes them.
auto tool_calls = builder.consume_json_with_dumped_args({ { "parameters" } });
for (const auto & tool_call : tool_calls.value) {
std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
throw common_chat_msg_partial_exception("incomplete tool call");
}
}
if (tool_calls.is_partial) {
throw common_chat_msg_partial_exception("incomplete tool call");
}
builder.consume_regex(end_action_regex);
} else if (auto res = builder.try_find_regex(start_response_regex)) {
if (!builder.try_find_regex(end_response_regex)) {
builder.add_content(builder.consume_rest());
throw common_chat_msg_partial_exception(end_response_regex.str());
}
} else {
builder.add_content(builder.consume_rest());
}
if (print_results) {
std::cout << "== Parsed (legacy) ==\n";
std::cout << "=== Reasoning ===\n";
std::cout << builder.result().reasoning_content << "\n";
std::cout << "\n\n=== Content ===\n";
std::cout << builder.result().content << "\n";
std::cout << "\n\n=== Tool Calls ===\n";
for (const auto & tc : builder.result().tool_calls) {
std::cout << "id: " << tc.id << "\n";
std::cout << "name: " << tc.name << "\n";
std::cout << "args: " << tc.arguments << "\n";
}
}
};
std::string reasoning = "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
"budget of $4000 for a two-week stay, we need to:\n\n"
"1. Identify key historical sites and modern attractions in Japan.\n"
"2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
"3. Determine the best modes of transportation for getting around Japan.\n"
"4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
"overspending.\n"
"5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
"to attractions.";
std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {{
"call_0",
"plan_trip",
nlohmann::json::parse(R"({
"destination": "Japan",
"duration": 14,
"budget": 4000,
"interests": ["historical sites", "modern attractions"],
"accommodation_preferences": "affordable",
"transportation_preferences": "efficient",
"meal_preferences": "local cuisine"
})")
}};
std::vector<std::string> tokens;
// Build tokens
if (!reasoning.empty()) {
auto tokenized = simple_tokenize(reasoning);
tokens.emplace_back("<|START_THINKING|>");
tokens.insert(tokens.end(), tokenized.begin(), tokenized.end());
tokens.emplace_back("<|END_THINKING|>");
}
if (!tool_calls.empty()) {
tokens.emplace_back("<|START_ACTION|>");
auto json = nlohmann::json::array();
for (const auto & tc : tool_calls) {
auto tc_json = nlohmann::json::object();
tc_json["tool_call_id"] = std::get<0>(tc);
tc_json["tool_name"] = std::get<1>(tc);
tc_json["parameters"] = std::get<2>(tc);
json.push_back(tc_json);
}
auto tokenized = simple_tokenize(json.dump(-1, ' ', true));
tokens.insert(tokens.end(), tokenized.begin(), tokenized.end());
tokens.emplace_back("<|END_ACTION|>");
}
std::string input = std::accumulate(tokens.begin(), tokens.end(), std::string());
// Run tests
t.test("legacy_parse", [&](testing & /* t */) {
test_legacy(input, false, false);
});
t.test("current_parse", [&](testing & /* t */) {
test_current(parser, input, false, false);
});
// Run benchmarks
t.bench("legacy_parse_benchmark complete", [&]() {
test_legacy(input, false, false);
});
t.bench("legacy_parse_benchmark incremental", [&]() {
std::string in;
for (auto i = 0u; i < tokens.size(); i++) {
in += tokens[i];
try {
test_legacy(in, i + 1 < tokens.size(), false);
} catch (common_chat_msg_partial_exception & /* e */) {
// Do nothing, this is expected
}
}
}, 20);
t.bench("current_parse_benchmark complete", [&]() {
test_current(parser, input, false, false);
}, 100);
t.bench("current_parse_benchmark incremental", [&]() {
std::string in;
for (auto i = 0u; i < tokens.size(); i++) {
in += tokens[i];
test_current(parser, in, i + 1 < tokens.size(), false);
}
}, 20);
}