Close TD-16 frontmatter robustness

This commit is contained in:
2026-06-12 13:27:39 +02:00
parent f7e1662bca
commit 8224b3d59f
4 changed files with 112 additions and 24 deletions

View File

@@ -622,7 +622,18 @@ existing task lifecycle tests green.
## Phase 4 — Build-vs-buy replacements
### TD-16: Frontmatter robustness — yaml_elixir/ymlr or harden the hand-rolled parser
### TD-16: Frontmatter robustness — yaml_elixir/ymlr or harden the hand-rolled parser ✅ DONE (2026-06-12)
**Status: implemented.** The project keeps the hand-rolled serializer/parser for
byte-stable frontmatter output, but `BDS.Frontmatter` is now hardened for the
known user-edited-file cases: `parse_document/1` normalizes CRLF and lone `\r`
line endings before splitting the gray-matter envelope, and quoted scalar
parsing now removes exactly one closing quote and unescapes content explicitly
instead of `trim_trailing/2`, which previously corrupted strings whose content
ended in a quote character. Coverage now includes focused parser tests for CRLF
documents and quoted-string roundtrips with embedded quotes, backslashes, and
trailing-quote content, plus adjacent post/template/maintenance frontmatter and
serializer parity suites.
**Context.** `BDS.Frontmatter` is a hand-rolled YAML subset with concrete
bugs for user-edited files:

View File

@@ -17,7 +17,9 @@ defmodule BDS.Frontmatter do
end
def parse_document(contents) when is_binary(contents) do
case String.split(contents, "\n---\n", parts: 2) do
normalized_contents = normalize_newlines(contents)
case String.split(normalized_contents, "\n---\n", parts: 2) do
[frontmatter_with_marker, body] ->
frontmatter = String.replace_prefix(frontmatter_with_marker, "---\n", "")
@@ -163,19 +165,11 @@ defmodule BDS.Frontmatter do
end
defp parse_string("\"" <> rest) do
rest
|> String.trim_trailing("\"")
|> String.replace("\\n", "\n")
|> String.replace("\\\"", "\"")
|> String.replace("\\\\", "\\")
parse_quoted_string(rest, ?")
end
defp parse_string("'" <> rest) do
rest
|> String.trim_trailing("'")
|> String.replace("\\n", "\n")
|> String.replace("\\'", "'")
|> String.replace("\\\\", "\\")
parse_quoted_string(rest, ?')
end
defp parse_string(value), do: value
@@ -235,4 +229,46 @@ defmodule BDS.Frontmatter do
rendered = to_string(key)
String.ends_with?(rendered, "_at") or String.ends_with?(rendered, "At")
end
defp normalize_newlines(contents) do
contents
|> String.replace("\r\n", "\n")
|> String.replace("\r", "\n")
end
defp parse_quoted_string(rest, quote) do
quote_binary = <<quote::utf8>>
if String.ends_with?(rest, quote_binary) do
inner = binary_part(rest, 0, byte_size(rest) - byte_size(quote_binary))
unescape_quoted_string(inner, quote, "")
else
quote_binary <> rest
end
end
defp unescape_quoted_string(<<>>, _quote, acc), do: acc
defp unescape_quoted_string("\\" <> rest, quote, acc) do
case rest do
<<"n", tail::binary>> ->
unescape_quoted_string(tail, quote, acc <> "\n")
<<"\\", tail::binary>> ->
unescape_quoted_string(tail, quote, acc <> "\\")
<<escaped, tail::binary>> when escaped == quote ->
unescape_quoted_string(tail, quote, acc <> <<quote::utf8>>)
<<char::utf8, tail::binary>> ->
unescape_quoted_string(tail, quote, acc <> "\\" <> <<char::utf8>>)
<<>> ->
acc <> "\\"
end
end
defp unescape_quoted_string(<<char::utf8, tail::binary>>, quote, acc) do
unescape_quoted_string(tail, quote, acc <> <<char::utf8>>)
end
end

View File

@@ -104,25 +104,19 @@ defmodule BDS.AI.ChatStreamingTest do
server = start_supervised!({Bandit, plug: StreamingChatPlug, port: 0, startup_log: false})
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
assert {:ok, _endpoint} =
BDS.AI.put_endpoint(:online, %{
url: "http://127.0.0.1:#{port}/v1",
api_key: "sk-stream",
model: "stream-model"
})
assert :ok = BDS.AI.set_airplane_mode(false)
assert {:ok, conversation} = BDS.AI.start_chat(%{model: "stream-model"})
{:ok, conversation: conversation}
{:ok, conversation: conversation, streaming_port: port}
end
test "incremental content events arrive before the final reply and persistence matches", %{
conversation: conversation
conversation: conversation,
streaming_port: port
} do
conversation_id = conversation.id
configure_streaming_runtime!(port)
assert {:ok, reply} =
BDS.AI.send_chat_message(conversation_id, "tell me a story",
event_target: self()
@@ -142,11 +136,16 @@ defmodule BDS.AI.ChatStreamingTest do
assert assistant_message.token_usage_output == 4
end
test "cancel_chat mid-stream aborts the HTTP request", %{conversation: conversation} do
test "cancel_chat mid-stream aborts the HTTP request", %{
conversation: conversation,
streaming_port: port
} do
Application.put_env(:bds, :chat_stream_scenario, :endless)
conversation_id = conversation.id
test_pid = self()
configure_streaming_runtime!(port)
task =
Task.async(fn ->
BDS.AI.send_chat_message(conversation_id, "stream forever", event_target: test_pid)
@@ -161,4 +160,16 @@ defmodule BDS.AI.ChatStreamingTest do
# The server notices the closed connection — the request was truly aborted.
assert_receive :sse_client_disconnected, 2_000
end
defp configure_streaming_runtime!(port) do
assert {:ok, _endpoint} =
BDS.AI.put_endpoint(:online, %{
url: "http://127.0.0.1:#{port}/v1",
api_key: "sk-stream",
model: "stream-model"
})
assert :ok = BDS.AI.put_model_preference(:chat, "stream-model")
assert :ok = BDS.AI.set_airplane_mode(false)
end
end

View File

@@ -0,0 +1,30 @@
defmodule BDS.FrontmatterTest do
use ExUnit.Case, async: true
test "parse_document accepts CRLF frontmatter documents" do
contents = "---\r\ntitle: Hello\r\ntags:\r\n - elixir\r\n---\r\nBody\r\n"
assert {:ok, %{fields: fields, body: body}} = BDS.Frontmatter.parse_document(contents)
assert fields["title"] == "Hello"
assert fields["tags"] == ["elixir"]
assert body == "Body"
end
test "serialize_document roundtrips quoted strings with embedded quotes and escapes" do
fields = [
{"title", "He said \"hi\" \\\\ there"},
{"summary", "Ends with a quote\""},
{"excerpt", "first line\nsecond line"}
]
contents = BDS.Frontmatter.serialize_document(fields, "Body")
assert {:ok, %{fields: parsed_fields, body: body}} =
BDS.Frontmatter.parse_document(contents)
assert parsed_fields["title"] == "He said \"hi\" \\\\ there"
assert parsed_fields["summary"] == "Ends with a quote\""
assert parsed_fields["excerpt"] == "first line\nsecond line"
assert body == "Body"
end
end