Close TD-16 frontmatter robustness
This commit is contained in:
13
TECHDEBTS.md
13
TECHDEBTS.md
@@ -622,7 +622,18 @@ existing task lifecycle tests green.
|
||||
|
||||
## Phase 4 — Build-vs-buy replacements
|
||||
|
||||
### TD-16: Frontmatter robustness — yaml_elixir/ymlr or harden the hand-rolled parser
|
||||
### TD-16: Frontmatter robustness — yaml_elixir/ymlr or harden the hand-rolled parser ✅ DONE (2026-06-12)
|
||||
|
||||
**Status: implemented.** The project keeps the hand-rolled serializer/parser for
|
||||
byte-stable frontmatter output, but `BDS.Frontmatter` is now hardened for the
|
||||
known user-edited-file cases: `parse_document/1` normalizes CRLF and lone `\r`
|
||||
line endings before splitting the gray-matter envelope, and quoted scalar
|
||||
parsing now removes exactly one closing quote and unescapes content explicitly
|
||||
instead of `trim_trailing/2`, which previously corrupted strings whose content
|
||||
ended in a quote character. Coverage now includes focused parser tests for CRLF
|
||||
documents and quoted-string roundtrips with embedded quotes, backslashes, and
|
||||
trailing-quote content, plus adjacent post/template/maintenance frontmatter and
|
||||
serializer parity suites.
|
||||
|
||||
**Context.** `BDS.Frontmatter` is a hand-rolled YAML subset with concrete
|
||||
bugs for user-edited files:
|
||||
|
||||
@@ -17,7 +17,9 @@ defmodule BDS.Frontmatter do
|
||||
end
|
||||
|
||||
def parse_document(contents) when is_binary(contents) do
|
||||
case String.split(contents, "\n---\n", parts: 2) do
|
||||
normalized_contents = normalize_newlines(contents)
|
||||
|
||||
case String.split(normalized_contents, "\n---\n", parts: 2) do
|
||||
[frontmatter_with_marker, body] ->
|
||||
frontmatter = String.replace_prefix(frontmatter_with_marker, "---\n", "")
|
||||
|
||||
@@ -163,19 +165,11 @@ defmodule BDS.Frontmatter do
|
||||
end
|
||||
|
||||
defp parse_string("\"" <> rest) do
|
||||
rest
|
||||
|> String.trim_trailing("\"")
|
||||
|> String.replace("\\n", "\n")
|
||||
|> String.replace("\\\"", "\"")
|
||||
|> String.replace("\\\\", "\\")
|
||||
parse_quoted_string(rest, ?")
|
||||
end
|
||||
|
||||
defp parse_string("'" <> rest) do
|
||||
rest
|
||||
|> String.trim_trailing("'")
|
||||
|> String.replace("\\n", "\n")
|
||||
|> String.replace("\\'", "'")
|
||||
|> String.replace("\\\\", "\\")
|
||||
parse_quoted_string(rest, ?')
|
||||
end
|
||||
|
||||
defp parse_string(value), do: value
|
||||
@@ -235,4 +229,46 @@ defmodule BDS.Frontmatter do
|
||||
rendered = to_string(key)
|
||||
String.ends_with?(rendered, "_at") or String.ends_with?(rendered, "At")
|
||||
end
|
||||
|
||||
defp normalize_newlines(contents) do
|
||||
contents
|
||||
|> String.replace("\r\n", "\n")
|
||||
|> String.replace("\r", "\n")
|
||||
end
|
||||
|
||||
defp parse_quoted_string(rest, quote) do
|
||||
quote_binary = <<quote::utf8>>
|
||||
|
||||
if String.ends_with?(rest, quote_binary) do
|
||||
inner = binary_part(rest, 0, byte_size(rest) - byte_size(quote_binary))
|
||||
unescape_quoted_string(inner, quote, "")
|
||||
else
|
||||
quote_binary <> rest
|
||||
end
|
||||
end
|
||||
|
||||
defp unescape_quoted_string(<<>>, _quote, acc), do: acc
|
||||
|
||||
defp unescape_quoted_string("\\" <> rest, quote, acc) do
|
||||
case rest do
|
||||
<<"n", tail::binary>> ->
|
||||
unescape_quoted_string(tail, quote, acc <> "\n")
|
||||
|
||||
<<"\\", tail::binary>> ->
|
||||
unescape_quoted_string(tail, quote, acc <> "\\")
|
||||
|
||||
<<escaped, tail::binary>> when escaped == quote ->
|
||||
unescape_quoted_string(tail, quote, acc <> <<quote::utf8>>)
|
||||
|
||||
<<char::utf8, tail::binary>> ->
|
||||
unescape_quoted_string(tail, quote, acc <> "\\" <> <<char::utf8>>)
|
||||
|
||||
<<>> ->
|
||||
acc <> "\\"
|
||||
end
|
||||
end
|
||||
|
||||
defp unescape_quoted_string(<<char::utf8, tail::binary>>, quote, acc) do
|
||||
unescape_quoted_string(tail, quote, acc <> <<char::utf8>>)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -104,25 +104,19 @@ defmodule BDS.AI.ChatStreamingTest do
|
||||
|
||||
server = start_supervised!({Bandit, plug: StreamingChatPlug, port: 0, startup_log: false})
|
||||
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
||||
|
||||
assert {:ok, _endpoint} =
|
||||
BDS.AI.put_endpoint(:online, %{
|
||||
url: "http://127.0.0.1:#{port}/v1",
|
||||
api_key: "sk-stream",
|
||||
model: "stream-model"
|
||||
})
|
||||
|
||||
assert :ok = BDS.AI.set_airplane_mode(false)
|
||||
assert {:ok, conversation} = BDS.AI.start_chat(%{model: "stream-model"})
|
||||
|
||||
{:ok, conversation: conversation}
|
||||
{:ok, conversation: conversation, streaming_port: port}
|
||||
end
|
||||
|
||||
test "incremental content events arrive before the final reply and persistence matches", %{
|
||||
conversation: conversation
|
||||
conversation: conversation,
|
||||
streaming_port: port
|
||||
} do
|
||||
conversation_id = conversation.id
|
||||
|
||||
configure_streaming_runtime!(port)
|
||||
|
||||
assert {:ok, reply} =
|
||||
BDS.AI.send_chat_message(conversation_id, "tell me a story",
|
||||
event_target: self()
|
||||
@@ -142,11 +136,16 @@ defmodule BDS.AI.ChatStreamingTest do
|
||||
assert assistant_message.token_usage_output == 4
|
||||
end
|
||||
|
||||
test "cancel_chat mid-stream aborts the HTTP request", %{conversation: conversation} do
|
||||
test "cancel_chat mid-stream aborts the HTTP request", %{
|
||||
conversation: conversation,
|
||||
streaming_port: port
|
||||
} do
|
||||
Application.put_env(:bds, :chat_stream_scenario, :endless)
|
||||
conversation_id = conversation.id
|
||||
test_pid = self()
|
||||
|
||||
configure_streaming_runtime!(port)
|
||||
|
||||
task =
|
||||
Task.async(fn ->
|
||||
BDS.AI.send_chat_message(conversation_id, "stream forever", event_target: test_pid)
|
||||
@@ -161,4 +160,16 @@ defmodule BDS.AI.ChatStreamingTest do
|
||||
# The server notices the closed connection — the request was truly aborted.
|
||||
assert_receive :sse_client_disconnected, 2_000
|
||||
end
|
||||
|
||||
defp configure_streaming_runtime!(port) do
|
||||
assert {:ok, _endpoint} =
|
||||
BDS.AI.put_endpoint(:online, %{
|
||||
url: "http://127.0.0.1:#{port}/v1",
|
||||
api_key: "sk-stream",
|
||||
model: "stream-model"
|
||||
})
|
||||
|
||||
assert :ok = BDS.AI.put_model_preference(:chat, "stream-model")
|
||||
assert :ok = BDS.AI.set_airplane_mode(false)
|
||||
end
|
||||
end
|
||||
|
||||
30
test/bds/frontmatter_test.exs
Normal file
30
test/bds/frontmatter_test.exs
Normal file
@@ -0,0 +1,30 @@
|
||||
defmodule BDS.FrontmatterTest do
|
||||
use ExUnit.Case, async: true
|
||||
|
||||
test "parse_document accepts CRLF frontmatter documents" do
|
||||
contents = "---\r\ntitle: Hello\r\ntags:\r\n - elixir\r\n---\r\nBody\r\n"
|
||||
|
||||
assert {:ok, %{fields: fields, body: body}} = BDS.Frontmatter.parse_document(contents)
|
||||
assert fields["title"] == "Hello"
|
||||
assert fields["tags"] == ["elixir"]
|
||||
assert body == "Body"
|
||||
end
|
||||
|
||||
test "serialize_document roundtrips quoted strings with embedded quotes and escapes" do
|
||||
fields = [
|
||||
{"title", "He said \"hi\" \\\\ there"},
|
||||
{"summary", "Ends with a quote\""},
|
||||
{"excerpt", "first line\nsecond line"}
|
||||
]
|
||||
|
||||
contents = BDS.Frontmatter.serialize_document(fields, "Body")
|
||||
|
||||
assert {:ok, %{fields: parsed_fields, body: body}} =
|
||||
BDS.Frontmatter.parse_document(contents)
|
||||
|
||||
assert parsed_fields["title"] == "He said \"hi\" \\\\ there"
|
||||
assert parsed_fields["summary"] == "Ends with a quote\""
|
||||
assert parsed_fields["excerpt"] == "first line\nsecond line"
|
||||
assert body == "Body"
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user