From 8224b3d59fe43f57db3bebd1af6e09a83c84e7f3 Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Fri, 12 Jun 2026 13:27:39 +0200 Subject: [PATCH] Close TD-16 frontmatter robustness --- TECHDEBTS.md | 13 ++++++- lib/bds/frontmatter.ex | 58 +++++++++++++++++++++++------ test/bds/ai/chat_streaming_test.exs | 35 +++++++++++------ test/bds/frontmatter_test.exs | 30 +++++++++++++++ 4 files changed, 112 insertions(+), 24 deletions(-) create mode 100644 test/bds/frontmatter_test.exs diff --git a/TECHDEBTS.md b/TECHDEBTS.md index 38e8fb6..8e17dbf 100644 --- a/TECHDEBTS.md +++ b/TECHDEBTS.md @@ -622,7 +622,18 @@ existing task lifecycle tests green. ## Phase 4 — Build-vs-buy replacements -### TD-16: Frontmatter robustness — yaml_elixir/ymlr or harden the hand-rolled parser +### TD-16: Frontmatter robustness — yaml_elixir/ymlr or harden the hand-rolled parser ✅ DONE (2026-06-12) + +**Status: implemented.** The project keeps the hand-rolled serializer/parser for +byte-stable frontmatter output, but `BDS.Frontmatter` is now hardened for the +known user-edited-file cases: `parse_document/1` normalizes CRLF and lone `\r` +line endings before splitting the gray-matter envelope, and quoted scalar +parsing now removes exactly one closing quote and unescapes content explicitly +instead of `trim_trailing/2`, which previously corrupted strings whose content +ended in a quote character. Coverage now includes focused parser tests for CRLF +documents and quoted-string roundtrips with embedded quotes, backslashes, and +trailing-quote content, plus adjacent post/template/maintenance frontmatter and +serializer parity suites. **Context.** `BDS.Frontmatter` is a hand-rolled YAML subset with concrete bugs for user-edited files: diff --git a/lib/bds/frontmatter.ex b/lib/bds/frontmatter.ex index 96ddd5d..e60b8b2 100644 --- a/lib/bds/frontmatter.ex +++ b/lib/bds/frontmatter.ex @@ -17,7 +17,9 @@ defmodule BDS.Frontmatter do end def parse_document(contents) when is_binary(contents) do - case String.split(contents, "\n---\n", parts: 2) do + normalized_contents = normalize_newlines(contents) + + case String.split(normalized_contents, "\n---\n", parts: 2) do [frontmatter_with_marker, body] -> frontmatter = String.replace_prefix(frontmatter_with_marker, "---\n", "") @@ -163,19 +165,11 @@ defmodule BDS.Frontmatter do end defp parse_string("\"" <> rest) do - rest - |> String.trim_trailing("\"") - |> String.replace("\\n", "\n") - |> String.replace("\\\"", "\"") - |> String.replace("\\\\", "\\") + parse_quoted_string(rest, ?") end defp parse_string("'" <> rest) do - rest - |> String.trim_trailing("'") - |> String.replace("\\n", "\n") - |> String.replace("\\'", "'") - |> String.replace("\\\\", "\\") + parse_quoted_string(rest, ?') end defp parse_string(value), do: value @@ -235,4 +229,46 @@ defmodule BDS.Frontmatter do rendered = to_string(key) String.ends_with?(rendered, "_at") or String.ends_with?(rendered, "At") end + + defp normalize_newlines(contents) do + contents + |> String.replace("\r\n", "\n") + |> String.replace("\r", "\n") + end + + defp parse_quoted_string(rest, quote) do + quote_binary = <> + + if String.ends_with?(rest, quote_binary) do + inner = binary_part(rest, 0, byte_size(rest) - byte_size(quote_binary)) + unescape_quoted_string(inner, quote, "") + else + quote_binary <> rest + end + end + + defp unescape_quoted_string(<<>>, _quote, acc), do: acc + + defp unescape_quoted_string("\\" <> rest, quote, acc) do + case rest do + <<"n", tail::binary>> -> + unescape_quoted_string(tail, quote, acc <> "\n") + + <<"\\", tail::binary>> -> + unescape_quoted_string(tail, quote, acc <> "\\") + + <> when escaped == quote -> + unescape_quoted_string(tail, quote, acc <> <>) + + <> -> + unescape_quoted_string(tail, quote, acc <> "\\" <> <>) + + <<>> -> + acc <> "\\" + end + end + + defp unescape_quoted_string(<>, quote, acc) do + unescape_quoted_string(tail, quote, acc <> <>) + end end diff --git a/test/bds/ai/chat_streaming_test.exs b/test/bds/ai/chat_streaming_test.exs index d0fa2e8..855f762 100644 --- a/test/bds/ai/chat_streaming_test.exs +++ b/test/bds/ai/chat_streaming_test.exs @@ -104,25 +104,19 @@ defmodule BDS.AI.ChatStreamingTest do server = start_supervised!({Bandit, plug: StreamingChatPlug, port: 0, startup_log: false}) {:ok, {_address, port}} = ThousandIsland.listener_info(server) - - assert {:ok, _endpoint} = - BDS.AI.put_endpoint(:online, %{ - url: "http://127.0.0.1:#{port}/v1", - api_key: "sk-stream", - model: "stream-model" - }) - - assert :ok = BDS.AI.set_airplane_mode(false) assert {:ok, conversation} = BDS.AI.start_chat(%{model: "stream-model"}) - {:ok, conversation: conversation} + {:ok, conversation: conversation, streaming_port: port} end test "incremental content events arrive before the final reply and persistence matches", %{ - conversation: conversation + conversation: conversation, + streaming_port: port } do conversation_id = conversation.id + configure_streaming_runtime!(port) + assert {:ok, reply} = BDS.AI.send_chat_message(conversation_id, "tell me a story", event_target: self() @@ -142,11 +136,16 @@ defmodule BDS.AI.ChatStreamingTest do assert assistant_message.token_usage_output == 4 end - test "cancel_chat mid-stream aborts the HTTP request", %{conversation: conversation} do + test "cancel_chat mid-stream aborts the HTTP request", %{ + conversation: conversation, + streaming_port: port + } do Application.put_env(:bds, :chat_stream_scenario, :endless) conversation_id = conversation.id test_pid = self() + configure_streaming_runtime!(port) + task = Task.async(fn -> BDS.AI.send_chat_message(conversation_id, "stream forever", event_target: test_pid) @@ -161,4 +160,16 @@ defmodule BDS.AI.ChatStreamingTest do # The server notices the closed connection — the request was truly aborted. assert_receive :sse_client_disconnected, 2_000 end + + defp configure_streaming_runtime!(port) do + assert {:ok, _endpoint} = + BDS.AI.put_endpoint(:online, %{ + url: "http://127.0.0.1:#{port}/v1", + api_key: "sk-stream", + model: "stream-model" + }) + + assert :ok = BDS.AI.put_model_preference(:chat, "stream-model") + assert :ok = BDS.AI.set_airplane_mode(false) + end end diff --git a/test/bds/frontmatter_test.exs b/test/bds/frontmatter_test.exs new file mode 100644 index 0000000..7ae6ce8 --- /dev/null +++ b/test/bds/frontmatter_test.exs @@ -0,0 +1,30 @@ +defmodule BDS.FrontmatterTest do + use ExUnit.Case, async: true + + test "parse_document accepts CRLF frontmatter documents" do + contents = "---\r\ntitle: Hello\r\ntags:\r\n - elixir\r\n---\r\nBody\r\n" + + assert {:ok, %{fields: fields, body: body}} = BDS.Frontmatter.parse_document(contents) + assert fields["title"] == "Hello" + assert fields["tags"] == ["elixir"] + assert body == "Body" + end + + test "serialize_document roundtrips quoted strings with embedded quotes and escapes" do + fields = [ + {"title", "He said \"hi\" \\\\ there"}, + {"summary", "Ends with a quote\""}, + {"excerpt", "first line\nsecond line"} + ] + + contents = BDS.Frontmatter.serialize_document(fields, "Body") + + assert {:ok, %{fields: parsed_fields, body: body}} = + BDS.Frontmatter.parse_document(contents) + + assert parsed_fields["title"] == "He said \"hi\" \\\\ there" + assert parsed_fields["summary"] == "Ends with a quote\"" + assert parsed_fields["excerpt"] == "first line\nsecond line" + assert body == "Body" + end +end \ No newline at end of file