diff --git a/lib/bds/ai/json_content.ex b/lib/bds/ai/json_content.ex new file mode 100644 index 0000000..41e5021 --- /dev/null +++ b/lib/bds/ai/json_content.ex @@ -0,0 +1,42 @@ +defmodule BDS.AI.JsonContent do + @moduledoc """ + Decodes JSON object payloads from model responses, tolerating the markdown + code fences and surrounding prose that smaller (local) models often emit + instead of bare JSON. + """ + + @fence_pattern ~r/```(?:json)?\s*\n?(.*?)```/is + + @spec decode(term()) :: map() | nil + def decode(content) when is_binary(content) do + decode_strict(content) || decode_fenced(content) || decode_embedded_object(content) + end + + def decode(_content), do: nil + + defp decode_strict(content) do + case Jason.decode(content) do + {:ok, decoded} when is_map(decoded) -> decoded + _other -> nil + end + end + + defp decode_fenced(content) do + case Regex.run(@fence_pattern, content, capture: :all_but_first) do + [inner] -> decode_strict(String.trim(inner)) || decode_embedded_object(inner) + _no_fence -> nil + end + end + + defp decode_embedded_object(content) do + with {start, _length} <- :binary.match(content, "{"), + [{last, _} | _] <- content |> :binary.matches("}") |> Enum.take(-1), + true <- last > start do + content + |> binary_part(start, last - start + 1) + |> decode_strict() + else + _no_object -> nil + end + end +end diff --git a/test/bds/ai/json_content_test.exs b/test/bds/ai/json_content_test.exs new file mode 100644 index 0000000..bf4b145 --- /dev/null +++ b/test/bds/ai/json_content_test.exs @@ -0,0 +1,69 @@ +defmodule BDS.AI.JsonContentTest do + use ExUnit.Case, async: true + + alias BDS.AI.JsonContent + + test "decodes a bare JSON object" do + assert %{"title" => "Sunset"} = JsonContent.decode(~s({"title": "Sunset"})) + end + + test "decodes a JSON object wrapped in a json markdown fence" do + content = """ + ```json + { + "title": "Ahornblätter im Herbstlicht", + "alt": "Nahaufnahme von Ahornblättern", + "caption": "Einige Ahornblätter verfärben sich." + } + ``` + """ + + assert %{ + "title" => "Ahornblätter im Herbstlicht", + "alt" => "Nahaufnahme von Ahornblättern", + "caption" => "Einige Ahornblätter verfärben sich." + } = JsonContent.decode(content) + end + + test "decodes a JSON object wrapped in an untagged markdown fence" do + assert %{"language_code" => "de"} = + JsonContent.decode("```\n{\"language_code\": \"de\"}\n```") + end + + test "decodes a fenced JSON object with an uppercase language tag" do + assert %{"slug" => "herbst"} = JsonContent.decode("```JSON\n{\"slug\": \"herbst\"}\n```") + end + + test "decodes a fenced JSON object surrounded by prose" do + content = """ + Here is the requested metadata: + + ```json + {"title": "Herbst"} + ``` + + Let me know if you need anything else. + """ + + assert %{"title" => "Herbst"} = JsonContent.decode(content) + end + + test "decodes a bare JSON object surrounded by prose" do + content = ~s(Sure! {"title": "Herbst", "alt": "Blätter"} Hope this helps.) + + assert %{"title" => "Herbst", "alt" => "Blätter"} = JsonContent.decode(content) + end + + test "returns nil for content without a JSON object" do + assert JsonContent.decode("This is not valid JSON") == nil + end + + test "returns nil for a JSON array" do + assert JsonContent.decode(~s([1, 2, 3])) == nil + end + + test "returns nil for nil and non-binary input" do + assert JsonContent.decode(nil) == nil + assert JsonContent.decode(42) == nil + end +end