fix: implemented TD-06 real SSE implementation

This commit is contained in:
2026-06-11 16:37:08 +02:00
parent a5391e8e25
commit 9325de2db4
9 changed files with 991 additions and 15 deletions

View File

@@ -0,0 +1,164 @@
defmodule BDS.AI.ChatStreamingTest do
use ExUnit.Case, async: false
defmodule StreamingChatPlug do
import Plug.Conn
def init(opts), do: opts
def call(conn, _opts) do
{:ok, body, conn} = read_body(conn)
payload = Jason.decode!(body)
if payload["stream"] == true do
stream_chat(conn)
else
# Chat-title generation and other one-shot requests stay non-streaming.
conn
|> put_resp_content_type("application/json")
|> send_resp(
200,
Jason.encode!(%{
"choices" => [%{"message" => %{"content" => "Story Time"}}],
"usage" => %{"prompt_tokens" => 1, "completion_tokens" => 1}
})
)
end
end
defp stream_chat(conn) do
conn =
conn
|> put_resp_content_type("text/event-stream")
|> send_chunked(200)
case Application.get_env(:bds, :chat_stream_scenario, :short) do
:short -> stream_short(conn)
:endless -> stream_endless(conn)
end
end
defp stream_short(conn) do
events =
[
delta_event(%{"content" => "Once"}),
delta_event(%{"content" => " upon"}),
delta_event(%{"content" => " a time"}),
"data: " <>
Jason.encode!(%{
"choices" => [],
"usage" => %{"prompt_tokens" => 9, "completion_tokens" => 4}
}) <> "\n\n",
"data: [DONE]\n\n"
]
Enum.reduce_while(events, conn, fn event, conn ->
case chunk(conn, event) do
{:ok, conn} -> {:cont, conn}
{:error, _reason} -> {:halt, conn}
end
end)
end
defp stream_endless(conn) do
case chunk(conn, delta_event(%{"content" => "tick "})) do
{:ok, conn} ->
Process.sleep(50)
stream_endless(conn)
{:error, _reason} ->
send(test_pid(), :sse_client_disconnected)
conn
end
end
defp delta_event(delta) do
"data: " <> Jason.encode!(%{"choices" => [%{"delta" => delta}]}) <> "\n\n"
end
defp test_pid, do: Application.get_env(:bds, :chat_stream_test_pid)
end
setup do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
Application.put_env(:bds, :chat_stream_test_pid, self())
Application.put_env(:bds, :chat_stream_scenario, :short)
original_chat = Application.fetch_env(:bds, :chat)
Application.put_env(
:bds,
:chat,
Keyword.merge(Application.get_env(:bds, :chat, []), stream_emit_interval_ms: 0)
)
on_exit(fn ->
Application.delete_env(:bds, :chat_stream_scenario)
case original_chat do
{:ok, value} -> Application.put_env(:bds, :chat, value)
:error -> Application.delete_env(:bds, :chat)
end
end)
server = start_supervised!({Bandit, plug: StreamingChatPlug, port: 0, startup_log: false})
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
assert {:ok, _endpoint} =
BDS.AI.put_endpoint(:online, %{
url: "http://127.0.0.1:#{port}/v1",
api_key: "sk-stream",
model: "stream-model"
})
assert :ok = BDS.AI.set_airplane_mode(false)
assert {:ok, conversation} = BDS.AI.start_chat(%{model: "stream-model"})
{:ok, conversation: conversation}
end
test "incremental content events arrive before the final reply and persistence matches", %{
conversation: conversation
} do
conversation_id = conversation.id
assert {:ok, reply} =
BDS.AI.send_chat_message(conversation_id, "tell me a story",
event_target: self()
)
assert reply.assistant_message.content == "Once upon a time"
assert_received {:chat_streaming_content, ^conversation_id, "Once"}
assert_received {:chat_streaming_content, ^conversation_id, "Once upon"}
assert_received {:chat_streaming_content, ^conversation_id, "Once upon a time"}
messages = BDS.AI.list_chat_messages(conversation_id)
assistant_message = List.last(messages)
assert assistant_message.role == :assistant
assert assistant_message.content == "Once upon a time"
assert assistant_message.token_usage_input == 9
assert assistant_message.token_usage_output == 4
end
test "cancel_chat mid-stream aborts the HTTP request", %{conversation: conversation} do
Application.put_env(:bds, :chat_stream_scenario, :endless)
conversation_id = conversation.id
test_pid = self()
task =
Task.async(fn ->
BDS.AI.send_chat_message(conversation_id, "stream forever", event_target: test_pid)
end)
# Wait until tokens are actually flowing before cancelling.
assert_receive {:chat_streaming_content, ^conversation_id, _content}, 2_000
assert :ok = BDS.AI.cancel_chat(conversation_id)
assert {:error, :cancelled} = Task.await(task)
# The server notices the closed connection — the request was truly aborted.
assert_receive :sse_client_disconnected, 2_000
end
end

View File

@@ -0,0 +1,253 @@
defmodule BDS.AI.OpenAICompatibleRuntimeStreamingTest do
use ExUnit.Case, async: false
alias BDS.AI.OpenAICompatibleRuntime
defmodule SSEPlug do
import Plug.Conn
def init(opts), do: opts
def call(conn, _opts) do
{:ok, body, conn} = read_body(conn)
payload = Jason.decode!(body)
send(test_pid(), {:endpoint_request, payload})
respond(conn, payload["model"], payload)
end
defp respond(conn, "stream-content", %{"stream" => true}) do
stream(conn, [
delta_event(%{"role" => "assistant", "content" => ""}),
delta_event(%{"content" => "Once"}),
delta_event(%{"content" => " upon"}),
delta_event(%{"content" => " a time"}),
~s(data: ) <>
Jason.encode!(%{
"choices" => [],
"usage" => %{"prompt_tokens" => 7, "completion_tokens" => 3}
}) <> "\n\n",
"data: [DONE]\n\n"
])
end
defp respond(conn, "stream-tools", %{"stream" => true}) do
stream(conn, [
delta_event(%{
"tool_calls" => [
%{
"index" => 0,
"id" => "call-1",
"function" => %{"name" => "search_posts", "arguments" => ""}
}
]
}),
delta_event(%{
"tool_calls" => [%{"index" => 0, "function" => %{"arguments" => "{\"query\":"}}]
}),
delta_event(%{
"tool_calls" => [%{"index" => 0, "function" => %{"arguments" => "\"sun\"}"}}]
}),
"data: [DONE]\n\n"
])
end
defp respond(conn, "stream-error", %{"stream" => true}) do
send_resp(conn, 503, ~s({"error":"overloaded"}))
end
# Simulates a provider that ignores the "stream" flag and answers with a
# plain JSON completion.
defp respond(conn, "ignores-stream", %{"stream" => true}) do
conn
|> put_resp_content_type("application/json")
|> send_resp(
200,
Jason.encode!(%{
"choices" => [%{"message" => %{"content" => "plain json despite stream"}}],
"usage" => %{"prompt_tokens" => 5, "completion_tokens" => 2}
})
)
end
defp respond(conn, _model, _payload) do
conn
|> put_resp_content_type("application/json")
|> send_resp(
200,
Jason.encode!(%{
"choices" => [%{"message" => %{"content" => "non-streaming reply"}}],
"usage" => %{"prompt_tokens" => 1, "completion_tokens" => 1}
})
)
end
defp delta_event(delta) do
"data: " <> Jason.encode!(%{"choices" => [%{"delta" => delta}]}) <> "\n\n"
end
defp stream(conn, events) do
conn =
conn
|> put_resp_content_type("text/event-stream")
|> send_chunked(200)
Enum.reduce_while(events, conn, fn event, conn ->
case chunk(conn, event) do
{:ok, conn} -> {:cont, conn}
{:error, _reason} -> {:halt, conn}
end
end)
end
defp test_pid, do: Application.get_env(:bds, :sse_plug_test_pid)
end
setup do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
Application.put_env(:bds, :sse_plug_test_pid, self())
original_chat = Application.fetch_env(:bds, :chat)
Application.put_env(
:bds,
:chat,
Keyword.merge(Application.get_env(:bds, :chat, []), stream_emit_interval_ms: 0)
)
on_exit(fn ->
case original_chat do
{:ok, value} -> Application.put_env(:bds, :chat, value)
:error -> Application.delete_env(:bds, :chat)
end
end)
server = start_supervised!({Bandit, plug: SSEPlug, port: 0, startup_log: false})
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
{:ok, url: "http://127.0.0.1:#{port}/v1"}
end
defp chat_request(model) do
%{
operation: :chat,
model: model,
max_output_tokens: 64,
messages: [%{"role" => "user", "content" => "hello"}]
}
end
defp stream_collector do
test_pid = self()
fn event -> send(test_pid, {:stream_event, event}) end
end
test "generate streams cumulative content and returns the assembled response", %{url: url} do
assert {:ok, response} =
OpenAICompatibleRuntime.generate(
%{url: url, api_key: "sk-test"},
chat_request("stream-content"),
on_stream: stream_collector()
)
assert response.content == "Once upon a time"
assert response.tool_calls == []
assert response.usage.input_tokens == 7
assert response.usage.output_tokens == 3
assert_received {:endpoint_request, payload}
assert payload["stream"] == true
assert payload["stream_options"] == %{"include_usage" => true}
assert_received {:stream_event, %{content: "Once"}}
assert_received {:stream_event, %{content: "Once upon"}}
assert_received {:stream_event, %{content: "Once upon a time"}}
end
test "generate assembles tool calls streamed as fragments", %{url: url} do
assert {:ok, response} =
OpenAICompatibleRuntime.generate(
%{url: url, api_key: "sk-test"},
chat_request("stream-tools"),
on_stream: stream_collector()
)
assert response.content == nil
assert response.tool_calls == [
%{id: "call-1", name: "search_posts", arguments: %{"query" => "sun"}}
]
end
test "an error status during streaming surfaces as a structured error", %{url: url} do
assert {:error, %{kind: :http_error, status: 503}} =
OpenAICompatibleRuntime.generate(
%{url: url, api_key: "sk-test"},
chat_request("stream-error"),
on_stream: stream_collector()
)
end
test "a provider that ignores the stream flag still produces a full response", %{url: url} do
assert {:ok, response} =
OpenAICompatibleRuntime.generate(
%{url: url, api_key: "sk-test"},
chat_request("ignores-stream"),
on_stream: stream_collector()
)
assert response.content == "plain json despite stream"
assert response.usage.input_tokens == 5
assert response.usage.output_tokens == 2
end
test "streaming is skipped when disabled via config", %{url: url} do
Application.put_env(
:bds,
:chat,
Keyword.merge(Application.get_env(:bds, :chat, []), streaming: false)
)
assert {:ok, %{content: "non-streaming reply"}} =
OpenAICompatibleRuntime.generate(
%{url: url, api_key: "sk-test"},
chat_request("any-model"),
on_stream: stream_collector()
)
assert_received {:endpoint_request, payload}
refute Map.has_key?(payload, "stream")
refute_received {:stream_event, _event}
end
test "streaming requires an on_stream callback", %{url: url} do
assert {:ok, %{content: "non-streaming reply"}} =
OpenAICompatibleRuntime.generate(
%{url: url, api_key: "sk-test"},
chat_request("any-model"),
[]
)
assert_received {:endpoint_request, payload}
refute Map.has_key?(payload, "stream")
end
test "non-chat operations never stream", %{url: url} do
request = %{
operation: :chat_title,
model: "any-model",
max_output_tokens: 32,
messages: [%{"role" => "user", "content" => "Topic: hello"}]
}
assert {:ok, %{content: "non-streaming reply"}} =
OpenAICompatibleRuntime.generate(
%{url: url, api_key: "sk-test"},
request,
on_stream: stream_collector()
)
assert_received {:endpoint_request, payload}
refute Map.has_key?(payload, "stream")
end
end

200
test/bds/ai/sse_test.exs Normal file
View File

@@ -0,0 +1,200 @@
defmodule BDS.AI.SSETest do
use ExUnit.Case, async: true
alias BDS.AI.SSE
defp chunk_event(payload), do: "data: " <> Jason.encode!(payload) <> "\n\n"
defp content_delta(text) do
%{"choices" => [%{"delta" => %{"content" => text}}]}
end
test "assembles content from deltas across separate chunks" do
sse = SSE.new(nil)
sse =
sse
|> SSE.feed(chunk_event(content_delta("Hel")))
|> SSE.feed(chunk_event(content_delta("lo ")))
|> SSE.feed(chunk_event(content_delta("world")))
|> SSE.feed("data: [DONE]\n\n")
assert %{content: "Hello world", tool_calls: [], usage: nil} = SSE.finish(sse)
end
test "handles events split across arbitrary chunk boundaries" do
raw =
chunk_event(content_delta("alpha ")) <>
chunk_event(content_delta("beta")) <> "data: [DONE]\n\n"
# Feed the byte stream in 7-byte slices to exercise buffering.
sse =
raw
|> :binary.bin_to_list()
|> Enum.chunk_every(7)
|> Enum.map(&:binary.list_to_bin/1)
|> Enum.reduce(SSE.new(nil), &SSE.feed(&2, &1))
assert %{content: "alpha beta"} = SSE.finish(sse)
end
test "supports CRLF line endings and data lines without a space" do
payload = Jason.encode!(content_delta("crlf"))
sse = SSE.feed(SSE.new(nil), "data:" <> payload <> "\r\n\r\ndata: [DONE]\r\n\r\n")
assert %{content: "crlf"} = SSE.finish(sse)
end
test "ignores comments, other fields, and undecodable data" do
sse =
SSE.new(nil)
|> SSE.feed(": keep-alive\n\n")
|> SSE.feed("event: message\nid: 7\n" <> "data: " <> Jason.encode!(content_delta("ok")) <> "\n\n")
|> SSE.feed("data: not-json\n\n")
assert %{content: "ok"} = SSE.finish(sse)
end
test "stops processing after [DONE]" do
sse =
SSE.new(nil)
|> SSE.feed(chunk_event(content_delta("kept")))
|> SSE.feed("data: [DONE]\n\n")
|> SSE.feed(chunk_event(content_delta(" dropped")))
assert %{content: "kept"} = SSE.finish(sse)
end
test "finishes a trailing event that lacks the final blank line" do
sse = SSE.feed(SSE.new(nil), "data: " <> Jason.encode!(content_delta("tail")))
assert %{content: "tail"} = SSE.finish(sse)
end
test "content is nil when the stream carried no content" do
sse = SSE.feed(SSE.new(nil), "data: [DONE]\n\n")
assert %{content: nil} = SSE.finish(sse)
end
test "assembles tool calls from fragments in OpenAI wire shape" do
fragments = [
%{
"choices" => [
%{
"delta" => %{
"tool_calls" => [
%{
"index" => 0,
"id" => "call-1",
"function" => %{"name" => "search_posts", "arguments" => ""}
}
]
}
}
]
},
%{
"choices" => [
%{
"delta" => %{
"tool_calls" => [
%{"index" => 0, "function" => %{"arguments" => "{\"query\":"}},
%{
"index" => 1,
"id" => "call-2",
"function" => %{"name" => "count_posts", "arguments" => "{}"}
}
]
}
}
]
},
%{
"choices" => [
%{
"delta" => %{
"tool_calls" => [%{"index" => 0, "function" => %{"arguments" => "\"sun\"}"}}]
}
}
]
}
]
sse = Enum.reduce(fragments, SSE.new(nil), &SSE.feed(&2, chunk_event(&1)))
assert %{tool_calls: tool_calls} = SSE.finish(sse)
assert tool_calls == [
%{
"id" => "call-1",
"function" => %{"name" => "search_posts", "arguments" => ~s({"query":"sun"})}
},
%{"id" => "call-2", "function" => %{"name" => "count_posts", "arguments" => "{}"}}
]
end
test "captures usage from the final chunk" do
sse =
SSE.new(nil)
|> SSE.feed(chunk_event(content_delta("hi")))
|> SSE.feed(
chunk_event(%{"choices" => [], "usage" => %{"prompt_tokens" => 7, "completion_tokens" => 2}})
)
assert %{usage: %{"prompt_tokens" => 7, "completion_tokens" => 2}} = SSE.finish(sse)
end
test "emits cumulative content snapshots to the callback" do
test_pid = self()
sse = SSE.new(fn event -> send(test_pid, {:stream_event, event}) end, emit_interval_ms: 0)
sse
|> SSE.feed(chunk_event(content_delta("one")))
|> SSE.feed(chunk_event(content_delta(" two")))
assert_received {:stream_event, %{content: "one"}}
assert_received {:stream_event, %{content: "one two"}}
end
test "throttles intermediate emissions but always emits the first delta" do
test_pid = self()
sse =
SSE.new(fn event -> send(test_pid, {:stream_event, event}) end,
emit_interval_ms: 60_000
)
sse
|> SSE.feed(chunk_event(content_delta("first")))
|> SSE.feed(chunk_event(content_delta(" second")))
|> SSE.feed(chunk_event(content_delta(" third")))
assert_received {:stream_event, %{content: "first"}}
refute_received {:stream_event, _event}
end
test "tool-call-only streams emit no content events" do
test_pid = self()
sse =
SSE.new(fn event -> send(test_pid, {:stream_event, event}) end, emit_interval_ms: 0)
SSE.feed(
sse,
chunk_event(%{
"choices" => [
%{
"delta" => %{
"tool_calls" => [
%{"index" => 0, "id" => "c", "function" => %{"name" => "n", "arguments" => "{}"}}
]
}
}
]
})
)
refute_received {:stream_event, _event}
end
end