fix: implemented TD-07, chat await path with deadline

This commit is contained in:
2026-06-12 12:08:27 +02:00
parent 2e633922f9
commit 66938c23f2
6 changed files with 124 additions and 6 deletions

View File

@@ -312,7 +312,20 @@ airplane mode / local model / toast.
events arrive before the final `{:ok, reply}`; tool-call rounds still work;
cancellation mid-stream (TD-07) aborts the HTTP request.
### TD-07: Bound the chat await chain; end-to-end timeout & cancellation
### TD-07: Bound the chat await chain; end-to-end timeout & cancellation ✅ DONE (2026-06-12)
**Status: implemented.** `BDS.AI.Chat.send_chat_message/3` no longer waits
unboundedly on the supervised chat task: `await_chat_task/2` now applies a
global deadline derived from the configured per-request HTTP budget and the
bounded tool loop (`BDS.AI.HttpClient.request_timeout_ms() *
(chat_max_tool_rounds + 1) + config :bds, :chat, :await_timeout_margin_ms`). On
deadline expiry it returns `{:error, :chat_timeout}` and shuts the task down via
`Task.shutdown/2`, so the caller is released even if the runtime wedges.
`config/config.exs` now exposes `:await_timeout_margin_ms` under `:chat`. The
acceptance proof is a shutdown-aware blocking runtime test that asserts the
timeout result, verifies the task receives shutdown, and confirms the
conversation persists only the user message on timeout; existing cancellation
and streaming tests remain green.
**Context.** `BDS.AI.Chat.send_chat_message/3` blocks the caller (a LiveView
process) on a hand-rolled `await_chat_task/1` — a raw `receive` with **no

View File

@@ -70,10 +70,13 @@ config :bds, :scripting,
# streaming: chat completions use SSE when the provider supports it (set to
# false for OpenAI-compatible servers that reject the "stream" flag).
# stream_emit_interval_ms throttles how often streamed content reaches the UI.
# await_timeout_margin_ms is added on top of the per-request HTTP budget across
# the bounded tool-call loop, so the caller never waits forever.
config :bds, :chat,
max_tool_rounds: 10,
streaming: true,
stream_emit_interval_ms: 100
stream_emit_interval_ms: 100,
await_timeout_margin_ms: 5_000
config :bds, :embeddings,
backend: BDS.Embeddings.Backends.Neural,

View File

@@ -27,6 +27,7 @@ defmodule BDS.AI.Chat do
@title_max_output_tokens 256
@chat_title_max_length 30
@chat_max_tool_rounds 10
@chat_await_timeout_margin_ms 5_000
@default_context_window 128_000
@spec start_chat(map()) :: {:ok, map()} | {:error, Ecto.Changeset.t()}
@@ -208,7 +209,7 @@ defmodule BDS.AI.Chat do
send(task.pid, :sandbox_ready)
try do
await_chat_task(task)
await_chat_task(task, chat_await_timeout_ms())
after
InFlight.unregister(conversation.id)
end
@@ -756,9 +757,20 @@ defmodule BDS.AI.Chat do
# BoundedToolLoop: the tool-calling round count is capped by
# config.chat_max_tool_rounds (falling back to the built-in default).
defp chat_max_tool_rounds do
chat_config(:max_tool_rounds, @chat_max_tool_rounds)
end
defp chat_await_timeout_ms do
per_request_timeout_ms = BDS.AI.HttpClient.request_timeout_ms()
per_request_timeout_ms * (chat_max_tool_rounds() + 1) +
chat_config(:await_timeout_margin_ms, @chat_await_timeout_margin_ms)
end
defp chat_config(key, default) do
:bds
|> Application.get_env(:chat, [])
|> Keyword.get(:max_tool_rounds, @chat_max_tool_rounds)
|> Keyword.get(key, default)
end
defp chat_system_prompt(project_id, tools) do
@@ -853,7 +865,7 @@ defmodule BDS.AI.Chat do
:ok
end
defp await_chat_task(task) do
defp await_chat_task(task, timeout_ms) do
ref = task.ref
receive do
@@ -879,6 +891,10 @@ defmodule BDS.AI.Chat do
_other ->
{:error, :cancelled}
end
after
timeout_ms ->
_ = Task.shutdown(task, 100)
{:error, :chat_timeout}
end
end

View File

@@ -25,6 +25,14 @@ defmodule BDS.AI.HttpClient do
@default_get_max_retries 2
@default_retry_delay_ms 500
@spec request_timeout_ms() :: pos_integer()
def request_timeout_ms do
max(
config(:connect_timeout_ms, @default_connect_timeout_ms),
config(:receive_timeout_ms, @default_receive_timeout_ms)
)
end
@spec get(String.t(), %{String.t() => String.t()}) ::
{:ok, %{status: non_neg_integer(), headers: map(), body: binary()}} | {:error, term()}
def get(url, headers) when is_binary(url) and is_map(headers) do

View File

@@ -279,6 +279,24 @@ defmodule BDS.AITest do
end
end
defmodule ShutdownAwareBlockingRuntime do
def generate(endpoint, request, opts) do
Process.flag(:trap_exit, true)
test_pid = Keyword.fetch!(opts, :test_pid)
send(test_pid, {:blocking_runtime_started, endpoint, request, self()})
receive do
{:EXIT, _from, :shutdown} ->
send(test_pid, :blocking_runtime_shutdown)
exit(:shutdown)
after
5_000 ->
{:ok, %{content: "too late", usage: %{input_tokens: 1, output_tokens: 1}}}
end
end
end
# Always returns another tool call and never a final answer, so a chat would
# loop forever if the round count were not bounded.
defmodule LoopingToolRuntime do
@@ -1772,6 +1790,64 @@ defmodule BDS.AITest do
assert Enum.map(messages, & &1.role) == [:user]
end
@tag :chat_timeout
test "send_chat_message times out a stalled chat round and keeps persisted state consistent" do
original_chat_config = Application.get_env(:bds, :chat, [])
original_http_config = Application.get_env(:bds, BDS.AI.HttpClient, [])
Application.put_env(
:bds,
:chat,
original_chat_config
|> Keyword.put(:max_tool_rounds, 1)
|> Keyword.put(:await_timeout_margin_ms, 25)
)
Application.put_env(
:bds,
BDS.AI.HttpClient,
original_http_config
|> Keyword.put(:connect_timeout_ms, 50)
|> Keyword.put(:receive_timeout_ms, 50)
)
on_exit(fn ->
Application.put_env(:bds, :chat, original_chat_config)
Application.put_env(:bds, BDS.AI.HttpClient, original_http_config)
end)
assert {:ok, _endpoint} =
BDS.AI.put_endpoint(
:online,
%{
url: "https://api.example.test/v1",
api_key: "online-secret",
model: "gpt-4o-mini"
},
secret_backend: FakeSecretBackend
)
assert {:ok, conversation} = BDS.AI.start_chat(%{model: "gpt-4o-mini"})
started_at = System.monotonic_time(:millisecond)
assert {:error, :chat_timeout} =
BDS.AI.send_chat_message(conversation.id, "Please wait forever",
runtime: ShutdownAwareBlockingRuntime,
test_pid: self(),
secret_backend: FakeSecretBackend
)
elapsed_ms = System.monotonic_time(:millisecond) - started_at
assert elapsed_ms < 1_000
assert_receive {:blocking_runtime_started, _endpoint, %{operation: :chat}, _pid}, 500
assert_receive :blocking_runtime_shutdown, 500
messages = BDS.AI.list_chat_messages(conversation.id)
assert Enum.map(messages, & &1.role) == [:user]
end
test "get_surface_state and put_surface_state persist and restore surface UI state" do
assert {:ok, conversation} = BDS.AI.start_chat(%{title: "Surface State", model: "gpt-4.1"})

View File

@@ -1,5 +1,5 @@
defmodule BDS.WxrParserTest do
use ExUnit.Case, async: true
use ExUnit.Case, async: false
alias BDS.WxrParser
@@ -102,6 +102,8 @@ defmodule BDS.WxrParserTest do
</rss>
"""
_warmup = WxrParser.parse_xml(sample_wxr_xml())
atom_count_before = :erlang.system_info(:atom_count)
parsed = WxrParser.parse_xml(xml)
atom_count_after = :erlang.system_info(:atom_count)