fix: implemented TD-07, chat await path with deadline

2026-06-12 12:08:27 +02:00
parent 2e633922f9
commit 66938c23f2
6 changed files with 124 additions and 6 deletions
--- a/TECHDEBTS.md
+++ b/TECHDEBTS.md
@@ -312,7 +312,20 @@ airplane mode / local model / toast.
 events arrive before the final `{:ok, reply}`; tool-call rounds still work;
 cancellation mid-stream (TD-07) aborts the HTTP request.
-### TD-07: Bound the chat await chain; end-to-end timeout & cancellation
+### TD-07: Bound the chat await chain; end-to-end timeout & cancellation ✅ DONE (2026-06-12)
 **Status: implemented.** `BDS.AI.Chat.send_chat_message/3` no longer waits
 unboundedly on the supervised chat task: `await_chat_task/2` now applies a
 global deadline derived from the configured per-request HTTP budget and the
 bounded tool loop (`BDS.AI.HttpClient.request_timeout_ms() *
 (chat_max_tool_rounds + 1) + config :bds, :chat, :await_timeout_margin_ms`). On
 deadline expiry it returns `{:error, :chat_timeout}` and shuts the task down via
 `Task.shutdown/2`, so the caller is released even if the runtime wedges.
 `config/config.exs` now exposes `:await_timeout_margin_ms` under `:chat`. The
 acceptance proof is a shutdown-aware blocking runtime test that asserts the
 timeout result, verifies the task receives shutdown, and confirms the
 conversation persists only the user message on timeout; existing cancellation
 and streaming tests remain green.
 **Context.** `BDS.AI.Chat.send_chat_message/3` blocks the caller (a LiveView
 process) on a hand-rolled `await_chat_task/1` — a raw `receive` with **no
--- a/config/config.exs
+++ b/config/config.exs
@@ -70,10 +70,13 @@ config :bds, :scripting,
 # streaming: chat completions use SSE when the provider supports it (set to
 # false for OpenAI-compatible servers that reject the "stream" flag).
 # stream_emit_interval_ms throttles how often streamed content reaches the UI.
 # await_timeout_margin_ms is added on top of the per-request HTTP budget across
 # the bounded tool-call loop, so the caller never waits forever.
 config :bds, :chat,
  max_tool_rounds: 10,
  streaming: true,
-  stream_emit_interval_ms: 100
+  stream_emit_interval_ms: 100,
  await_timeout_margin_ms: 5_000
 config :bds, :embeddings,
  backend: BDS.Embeddings.Backends.Neural,
--- a/lib/bds/ai/chat.ex
+++ b/lib/bds/ai/chat.ex
@@ -27,6 +27,7 @@ defmodule BDS.AI.Chat do
  @title_max_output_tokens 256
  @chat_title_max_length 30
  @chat_max_tool_rounds 10
  @chat_await_timeout_margin_ms 5_000
  @default_context_window 128_000
  @spec start_chat(map()) :: {:ok, map()} | {:error, Ecto.Changeset.t()}
@@ -208,7 +209,7 @@ defmodule BDS.AI.Chat do
      send(task.pid, :sandbox_ready)
      try do
-        await_chat_task(task)
+        await_chat_task(task, chat_await_timeout_ms())
      after
        InFlight.unregister(conversation.id)
      end
@@ -756,9 +757,20 @@ defmodule BDS.AI.Chat do
  # BoundedToolLoop: the tool-calling round count is capped by
  # config.chat_max_tool_rounds (falling back to the built-in default).
  defp chat_max_tool_rounds do
    chat_config(:max_tool_rounds, @chat_max_tool_rounds)
  end
  defp chat_await_timeout_ms do
    per_request_timeout_ms = BDS.AI.HttpClient.request_timeout_ms()
    per_request_timeout_ms * (chat_max_tool_rounds() + 1) +
      chat_config(:await_timeout_margin_ms, @chat_await_timeout_margin_ms)
  end
  defp chat_config(key, default) do
    :bds
    |> Application.get_env(:chat, [])
-    |> Keyword.get(:max_tool_rounds, @chat_max_tool_rounds)
+    |> Keyword.get(key, default)
  end
  defp chat_system_prompt(project_id, tools) do
@@ -853,7 +865,7 @@ defmodule BDS.AI.Chat do
    :ok
  end
-  defp await_chat_task(task) do
+  defp await_chat_task(task, timeout_ms) do
    ref = task.ref
    receive do
@@ -879,6 +891,10 @@ defmodule BDS.AI.Chat do
          _other ->
            {:error, :cancelled}
        end
    after
      timeout_ms ->
        _ = Task.shutdown(task, 100)
        {:error, :chat_timeout}
    end
  end
--- a/lib/bds/ai/http_client.ex
+++ b/lib/bds/ai/http_client.ex
@@ -25,6 +25,14 @@ defmodule BDS.AI.HttpClient do
  @default_get_max_retries 2
  @default_retry_delay_ms 500
  @spec request_timeout_ms() :: pos_integer()
  def request_timeout_ms do
    max(
      config(:connect_timeout_ms, @default_connect_timeout_ms),
      config(:receive_timeout_ms, @default_receive_timeout_ms)
    )
  end
  @spec get(String.t(), %{String.t() => String.t()}) ::
          {:ok, %{status: non_neg_integer(), headers: map(), body: binary()}} | {:error, term()}
  def get(url, headers) when is_binary(url) and is_map(headers) do
--- a/test/bds/ai_test.exs
+++ b/test/bds/ai_test.exs
@@ -279,6 +279,24 @@ defmodule BDS.AITest do
    end
  end
  defmodule ShutdownAwareBlockingRuntime do
    def generate(endpoint, request, opts) do
      Process.flag(:trap_exit, true)
      test_pid = Keyword.fetch!(opts, :test_pid)
      send(test_pid, {:blocking_runtime_started, endpoint, request, self()})
      receive do
        {:EXIT, _from, :shutdown} ->
          send(test_pid, :blocking_runtime_shutdown)
          exit(:shutdown)
      after
        5_000 ->
          {:ok, %{content: "too late", usage: %{input_tokens: 1, output_tokens: 1}}}
      end
    end
  end
  # Always returns another tool call and never a final answer, so a chat would
  # loop forever if the round count were not bounded.
  defmodule LoopingToolRuntime do
@@ -1772,6 +1790,64 @@ defmodule BDS.AITest do
    assert Enum.map(messages, & &1.role) == [:user]
  end
  @tag :chat_timeout
  test "send_chat_message times out a stalled chat round and keeps persisted state consistent" do
    original_chat_config = Application.get_env(:bds, :chat, [])
    original_http_config = Application.get_env(:bds, BDS.AI.HttpClient, [])
    Application.put_env(
      :bds,
      :chat,
      original_chat_config
      |> Keyword.put(:max_tool_rounds, 1)
      |> Keyword.put(:await_timeout_margin_ms, 25)
    )
    Application.put_env(
      :bds,
      BDS.AI.HttpClient,
      original_http_config
      |> Keyword.put(:connect_timeout_ms, 50)
      |> Keyword.put(:receive_timeout_ms, 50)
    )
    on_exit(fn ->
      Application.put_env(:bds, :chat, original_chat_config)
      Application.put_env(:bds, BDS.AI.HttpClient, original_http_config)
    end)
    assert {:ok, _endpoint} =
             BDS.AI.put_endpoint(
               :online,
               %{
                 url: "https://api.example.test/v1",
                 api_key: "online-secret",
                 model: "gpt-4o-mini"
               },
               secret_backend: FakeSecretBackend
             )
    assert {:ok, conversation} = BDS.AI.start_chat(%{model: "gpt-4o-mini"})
    started_at = System.monotonic_time(:millisecond)
    assert {:error, :chat_timeout} =
             BDS.AI.send_chat_message(conversation.id, "Please wait forever",
               runtime: ShutdownAwareBlockingRuntime,
               test_pid: self(),
               secret_backend: FakeSecretBackend
             )
    elapsed_ms = System.monotonic_time(:millisecond) - started_at
    assert elapsed_ms < 1_000
    assert_receive {:blocking_runtime_started, _endpoint, %{operation: :chat}, _pid}, 500
    assert_receive :blocking_runtime_shutdown, 500
    messages = BDS.AI.list_chat_messages(conversation.id)
    assert Enum.map(messages, & &1.role) == [:user]
  end
  test "get_surface_state and put_surface_state persist and restore surface UI state" do
    assert {:ok, conversation} = BDS.AI.start_chat(%{title: "Surface State", model: "gpt-4.1"})
--- a/test/bds/wxr_parser_test.exs
+++ b/test/bds/wxr_parser_test.exs
@@ -1,5 +1,5 @@
 defmodule BDS.WxrParserTest do
-  use ExUnit.Case, async: true
+  use ExUnit.Case, async: false
  alias BDS.WxrParser
@@ -102,6 +102,8 @@ defmodule BDS.WxrParserTest do
    </rss>
    """
    _warmup = WxrParser.parse_xml(sample_wxr_xml())
    atom_count_before = :erlang.system_info(:atom_count)
    parsed = WxrParser.parse_xml(xml)
    atom_count_after = :erlang.system_info(:atom_count)