diff --git a/TECHDEBTS.md b/TECHDEBTS.md index b1f15b8..c8bfe0a 100644 --- a/TECHDEBTS.md +++ b/TECHDEBTS.md @@ -127,10 +127,20 @@ within the configured budget instead of hanging; dialyzer clean. --- -### TD-03: Fix `BDS.AI.InFlight` ETS ownership and creation race +### TD-03: Fix `BDS.AI.InFlight` ETS ownership and creation race ✅ DONE (2026-06-11) **Severity: High (correctness).** +**Status: implemented.** `BDS.AI.InFlight` is now a minimal GenServer whose +`init/1` creates the named table (`:named_table, :public, :set, +read_concurrency: true`); it is supervised in `BDS.Application` (before +anything that uses chat), so the table lives for the VM's lifetime and the +concurrent-first-use race is impossible by construction. The lazy `table/0` +creation path is deleted; `register/unregister/lookup` reference the named +table directly. `test/bds/ai/in_flight_test.exs` proves registrations survive +the death of the registering process and that the supervised process owns the +table. + **Context.** `lib/bds/ai/in_flight.ex` creates its named ETS table lazily in whichever process first calls `table/0`. Two defects: (1) the table is owned by that first caller — typically a transient LiveView or chat task — so when diff --git a/lib/bds/ai/in_flight.ex b/lib/bds/ai/in_flight.ex index 13e1e98..a5fe388 100644 --- a/lib/bds/ai/in_flight.ex +++ b/lib/bds/ai/in_flight.ex @@ -1,29 +1,38 @@ defmodule BDS.AI.InFlight do @moduledoc false + # Registry of in-flight chat tasks keyed by conversation id. The named ETS + # table is owned by this supervised GenServer (started from the application + # supervision tree), so registrations survive the exit of the registering + # process and there is no creation race between concurrent first callers. + use GenServer + @table :bds_ai_in_flight + def start_link(opts) do + GenServer.start_link(__MODULE__, opts, name: __MODULE__) + end + + @impl true + def init(_opts) do + table = :ets.new(@table, [:named_table, :public, :set, read_concurrency: true]) + {:ok, table} + end + def register(conversation_id, pid) when is_binary(conversation_id) and is_pid(pid) do - :ets.insert(table(), {conversation_id, pid}) + :ets.insert(@table, {conversation_id, pid}) :ok end def unregister(conversation_id) when is_binary(conversation_id) do - :ets.delete(table(), conversation_id) + :ets.delete(@table, conversation_id) :ok end def lookup(conversation_id) when is_binary(conversation_id) do - case :ets.lookup(table(), conversation_id) do + case :ets.lookup(@table, conversation_id) do [{^conversation_id, pid}] -> pid _other -> nil end end - - defp table do - case :ets.whereis(@table) do - :undefined -> :ets.new(@table, [:named_table, :public, :set, read_concurrency: true]) - table -> table - end - end end diff --git a/lib/bds/application.ex b/lib/bds/application.ex index 5af33b9..51b3fb4 100644 --- a/lib/bds/application.ex +++ b/lib/bds/application.ex @@ -32,6 +32,7 @@ defmodule BDS.Application do BDS.Repo, BDS.RepoBootstrap, BDS.Tasks, + BDS.AI.InFlight, BDS.Preview, BDS.Publishing, {Task.Supervisor, name: BDS.Tasks.TaskSupervisor}, diff --git a/test/bds/ai/in_flight_test.exs b/test/bds/ai/in_flight_test.exs new file mode 100644 index 0000000..df75968 --- /dev/null +++ b/test/bds/ai/in_flight_test.exs @@ -0,0 +1,39 @@ +defmodule BDS.AI.InFlightTest do + use ExUnit.Case, async: true + + alias BDS.AI.InFlight + + test "registrations survive the death of the registering process" do + conversation_id = unique_conversation_id() + target = self() + + {pid, ref} = + spawn_monitor(fn -> + InFlight.register(conversation_id, self()) + send(target, :registered) + end) + + assert_receive :registered + assert_receive {:DOWN, ^ref, :process, ^pid, _reason} + + assert InFlight.lookup(conversation_id) == pid + + assert InFlight.unregister(conversation_id) == :ok + assert InFlight.lookup(conversation_id) == nil + end + + test "lookup returns nil for unknown conversations" do + assert InFlight.lookup(unique_conversation_id()) == nil + end + + test "the named table is owned by the supervised InFlight process" do + owner = :ets.info(:bds_ai_in_flight, :owner) + + assert is_pid(owner) + assert owner == Process.whereis(InFlight) + end + + defp unique_conversation_id do + "in-flight-test-" <> Integer.to_string(System.unique_integer([:positive])) + end +end