From 0808b2705764589e0db422169c5b2eda6d2ba2d7 Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Mon, 11 May 2026 10:20:46 +0200 Subject: [PATCH] chore: moved from delay-based tests to deterministic gated server tests for chat --- .../-Users-gb-Projects-bDS2/memory/MEMORY.md | 3 +- .../memory/feedback_fix_all_failures.md | 12 +++-- .claude/settings.local.json | 3 +- test/bds/desktop/shell_live_test.exs | 51 ++++++++++++++----- 4 files changed, 50 insertions(+), 19 deletions(-) diff --git a/.claude/projects/-Users-gb-Projects-bDS2/memory/MEMORY.md b/.claude/projects/-Users-gb-Projects-bDS2/memory/MEMORY.md index d00060a..87bfc1e 100644 --- a/.claude/projects/-Users-gb-Projects-bDS2/memory/MEMORY.md +++ b/.claude/projects/-Users-gb-Projects-bDS2/memory/MEMORY.md @@ -1 +1,2 @@ -- [Fix all test failures](feedback_fix_all_failures.md) — Never dismiss failures as pre-existing; fix everything +- [Fix all test failures](feedback_fix_all_failures.md) — Never dismiss failures as pre-existing or flaky; investigate and fix +- [Debug targeted](feedback_targeted_debugging.md) — Analyze the code and fix; don't brute-force with repeated suite runs diff --git a/.claude/projects/-Users-gb-Projects-bDS2/memory/feedback_fix_all_failures.md b/.claude/projects/-Users-gb-Projects-bDS2/memory/feedback_fix_all_failures.md index 5b6b9cc..36e5a36 100644 --- a/.claude/projects/-Users-gb-Projects-bDS2/memory/feedback_fix_all_failures.md +++ b/.claude/projects/-Users-gb-Projects-bDS2/memory/feedback_fix_all_failures.md @@ -1,11 +1,13 @@ --- -name: Fix all test failures -description: Never dismiss test failures as pre-existing — if tests fail after changes, fix them +name: Fix all test failures including flaky ones +description: Never dismiss test failures as pre-existing or flaky — investigate root cause and stabilize type: feedback --- -All test failures after changes must be fixed, even if they appear unrelated. The test suite was clean before, so any failure is the responsibility of the current change. +All test failures must be fixed, even if they appear unrelated to current changes. The test suite was clean before, so any failure is my responsibility. -**Why:** The user confirmed the suite was green before. Dismissing failures as "pre-existing" is wrong and wastes time. +Flaky tests are deeper problems waiting to surface. Running a test in isolation and seeing it pass is never enough — must find out why it was flaky in the full suite run and make it stable. -**How to apply:** After making changes, if any test fails, investigate and fix it before reporting the task as done. Never stash/skip/ignore failures. +**Why:** Dismissing failures as "pre-existing" or "flaky" is wrong. Flaky tests indicate real issues (race conditions, test pollution, shared state) that will bite harder later. + +**How to apply:** After making changes, if any test fails: investigate the root cause, fix it, and verify it passes reliably in the full suite. Never stash, never skip, never re-run and hope. Never dismiss ordering-dependent failures — find and fix the shared state or race condition. diff --git a/.claude/settings.local.json b/.claude/settings.local.json index b6b2004..ec03357 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -6,7 +6,8 @@ "Bash(mix dialyzer *)", "Bash(mix ecto.migrate)", "Bash(git add *)", - "Bash(git push *)" + "Bash(git push *)", + "Bash(git -C /Users/gb/Projects/bDS2 status)" ] } } diff --git a/test/bds/desktop/shell_live_test.exs b/test/bds/desktop/shell_live_test.exs index e98979e..36946b5 100644 --- a/test/bds/desktop/shell_live_test.exs +++ b/test/bds/desktop/shell_live_test.exs @@ -443,15 +443,27 @@ defmodule BDS.Desktop.ShellLiveTest do def get(_url, _headers), do: {:error, :not_found} end - defmodule DelayedChatServer do + defmodule GatedChatServer do + @moduledoc false use Plug.Router import Phoenix.ConnTest, except: [post: 2] plug(:match) plug(:dispatch) + def hold_gate do + case GenServer.whereis(__MODULE__) do + nil -> :ok + pid -> Agent.stop(pid) + end + + Agent.start_link(fn -> :hold end, name: __MODULE__) + end + + def release_gate, do: Agent.update(__MODULE__, fn _ -> :release end) + post "/v1/chat/completions" do - Process.sleep(300) + wait_for_release() body = Jason.encode!(%{ @@ -467,6 +479,19 @@ defmodule BDS.Desktop.ShellLiveTest do match _ do send_resp(conn, 404, "not found") end + + defp wait_for_release do + case GenServer.whereis(__MODULE__) do + nil -> + Process.sleep(:infinity) + + _pid -> + case Agent.get(__MODULE__, & &1) do + :release -> :ok + :hold -> Process.sleep(50) && wait_for_release() + end + end + end end defmodule TitleChatServer do @@ -3915,9 +3940,10 @@ defmodule BDS.Desktop.ShellLiveTest do test "chat editor keeps previous surfaces visible while a new update surface streams" do assert :ok = AI.set_airplane_mode(false) + {:ok, _} = GatedChatServer.hold_gate() server = - start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false}) + start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false}) {:ok, {_address, port}} = ThousandIsland.listener_info(server) @@ -4000,8 +4026,6 @@ defmodule BDS.Desktop.ShellLiveTest do view |> element("[data-testid='chat-abort-button']") |> render_click() - - Process.sleep(350) end test "chat editor hook reopens server-expanded A2UI surfaces after patches" do @@ -4356,9 +4380,10 @@ defmodule BDS.Desktop.ShellLiveTest do test "chat editor shows in-flight stop state and can abort a running turn" do assert :ok = AI.set_airplane_mode(false) + {:ok, _} = GatedChatServer.hold_gate() server = - start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false}) + start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false}) {:ok, {_address, port}} = ThousandIsland.listener_info(server) @@ -4401,15 +4426,17 @@ defmodule BDS.Desktop.ShellLiveTest do refute html =~ ~s(data-testid="chat-abort-button") - Process.sleep(350) + GatedChatServer.release_gate() + Process.sleep(100) refute render(view) =~ "Delayed response" end test "chat editor keeps the in-flight user turn visible and disables input while streaming" do assert :ok = AI.set_airplane_mode(false) + {:ok, _} = GatedChatServer.hold_gate() server = - start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false}) + start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false}) {:ok, {_address, port}} = ThousandIsland.listener_info(server) @@ -4483,15 +4510,17 @@ defmodule BDS.Desktop.ShellLiveTest do refute html =~ ~s(data-testid="chat-abort-button") - Process.sleep(350) + GatedChatServer.release_gate() + Process.sleep(100) refute render(view) =~ "Delayed response" end test "chat editor does not duplicate persisted turn artifacts while the request is still active" do assert :ok = AI.set_airplane_mode(false) + {:ok, _} = GatedChatServer.hold_gate() server = - start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false}) + start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false}) {:ok, {_address, port}} = ThousandIsland.listener_info(server) @@ -4575,8 +4604,6 @@ defmodule BDS.Desktop.ShellLiveTest do view |> element("[data-testid='chat-abort-button']") |> render_click() - - Process.sleep(350) end test "translation validation route renders dedicated cards and fix controls", %{