chore: unit tests adaption to idea of gate test

chore: moved from delay-based tests to deterministic gated server tests
for chat
2026-05-11 10:21:00 +02:00 · 2026-05-11 10:20:46 +02:00
10 changed files with 66 additions and 19 deletions
--- a/.claude/projects/-Users-gb-Projects-bDS2/memory/MEMORY.md
+++ b/.claude/projects/-Users-gb-Projects-bDS2/memory/MEMORY.md
@@ -1 +1,2 @@
- [Fix all test failures](feedback_fix_all_failures.md) — Never dismiss failures as pre-existing; fix everything
+- [Fix all test failures](feedback_fix_all_failures.md) — Never dismiss failures as pre-existing or flaky; investigate and fix
+- [Debug targeted](feedback_targeted_debugging.md) — Analyze the code and fix; don't brute-force with repeated suite runs
--- a/.claude/projects/-Users-gb-Projects-bDS2/memory/feedback_fix_all_failures.md
+++ b/.claude/projects/-Users-gb-Projects-bDS2/memory/feedback_fix_all_failures.md
@@ -1,11 +1,13 @@
 ---
-name: Fix all test failures
-description: Never dismiss test failures as pre-existing — if tests fail after changes, fix them
+name: Fix all test failures including flaky ones
+description: Never dismiss test failures as pre-existing or flaky — investigate root cause and stabilize
 type: feedback
 ---

-All test failures after changes must be fixed, even if they appear unrelated. The test suite was clean before, so any failure is the responsibility of the current change.
+All test failures must be fixed, even if they appear unrelated to current changes. The test suite was clean before, so any failure is my responsibility.

-**Why:** The user confirmed the suite was green before. Dismissing failures as "pre-existing" is wrong and wastes time.
+Flaky tests are deeper problems waiting to surface. Running a test in isolation and seeing it pass is never enough — must find out why it was flaky in the full suite run and make it stable.

-**How to apply:** After making changes, if any test fails, investigate and fix it before reporting the task as done. Never stash/skip/ignore failures.
+**Why:** Dismissing failures as "pre-existing" or "flaky" is wrong. Flaky tests indicate real issues (race conditions, test pollution, shared state) that will bite harder later.
+
+**How to apply:** After making changes, if any test fails: investigate the root cause, fix it, and verify it passes reliably in the full suite. Never stash, never skip, never re-run and hope. Never dismiss ordering-dependent failures — find and fix the shared state or race condition.
--- a/.claude/projects/-Users-gb-Projects-bDS2/memory/feedback_targeted_debugging.md
+++ b/.claude/projects/-Users-gb-Projects-bDS2/memory/feedback_targeted_debugging.md
@@ -0,0 +1,11 @@
+---
+name: Debug targeted, don't brute-force
+description: When investigating flaky tests, analyze the code and fix — don't brute-force with repeated full suite runs
+type: feedback
+---
+
+If you already know which test is failing and why, fix it. Don't waste time running the full suite 20 times hoping to capture output.
+
+**Why:** It's slow, wasteful, and avoids thinking. Analyze the code, understand the race, fix it.
+
+**How to apply:** When a test fails, read the test, understand what it does, identify the root cause, and fix it. Only re-run to verify the fix, not to gather more data you already have.
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -6,7 +6,8 @@
      "Bash(mix dialyzer *)",
      "Bash(mix ecto.migrate)",
      "Bash(git add *)",
-      "Bash(git push *)"
+      "Bash(git push *)",
+      "Bash(git -C /Users/gb/Projects/bDS2 status)"
    ]
  }
 }
--- a/test/bds/desktop/shell_live_test.exs
+++ b/test/bds/desktop/shell_live_test.exs
@@ -443,15 +443,27 @@ defmodule BDS.Desktop.ShellLiveTest do
    def get(_url, _headers), do: {:error, :not_found}
  end

-  defmodule DelayedChatServer do
+  defmodule GatedChatServer do
+    @moduledoc false
    use Plug.Router
    import Phoenix.ConnTest, except: [post: 2]

    plug(:match)
    plug(:dispatch)

+    def hold_gate do
+      case GenServer.whereis(__MODULE__) do
+        nil -> :ok
+        pid -> Agent.stop(pid)
+      end
+
+      Agent.start_link(fn -> :hold end, name: __MODULE__)
+    end
+
+    def release_gate, do: Agent.update(__MODULE__, fn _ -> :release end)
+
    post "/v1/chat/completions" do
-      Process.sleep(300)
+      wait_for_release()

      body =
        Jason.encode!(%{
@@ -467,6 +479,19 @@ defmodule BDS.Desktop.ShellLiveTest do
    match _ do
      send_resp(conn, 404, "not found")
    end
+
+    defp wait_for_release do
+      case GenServer.whereis(__MODULE__) do
+        nil ->
+          Process.sleep(:infinity)
+
+        _pid ->
+          case Agent.get(__MODULE__, & &1) do
+            :release -> :ok
+            :hold -> Process.sleep(50) && wait_for_release()
+          end
+      end
+    end
  end

  defmodule TitleChatServer do
@@ -3915,9 +3940,10 @@ defmodule BDS.Desktop.ShellLiveTest do

  test "chat editor keeps previous surfaces visible while a new update surface streams" do
    assert :ok = AI.set_airplane_mode(false)
+    {:ok, _} = GatedChatServer.hold_gate()

    server =
-      start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
+      start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})

    {:ok, {_address, port}} = ThousandIsland.listener_info(server)

@@ -4000,8 +4026,6 @@ defmodule BDS.Desktop.ShellLiveTest do
      view
      |> element("[data-testid='chat-abort-button']")
      |> render_click()
-
-    Process.sleep(350)
  end

  test "chat editor hook reopens server-expanded A2UI surfaces after patches" do
@@ -4356,9 +4380,10 @@ defmodule BDS.Desktop.ShellLiveTest do

  test "chat editor shows in-flight stop state and can abort a running turn" do
    assert :ok = AI.set_airplane_mode(false)
+    {:ok, _} = GatedChatServer.hold_gate()

    server =
-      start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
+      start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})

    {:ok, {_address, port}} = ThousandIsland.listener_info(server)

@@ -4401,15 +4426,17 @@ defmodule BDS.Desktop.ShellLiveTest do

    refute html =~ ~s(data-testid="chat-abort-button")

-    Process.sleep(350)
+    GatedChatServer.release_gate()
+    Process.sleep(100)
    refute render(view) =~ "Delayed response"
  end

  test "chat editor keeps the in-flight user turn visible and disables input while streaming" do
    assert :ok = AI.set_airplane_mode(false)
+    {:ok, _} = GatedChatServer.hold_gate()

    server =
-      start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
+      start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})

    {:ok, {_address, port}} = ThousandIsland.listener_info(server)

@@ -4483,15 +4510,17 @@ defmodule BDS.Desktop.ShellLiveTest do

    refute html =~ ~s(data-testid="chat-abort-button")

-    Process.sleep(350)
+    GatedChatServer.release_gate()
+    Process.sleep(100)
    refute render(view) =~ "Delayed response"
  end

  test "chat editor does not duplicate persisted turn artifacts while the request is still active" do
    assert :ok = AI.set_airplane_mode(false)
+    {:ok, _} = GatedChatServer.hold_gate()

    server =
-      start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
+      start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})

    {:ok, {_address, port}} = ThousandIsland.listener_info(server)

@@ -4575,8 +4604,6 @@ defmodule BDS.Desktop.ShellLiveTest do
      view
      |> element("[data-testid='chat-abort-button']")
      |> render_click()
-
-    Process.sleep(350)
  end

  test "translation validation route renders dedicated cards and fix controls", %{
--- a/tmp/BDS.TOCTOU.FileSystemTest/test-read_template_file-2-protocol-uses-atomic-read-reads-existing-template-0fbdd3d0/footer.liquid
+++ b/tmp/BDS.TOCTOU.FileSystemTest/test-read_template_file-2-protocol-uses-atomic-read-reads-existing-template-0fbdd3d0/footer.liquid
@@ -0,0 +1 @@
+FOOTER
--- a/tmp/BDS.TOCTOU.FileSystemTest/test-try_read-2-eliminates-TOCTOU-race-falls-through-to-next-root-path-when-first-is-missing-ac96bedd/b/partial.liquid
+++ b/tmp/BDS.TOCTOU.FileSystemTest/test-try_read-2-eliminates-TOCTOU-race-falls-through-to-next-root-path-when-first-is-missing-ac96bedd/b/partial.liquid
@@ -0,0 +1 @@
+FROM B
--- a/tmp/BDS.TOCTOU.FileSystemTest/test-try_read-2-eliminates-TOCTOU-race-first-root-path-wins-when-both-have-the-template-69386ef3/a/shared.liquid
+++ b/tmp/BDS.TOCTOU.FileSystemTest/test-try_read-2-eliminates-TOCTOU-race-first-root-path-wins-when-both-have-the-template-69386ef3/a/shared.liquid
@@ -0,0 +1 @@
+FROM A
--- a/tmp/BDS.TOCTOU.FileSystemTest/test-try_read-2-eliminates-TOCTOU-race-first-root-path-wins-when-both-have-the-template-69386ef3/b/shared.liquid
+++ b/tmp/BDS.TOCTOU.FileSystemTest/test-try_read-2-eliminates-TOCTOU-race-first-root-path-wins-when-both-have-the-template-69386ef3/b/shared.liquid
@@ -0,0 +1 @@
+FROM B
--- a/tmp/BDS.TOCTOU.FileSystemTest/test-try_read-2-eliminates-TOCTOU-race-reads-file-atomically-without-separate-existence-check-8a97013f/header.liquid
+++ b/tmp/BDS.TOCTOU.FileSystemTest/test-try_read-2-eliminates-TOCTOU-race-reads-file-atomically-without-separate-existence-check-8a97013f/header.liquid
@@ -0,0 +1 @@
+HEADER CONTENT
Author	SHA1	Message	Date
Chili Palmer	71fb99af16	chore: unit tests adaption to idea of gate test	2026-05-11 10:21:00 +02:00
Chili Palmer	0808b27057	chore: moved from delay-based tests to deterministic gated server tests for chat	2026-05-11 10:20:46 +02:00