Compare commits

...

2 Commits

10 changed files with 66 additions and 19 deletions

View File

@@ -1 +1,2 @@
- [Fix all test failures](feedback_fix_all_failures.md) — Never dismiss failures as pre-existing; fix everything - [Fix all test failures](feedback_fix_all_failures.md) — Never dismiss failures as pre-existing or flaky; investigate and fix
- [Debug targeted](feedback_targeted_debugging.md) — Analyze the code and fix; don't brute-force with repeated suite runs

View File

@@ -1,11 +1,13 @@
--- ---
name: Fix all test failures name: Fix all test failures including flaky ones
description: Never dismiss test failures as pre-existing — if tests fail after changes, fix them description: Never dismiss test failures as pre-existing or flaky — investigate root cause and stabilize
type: feedback type: feedback
--- ---
All test failures after changes must be fixed, even if they appear unrelated. The test suite was clean before, so any failure is the responsibility of the current change. All test failures must be fixed, even if they appear unrelated to current changes. The test suite was clean before, so any failure is my responsibility.
**Why:** The user confirmed the suite was green before. Dismissing failures as "pre-existing" is wrong and wastes time. Flaky tests are deeper problems waiting to surface. Running a test in isolation and seeing it pass is never enough — must find out why it was flaky in the full suite run and make it stable.
**How to apply:** After making changes, if any test fails, investigate and fix it before reporting the task as done. Never stash/skip/ignore failures. **Why:** Dismissing failures as "pre-existing" or "flaky" is wrong. Flaky tests indicate real issues (race conditions, test pollution, shared state) that will bite harder later.
**How to apply:** After making changes, if any test fails: investigate the root cause, fix it, and verify it passes reliably in the full suite. Never stash, never skip, never re-run and hope. Never dismiss ordering-dependent failures — find and fix the shared state or race condition.

View File

@@ -0,0 +1,11 @@
---
name: Debug targeted, don't brute-force
description: When investigating flaky tests, analyze the code and fix — don't brute-force with repeated full suite runs
type: feedback
---
If you already know which test is failing and why, fix it. Don't waste time running the full suite 20 times hoping to capture output.
**Why:** It's slow, wasteful, and avoids thinking. Analyze the code, understand the race, fix it.
**How to apply:** When a test fails, read the test, understand what it does, identify the root cause, and fix it. Only re-run to verify the fix, not to gather more data you already have.

View File

@@ -6,7 +6,8 @@
"Bash(mix dialyzer *)", "Bash(mix dialyzer *)",
"Bash(mix ecto.migrate)", "Bash(mix ecto.migrate)",
"Bash(git add *)", "Bash(git add *)",
"Bash(git push *)" "Bash(git push *)",
"Bash(git -C /Users/gb/Projects/bDS2 status)"
] ]
} }
} }

View File

@@ -443,15 +443,27 @@ defmodule BDS.Desktop.ShellLiveTest do
def get(_url, _headers), do: {:error, :not_found} def get(_url, _headers), do: {:error, :not_found}
end end
defmodule DelayedChatServer do defmodule GatedChatServer do
@moduledoc false
use Plug.Router use Plug.Router
import Phoenix.ConnTest, except: [post: 2] import Phoenix.ConnTest, except: [post: 2]
plug(:match) plug(:match)
plug(:dispatch) plug(:dispatch)
def hold_gate do
case GenServer.whereis(__MODULE__) do
nil -> :ok
pid -> Agent.stop(pid)
end
Agent.start_link(fn -> :hold end, name: __MODULE__)
end
def release_gate, do: Agent.update(__MODULE__, fn _ -> :release end)
post "/v1/chat/completions" do post "/v1/chat/completions" do
Process.sleep(300) wait_for_release()
body = body =
Jason.encode!(%{ Jason.encode!(%{
@@ -467,6 +479,19 @@ defmodule BDS.Desktop.ShellLiveTest do
match _ do match _ do
send_resp(conn, 404, "not found") send_resp(conn, 404, "not found")
end end
defp wait_for_release do
case GenServer.whereis(__MODULE__) do
nil ->
Process.sleep(:infinity)
_pid ->
case Agent.get(__MODULE__, & &1) do
:release -> :ok
:hold -> Process.sleep(50) && wait_for_release()
end
end
end
end end
defmodule TitleChatServer do defmodule TitleChatServer do
@@ -3915,9 +3940,10 @@ defmodule BDS.Desktop.ShellLiveTest do
test "chat editor keeps previous surfaces visible while a new update surface streams" do test "chat editor keeps previous surfaces visible while a new update surface streams" do
assert :ok = AI.set_airplane_mode(false) assert :ok = AI.set_airplane_mode(false)
{:ok, _} = GatedChatServer.hold_gate()
server = server =
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false}) start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
{:ok, {_address, port}} = ThousandIsland.listener_info(server) {:ok, {_address, port}} = ThousandIsland.listener_info(server)
@@ -4000,8 +4026,6 @@ defmodule BDS.Desktop.ShellLiveTest do
view view
|> element("[data-testid='chat-abort-button']") |> element("[data-testid='chat-abort-button']")
|> render_click() |> render_click()
Process.sleep(350)
end end
test "chat editor hook reopens server-expanded A2UI surfaces after patches" do test "chat editor hook reopens server-expanded A2UI surfaces after patches" do
@@ -4356,9 +4380,10 @@ defmodule BDS.Desktop.ShellLiveTest do
test "chat editor shows in-flight stop state and can abort a running turn" do test "chat editor shows in-flight stop state and can abort a running turn" do
assert :ok = AI.set_airplane_mode(false) assert :ok = AI.set_airplane_mode(false)
{:ok, _} = GatedChatServer.hold_gate()
server = server =
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false}) start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
{:ok, {_address, port}} = ThousandIsland.listener_info(server) {:ok, {_address, port}} = ThousandIsland.listener_info(server)
@@ -4401,15 +4426,17 @@ defmodule BDS.Desktop.ShellLiveTest do
refute html =~ ~s(data-testid="chat-abort-button") refute html =~ ~s(data-testid="chat-abort-button")
Process.sleep(350) GatedChatServer.release_gate()
Process.sleep(100)
refute render(view) =~ "Delayed response" refute render(view) =~ "Delayed response"
end end
test "chat editor keeps the in-flight user turn visible and disables input while streaming" do test "chat editor keeps the in-flight user turn visible and disables input while streaming" do
assert :ok = AI.set_airplane_mode(false) assert :ok = AI.set_airplane_mode(false)
{:ok, _} = GatedChatServer.hold_gate()
server = server =
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false}) start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
{:ok, {_address, port}} = ThousandIsland.listener_info(server) {:ok, {_address, port}} = ThousandIsland.listener_info(server)
@@ -4483,15 +4510,17 @@ defmodule BDS.Desktop.ShellLiveTest do
refute html =~ ~s(data-testid="chat-abort-button") refute html =~ ~s(data-testid="chat-abort-button")
Process.sleep(350) GatedChatServer.release_gate()
Process.sleep(100)
refute render(view) =~ "Delayed response" refute render(view) =~ "Delayed response"
end end
test "chat editor does not duplicate persisted turn artifacts while the request is still active" do test "chat editor does not duplicate persisted turn artifacts while the request is still active" do
assert :ok = AI.set_airplane_mode(false) assert :ok = AI.set_airplane_mode(false)
{:ok, _} = GatedChatServer.hold_gate()
server = server =
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false}) start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
{:ok, {_address, port}} = ThousandIsland.listener_info(server) {:ok, {_address, port}} = ThousandIsland.listener_info(server)
@@ -4575,8 +4604,6 @@ defmodule BDS.Desktop.ShellLiveTest do
view view
|> element("[data-testid='chat-abort-button']") |> element("[data-testid='chat-abort-button']")
|> render_click() |> render_click()
Process.sleep(350)
end end
test "translation validation route renders dedicated cards and fix controls", %{ test "translation validation route renders dedicated cards and fix controls", %{