Compare commits
2 Commits
ac4f5a3580
...
71fb99af16
| Author | SHA1 | Date | |
|---|---|---|---|
| 71fb99af16 | |||
| 0808b27057 |
@@ -1 +1,2 @@
|
|||||||
- [Fix all test failures](feedback_fix_all_failures.md) — Never dismiss failures as pre-existing; fix everything
|
- [Fix all test failures](feedback_fix_all_failures.md) — Never dismiss failures as pre-existing or flaky; investigate and fix
|
||||||
|
- [Debug targeted](feedback_targeted_debugging.md) — Analyze the code and fix; don't brute-force with repeated suite runs
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
---
|
---
|
||||||
name: Fix all test failures
|
name: Fix all test failures including flaky ones
|
||||||
description: Never dismiss test failures as pre-existing — if tests fail after changes, fix them
|
description: Never dismiss test failures as pre-existing or flaky — investigate root cause and stabilize
|
||||||
type: feedback
|
type: feedback
|
||||||
---
|
---
|
||||||
|
|
||||||
All test failures after changes must be fixed, even if they appear unrelated. The test suite was clean before, so any failure is the responsibility of the current change.
|
All test failures must be fixed, even if they appear unrelated to current changes. The test suite was clean before, so any failure is my responsibility.
|
||||||
|
|
||||||
**Why:** The user confirmed the suite was green before. Dismissing failures as "pre-existing" is wrong and wastes time.
|
Flaky tests are deeper problems waiting to surface. Running a test in isolation and seeing it pass is never enough — must find out why it was flaky in the full suite run and make it stable.
|
||||||
|
|
||||||
**How to apply:** After making changes, if any test fails, investigate and fix it before reporting the task as done. Never stash/skip/ignore failures.
|
**Why:** Dismissing failures as "pre-existing" or "flaky" is wrong. Flaky tests indicate real issues (race conditions, test pollution, shared state) that will bite harder later.
|
||||||
|
|
||||||
|
**How to apply:** After making changes, if any test fails: investigate the root cause, fix it, and verify it passes reliably in the full suite. Never stash, never skip, never re-run and hope. Never dismiss ordering-dependent failures — find and fix the shared state or race condition.
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
---
|
||||||
|
name: Debug targeted, don't brute-force
|
||||||
|
description: When investigating flaky tests, analyze the code and fix — don't brute-force with repeated full suite runs
|
||||||
|
type: feedback
|
||||||
|
---
|
||||||
|
|
||||||
|
If you already know which test is failing and why, fix it. Don't waste time running the full suite 20 times hoping to capture output.
|
||||||
|
|
||||||
|
**Why:** It's slow, wasteful, and avoids thinking. Analyze the code, understand the race, fix it.
|
||||||
|
|
||||||
|
**How to apply:** When a test fails, read the test, understand what it does, identify the root cause, and fix it. Only re-run to verify the fix, not to gather more data you already have.
|
||||||
@@ -6,7 +6,8 @@
|
|||||||
"Bash(mix dialyzer *)",
|
"Bash(mix dialyzer *)",
|
||||||
"Bash(mix ecto.migrate)",
|
"Bash(mix ecto.migrate)",
|
||||||
"Bash(git add *)",
|
"Bash(git add *)",
|
||||||
"Bash(git push *)"
|
"Bash(git push *)",
|
||||||
|
"Bash(git -C /Users/gb/Projects/bDS2 status)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -443,15 +443,27 @@ defmodule BDS.Desktop.ShellLiveTest do
|
|||||||
def get(_url, _headers), do: {:error, :not_found}
|
def get(_url, _headers), do: {:error, :not_found}
|
||||||
end
|
end
|
||||||
|
|
||||||
defmodule DelayedChatServer do
|
defmodule GatedChatServer do
|
||||||
|
@moduledoc false
|
||||||
use Plug.Router
|
use Plug.Router
|
||||||
import Phoenix.ConnTest, except: [post: 2]
|
import Phoenix.ConnTest, except: [post: 2]
|
||||||
|
|
||||||
plug(:match)
|
plug(:match)
|
||||||
plug(:dispatch)
|
plug(:dispatch)
|
||||||
|
|
||||||
|
def hold_gate do
|
||||||
|
case GenServer.whereis(__MODULE__) do
|
||||||
|
nil -> :ok
|
||||||
|
pid -> Agent.stop(pid)
|
||||||
|
end
|
||||||
|
|
||||||
|
Agent.start_link(fn -> :hold end, name: __MODULE__)
|
||||||
|
end
|
||||||
|
|
||||||
|
def release_gate, do: Agent.update(__MODULE__, fn _ -> :release end)
|
||||||
|
|
||||||
post "/v1/chat/completions" do
|
post "/v1/chat/completions" do
|
||||||
Process.sleep(300)
|
wait_for_release()
|
||||||
|
|
||||||
body =
|
body =
|
||||||
Jason.encode!(%{
|
Jason.encode!(%{
|
||||||
@@ -467,6 +479,19 @@ defmodule BDS.Desktop.ShellLiveTest do
|
|||||||
match _ do
|
match _ do
|
||||||
send_resp(conn, 404, "not found")
|
send_resp(conn, 404, "not found")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp wait_for_release do
|
||||||
|
case GenServer.whereis(__MODULE__) do
|
||||||
|
nil ->
|
||||||
|
Process.sleep(:infinity)
|
||||||
|
|
||||||
|
_pid ->
|
||||||
|
case Agent.get(__MODULE__, & &1) do
|
||||||
|
:release -> :ok
|
||||||
|
:hold -> Process.sleep(50) && wait_for_release()
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defmodule TitleChatServer do
|
defmodule TitleChatServer do
|
||||||
@@ -3915,9 +3940,10 @@ defmodule BDS.Desktop.ShellLiveTest do
|
|||||||
|
|
||||||
test "chat editor keeps previous surfaces visible while a new update surface streams" do
|
test "chat editor keeps previous surfaces visible while a new update surface streams" do
|
||||||
assert :ok = AI.set_airplane_mode(false)
|
assert :ok = AI.set_airplane_mode(false)
|
||||||
|
{:ok, _} = GatedChatServer.hold_gate()
|
||||||
|
|
||||||
server =
|
server =
|
||||||
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
|
start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
|
||||||
|
|
||||||
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
||||||
|
|
||||||
@@ -4000,8 +4026,6 @@ defmodule BDS.Desktop.ShellLiveTest do
|
|||||||
view
|
view
|
||||||
|> element("[data-testid='chat-abort-button']")
|
|> element("[data-testid='chat-abort-button']")
|
||||||
|> render_click()
|
|> render_click()
|
||||||
|
|
||||||
Process.sleep(350)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
test "chat editor hook reopens server-expanded A2UI surfaces after patches" do
|
test "chat editor hook reopens server-expanded A2UI surfaces after patches" do
|
||||||
@@ -4356,9 +4380,10 @@ defmodule BDS.Desktop.ShellLiveTest do
|
|||||||
|
|
||||||
test "chat editor shows in-flight stop state and can abort a running turn" do
|
test "chat editor shows in-flight stop state and can abort a running turn" do
|
||||||
assert :ok = AI.set_airplane_mode(false)
|
assert :ok = AI.set_airplane_mode(false)
|
||||||
|
{:ok, _} = GatedChatServer.hold_gate()
|
||||||
|
|
||||||
server =
|
server =
|
||||||
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
|
start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
|
||||||
|
|
||||||
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
||||||
|
|
||||||
@@ -4401,15 +4426,17 @@ defmodule BDS.Desktop.ShellLiveTest do
|
|||||||
|
|
||||||
refute html =~ ~s(data-testid="chat-abort-button")
|
refute html =~ ~s(data-testid="chat-abort-button")
|
||||||
|
|
||||||
Process.sleep(350)
|
GatedChatServer.release_gate()
|
||||||
|
Process.sleep(100)
|
||||||
refute render(view) =~ "Delayed response"
|
refute render(view) =~ "Delayed response"
|
||||||
end
|
end
|
||||||
|
|
||||||
test "chat editor keeps the in-flight user turn visible and disables input while streaming" do
|
test "chat editor keeps the in-flight user turn visible and disables input while streaming" do
|
||||||
assert :ok = AI.set_airplane_mode(false)
|
assert :ok = AI.set_airplane_mode(false)
|
||||||
|
{:ok, _} = GatedChatServer.hold_gate()
|
||||||
|
|
||||||
server =
|
server =
|
||||||
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
|
start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
|
||||||
|
|
||||||
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
||||||
|
|
||||||
@@ -4483,15 +4510,17 @@ defmodule BDS.Desktop.ShellLiveTest do
|
|||||||
|
|
||||||
refute html =~ ~s(data-testid="chat-abort-button")
|
refute html =~ ~s(data-testid="chat-abort-button")
|
||||||
|
|
||||||
Process.sleep(350)
|
GatedChatServer.release_gate()
|
||||||
|
Process.sleep(100)
|
||||||
refute render(view) =~ "Delayed response"
|
refute render(view) =~ "Delayed response"
|
||||||
end
|
end
|
||||||
|
|
||||||
test "chat editor does not duplicate persisted turn artifacts while the request is still active" do
|
test "chat editor does not duplicate persisted turn artifacts while the request is still active" do
|
||||||
assert :ok = AI.set_airplane_mode(false)
|
assert :ok = AI.set_airplane_mode(false)
|
||||||
|
{:ok, _} = GatedChatServer.hold_gate()
|
||||||
|
|
||||||
server =
|
server =
|
||||||
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
|
start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
|
||||||
|
|
||||||
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
||||||
|
|
||||||
@@ -4575,8 +4604,6 @@ defmodule BDS.Desktop.ShellLiveTest do
|
|||||||
view
|
view
|
||||||
|> element("[data-testid='chat-abort-button']")
|
|> element("[data-testid='chat-abort-button']")
|
||||||
|> render_click()
|
|> render_click()
|
||||||
|
|
||||||
Process.sleep(350)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
test "translation validation route renders dedicated cards and fix controls", %{
|
test "translation validation route renders dedicated cards and fix controls", %{
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
FOOTER
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
FROM B
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
FROM A
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
FROM B
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
HEADER CONTENT
|
||||||
Reference in New Issue
Block a user