Compare commits
2 Commits
ac4f5a3580
...
71fb99af16
| Author | SHA1 | Date | |
|---|---|---|---|
| 71fb99af16 | |||
| 0808b27057 |
@@ -1 +1,2 @@
|
||||
- [Fix all test failures](feedback_fix_all_failures.md) — Never dismiss failures as pre-existing; fix everything
|
||||
- [Fix all test failures](feedback_fix_all_failures.md) — Never dismiss failures as pre-existing or flaky; investigate and fix
|
||||
- [Debug targeted](feedback_targeted_debugging.md) — Analyze the code and fix; don't brute-force with repeated suite runs
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
---
|
||||
name: Fix all test failures
|
||||
description: Never dismiss test failures as pre-existing — if tests fail after changes, fix them
|
||||
name: Fix all test failures including flaky ones
|
||||
description: Never dismiss test failures as pre-existing or flaky — investigate root cause and stabilize
|
||||
type: feedback
|
||||
---
|
||||
|
||||
All test failures after changes must be fixed, even if they appear unrelated. The test suite was clean before, so any failure is the responsibility of the current change.
|
||||
All test failures must be fixed, even if they appear unrelated to current changes. The test suite was clean before, so any failure is my responsibility.
|
||||
|
||||
**Why:** The user confirmed the suite was green before. Dismissing failures as "pre-existing" is wrong and wastes time.
|
||||
Flaky tests are deeper problems waiting to surface. Running a test in isolation and seeing it pass is never enough — must find out why it was flaky in the full suite run and make it stable.
|
||||
|
||||
**How to apply:** After making changes, if any test fails, investigate and fix it before reporting the task as done. Never stash/skip/ignore failures.
|
||||
**Why:** Dismissing failures as "pre-existing" or "flaky" is wrong. Flaky tests indicate real issues (race conditions, test pollution, shared state) that will bite harder later.
|
||||
|
||||
**How to apply:** After making changes, if any test fails: investigate the root cause, fix it, and verify it passes reliably in the full suite. Never stash, never skip, never re-run and hope. Never dismiss ordering-dependent failures — find and fix the shared state or race condition.
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
---
|
||||
name: Debug targeted, don't brute-force
|
||||
description: When investigating flaky tests, analyze the code and fix — don't brute-force with repeated full suite runs
|
||||
type: feedback
|
||||
---
|
||||
|
||||
If you already know which test is failing and why, fix it. Don't waste time running the full suite 20 times hoping to capture output.
|
||||
|
||||
**Why:** It's slow, wasteful, and avoids thinking. Analyze the code, understand the race, fix it.
|
||||
|
||||
**How to apply:** When a test fails, read the test, understand what it does, identify the root cause, and fix it. Only re-run to verify the fix, not to gather more data you already have.
|
||||
@@ -6,7 +6,8 @@
|
||||
"Bash(mix dialyzer *)",
|
||||
"Bash(mix ecto.migrate)",
|
||||
"Bash(git add *)",
|
||||
"Bash(git push *)"
|
||||
"Bash(git push *)",
|
||||
"Bash(git -C /Users/gb/Projects/bDS2 status)"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -443,15 +443,27 @@ defmodule BDS.Desktop.ShellLiveTest do
|
||||
def get(_url, _headers), do: {:error, :not_found}
|
||||
end
|
||||
|
||||
defmodule DelayedChatServer do
|
||||
defmodule GatedChatServer do
|
||||
@moduledoc false
|
||||
use Plug.Router
|
||||
import Phoenix.ConnTest, except: [post: 2]
|
||||
|
||||
plug(:match)
|
||||
plug(:dispatch)
|
||||
|
||||
def hold_gate do
|
||||
case GenServer.whereis(__MODULE__) do
|
||||
nil -> :ok
|
||||
pid -> Agent.stop(pid)
|
||||
end
|
||||
|
||||
Agent.start_link(fn -> :hold end, name: __MODULE__)
|
||||
end
|
||||
|
||||
def release_gate, do: Agent.update(__MODULE__, fn _ -> :release end)
|
||||
|
||||
post "/v1/chat/completions" do
|
||||
Process.sleep(300)
|
||||
wait_for_release()
|
||||
|
||||
body =
|
||||
Jason.encode!(%{
|
||||
@@ -467,6 +479,19 @@ defmodule BDS.Desktop.ShellLiveTest do
|
||||
match _ do
|
||||
send_resp(conn, 404, "not found")
|
||||
end
|
||||
|
||||
defp wait_for_release do
|
||||
case GenServer.whereis(__MODULE__) do
|
||||
nil ->
|
||||
Process.sleep(:infinity)
|
||||
|
||||
_pid ->
|
||||
case Agent.get(__MODULE__, & &1) do
|
||||
:release -> :ok
|
||||
:hold -> Process.sleep(50) && wait_for_release()
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
defmodule TitleChatServer do
|
||||
@@ -3915,9 +3940,10 @@ defmodule BDS.Desktop.ShellLiveTest do
|
||||
|
||||
test "chat editor keeps previous surfaces visible while a new update surface streams" do
|
||||
assert :ok = AI.set_airplane_mode(false)
|
||||
{:ok, _} = GatedChatServer.hold_gate()
|
||||
|
||||
server =
|
||||
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
|
||||
start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
|
||||
|
||||
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
||||
|
||||
@@ -4000,8 +4026,6 @@ defmodule BDS.Desktop.ShellLiveTest do
|
||||
view
|
||||
|> element("[data-testid='chat-abort-button']")
|
||||
|> render_click()
|
||||
|
||||
Process.sleep(350)
|
||||
end
|
||||
|
||||
test "chat editor hook reopens server-expanded A2UI surfaces after patches" do
|
||||
@@ -4356,9 +4380,10 @@ defmodule BDS.Desktop.ShellLiveTest do
|
||||
|
||||
test "chat editor shows in-flight stop state and can abort a running turn" do
|
||||
assert :ok = AI.set_airplane_mode(false)
|
||||
{:ok, _} = GatedChatServer.hold_gate()
|
||||
|
||||
server =
|
||||
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
|
||||
start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
|
||||
|
||||
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
||||
|
||||
@@ -4401,15 +4426,17 @@ defmodule BDS.Desktop.ShellLiveTest do
|
||||
|
||||
refute html =~ ~s(data-testid="chat-abort-button")
|
||||
|
||||
Process.sleep(350)
|
||||
GatedChatServer.release_gate()
|
||||
Process.sleep(100)
|
||||
refute render(view) =~ "Delayed response"
|
||||
end
|
||||
|
||||
test "chat editor keeps the in-flight user turn visible and disables input while streaming" do
|
||||
assert :ok = AI.set_airplane_mode(false)
|
||||
{:ok, _} = GatedChatServer.hold_gate()
|
||||
|
||||
server =
|
||||
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
|
||||
start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
|
||||
|
||||
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
||||
|
||||
@@ -4483,15 +4510,17 @@ defmodule BDS.Desktop.ShellLiveTest do
|
||||
|
||||
refute html =~ ~s(data-testid="chat-abort-button")
|
||||
|
||||
Process.sleep(350)
|
||||
GatedChatServer.release_gate()
|
||||
Process.sleep(100)
|
||||
refute render(view) =~ "Delayed response"
|
||||
end
|
||||
|
||||
test "chat editor does not duplicate persisted turn artifacts while the request is still active" do
|
||||
assert :ok = AI.set_airplane_mode(false)
|
||||
{:ok, _} = GatedChatServer.hold_gate()
|
||||
|
||||
server =
|
||||
start_supervised!({Bandit, plug: DelayedChatServer, port: 0, startup_log: false})
|
||||
start_supervised!({Bandit, plug: GatedChatServer, port: 0, startup_log: false})
|
||||
|
||||
{:ok, {_address, port}} = ThousandIsland.listener_info(server)
|
||||
|
||||
@@ -4575,8 +4604,6 @@ defmodule BDS.Desktop.ShellLiveTest do
|
||||
view
|
||||
|> element("[data-testid='chat-abort-button']")
|
||||
|> render_click()
|
||||
|
||||
Process.sleep(350)
|
||||
end
|
||||
|
||||
test "translation validation route renders dedicated cards and fix controls", %{
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
FOOTER
|
||||
@@ -0,0 +1 @@
|
||||
FROM B
|
||||
@@ -0,0 +1 @@
|
||||
FROM A
|
||||
@@ -0,0 +1 @@
|
||||
FROM B
|
||||
@@ -0,0 +1 @@
|
||||
HEADER CONTENT
|
||||
Reference in New Issue
Block a user