test: D1-12 enforce BoundedToolLoop via config.chat_max_tool_rounds
This commit is contained in:
@@ -126,7 +126,7 @@ All reconciled to follow code. Specs must be self-consistent and match code.
|
|||||||
| D1-9 | ~~ExecuteTransform rule (pipeline, ordering, toast budget)~~ | script.allium:229-263 | **Resolved:** the `ExecuteTransform` rule had no engine — added `BDS.Scripts.Transforms.run/3` (+ `Scripts.list_transform_scripts/1` ordered by updated_at→slug→id and `Scripts.resolved_content/1`). The pipeline runs enabled project transforms sequentially on the blogmark candidate with a `{source="blogmark", url}` context, captures per-script errors without rolling back the last valid candidate (TransformPipelineContinuation), and enforces the toast budget (`transform_max_toasts_per_script`/`transform_max_toasts_total`/`transform_max_toast_length`, new config keys). 6 tests added (ordering, project/disabled scoping, continuation, context, per-script + total toast caps with truncation). Deep-link OS routing into this engine remains future work. |
|
| D1-9 | ~~ExecuteTransform rule (pipeline, ordering, toast budget)~~ | script.allium:229-263 | **Resolved:** the `ExecuteTransform` rule had no engine — added `BDS.Scripts.Transforms.run/3` (+ `Scripts.list_transform_scripts/1` ordered by updated_at→slug→id and `Scripts.resolved_content/1`). The pipeline runs enabled project transforms sequentially on the blogmark candidate with a `{source="blogmark", url}` context, captures per-script errors without rolling back the last valid candidate (TransformPipelineContinuation), and enforces the toast budget (`transform_max_toasts_per_script`/`transform_max_toasts_total`/`transform_max_toast_length`, new config keys). 6 tests added (ordering, project/disabled scoping, continuation, context, per-script + total toast caps with truncation). Deep-link OS routing into this engine remains future work. |
|
||||||
| D1-10 | ~~TransformPipelineContinuation~~ | script.allium:247-249 | **Resolved:** added focused test in `transforms_test.exs` — a failing *first* transform (no prior valid state) does not halt the pipeline: the original input survives, a later enabled transform still runs against it, and every failure is captured per-script in pipeline order tagged with its slug |
|
| D1-10 | ~~TransformPipelineContinuation~~ | script.allium:247-249 | **Resolved:** added focused test in `transforms_test.exs` — a failing *first* transform (no prior valid state) does not halt the pipeline: the original input survives, a later enabled transform still runs against it, and every failure is captured per-script in pipeline order tagged with its slug |
|
||||||
| D1-11 | ~~ChatContextTruncation invariant~~ | ai.allium:375-379 | **Resolved:** test added in `ai_test.exs` — a catalog model with a 2,000-token context window plus 40 large seeded turns forces truncation; the captured chat request keeps the system prompt as the first message, drops the oldest pairs first (surviving markers form a contiguous newest suffix, oldest absent), and always retains the newest user turn |
|
| D1-11 | ~~ChatContextTruncation invariant~~ | ai.allium:375-379 | **Resolved:** test added in `ai_test.exs` — a catalog model with a 2,000-token context window plus 40 large seeded turns forces truncation; the captured chat request keeps the system prompt as the first message, drops the oldest pairs first (surviving markers form a contiguous newest suffix, oldest absent), and always retains the newest user turn |
|
||||||
| D1-12 | BoundedToolLoop enforcement | ai.allium:381-385 | Write test: tool rounds bounded by chat_max_tool_rounds |
|
| D1-12 | ~~BoundedToolLoop enforcement~~ | ai.allium:381-385 | **Resolved:** the round cap is now read from `config.chat_max_tool_rounds` (`config :bds, :chat, max_tool_rounds: 10`) via `chat_max_tool_rounds/0` in chat.ex instead of a hardcoded attribute, matching the spec wording; test added in `ai_test.exs` — a `LoopingToolRuntime` that always returns another tool call (never a final answer) with `max_tool_rounds: 3` ends with `{:error, %{kind: :tool_loop_exhausted}}` after exactly 3 runtime calls (the `rounds_left == 0` round short-circuits before contacting the runtime) |
|
||||||
| D1-13 | DiscardPostChangesSideEffects | engine_side_effects.allium:99-104 | Write test: FTS updated after discard |
|
| D1-13 | DiscardPostChangesSideEffects | engine_side_effects.allium:99-104 | Write test: FTS updated after discard |
|
||||||
| D1-14 | ReplaceMediaFileSideEffects | engine_side_effects.allium:128-134 | Write test: file replaced, thumbnails regenerated |
|
| D1-14 | ReplaceMediaFileSideEffects | engine_side_effects.allium:128-134 | Write test: file replaced, thumbnails regenerated |
|
||||||
| D1-15 | Drag-and-drop image chain | action_patterns.allium:84-103 | Write integration test |
|
| D1-15 | Drag-and-drop image chain | action_patterns.allium:84-103 | Write integration test |
|
||||||
|
|||||||
@@ -63,6 +63,8 @@ config :bds, :scripting,
|
|||||||
transform_max_toasts_total: 20,
|
transform_max_toasts_total: 20,
|
||||||
transform_max_toast_length: 300
|
transform_max_toast_length: 300
|
||||||
|
|
||||||
|
config :bds, :chat, max_tool_rounds: 10
|
||||||
|
|
||||||
config :bds, :embeddings,
|
config :bds, :embeddings,
|
||||||
backend: BDS.Embeddings.Backends.Neural,
|
backend: BDS.Embeddings.Backends.Neural,
|
||||||
model_id: "Xenova/multilingual-e5-small",
|
model_id: "Xenova/multilingual-e5-small",
|
||||||
|
|||||||
@@ -411,7 +411,7 @@ defmodule BDS.AI.Chat do
|
|||||||
tools,
|
tools,
|
||||||
runtime,
|
runtime,
|
||||||
opts,
|
opts,
|
||||||
@chat_max_tool_rounds
|
chat_max_tool_rounds()
|
||||||
),
|
),
|
||||||
{:ok, reply} <-
|
{:ok, reply} <-
|
||||||
maybe_generate_chat_title(conversation.id, user_message.content, reply, opts) do
|
maybe_generate_chat_title(conversation.id, user_message.content, reply, opts) do
|
||||||
@@ -752,6 +752,14 @@ defmodule BDS.AI.Chat do
|
|||||||
ChatTools.available_specs(project_id, Catalog.model_capabilities(model))
|
ChatTools.available_specs(project_id, Catalog.model_capabilities(model))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# BoundedToolLoop: the tool-calling round count is capped by
|
||||||
|
# config.chat_max_tool_rounds (falling back to the built-in default).
|
||||||
|
defp chat_max_tool_rounds do
|
||||||
|
:bds
|
||||||
|
|> Application.get_env(:chat, [])
|
||||||
|
|> Keyword.get(:max_tool_rounds, @chat_max_tool_rounds)
|
||||||
|
end
|
||||||
|
|
||||||
defp chat_system_prompt(project_id, tools) do
|
defp chat_system_prompt(project_id, tools) do
|
||||||
base = get_setting("ai.system_prompt") || @default_system_prompt
|
base = get_setting("ai.system_prompt") || @default_system_prompt
|
||||||
|
|
||||||
|
|||||||
@@ -280,6 +280,32 @@ defmodule BDS.AITest do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Always returns another tool call and never a final answer, so a chat would
|
||||||
|
# loop forever if the round count were not bounded.
|
||||||
|
defmodule LoopingToolRuntime do
|
||||||
|
def generate(endpoint, request, opts) do
|
||||||
|
test_pid = Keyword.fetch!(opts, :test_pid)
|
||||||
|
send(test_pid, {:looping_request, endpoint, request})
|
||||||
|
|
||||||
|
{:ok,
|
||||||
|
%{
|
||||||
|
tool_calls: [
|
||||||
|
%{
|
||||||
|
id: "call-loop-#{System.unique_integer([:positive])}",
|
||||||
|
name: "blog_stats",
|
||||||
|
arguments: %{}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
usage: %{
|
||||||
|
input_tokens: 1,
|
||||||
|
output_tokens: 1,
|
||||||
|
cache_read_tokens: 0,
|
||||||
|
cache_write_tokens: 0
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
setup do
|
setup do
|
||||||
:ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
|
:ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
|
||||||
:ok
|
:ok
|
||||||
@@ -1313,6 +1339,56 @@ defmodule BDS.AITest do
|
|||||||
assert Enum.max(kept_markers) - Enum.min(kept_markers) + 1 == length(kept_markers)
|
assert Enum.max(kept_markers) - Enum.min(kept_markers) + 1 == length(kept_markers)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "chat tool execution is bounded by config.chat_max_tool_rounds" do
|
||||||
|
{:ok, project} = create_project_fixture("Tool Loop Chat")
|
||||||
|
_fixtures = seed_project_content(project.id)
|
||||||
|
|
||||||
|
previous_chat_config = Application.get_env(:bds, :chat, [])
|
||||||
|
max_rounds = 3
|
||||||
|
Application.put_env(:bds, :chat, Keyword.put(previous_chat_config, :max_tool_rounds, max_rounds))
|
||||||
|
on_exit(fn -> Application.put_env(:bds, :chat, previous_chat_config) end)
|
||||||
|
|
||||||
|
assert {:ok, _endpoint} =
|
||||||
|
BDS.AI.put_endpoint(
|
||||||
|
:online,
|
||||||
|
%{
|
||||||
|
url: "https://api.example.test/v1",
|
||||||
|
api_key: "online-secret",
|
||||||
|
model: "gpt-4o-mini"
|
||||||
|
},
|
||||||
|
secret_backend: FakeSecretBackend
|
||||||
|
)
|
||||||
|
|
||||||
|
assert :ok = BDS.AI.set_airplane_mode(false)
|
||||||
|
|
||||||
|
# Explicit title skips title generation, so only chat rounds reach the runtime.
|
||||||
|
assert {:ok, conversation} =
|
||||||
|
BDS.AI.start_chat(%{title: "Tool Loop", model: "gpt-4o-mini"})
|
||||||
|
|
||||||
|
# The runtime never stops calling tools, so the loop only ends because the
|
||||||
|
# round budget is exhausted.
|
||||||
|
assert {:error, %{kind: :tool_loop_exhausted}} =
|
||||||
|
BDS.AI.send_chat_message(conversation.id, "loop forever please",
|
||||||
|
runtime: LoopingToolRuntime,
|
||||||
|
test_pid: self(),
|
||||||
|
project_id: project.id,
|
||||||
|
secret_backend: FakeSecretBackend
|
||||||
|
)
|
||||||
|
|
||||||
|
# Exactly max_rounds generate calls happen: the final (rounds_left == 0)
|
||||||
|
# round short-circuits before contacting the runtime.
|
||||||
|
request_count = drain_looping_requests(0)
|
||||||
|
assert request_count == max_rounds
|
||||||
|
end
|
||||||
|
|
||||||
|
defp drain_looping_requests(count) do
|
||||||
|
receive do
|
||||||
|
{:looping_request, _endpoint, _request} -> drain_looping_requests(count + 1)
|
||||||
|
after
|
||||||
|
0 -> count
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
test "chat generates a short title after the first user turn using the title model" do
|
test "chat generates a short title after the first user turn using the title model" do
|
||||||
{:ok, project} = create_project_fixture("Title Chat")
|
{:ok, project} = create_project_fixture("Title Chat")
|
||||||
_fixtures = seed_project_content(project.id)
|
_fixtures = seed_project_content(project.id)
|
||||||
|
|||||||
Reference in New Issue
Block a user