test: D1-11 cover ChatContextTruncation invariant in chat requests
This commit is contained in:
@@ -125,7 +125,7 @@ All reconciled to follow code. Specs must be self-consistent and match code.
|
|||||||
| D1-8 | ~~MacroTimeout guarantee~~ | script.allium:94-95 | **Resolved:** added test in `api_test.exs` — an infinite-loop `render()` macro run with `max_reductions: :none` (forces the luerl sandbox onto its wall-clock path) and a 150ms `timeout` returns `{:error, :timeout}` and terminates within budget (<2s), proving the macro is killed near its budget rather than the default multi-minute script timeout |
|
| D1-8 | ~~MacroTimeout guarantee~~ | script.allium:94-95 | **Resolved:** added test in `api_test.exs` — an infinite-loop `render()` macro run with `max_reductions: :none` (forces the luerl sandbox onto its wall-clock path) and a 150ms `timeout` returns `{:error, :timeout}` and terminates within budget (<2s), proving the macro is killed near its budget rather than the default multi-minute script timeout |
|
||||||
| D1-9 | ~~ExecuteTransform rule (pipeline, ordering, toast budget)~~ | script.allium:229-263 | **Resolved:** the `ExecuteTransform` rule had no engine — added `BDS.Scripts.Transforms.run/3` (+ `Scripts.list_transform_scripts/1` ordered by updated_at→slug→id and `Scripts.resolved_content/1`). The pipeline runs enabled project transforms sequentially on the blogmark candidate with a `{source="blogmark", url}` context, captures per-script errors without rolling back the last valid candidate (TransformPipelineContinuation), and enforces the toast budget (`transform_max_toasts_per_script`/`transform_max_toasts_total`/`transform_max_toast_length`, new config keys). 6 tests added (ordering, project/disabled scoping, continuation, context, per-script + total toast caps with truncation). Deep-link OS routing into this engine remains future work. |
|
| D1-9 | ~~ExecuteTransform rule (pipeline, ordering, toast budget)~~ | script.allium:229-263 | **Resolved:** the `ExecuteTransform` rule had no engine — added `BDS.Scripts.Transforms.run/3` (+ `Scripts.list_transform_scripts/1` ordered by updated_at→slug→id and `Scripts.resolved_content/1`). The pipeline runs enabled project transforms sequentially on the blogmark candidate with a `{source="blogmark", url}` context, captures per-script errors without rolling back the last valid candidate (TransformPipelineContinuation), and enforces the toast budget (`transform_max_toasts_per_script`/`transform_max_toasts_total`/`transform_max_toast_length`, new config keys). 6 tests added (ordering, project/disabled scoping, continuation, context, per-script + total toast caps with truncation). Deep-link OS routing into this engine remains future work. |
|
||||||
| D1-10 | ~~TransformPipelineContinuation~~ | script.allium:247-249 | **Resolved:** added focused test in `transforms_test.exs` — a failing *first* transform (no prior valid state) does not halt the pipeline: the original input survives, a later enabled transform still runs against it, and every failure is captured per-script in pipeline order tagged with its slug |
|
| D1-10 | ~~TransformPipelineContinuation~~ | script.allium:247-249 | **Resolved:** added focused test in `transforms_test.exs` — a failing *first* transform (no prior valid state) does not halt the pipeline: the original input survives, a later enabled transform still runs against it, and every failure is captured per-script in pipeline order tagged with its slug |
|
||||||
| D1-11 | ChatContextTruncation invariant | ai.allium:375-379 | Write test: long chat history trimmed to context window |
|
| D1-11 | ~~ChatContextTruncation invariant~~ | ai.allium:375-379 | **Resolved:** test added in `ai_test.exs` — a catalog model with a 2,000-token context window plus 40 large seeded turns forces truncation; the captured chat request keeps the system prompt as the first message, drops the oldest pairs first (surviving markers form a contiguous newest suffix, oldest absent), and always retains the newest user turn |
|
||||||
| D1-12 | BoundedToolLoop enforcement | ai.allium:381-385 | Write test: tool rounds bounded by chat_max_tool_rounds |
|
| D1-12 | BoundedToolLoop enforcement | ai.allium:381-385 | Write test: tool rounds bounded by chat_max_tool_rounds |
|
||||||
| D1-13 | DiscardPostChangesSideEffects | engine_side_effects.allium:99-104 | Write test: FTS updated after discard |
|
| D1-13 | DiscardPostChangesSideEffects | engine_side_effects.allium:99-104 | Write test: FTS updated after discard |
|
||||||
| D1-14 | ReplaceMediaFileSideEffects | engine_side_effects.allium:128-134 | Write test: file replaced, thumbnails regenerated |
|
| D1-14 | ReplaceMediaFileSideEffects | engine_side_effects.allium:128-134 | Write test: file replaced, thumbnails regenerated |
|
||||||
|
|||||||
@@ -1209,6 +1209,110 @@ defmodule BDS.AITest do
|
|||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "chat request is truncated to the model context window, dropping oldest pairs and keeping the system prompt" do
|
||||||
|
{:ok, project} = create_project_fixture("Truncation Chat")
|
||||||
|
_fixtures = seed_project_content(project.id)
|
||||||
|
|
||||||
|
# A catalog model with a small context window forces truncation. No tool
|
||||||
|
# support keeps the round single so the captured request is the chat call.
|
||||||
|
Repo.insert!(
|
||||||
|
BDS.AI.CatalogProvider.changeset(%BDS.AI.CatalogProvider{}, %{
|
||||||
|
id: "test",
|
||||||
|
name: "Test Provider",
|
||||||
|
updated_at: Persistence.now_ms()
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
Repo.insert!(
|
||||||
|
BDS.AI.Model.changeset(%BDS.AI.Model{}, %{
|
||||||
|
provider: "test",
|
||||||
|
model_id: "tiny-ctx-model",
|
||||||
|
name: "Tiny Context Model",
|
||||||
|
supports_tool_calls: false,
|
||||||
|
context_window: 2_000,
|
||||||
|
max_input_tokens: 2_000,
|
||||||
|
max_output_tokens: 256,
|
||||||
|
updated_at: Persistence.now_ms()
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
assert {:ok, _endpoint} =
|
||||||
|
BDS.AI.put_endpoint(
|
||||||
|
:online,
|
||||||
|
%{
|
||||||
|
url: "https://api.example.test/v1",
|
||||||
|
api_key: "online-secret",
|
||||||
|
model: "tiny-ctx-model"
|
||||||
|
},
|
||||||
|
secret_backend: FakeSecretBackend
|
||||||
|
)
|
||||||
|
|
||||||
|
assert :ok = BDS.AI.set_airplane_mode(false)
|
||||||
|
|
||||||
|
# Explicit title skips title generation, so only the chat request is sent.
|
||||||
|
assert {:ok, conversation} =
|
||||||
|
BDS.AI.start_chat(%{title: "Truncation Test", model: "tiny-ctx-model"})
|
||||||
|
|
||||||
|
# Seed a long history of alternating user/assistant turns, each large enough
|
||||||
|
# that the full history blows past the context budget.
|
||||||
|
seeded_count = 40
|
||||||
|
base_time = Persistence.now_ms() - 1_000_000
|
||||||
|
|
||||||
|
for n <- 1..seeded_count do
|
||||||
|
role = if rem(n, 2) == 1, do: :user, else: :assistant
|
||||||
|
marker = "[[MARK-#{String.pad_leading(Integer.to_string(n), 4, "0")}]]"
|
||||||
|
|
||||||
|
Repo.insert!(%BDS.AI.ChatMessage{
|
||||||
|
conversation_id: conversation.id,
|
||||||
|
role: role,
|
||||||
|
content: marker <> " " <> String.duplicate("x", 380),
|
||||||
|
created_at: base_time + n
|
||||||
|
})
|
||||||
|
end
|
||||||
|
|
||||||
|
assert {:ok, _reply} =
|
||||||
|
BDS.AI.send_chat_message(conversation.id, "newest question please answer",
|
||||||
|
runtime: FakeRuntime,
|
||||||
|
test_pid: self(),
|
||||||
|
project_id: project.id,
|
||||||
|
secret_backend: FakeSecretBackend
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_received {:runtime_request, _endpoint, request}
|
||||||
|
assert request.operation == :chat
|
||||||
|
|
||||||
|
[system_message | rest] = request.messages
|
||||||
|
|
||||||
|
# The system prompt is preserved as the first message.
|
||||||
|
assert system_message["role"] == "system"
|
||||||
|
assert is_binary(system_message["content"]) and system_message["content"] != ""
|
||||||
|
|
||||||
|
# Truncation actually happened: not every seeded turn survives.
|
||||||
|
refute Enum.any?(rest, &(&1["role"] == "system"))
|
||||||
|
assert length(rest) < seeded_count + 1
|
||||||
|
|
||||||
|
# The newest user turn is always kept (it is the request's last message).
|
||||||
|
assert List.last(rest)["content"] =~ "newest question please answer"
|
||||||
|
|
||||||
|
kept_markers =
|
||||||
|
rest
|
||||||
|
|> Enum.flat_map(fn message ->
|
||||||
|
case Regex.run(~r/\[\[MARK-(\d+)\]\]/, message["content"] || "") do
|
||||||
|
[_full, number] -> [String.to_integer(number)]
|
||||||
|
_no_match -> []
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
|
||||||
|
assert kept_markers != []
|
||||||
|
|
||||||
|
# Oldest pairs are dropped first: the surviving markers form a contiguous
|
||||||
|
# suffix ending at the newest one, and the oldest is gone.
|
||||||
|
assert Enum.max(kept_markers) == seeded_count
|
||||||
|
assert Enum.min(kept_markers) > 1
|
||||||
|
assert kept_markers == Enum.sort(kept_markers)
|
||||||
|
assert Enum.max(kept_markers) - Enum.min(kept_markers) + 1 == length(kept_markers)
|
||||||
|
end
|
||||||
|
|
||||||
test "chat generates a short title after the first user turn using the title model" do
|
test "chat generates a short title after the first user turn using the title model" do
|
||||||
{:ok, project} = create_project_fixture("Title Chat")
|
{:ok, project} = create_project_fixture("Title Chat")
|
||||||
_fixtures = seed_project_content(project.id)
|
_fixtures = seed_project_content(project.id)
|
||||||
|
|||||||
Reference in New Issue
Block a user