From 8bfc509472d723e7d96c8284f9cb30bc2d60b5e2 Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Fri, 29 May 2026 22:41:34 +0200 Subject: [PATCH] fix: D1-9 implement ExecuteTransform pipeline with ordering and toast budget --- SPECGAPS.md | 2 +- config/config.exs | 5 +- lib/bds/scripts.ex | 20 +++ lib/bds/scripts/transforms.ex | 133 +++++++++++++++ test/bds/scripts/transforms_test.exs | 240 +++++++++++++++++++++++++++ 5 files changed, 398 insertions(+), 2 deletions(-) create mode 100644 lib/bds/scripts/transforms.ex create mode 100644 test/bds/scripts/transforms_test.exs diff --git a/SPECGAPS.md b/SPECGAPS.md index 803d646..660f0c7 100644 --- a/SPECGAPS.md +++ b/SPECGAPS.md @@ -122,7 +122,7 @@ All reconciled to follow code. Specs must be self-consistent and match code. | D1-6 | ~~LiquidFilterSubset (4 standard + 2 custom)~~ | template.allium:191 | **Resolved:** added `LiquidParser.validate/1`, which parses with the restricted tag grammar then walks the AST to reject any filter outside the allowed set — 4 standard (`escape`, `url_encode`, `default`, `append`) + 3 custom (`i18n`, `markdown`, `slugify`). Wired into `validate_liquid` (publish gate) and MCP `validate_template` so unsupported filters are rejected even though Liquex would otherwise apply them as built-in standard filters. Spec corrected to 3 custom filters (bundled templates use `slugify`); 9 tests added (6 unsupported filters rejected, 3 supported filters accepted). | | D1-7 | ~~LiquidOperatorSubset~~ | template.allium:210 | **Resolved:** `LiquidParser.validate/1` now walks the parsed AST for `{:op, _}` nodes and rejects any comparison operator outside the allowed `==`/`>` subset (`!=`, `<`, `>=`, `<=`, `contains`), sharing the publish gate and MCP `validate_template` surface with the tag/filter checks; spec `LiquidOperatorSubset` annotated with enforcement note; 10 tests added (5 unsupported operators rejected at publish, 5 supported `==`/`>`/`and`/`or`/bare-truthy expressions accepted). | | D1-8 | ~~MacroTimeout guarantee~~ | script.allium:94-95 | **Resolved:** added test in `api_test.exs` — an infinite-loop `render()` macro run with `max_reductions: :none` (forces the luerl sandbox onto its wall-clock path) and a 150ms `timeout` returns `{:error, :timeout}` and terminates within budget (<2s), proving the macro is killed near its budget rather than the default multi-minute script timeout | -| D1-9 | ExecuteTransform rule (pipeline, ordering, toast budget) | script.allium:229-263 | Write test: transform pipeline executes in order, toast budget enforced | +| D1-9 | ~~ExecuteTransform rule (pipeline, ordering, toast budget)~~ | script.allium:229-263 | **Resolved:** the `ExecuteTransform` rule had no engine — added `BDS.Scripts.Transforms.run/3` (+ `Scripts.list_transform_scripts/1` ordered by updated_at→slug→id and `Scripts.resolved_content/1`). The pipeline runs enabled project transforms sequentially on the blogmark candidate with a `{source="blogmark", url}` context, captures per-script errors without rolling back the last valid candidate (TransformPipelineContinuation), and enforces the toast budget (`transform_max_toasts_per_script`/`transform_max_toasts_total`/`transform_max_toast_length`, new config keys). 6 tests added (ordering, project/disabled scoping, continuation, context, per-script + total toast caps with truncation). Deep-link OS routing into this engine remains future work. | | D1-10 | TransformPipelineContinuation | script.allium:247-249 | Write test: error in transform doesn't halt pipeline | | D1-11 | ChatContextTruncation invariant | ai.allium:375-379 | Write test: long chat history trimmed to context window | | D1-12 | BoundedToolLoop enforcement | ai.allium:381-385 | Write test: tool rounds bounded by chat_max_tool_rounds | diff --git a/config/config.exs b/config/config.exs index bbd14a2..58eeb03 100644 --- a/config/config.exs +++ b/config/config.exs @@ -58,7 +58,10 @@ config :bds, :scripting, timeout: 300_000, max_reductions: 5_000_000, job_timeout: :infinity, - job_max_reductions: :none + job_max_reductions: :none, + transform_max_toasts_per_script: 5, + transform_max_toasts_total: 20, + transform_max_toast_length: 300 config :bds, :embeddings, backend: BDS.Embeddings.Backends.Neural, diff --git a/lib/bds/scripts.ex b/lib/bds/scripts.ex index 7a57d7c..6537311 100644 --- a/lib/bds/scripts.ex +++ b/lib/bds/scripts.ex @@ -225,6 +225,26 @@ defmodule BDS.Scripts do end end + @doc """ + Returns the executable source for a script, reading the published file body + when the in-memory content is not loaded. + """ + @spec resolved_content(Script.t()) :: String.t() + def resolved_content(%Script{} = script), do: effective_script_content(script) + + @doc """ + Lists enabled `transform` scripts for a project in the deterministic order + the transform pipeline applies them: updated_at, then slug, then id. + """ + @spec list_transform_scripts(String.t()) :: [Script.t()] + def list_transform_scripts(project_id) when is_binary(project_id) do + Script + |> where([s], s.project_id == ^project_id and s.kind == :transform and s.enabled == true) + |> order_by([s], asc: s.updated_at, asc: s.slug, asc: s.id) + |> Repo.all() + |> Enum.map(&hydrate_script_content/1) + end + defp default_entrypoint(:macro), do: "render" defp default_entrypoint(_kind), do: "main" diff --git a/lib/bds/scripts/transforms.ex b/lib/bds/scripts/transforms.ex new file mode 100644 index 0000000..3a3742c --- /dev/null +++ b/lib/bds/scripts/transforms.ex @@ -0,0 +1,133 @@ +defmodule BDS.Scripts.Transforms do + @moduledoc """ + Runs the blogmark transform pipeline (spec: script.allium `ExecuteTransform`). + + Enabled `transform` scripts for a project are applied sequentially to a post + candidate produced by a `bds://new-post` blogmark deep link. Each transform + receives the current candidate plus a context describing the blogmark source + and origin URL, and returns the modified candidate. + + Guarantees enforced here: + + * `TransformTrigger` — each script receives the candidate plus + `{source = "blogmark", url = ...}` context. + * `TransformPipelineContinuation` — a transform error is captured per script + and does not roll back the last valid candidate; the pipeline continues. + * `TransformToastBudget` — at most `transform_max_toasts_per_script` toasts + are accepted from any one transform, with a total budget of + `transform_max_toasts_total`, each truncated to + `transform_max_toast_length` characters. + """ + + require Logger + + alias BDS.Scripts + alias BDS.Scripts.Script + alias BDS.Scripting + alias BDS.Scripting.Capabilities.Util + + @type data :: %{optional(String.t()) => term()} + @type result :: %{ + data: data(), + toasts: [String.t()], + errors: [%{slug: String.t() | nil, reason: term()}] + } + + @doc """ + Applies every enabled transform script for `project_id` to `data` in order. + + Returns `{:ok, %{data:, toasts:, errors:}}` where `data` is the final + candidate, `toasts` are the budget-enforced messages accepted across the + pipeline, and `errors` records any transforms that failed. + """ + @spec run(String.t(), data(), keyword()) :: {:ok, result()} + def run(project_id, data, opts \\ []) + when is_binary(project_id) and is_map(data) and is_list(opts) do + context = %{"source" => "blogmark", "url" => Map.get(data, "url")} + transforms = Scripts.list_transform_scripts(project_id) + + initial = %{data: data, toasts: [], errors: [], toast_total: 0} + + final = + Enum.reduce(transforms, initial, fn script, acc -> + apply_transform(project_id, script, context, acc, opts) + end) + + {:ok, + %{data: final.data, toasts: Enum.reverse(final.toasts), errors: Enum.reverse(final.errors)}} + end + + defp apply_transform(_project_id, %Script{entrypoint: entry}, _context, acc, _opts) + when entry in [nil, ""] do + acc + end + + defp apply_transform(project_id, %Script{} = script, context, acc, opts) do + source = Scripts.resolved_content(script) + + case Scripting.execute_project_script( + project_id, + source, + script.entrypoint, + [acc.data, context], + opts + ) do + {:ok, returned} -> + {next_data, raw_toasts} = split_return(Util.normalize_input(returned), acc.data) + accept_toasts(%{acc | data: next_data}, raw_toasts) + + {:error, reason} -> + Logger.warning("transform #{script.slug} failed: #{inspect(reason)}") + %{acc | errors: [%{slug: script.slug, reason: reason} | acc.errors]} + end + end + + # A transform may return either the candidate map directly, or a wrapper + # `{ data = , toasts = [...] }`. Blogmark candidates never carry a + # nested "data" map, so the wrapper shape is unambiguous. + defp split_return(%{"data" => %{} = inner} = wrapper, _previous) do + {inner, toast_list(Map.get(wrapper, "toasts"))} + end + + defp split_return(returned, _previous) when is_map(returned), do: {returned, []} + defp split_return(_returned, previous), do: {previous, []} + + defp toast_list(list) when is_list(list), do: Enum.filter(list, &is_binary/1) + defp toast_list(_other), do: [] + + defp accept_toasts(acc, raw_toasts) do + per_script_max = config(:transform_max_toasts_per_script, 5) + total_max = config(:transform_max_toasts_total, 20) + max_length = config(:transform_max_toast_length, 300) + + raw_toasts + |> Enum.take(per_script_max) + |> Enum.reduce(acc, fn message, inner_acc -> + if inner_acc.toast_total >= total_max do + inner_acc + else + truncated = truncate(message, max_length) + + %{ + inner_acc + | toasts: [truncated | inner_acc.toasts], + toast_total: inner_acc.toast_total + 1 + } + end + end) + end + + defp truncate(message, max_length) do + if String.length(message) > max_length do + String.slice(message, 0, max_length) + else + message + end + end + + defp config(key, default) do + :bds + |> Application.fetch_env!(:scripting) + |> Keyword.get(key, default) + end +end diff --git a/test/bds/scripts/transforms_test.exs b/test/bds/scripts/transforms_test.exs new file mode 100644 index 0000000..a6cc1b0 --- /dev/null +++ b/test/bds/scripts/transforms_test.exs @@ -0,0 +1,240 @@ +defmodule BDS.Scripts.TransformsTest do + use ExUnit.Case, async: false + + alias BDS.Scripts + alias BDS.Scripts.Transforms + + setup do + :ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo) + Ecto.Adapters.SQL.Sandbox.mode(BDS.Repo, {:shared, self()}) + + temp_dir = + Path.join(System.tmp_dir!(), "bds-transforms-#{System.unique_integer([:positive])}") + + File.mkdir_p!(temp_dir) + on_exit(fn -> File.rm_rf(temp_dir) end) + + {:ok, project} = BDS.Projects.create_project(%{name: "Transforms", data_path: temp_dir}) + + %{project: project} + end + + defp transform(project_id, title, content, opts \\ []) do + {:ok, script} = + Scripts.create_script(%{ + project_id: project_id, + title: title, + kind: :transform, + content: content, + entrypoint: Keyword.get(opts, :entrypoint, "main") + }) + + script = + case Keyword.get(opts, :enabled, true) do + true -> + script + + false -> + {:ok, s} = Scripts.update_script(script.id, %{enabled: false}) + s + end + + script + end + + test "runs enabled transforms in deterministic order (updated_at, slug, id)", %{ + project: project + } do + # Each transform appends its marker to content so we can read execution order. + transform(project.id, "Bravo", """ + function main(data, _ctx) + data.content = data.content .. "B" + return data + end + """) + + # Ensure distinct updated_at ordering by spacing out creation. + Process.sleep(5) + + transform(project.id, "Alpha", """ + function main(data, _ctx) + data.content = data.content .. "A" + return data + end + """) + + data = %{ + "title" => "t", + "content" => "", + "tags" => [], + "categories" => [], + "url" => "http://x" + } + + assert {:ok, result} = Transforms.run(project.id, data) + # Bravo created first (earlier updated_at) so runs before Alpha. + assert result.data["content"] == "BA" + assert result.errors == [] + end + + test "disabled transforms and transforms from other projects are skipped", %{project: project} do + {:ok, other} = + BDS.Projects.create_project(%{ + name: "Other", + data_path: + Path.join( + System.tmp_dir!(), + "bds-transforms-other-#{System.unique_integer([:positive])}" + ) + }) + + transform( + project.id, + "Disabled", + """ + function main(data, _ctx) + data.content = data.content .. "D" + return data + end + """, enabled: false) + + transform(other.id, "Foreign", """ + function main(data, _ctx) + data.content = data.content .. "F" + return data + end + """) + + transform(project.id, "Enabled", """ + function main(data, _ctx) + data.content = data.content .. "E" + return data + end + """) + + data = %{ + "title" => "t", + "content" => "", + "tags" => [], + "categories" => [], + "url" => "http://x" + } + + assert {:ok, result} = Transforms.run(project.id, data) + assert result.data["content"] == "E" + end + + test "pipeline continues after a failing transform, keeping last valid state", %{ + project: project + } do + transform(project.id, "First", """ + function main(data, _ctx) + data.content = data.content .. "1" + return data + end + """) + + Process.sleep(5) + + transform(project.id, "Boom", """ + function main(_data, _ctx) + error("boom") + end + """) + + Process.sleep(5) + + transform(project.id, "Third", """ + function main(data, _ctx) + data.content = data.content .. "3" + return data + end + """) + + data = %{ + "title" => "t", + "content" => "", + "tags" => [], + "categories" => [], + "url" => "http://x" + } + + assert {:ok, result} = Transforms.run(project.id, data) + # Boom's failure does not roll back "1" and does not stop "3". + assert result.data["content"] == "13" + assert [%{reason: _}] = result.errors + end + + test "receives blogmark context with source and originating url", %{project: project} do + transform(project.id, "Ctx", """ + function main(data, ctx) + data.content = ctx.source .. "|" .. ctx.url + return data + end + """) + + data = %{ + "title" => "t", + "content" => "", + "tags" => [], + "categories" => [], + "url" => "http://example.com/a" + } + + assert {:ok, result} = Transforms.run(project.id, data) + assert result.data["content"] == "blogmark|http://example.com/a" + end + + test "per-script toast budget caps and truncates toasts", %{project: project} do + long = String.duplicate("x", 500) + + transform(project.id, "Noisy", """ + function main(data, _ctx) + local toasts = {} + for i = 1, 10 do toasts[i] = "#{long}" end + return { data = data, toasts = toasts } + end + """) + + data = %{ + "title" => "t", + "content" => "", + "tags" => [], + "categories" => [], + "url" => "http://x" + } + + assert {:ok, result} = Transforms.run(project.id, data) + # max 5 per script + assert length(result.toasts) == 5 + # truncated to 300 chars + assert Enum.all?(result.toasts, &(String.length(&1) == 300)) + end + + test "total toast budget caps across the whole pipeline", %{project: project} do + body = """ + function main(data, _ctx) + local toasts = {} + for i = 1, 5 do toasts[i] = "msg" end + return { data = data, toasts = toasts } + end + """ + + # 5 transforms x 5 toasts each = 25 emitted, total budget is 20. + for i <- 1..5 do + transform(project.id, "T#{i}", body) + Process.sleep(3) + end + + data = %{ + "title" => "t", + "content" => "", + "tags" => [], + "categories" => [], + "url" => "http://x" + } + + assert {:ok, result} = Transforms.run(project.id, data) + assert length(result.toasts) == 20 + end +end