From 8bfc509472d723e7d96c8284f9cb30bc2d60b5e2 Mon Sep 17 00:00:00 2001
From: Chili Palmer <hugoms@me.com>
Date: Fri, 29 May 2026 22:41:34 +0200
Subject: [PATCH] fix: D1-9 implement ExecuteTransform pipeline with ordering
 and toast budget

---
 SPECGAPS.md                          |   2 +-
 config/config.exs                    |   5 +-
 lib/bds/scripts.ex                   |  20 +++
 lib/bds/scripts/transforms.ex        | 133 +++++++++++++++
 test/bds/scripts/transforms_test.exs | 240 +++++++++++++++++++++++++++
 5 files changed, 398 insertions(+), 2 deletions(-)
 create mode 100644 lib/bds/scripts/transforms.ex
 create mode 100644 test/bds/scripts/transforms_test.exs

diff --git a/SPECGAPS.md b/SPECGAPS.md
index 803d646..660f0c7 100644
--- a/SPECGAPS.md
+++ b/SPECGAPS.md
@@ -122,7 +122,7 @@ All reconciled to follow code. Specs must be self-consistent and match code.
 | D1-6 | ~~LiquidFilterSubset (4 standard + 2 custom)~~ | template.allium:191 | **Resolved:** added `LiquidParser.validate/1`, which parses with the restricted tag grammar then walks the AST to reject any filter outside the allowed set — 4 standard (`escape`, `url_encode`, `default`, `append`) + 3 custom (`i18n`, `markdown`, `slugify`). Wired into `validate_liquid` (publish gate) and MCP `validate_template` so unsupported filters are rejected even though Liquex would otherwise apply them as built-in standard filters. Spec corrected to 3 custom filters (bundled templates use `slugify`); 9 tests added (6 unsupported filters rejected, 3 supported filters accepted). |
 | D1-7 | ~~LiquidOperatorSubset~~ | template.allium:210 | **Resolved:** `LiquidParser.validate/1` now walks the parsed AST for `{:op, _}` nodes and rejects any comparison operator outside the allowed `==`/`>` subset (`!=`, `<`, `>=`, `<=`, `contains`), sharing the publish gate and MCP `validate_template` surface with the tag/filter checks; spec `LiquidOperatorSubset` annotated with enforcement note; 10 tests added (5 unsupported operators rejected at publish, 5 supported `==`/`>`/`and`/`or`/bare-truthy expressions accepted). |
 | D1-8 | ~~MacroTimeout guarantee~~ | script.allium:94-95 | **Resolved:** added test in `api_test.exs` — an infinite-loop `render()` macro run with `max_reductions: :none` (forces the luerl sandbox onto its wall-clock path) and a 150ms `timeout` returns `{:error, :timeout}` and terminates within budget (<2s), proving the macro is killed near its budget rather than the default multi-minute script timeout |
-| D1-9 | ExecuteTransform rule (pipeline, ordering, toast budget) | script.allium:229-263 | Write test: transform pipeline executes in order, toast budget enforced |
+| D1-9 | ~~ExecuteTransform rule (pipeline, ordering, toast budget)~~ | script.allium:229-263 | **Resolved:** the `ExecuteTransform` rule had no engine — added `BDS.Scripts.Transforms.run/3` (+ `Scripts.list_transform_scripts/1` ordered by updated_at→slug→id and `Scripts.resolved_content/1`). The pipeline runs enabled project transforms sequentially on the blogmark candidate with a `{source="blogmark", url}` context, captures per-script errors without rolling back the last valid candidate (TransformPipelineContinuation), and enforces the toast budget (`transform_max_toasts_per_script`/`transform_max_toasts_total`/`transform_max_toast_length`, new config keys). 6 tests added (ordering, project/disabled scoping, continuation, context, per-script + total toast caps with truncation). Deep-link OS routing into this engine remains future work. |
 | D1-10 | TransformPipelineContinuation | script.allium:247-249 | Write test: error in transform doesn't halt pipeline |
 | D1-11 | ChatContextTruncation invariant | ai.allium:375-379 | Write test: long chat history trimmed to context window |
 | D1-12 | BoundedToolLoop enforcement | ai.allium:381-385 | Write test: tool rounds bounded by chat_max_tool_rounds |
diff --git a/config/config.exs b/config/config.exs
index bbd14a2..58eeb03 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -58,7 +58,10 @@ config :bds, :scripting,
   timeout: 300_000,
   max_reductions: 5_000_000,
   job_timeout: :infinity,
-  job_max_reductions: :none
+  job_max_reductions: :none,
+  transform_max_toasts_per_script: 5,
+  transform_max_toasts_total: 20,
+  transform_max_toast_length: 300
 
 config :bds, :embeddings,
   backend: BDS.Embeddings.Backends.Neural,
diff --git a/lib/bds/scripts.ex b/lib/bds/scripts.ex
index 7a57d7c..6537311 100644
--- a/lib/bds/scripts.ex
+++ b/lib/bds/scripts.ex
@@ -225,6 +225,26 @@ defmodule BDS.Scripts do
     end
   end
 
+  @doc """
+  Returns the executable source for a script, reading the published file body
+  when the in-memory content is not loaded.
+  """
+  @spec resolved_content(Script.t()) :: String.t()
+  def resolved_content(%Script{} = script), do: effective_script_content(script)
+
+  @doc """
+  Lists enabled `transform` scripts for a project in the deterministic order
+  the transform pipeline applies them: updated_at, then slug, then id.
+  """
+  @spec list_transform_scripts(String.t()) :: [Script.t()]
+  def list_transform_scripts(project_id) when is_binary(project_id) do
+    Script
+    |> where([s], s.project_id == ^project_id and s.kind == :transform and s.enabled == true)
+    |> order_by([s], asc: s.updated_at, asc: s.slug, asc: s.id)
+    |> Repo.all()
+    |> Enum.map(&hydrate_script_content/1)
+  end
+
   defp default_entrypoint(:macro), do: "render"
   defp default_entrypoint(_kind), do: "main"
 
diff --git a/lib/bds/scripts/transforms.ex b/lib/bds/scripts/transforms.ex
new file mode 100644
index 0000000..3a3742c
--- /dev/null
+++ b/lib/bds/scripts/transforms.ex
@@ -0,0 +1,133 @@
+defmodule BDS.Scripts.Transforms do
+  @moduledoc """
+  Runs the blogmark transform pipeline (spec: script.allium `ExecuteTransform`).
+
+  Enabled `transform` scripts for a project are applied sequentially to a post
+  candidate produced by a `bds://new-post` blogmark deep link. Each transform
+  receives the current candidate plus a context describing the blogmark source
+  and origin URL, and returns the modified candidate.
+
+  Guarantees enforced here:
+
+    * `TransformTrigger` — each script receives the candidate plus
+      `{source = "blogmark", url = ...}` context.
+    * `TransformPipelineContinuation` — a transform error is captured per script
+      and does not roll back the last valid candidate; the pipeline continues.
+    * `TransformToastBudget` — at most `transform_max_toasts_per_script` toasts
+      are accepted from any one transform, with a total budget of
+      `transform_max_toasts_total`, each truncated to
+      `transform_max_toast_length` characters.
+  """
+
+  require Logger
+
+  alias BDS.Scripts
+  alias BDS.Scripts.Script
+  alias BDS.Scripting
+  alias BDS.Scripting.Capabilities.Util
+
+  @type data :: %{optional(String.t()) => term()}
+  @type result :: %{
+          data: data(),
+          toasts: [String.t()],
+          errors: [%{slug: String.t() | nil, reason: term()}]
+        }
+
+  @doc """
+  Applies every enabled transform script for `project_id` to `data` in order.
+
+  Returns `{:ok, %{data:, toasts:, errors:}}` where `data` is the final
+  candidate, `toasts` are the budget-enforced messages accepted across the
+  pipeline, and `errors` records any transforms that failed.
+  """
+  @spec run(String.t(), data(), keyword()) :: {:ok, result()}
+  def run(project_id, data, opts \\ [])
+      when is_binary(project_id) and is_map(data) and is_list(opts) do
+    context = %{"source" => "blogmark", "url" => Map.get(data, "url")}
+    transforms = Scripts.list_transform_scripts(project_id)
+
+    initial = %{data: data, toasts: [], errors: [], toast_total: 0}
+
+    final =
+      Enum.reduce(transforms, initial, fn script, acc ->
+        apply_transform(project_id, script, context, acc, opts)
+      end)
+
+    {:ok,
+     %{data: final.data, toasts: Enum.reverse(final.toasts), errors: Enum.reverse(final.errors)}}
+  end
+
+  defp apply_transform(_project_id, %Script{entrypoint: entry}, _context, acc, _opts)
+       when entry in [nil, ""] do
+    acc
+  end
+
+  defp apply_transform(project_id, %Script{} = script, context, acc, opts) do
+    source = Scripts.resolved_content(script)
+
+    case Scripting.execute_project_script(
+           project_id,
+           source,
+           script.entrypoint,
+           [acc.data, context],
+           opts
+         ) do
+      {:ok, returned} ->
+        {next_data, raw_toasts} = split_return(Util.normalize_input(returned), acc.data)
+        accept_toasts(%{acc | data: next_data}, raw_toasts)
+
+      {:error, reason} ->
+        Logger.warning("transform #{script.slug} failed: #{inspect(reason)}")
+        %{acc | errors: [%{slug: script.slug, reason: reason} | acc.errors]}
+    end
+  end
+
+  # A transform may return either the candidate map directly, or a wrapper
+  # `{ data = <candidate>, toasts = [...] }`. Blogmark candidates never carry a
+  # nested "data" map, so the wrapper shape is unambiguous.
+  defp split_return(%{"data" => %{} = inner} = wrapper, _previous) do
+    {inner, toast_list(Map.get(wrapper, "toasts"))}
+  end
+
+  defp split_return(returned, _previous) when is_map(returned), do: {returned, []}
+  defp split_return(_returned, previous), do: {previous, []}
+
+  defp toast_list(list) when is_list(list), do: Enum.filter(list, &is_binary/1)
+  defp toast_list(_other), do: []
+
+  defp accept_toasts(acc, raw_toasts) do
+    per_script_max = config(:transform_max_toasts_per_script, 5)
+    total_max = config(:transform_max_toasts_total, 20)
+    max_length = config(:transform_max_toast_length, 300)
+
+    raw_toasts
+    |> Enum.take(per_script_max)
+    |> Enum.reduce(acc, fn message, inner_acc ->
+      if inner_acc.toast_total >= total_max do
+        inner_acc
+      else
+        truncated = truncate(message, max_length)
+
+        %{
+          inner_acc
+          | toasts: [truncated | inner_acc.toasts],
+            toast_total: inner_acc.toast_total + 1
+        }
+      end
+    end)
+  end
+
+  defp truncate(message, max_length) do
+    if String.length(message) > max_length do
+      String.slice(message, 0, max_length)
+    else
+      message
+    end
+  end
+
+  defp config(key, default) do
+    :bds
+    |> Application.fetch_env!(:scripting)
+    |> Keyword.get(key, default)
+  end
+end
diff --git a/test/bds/scripts/transforms_test.exs b/test/bds/scripts/transforms_test.exs
new file mode 100644
index 0000000..a6cc1b0
--- /dev/null
+++ b/test/bds/scripts/transforms_test.exs
@@ -0,0 +1,240 @@
+defmodule BDS.Scripts.TransformsTest do
+  use ExUnit.Case, async: false
+
+  alias BDS.Scripts
+  alias BDS.Scripts.Transforms
+
+  setup do
+    :ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
+    Ecto.Adapters.SQL.Sandbox.mode(BDS.Repo, {:shared, self()})
+
+    temp_dir =
+      Path.join(System.tmp_dir!(), "bds-transforms-#{System.unique_integer([:positive])}")
+
+    File.mkdir_p!(temp_dir)
+    on_exit(fn -> File.rm_rf(temp_dir) end)
+
+    {:ok, project} = BDS.Projects.create_project(%{name: "Transforms", data_path: temp_dir})
+
+    %{project: project}
+  end
+
+  defp transform(project_id, title, content, opts \\ []) do
+    {:ok, script} =
+      Scripts.create_script(%{
+        project_id: project_id,
+        title: title,
+        kind: :transform,
+        content: content,
+        entrypoint: Keyword.get(opts, :entrypoint, "main")
+      })
+
+    script =
+      case Keyword.get(opts, :enabled, true) do
+        true ->
+          script
+
+        false ->
+          {:ok, s} = Scripts.update_script(script.id, %{enabled: false})
+          s
+      end
+
+    script
+  end
+
+  test "runs enabled transforms in deterministic order (updated_at, slug, id)", %{
+    project: project
+  } do
+    # Each transform appends its marker to content so we can read execution order.
+    transform(project.id, "Bravo", """
+    function main(data, _ctx)
+      data.content = data.content .. "B"
+      return data
+    end
+    """)
+
+    # Ensure distinct updated_at ordering by spacing out creation.
+    Process.sleep(5)
+
+    transform(project.id, "Alpha", """
+    function main(data, _ctx)
+      data.content = data.content .. "A"
+      return data
+    end
+    """)
+
+    data = %{
+      "title" => "t",
+      "content" => "",
+      "tags" => [],
+      "categories" => [],
+      "url" => "http://x"
+    }
+
+    assert {:ok, result} = Transforms.run(project.id, data)
+    # Bravo created first (earlier updated_at) so runs before Alpha.
+    assert result.data["content"] == "BA"
+    assert result.errors == []
+  end
+
+  test "disabled transforms and transforms from other projects are skipped", %{project: project} do
+    {:ok, other} =
+      BDS.Projects.create_project(%{
+        name: "Other",
+        data_path:
+          Path.join(
+            System.tmp_dir!(),
+            "bds-transforms-other-#{System.unique_integer([:positive])}"
+          )
+      })
+
+    transform(
+      project.id,
+      "Disabled",
+      """
+      function main(data, _ctx)
+        data.content = data.content .. "D"
+        return data
+      end
+      """, enabled: false)
+
+    transform(other.id, "Foreign", """
+    function main(data, _ctx)
+      data.content = data.content .. "F"
+      return data
+    end
+    """)
+
+    transform(project.id, "Enabled", """
+    function main(data, _ctx)
+      data.content = data.content .. "E"
+      return data
+    end
+    """)
+
+    data = %{
+      "title" => "t",
+      "content" => "",
+      "tags" => [],
+      "categories" => [],
+      "url" => "http://x"
+    }
+
+    assert {:ok, result} = Transforms.run(project.id, data)
+    assert result.data["content"] == "E"
+  end
+
+  test "pipeline continues after a failing transform, keeping last valid state", %{
+    project: project
+  } do
+    transform(project.id, "First", """
+    function main(data, _ctx)
+      data.content = data.content .. "1"
+      return data
+    end
+    """)
+
+    Process.sleep(5)
+
+    transform(project.id, "Boom", """
+    function main(_data, _ctx)
+      error("boom")
+    end
+    """)
+
+    Process.sleep(5)
+
+    transform(project.id, "Third", """
+    function main(data, _ctx)
+      data.content = data.content .. "3"
+      return data
+    end
+    """)
+
+    data = %{
+      "title" => "t",
+      "content" => "",
+      "tags" => [],
+      "categories" => [],
+      "url" => "http://x"
+    }
+
+    assert {:ok, result} = Transforms.run(project.id, data)
+    # Boom's failure does not roll back "1" and does not stop "3".
+    assert result.data["content"] == "13"
+    assert [%{reason: _}] = result.errors
+  end
+
+  test "receives blogmark context with source and originating url", %{project: project} do
+    transform(project.id, "Ctx", """
+    function main(data, ctx)
+      data.content = ctx.source .. "|" .. ctx.url
+      return data
+    end
+    """)
+
+    data = %{
+      "title" => "t",
+      "content" => "",
+      "tags" => [],
+      "categories" => [],
+      "url" => "http://example.com/a"
+    }
+
+    assert {:ok, result} = Transforms.run(project.id, data)
+    assert result.data["content"] == "blogmark|http://example.com/a"
+  end
+
+  test "per-script toast budget caps and truncates toasts", %{project: project} do
+    long = String.duplicate("x", 500)
+
+    transform(project.id, "Noisy", """
+    function main(data, _ctx)
+      local toasts = {}
+      for i = 1, 10 do toasts[i] = "#{long}" end
+      return { data = data, toasts = toasts }
+    end
+    """)
+
+    data = %{
+      "title" => "t",
+      "content" => "",
+      "tags" => [],
+      "categories" => [],
+      "url" => "http://x"
+    }
+
+    assert {:ok, result} = Transforms.run(project.id, data)
+    # max 5 per script
+    assert length(result.toasts) == 5
+    # truncated to 300 chars
+    assert Enum.all?(result.toasts, &(String.length(&1) == 300))
+  end
+
+  test "total toast budget caps across the whole pipeline", %{project: project} do
+    body = """
+    function main(data, _ctx)
+      local toasts = {}
+      for i = 1, 5 do toasts[i] = "msg" end
+      return { data = data, toasts = toasts }
+    end
+    """
+
+    # 5 transforms x 5 toasts each = 25 emitted, total budget is 20.
+    for i <- 1..5 do
+      transform(project.id, "T#{i}", body)
+      Process.sleep(3)
+    end
+
+    data = %{
+      "title" => "t",
+      "content" => "",
+      "tags" => [],
+      "categories" => [],
+      "url" => "http://x"
+    }
+
+    assert {:ok, result} = Transforms.run(project.id, data)
+    assert length(result.toasts) == 20
+  end
+end