fix: D1-9 implement ExecuteTransform pipeline with ordering and toast budget

2026-05-29 22:41:34 +02:00
parent e89a061d8f
commit 8bfc509472
5 changed files with 398 additions and 2 deletions
--- a/SPECGAPS.md
+++ b/SPECGAPS.md
@@ -122,7 +122,7 @@ All reconciled to follow code. Specs must be self-consistent and match code.
 | D1-6 | ~~LiquidFilterSubset (4 standard + 2 custom)~~ | template.allium:191 | **Resolved:** added `LiquidParser.validate/1`, which parses with the restricted tag grammar then walks the AST to reject any filter outside the allowed set — 4 standard (`escape`, `url_encode`, `default`, `append`) + 3 custom (`i18n`, `markdown`, `slugify`). Wired into `validate_liquid` (publish gate) and MCP `validate_template` so unsupported filters are rejected even though Liquex would otherwise apply them as built-in standard filters. Spec corrected to 3 custom filters (bundled templates use `slugify`); 9 tests added (6 unsupported filters rejected, 3 supported filters accepted). |
 | D1-7 | ~~LiquidOperatorSubset~~ | template.allium:210 | **Resolved:** `LiquidParser.validate/1` now walks the parsed AST for `{:op, _}` nodes and rejects any comparison operator outside the allowed `==`/`>` subset (`!=`, `<`, `>=`, `<=`, `contains`), sharing the publish gate and MCP `validate_template` surface with the tag/filter checks; spec `LiquidOperatorSubset` annotated with enforcement note; 10 tests added (5 unsupported operators rejected at publish, 5 supported `==`/`>`/`and`/`or`/bare-truthy expressions accepted). |
 | D1-8 | ~~MacroTimeout guarantee~~ | script.allium:94-95 | **Resolved:** added test in `api_test.exs` — an infinite-loop `render()` macro run with `max_reductions: :none` (forces the luerl sandbox onto its wall-clock path) and a 150ms `timeout` returns `{:error, :timeout}` and terminates within budget (<2s), proving the macro is killed near its budget rather than the default multi-minute script timeout |
-| D1-9 | ExecuteTransform rule (pipeline, ordering, toast budget) | script.allium:229-263 | Write test: transform pipeline executes in order, toast budget enforced |
+| D1-9 | ~~ExecuteTransform rule (pipeline, ordering, toast budget)~~ | script.allium:229-263 | **Resolved:** the `ExecuteTransform` rule had no engine — added `BDS.Scripts.Transforms.run/3` (+ `Scripts.list_transform_scripts/1` ordered by updated_at→slug→id and `Scripts.resolved_content/1`). The pipeline runs enabled project transforms sequentially on the blogmark candidate with a `{source="blogmark", url}` context, captures per-script errors without rolling back the last valid candidate (TransformPipelineContinuation), and enforces the toast budget (`transform_max_toasts_per_script`/`transform_max_toasts_total`/`transform_max_toast_length`, new config keys). 6 tests added (ordering, project/disabled scoping, continuation, context, per-script + total toast caps with truncation). Deep-link OS routing into this engine remains future work. |
 | D1-10 | TransformPipelineContinuation | script.allium:247-249 | Write test: error in transform doesn't halt pipeline |
 | D1-11 | ChatContextTruncation invariant | ai.allium:375-379 | Write test: long chat history trimmed to context window |
 | D1-12 | BoundedToolLoop enforcement | ai.allium:381-385 | Write test: tool rounds bounded by chat_max_tool_rounds |
--- a/config/config.exs
+++ b/config/config.exs
@@ -58,7 +58,10 @@ config :bds, :scripting,
  timeout: 300_000,
  max_reductions: 5_000_000,
  job_timeout: :infinity,
-  job_max_reductions: :none
+  job_max_reductions: :none,
  transform_max_toasts_per_script: 5,
  transform_max_toasts_total: 20,
  transform_max_toast_length: 300
 config :bds, :embeddings,
  backend: BDS.Embeddings.Backends.Neural,
--- a/lib/bds/scripts.ex
+++ b/lib/bds/scripts.ex
@@ -225,6 +225,26 @@ defmodule BDS.Scripts do
    end
  end
  @doc """
  Returns the executable source for a script, reading the published file body
  when the in-memory content is not loaded.
  """
  @spec resolved_content(Script.t()) :: String.t()
  def resolved_content(%Script{} = script), do: effective_script_content(script)
  @doc """
  Lists enabled `transform` scripts for a project in the deterministic order
  the transform pipeline applies them: updated_at, then slug, then id.
  """
  @spec list_transform_scripts(String.t()) :: [Script.t()]
  def list_transform_scripts(project_id) when is_binary(project_id) do
    Script
    |> where([s], s.project_id == ^project_id and s.kind == :transform and s.enabled == true)
    |> order_by([s], asc: s.updated_at, asc: s.slug, asc: s.id)
    |> Repo.all()
    |> Enum.map(&hydrate_script_content/1)
  end
  defp default_entrypoint(:macro), do: "render"
  defp default_entrypoint(_kind), do: "main"
--- a/lib/bds/scripts/transforms.ex
+++ b/lib/bds/scripts/transforms.ex
@@ -0,0 +1,133 @@
 defmodule BDS.Scripts.Transforms do
  @moduledoc """
  Runs the blogmark transform pipeline (spec: script.allium `ExecuteTransform`).
  Enabled `transform` scripts for a project are applied sequentially to a post
  candidate produced by a `bds://new-post` blogmark deep link. Each transform
  receives the current candidate plus a context describing the blogmark source
  and origin URL, and returns the modified candidate.
  Guarantees enforced here:
    * `TransformTrigger` — each script receives the candidate plus
      `{source = "blogmark", url = ...}` context.
    * `TransformPipelineContinuation` — a transform error is captured per script
      and does not roll back the last valid candidate; the pipeline continues.
    * `TransformToastBudget` — at most `transform_max_toasts_per_script` toasts
      are accepted from any one transform, with a total budget of
      `transform_max_toasts_total`, each truncated to
      `transform_max_toast_length` characters.
  """
  require Logger
  alias BDS.Scripts
  alias BDS.Scripts.Script
  alias BDS.Scripting
  alias BDS.Scripting.Capabilities.Util
  @type data :: %{optional(String.t()) => term()}
  @type result :: %{
          data: data(),
          toasts: [String.t()],
          errors: [%{slug: String.t() | nil, reason: term()}]
        }
  @doc """
  Applies every enabled transform script for `project_id` to `data` in order.
  Returns `{:ok, %{data:, toasts:, errors:}}` where `data` is the final
  candidate, `toasts` are the budget-enforced messages accepted across the
  pipeline, and `errors` records any transforms that failed.
  """
  @spec run(String.t(), data(), keyword()) :: {:ok, result()}
  def run(project_id, data, opts \\ [])
      when is_binary(project_id) and is_map(data) and is_list(opts) do
    context = %{"source" => "blogmark", "url" => Map.get(data, "url")}
    transforms = Scripts.list_transform_scripts(project_id)
    initial = %{data: data, toasts: [], errors: [], toast_total: 0}
    final =
      Enum.reduce(transforms, initial, fn script, acc ->
        apply_transform(project_id, script, context, acc, opts)
      end)
    {:ok,
     %{data: final.data, toasts: Enum.reverse(final.toasts), errors: Enum.reverse(final.errors)}}
  end
  defp apply_transform(_project_id, %Script{entrypoint: entry}, _context, acc, _opts)
       when entry in [nil, ""] do
    acc
  end
  defp apply_transform(project_id, %Script{} = script, context, acc, opts) do
    source = Scripts.resolved_content(script)
    case Scripting.execute_project_script(
           project_id,
           source,
           script.entrypoint,
           [acc.data, context],
           opts
         ) do
      {:ok, returned} ->
        {next_data, raw_toasts} = split_return(Util.normalize_input(returned), acc.data)
        accept_toasts(%{acc | data: next_data}, raw_toasts)
      {:error, reason} ->
        Logger.warning("transform #{script.slug} failed: #{inspect(reason)}")
        %{acc | errors: [%{slug: script.slug, reason: reason} | acc.errors]}
    end
  end
  # A transform may return either the candidate map directly, or a wrapper
  # `{ data = <candidate>, toasts = [...] }`. Blogmark candidates never carry a
  # nested "data" map, so the wrapper shape is unambiguous.
  defp split_return(%{"data" => %{} = inner} = wrapper, _previous) do
    {inner, toast_list(Map.get(wrapper, "toasts"))}
  end
  defp split_return(returned, _previous) when is_map(returned), do: {returned, []}
  defp split_return(_returned, previous), do: {previous, []}
  defp toast_list(list) when is_list(list), do: Enum.filter(list, &is_binary/1)
  defp toast_list(_other), do: []
  defp accept_toasts(acc, raw_toasts) do
    per_script_max = config(:transform_max_toasts_per_script, 5)
    total_max = config(:transform_max_toasts_total, 20)
    max_length = config(:transform_max_toast_length, 300)
    raw_toasts
    |> Enum.take(per_script_max)
    |> Enum.reduce(acc, fn message, inner_acc ->
      if inner_acc.toast_total >= total_max do
        inner_acc
      else
        truncated = truncate(message, max_length)
        %{
          inner_acc
          | toasts: [truncated | inner_acc.toasts],
            toast_total: inner_acc.toast_total + 1
        }
      end
    end)
  end
  defp truncate(message, max_length) do
    if String.length(message) > max_length do
      String.slice(message, 0, max_length)
    else
      message
    end
  end
  defp config(key, default) do
    :bds
    |> Application.fetch_env!(:scripting)
    |> Keyword.get(key, default)
  end
 end
--- a/test/bds/scripts/transforms_test.exs
+++ b/test/bds/scripts/transforms_test.exs
@@ -0,0 +1,240 @@
 defmodule BDS.Scripts.TransformsTest do
  use ExUnit.Case, async: false
  alias BDS.Scripts
  alias BDS.Scripts.Transforms
  setup do
    :ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
    Ecto.Adapters.SQL.Sandbox.mode(BDS.Repo, {:shared, self()})
    temp_dir =
      Path.join(System.tmp_dir!(), "bds-transforms-#{System.unique_integer([:positive])}")
    File.mkdir_p!(temp_dir)
    on_exit(fn -> File.rm_rf(temp_dir) end)
    {:ok, project} = BDS.Projects.create_project(%{name: "Transforms", data_path: temp_dir})
    %{project: project}
  end
  defp transform(project_id, title, content, opts \\ []) do
    {:ok, script} =
      Scripts.create_script(%{
        project_id: project_id,
        title: title,
        kind: :transform,
        content: content,
        entrypoint: Keyword.get(opts, :entrypoint, "main")
      })
    script =
      case Keyword.get(opts, :enabled, true) do
        true ->
          script
        false ->
          {:ok, s} = Scripts.update_script(script.id, %{enabled: false})
          s
      end
    script
  end
  test "runs enabled transforms in deterministic order (updated_at, slug, id)", %{
    project: project
  } do
    # Each transform appends its marker to content so we can read execution order.
    transform(project.id, "Bravo", """
    function main(data, _ctx)
      data.content = data.content .. "B"
      return data
    end
    """)
    # Ensure distinct updated_at ordering by spacing out creation.
    Process.sleep(5)
    transform(project.id, "Alpha", """
    function main(data, _ctx)
      data.content = data.content .. "A"
      return data
    end
    """)
    data = %{
      "title" => "t",
      "content" => "",
      "tags" => [],
      "categories" => [],
      "url" => "http://x"
    }
    assert {:ok, result} = Transforms.run(project.id, data)
    # Bravo created first (earlier updated_at) so runs before Alpha.
    assert result.data["content"] == "BA"
    assert result.errors == []
  end
  test "disabled transforms and transforms from other projects are skipped", %{project: project} do
    {:ok, other} =
      BDS.Projects.create_project(%{
        name: "Other",
        data_path:
          Path.join(
            System.tmp_dir!(),
            "bds-transforms-other-#{System.unique_integer([:positive])}"
          )
      })
    transform(
      project.id,
      "Disabled",
      """
      function main(data, _ctx)
        data.content = data.content .. "D"
        return data
      end
      """, enabled: false)
    transform(other.id, "Foreign", """
    function main(data, _ctx)
      data.content = data.content .. "F"
      return data
    end
    """)
    transform(project.id, "Enabled", """
    function main(data, _ctx)
      data.content = data.content .. "E"
      return data
    end
    """)
    data = %{
      "title" => "t",
      "content" => "",
      "tags" => [],
      "categories" => [],
      "url" => "http://x"
    }
    assert {:ok, result} = Transforms.run(project.id, data)
    assert result.data["content"] == "E"
  end
  test "pipeline continues after a failing transform, keeping last valid state", %{
    project: project
  } do
    transform(project.id, "First", """
    function main(data, _ctx)
      data.content = data.content .. "1"
      return data
    end
    """)
    Process.sleep(5)
    transform(project.id, "Boom", """
    function main(_data, _ctx)
      error("boom")
    end
    """)
    Process.sleep(5)
    transform(project.id, "Third", """
    function main(data, _ctx)
      data.content = data.content .. "3"
      return data
    end
    """)
    data = %{
      "title" => "t",
      "content" => "",
      "tags" => [],
      "categories" => [],
      "url" => "http://x"
    }
    assert {:ok, result} = Transforms.run(project.id, data)
    # Boom's failure does not roll back "1" and does not stop "3".
    assert result.data["content"] == "13"
    assert [%{reason: _}] = result.errors
  end
  test "receives blogmark context with source and originating url", %{project: project} do
    transform(project.id, "Ctx", """
    function main(data, ctx)
      data.content = ctx.source .. "|" .. ctx.url
      return data
    end
    """)
    data = %{
      "title" => "t",
      "content" => "",
      "tags" => [],
      "categories" => [],
      "url" => "http://example.com/a"
    }
    assert {:ok, result} = Transforms.run(project.id, data)
    assert result.data["content"] == "blogmark|http://example.com/a"
  end
  test "per-script toast budget caps and truncates toasts", %{project: project} do
    long = String.duplicate("x", 500)
    transform(project.id, "Noisy", """
    function main(data, _ctx)
      local toasts = {}
      for i = 1, 10 do toasts[i] = "#{long}" end
      return { data = data, toasts = toasts }
    end
    """)
    data = %{
      "title" => "t",
      "content" => "",
      "tags" => [],
      "categories" => [],
      "url" => "http://x"
    }
    assert {:ok, result} = Transforms.run(project.id, data)
    # max 5 per script
    assert length(result.toasts) == 5
    # truncated to 300 chars
    assert Enum.all?(result.toasts, &(String.length(&1) == 300))
  end
  test "total toast budget caps across the whole pipeline", %{project: project} do
    body = """
    function main(data, _ctx)
      local toasts = {}
      for i = 1, 5 do toasts[i] = "msg" end
      return { data = data, toasts = toasts }
    end
    """
    # 5 transforms x 5 toasts each = 25 emitted, total budget is 20.
    for i <- 1..5 do
      transform(project.id, "T#{i}", body)
      Process.sleep(3)
    end
    data = %{
      "title" => "t",
      "content" => "",
      "tags" => [],
      "categories" => [],
      "url" => "http://x"
    }
    assert {:ok, result} = Transforms.run(project.id, data)
    assert length(result.toasts) == 20
  end
 end