perf: batch CPU embedding inference and add A1-14c Apple GPU (EMLX) spec gap

2026-05-29 14:43:39 +02:00
parent a1004d72bf
commit 744f7543d7
10 changed files with 275 additions and 75 deletions
--- a/lib/bds/embeddings/backend.ex
+++ b/lib/bds/embeddings/backend.ex
@@ -3,4 +3,15 @@ defmodule BDS.Embeddings.Backend do

  @callback model_info() :: %{model_id: String.t(), dimensions: pos_integer()}
  @callback embed(String.t(), keyword()) :: {:ok, [number()]} | {:error, term()}
+
+  @doc """
+  Embeds a list of texts in a single call.
+
+  Backends that can amortise work across inputs (e.g. running the neural model
+  on a batched tensor) should implement this. The result list is aligned with
+  the input list. Optional — callers fall back to repeated `embed/2`.
+  """
+  @callback embed_many([String.t()], keyword()) :: {:ok, [[number()]]} | {:error, term()}
+
+  @optional_callbacks embed_many: 2
 end