fix: A1-14 real neural embeddings via Bumblebee multilingual-e5-small with Float32 BLOB vector cache

This commit is contained in:
2026-05-29 14:04:51 +02:00
parent 489d787306
commit a1004d72bf
16 changed files with 310 additions and 21 deletions

View File

@@ -1,5 +1,13 @@
defmodule BDS.Embeddings.Backends.InApp do
@moduledoc false
@moduledoc """
Deterministic lexical embedding stub.
This backend does NOT satisfy the `RealNeuralModel` invariant — it projects
stemmed tokens and bigrams into a sparse hashed vector. It exists only as an
offline, dependency-free fallback for tests and environments where the neural
model (see `BDS.Embeddings.Backends.Neural`) cannot be loaded. Production and
development use the neural backend.
"""
@behaviour BDS.Embeddings.Backend

View File

@@ -0,0 +1,104 @@
defmodule BDS.Embeddings.Backends.Neural do
@moduledoc """
Real on-device neural embedding backend.
Implements the `RealNeuralModel` and `ModelCaching` invariants from
`specs/embedding.allium`: embeddings are produced by the actual
multilingual-e5-small transformer (the `intfloat/multilingual-e5-small`
weights behind the `Xenova/multilingual-e5-small` identifier) via
Bumblebee + EXLA, never by a lexical approximation.
* Lazy-loaded — the model pipeline is built on the first embedding
request, not at application startup.
* Model files (~100 MB) are downloaded from the Hugging Face Hub on
first use and cached on disk (Bumblebee cache dir), persisting across
sessions and project switches.
* Text preprocessing follows the e5 convention: every input is prefixed
with `"query: "`, pooled with mean pooling over the attention mask, and
L2-normalised. This is what makes cross-language semantic similarity
work.
"""
@behaviour BDS.Embeddings.Backend
use GenServer
@query_prefix "query: "
@embed_timeout :timer.minutes(2)
@default_model_id "Xenova/multilingual-e5-small"
@default_model_repo "intfloat/multilingual-e5-small"
@default_dimensions 384
def child_spec(opts) do
%{id: __MODULE__, start: {__MODULE__, :start_link, [opts]}}
end
def start_link(opts \\ []) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
@impl BDS.Embeddings.Backend
def model_info do
config = config()
%{
model_id: Keyword.get(config, :model_id, @default_model_id),
dimensions: Keyword.get(config, :dimensions, @default_dimensions)
}
end
@impl BDS.Embeddings.Backend
def embed(text, _opts) when is_binary(text) do
GenServer.call(__MODULE__, {:embed, @query_prefix <> text}, @embed_timeout)
catch
:exit, reason -> {:error, {:embedding_backend_unavailable, reason}}
end
@impl GenServer
def init(_opts), do: {:ok, %{serving: nil}}
@impl GenServer
def handle_call({:embed, text}, _from, state) do
case ensure_serving(state) do
{:ok, %{serving: serving} = next_state} ->
%{embedding: tensor} = Nx.Serving.run(serving, text)
{:reply, {:ok, Nx.to_flat_list(tensor)}, next_state}
{:error, _reason} = error ->
{:reply, error, state}
end
rescue
exception ->
{:reply, {:error, Exception.message(exception)}, state}
end
defp ensure_serving(%{serving: nil} = state) do
case build_serving() do
{:ok, serving} -> {:ok, %{state | serving: serving}}
{:error, _reason} = error -> error
end
end
defp ensure_serving(state), do: {:ok, state}
defp build_serving do
repo = {:hf, Keyword.get(config(), :model_repo, @default_model_repo)}
with {:ok, model_info} <- Bumblebee.load_model(repo),
{:ok, tokenizer} <- Bumblebee.load_tokenizer(repo) do
serving =
Bumblebee.Text.text_embedding(model_info, tokenizer,
output_pool: :mean_pooling,
output_attribute: :hidden_state,
embedding_processor: :l2_norm,
compile: [batch_size: 1, sequence_length: 512],
defn_options: [compiler: EXLA]
)
{:ok, serving}
end
end
defp config, do: Application.get_env(:bds, :embeddings, [])
end

View File

@@ -192,8 +192,14 @@ defmodule BDS.Embeddings.Index do
Path.join(Path.dirname(snapshot_path), "embeddings.index.json")
end
# Vectors are stored as a packed little-endian Float32 BLOB; see
# BDS.Embeddings and the VectorCacheInDb invariant in embedding.allium.
defp decode_vector(nil), do: []
defp decode_vector(vector), do: Jason.decode!(vector)
defp decode_vector(<<>>), do: []
defp decode_vector(binary) when is_binary(binary) do
for <<value::float-32-little <- binary>>, do: value
end
defp cosine_similarity([], _other), do: 0.0
defp cosine_similarity(_vector, []), do: 0.0

View File

@@ -12,7 +12,9 @@ defmodule BDS.Embeddings.Key do
belongs_to :project, BDS.Projects.Project, type: :string
field :content_hash, :string
field :vector, :string
# Packed little-endian Float32 BLOB (dimensions * 4 bytes), per the
# VectorCacheInDb invariant in specs/embedding.allium.
field :vector, :binary
end
def changeset(key, attrs) do