fix: A1-14 real neural embeddings via Bumblebee multilingual-e5-small with Float32 BLOB vector cache
This commit is contained in:
@@ -38,13 +38,22 @@ defmodule BDS.Application do
|
||||
BDS.Scripting.JobStore,
|
||||
{Task.Supervisor, name: BDS.Scripting.TaskSupervisor},
|
||||
BDS.Scripting.JobSupervisor
|
||||
| desktop_children(current_env())
|
||||
]
|
||||
] ++ embedding_children() ++ desktop_children(current_env())
|
||||
|
||||
opts = [strategy: :one_for_one, name: BDS.Supervisor]
|
||||
Supervisor.start_link(children, opts)
|
||||
end
|
||||
|
||||
# The neural embedding backend runs as a supervised, lazily-initialised
|
||||
# GenServer (it loads the model only on the first embedding request). Only
|
||||
# start it when it is the configured backend.
|
||||
defp embedding_children do
|
||||
case Application.get_env(:bds, :embeddings, [])[:backend] do
|
||||
BDS.Embeddings.Backends.Neural -> [BDS.Embeddings.Backends.Neural]
|
||||
_other -> []
|
||||
end
|
||||
end
|
||||
|
||||
defp current_env do
|
||||
Application.get_env(:bds, :current_env_override) || @compiled_env
|
||||
end
|
||||
|
||||
@@ -217,7 +217,7 @@ defmodule BDS.Embeddings do
|
||||
post_id: post.id,
|
||||
project_id: post.project_id,
|
||||
content_hash: content_hash,
|
||||
vector: Jason.encode!(vector)
|
||||
vector: encode_vector(vector)
|
||||
})
|
||||
|> Repo.insert_or_update()
|
||||
|
||||
@@ -256,7 +256,7 @@ defmodule BDS.Embeddings do
|
||||
else
|
||||
{:ok, vector} = embed_text(raw_text, post.language)
|
||||
label = if existing_key, do: existing_key.label, else: next_label
|
||||
{:upsert, [label, post.id, post.project_id, content_hash, Jason.encode!(vector)]}
|
||||
{:upsert, [label, post.id, post.project_id, content_hash, encode_vector(vector)]}
|
||||
end
|
||||
end
|
||||
|
||||
@@ -655,7 +655,9 @@ defmodule BDS.Embeddings do
|
||||
end
|
||||
|
||||
defp embed_text(raw_text, language) do
|
||||
configured_backend().embed("query: " <> raw_text, language: language)
|
||||
# Per-backend preprocessing (e5 "query: " prefix, pooling, normalisation)
|
||||
# is the backend's responsibility — see BDS.Embeddings.Backends.Neural.
|
||||
configured_backend().embed(raw_text, language: language)
|
||||
end
|
||||
|
||||
defp rebuild_snapshot(project_id) do
|
||||
@@ -726,8 +728,22 @@ defmodule BDS.Embeddings do
|
||||
|
||||
defp hash_text(text), do: :crypto.hash(:sha256, text) |> Base.encode16(case: :lower)
|
||||
|
||||
# Vectors are persisted as a packed little-endian Float32 BLOB
|
||||
# (`dimensions` * 4 bytes; 1536 bytes for multilingual-e5-small) per the
|
||||
# VectorCacheInDb invariant in specs/embedding.allium.
|
||||
defp encode_vector(values) when is_list(values) do
|
||||
for value <- values, into: <<>>, do: <<float32(value)::float-32-little>>
|
||||
end
|
||||
|
||||
defp float32(value) when is_float(value), do: value
|
||||
defp float32(value) when is_integer(value), do: value * 1.0
|
||||
|
||||
defp decode_vector(nil), do: []
|
||||
defp decode_vector(vector), do: Jason.decode!(vector)
|
||||
defp decode_vector(<<>>), do: []
|
||||
|
||||
defp decode_vector(binary) when is_binary(binary) do
|
||||
for <<value::float-32-little <- binary>>, do: value
|
||||
end
|
||||
|
||||
defp cosine_similarity([], _other), do: 0.0
|
||||
defp cosine_similarity(_vector, []), do: 0.0
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
defmodule BDS.Embeddings.Backends.InApp do
|
||||
@moduledoc false
|
||||
@moduledoc """
|
||||
Deterministic lexical embedding stub.
|
||||
|
||||
This backend does NOT satisfy the `RealNeuralModel` invariant — it projects
|
||||
stemmed tokens and bigrams into a sparse hashed vector. It exists only as an
|
||||
offline, dependency-free fallback for tests and environments where the neural
|
||||
model (see `BDS.Embeddings.Backends.Neural`) cannot be loaded. Production and
|
||||
development use the neural backend.
|
||||
"""
|
||||
|
||||
@behaviour BDS.Embeddings.Backend
|
||||
|
||||
|
||||
104
lib/bds/embeddings/backends/neural.ex
Normal file
104
lib/bds/embeddings/backends/neural.ex
Normal file
@@ -0,0 +1,104 @@
|
||||
defmodule BDS.Embeddings.Backends.Neural do
|
||||
@moduledoc """
|
||||
Real on-device neural embedding backend.
|
||||
|
||||
Implements the `RealNeuralModel` and `ModelCaching` invariants from
|
||||
`specs/embedding.allium`: embeddings are produced by the actual
|
||||
multilingual-e5-small transformer (the `intfloat/multilingual-e5-small`
|
||||
weights behind the `Xenova/multilingual-e5-small` identifier) via
|
||||
Bumblebee + EXLA, never by a lexical approximation.
|
||||
|
||||
* Lazy-loaded — the model pipeline is built on the first embedding
|
||||
request, not at application startup.
|
||||
* Model files (~100 MB) are downloaded from the Hugging Face Hub on
|
||||
first use and cached on disk (Bumblebee cache dir), persisting across
|
||||
sessions and project switches.
|
||||
* Text preprocessing follows the e5 convention: every input is prefixed
|
||||
with `"query: "`, pooled with mean pooling over the attention mask, and
|
||||
L2-normalised. This is what makes cross-language semantic similarity
|
||||
work.
|
||||
"""
|
||||
|
||||
@behaviour BDS.Embeddings.Backend
|
||||
|
||||
use GenServer
|
||||
|
||||
@query_prefix "query: "
|
||||
@embed_timeout :timer.minutes(2)
|
||||
|
||||
@default_model_id "Xenova/multilingual-e5-small"
|
||||
@default_model_repo "intfloat/multilingual-e5-small"
|
||||
@default_dimensions 384
|
||||
|
||||
def child_spec(opts) do
|
||||
%{id: __MODULE__, start: {__MODULE__, :start_link, [opts]}}
|
||||
end
|
||||
|
||||
def start_link(opts \\ []) do
|
||||
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
|
||||
end
|
||||
|
||||
@impl BDS.Embeddings.Backend
|
||||
def model_info do
|
||||
config = config()
|
||||
|
||||
%{
|
||||
model_id: Keyword.get(config, :model_id, @default_model_id),
|
||||
dimensions: Keyword.get(config, :dimensions, @default_dimensions)
|
||||
}
|
||||
end
|
||||
|
||||
@impl BDS.Embeddings.Backend
|
||||
def embed(text, _opts) when is_binary(text) do
|
||||
GenServer.call(__MODULE__, {:embed, @query_prefix <> text}, @embed_timeout)
|
||||
catch
|
||||
:exit, reason -> {:error, {:embedding_backend_unavailable, reason}}
|
||||
end
|
||||
|
||||
@impl GenServer
|
||||
def init(_opts), do: {:ok, %{serving: nil}}
|
||||
|
||||
@impl GenServer
|
||||
def handle_call({:embed, text}, _from, state) do
|
||||
case ensure_serving(state) do
|
||||
{:ok, %{serving: serving} = next_state} ->
|
||||
%{embedding: tensor} = Nx.Serving.run(serving, text)
|
||||
{:reply, {:ok, Nx.to_flat_list(tensor)}, next_state}
|
||||
|
||||
{:error, _reason} = error ->
|
||||
{:reply, error, state}
|
||||
end
|
||||
rescue
|
||||
exception ->
|
||||
{:reply, {:error, Exception.message(exception)}, state}
|
||||
end
|
||||
|
||||
defp ensure_serving(%{serving: nil} = state) do
|
||||
case build_serving() do
|
||||
{:ok, serving} -> {:ok, %{state | serving: serving}}
|
||||
{:error, _reason} = error -> error
|
||||
end
|
||||
end
|
||||
|
||||
defp ensure_serving(state), do: {:ok, state}
|
||||
|
||||
defp build_serving do
|
||||
repo = {:hf, Keyword.get(config(), :model_repo, @default_model_repo)}
|
||||
|
||||
with {:ok, model_info} <- Bumblebee.load_model(repo),
|
||||
{:ok, tokenizer} <- Bumblebee.load_tokenizer(repo) do
|
||||
serving =
|
||||
Bumblebee.Text.text_embedding(model_info, tokenizer,
|
||||
output_pool: :mean_pooling,
|
||||
output_attribute: :hidden_state,
|
||||
embedding_processor: :l2_norm,
|
||||
compile: [batch_size: 1, sequence_length: 512],
|
||||
defn_options: [compiler: EXLA]
|
||||
)
|
||||
|
||||
{:ok, serving}
|
||||
end
|
||||
end
|
||||
|
||||
defp config, do: Application.get_env(:bds, :embeddings, [])
|
||||
end
|
||||
@@ -192,8 +192,14 @@ defmodule BDS.Embeddings.Index do
|
||||
Path.join(Path.dirname(snapshot_path), "embeddings.index.json")
|
||||
end
|
||||
|
||||
# Vectors are stored as a packed little-endian Float32 BLOB; see
|
||||
# BDS.Embeddings and the VectorCacheInDb invariant in embedding.allium.
|
||||
defp decode_vector(nil), do: []
|
||||
defp decode_vector(vector), do: Jason.decode!(vector)
|
||||
defp decode_vector(<<>>), do: []
|
||||
|
||||
defp decode_vector(binary) when is_binary(binary) do
|
||||
for <<value::float-32-little <- binary>>, do: value
|
||||
end
|
||||
|
||||
defp cosine_similarity([], _other), do: 0.0
|
||||
defp cosine_similarity(_vector, []), do: 0.0
|
||||
|
||||
@@ -12,7 +12,9 @@ defmodule BDS.Embeddings.Key do
|
||||
belongs_to :project, BDS.Projects.Project, type: :string
|
||||
|
||||
field :content_hash, :string
|
||||
field :vector, :string
|
||||
# Packed little-endian Float32 BLOB (dimensions * 4 bytes), per the
|
||||
# VectorCacheInDb invariant in specs/embedding.allium.
|
||||
field :vector, :binary
|
||||
end
|
||||
|
||||
def changeset(key, attrs) do
|
||||
|
||||
Reference in New Issue
Block a user