fix: A1-14 real neural embeddings via Bumblebee multilingual-e5-small with Float32 BLOB vector cache

This commit is contained in:
2026-05-29 14:04:51 +02:00
parent 489d787306
commit a1004d72bf
16 changed files with 310 additions and 21 deletions

View File

@@ -192,8 +192,14 @@ defmodule BDS.Embeddings.Index do
Path.join(Path.dirname(snapshot_path), "embeddings.index.json")
end
# Vectors are stored as a packed little-endian Float32 BLOB; see
# BDS.Embeddings and the VectorCacheInDb invariant in embedding.allium.
defp decode_vector(nil), do: []
defp decode_vector(vector), do: Jason.decode!(vector)
defp decode_vector(<<>>), do: []
defp decode_vector(binary) when is_binary(binary) do
for <<value::float-32-little <- binary>>, do: value
end
defp cosine_similarity([], _other), do: 0.0
defp cosine_similarity(_vector, []), do: 0.0