fix: A1-14 real neural embeddings via Bumblebee multilingual-e5-small with Float32 BLOB vector cache
This commit is contained in:
@@ -61,10 +61,15 @@ config :bds, :scripting,
|
||||
job_max_reductions: :none
|
||||
|
||||
config :bds, :embeddings,
|
||||
backend: BDS.Embeddings.Backends.InApp,
|
||||
backend: BDS.Embeddings.Backends.Neural,
|
||||
model_id: "Xenova/multilingual-e5-small",
|
||||
model_repo: "intfloat/multilingual-e5-small",
|
||||
dimensions: 384
|
||||
|
||||
# Cache downloaded model files under the app data directory so they persist
|
||||
# across sessions (ModelCaching invariant). Overridden at runtime in prod.
|
||||
config :bumblebee, :cache_dir, Path.expand("../priv/data/models", __DIR__)
|
||||
|
||||
config :logger, :console,
|
||||
format: "$time $metadata[$level] $message\n",
|
||||
metadata: [:request_id]
|
||||
|
||||
@@ -8,4 +8,9 @@ if config_env() == :prod do
|
||||
config :bds, BDS.Repo,
|
||||
database: database_path,
|
||||
pool_size: String.to_integer(System.get_env("POOL_SIZE") || "1")
|
||||
|
||||
# Persist downloaded embedding model files alongside the database data dir.
|
||||
config :bumblebee, :cache_dir,
|
||||
System.get_env("BDS_MODEL_CACHE_DIR") ||
|
||||
Path.join(Path.dirname(Path.expand(database_path)), "models")
|
||||
end
|
||||
|
||||
@@ -8,3 +8,11 @@ config :bds, BDS.Repo,
|
||||
busy_timeout: 15_000
|
||||
|
||||
config :logger, level: :warning
|
||||
|
||||
# Tests use the deterministic lexical stub backend so the suite stays offline
|
||||
# and never downloads the ~100 MB neural model.
|
||||
config :bds, :embeddings,
|
||||
backend: BDS.Embeddings.Backends.InApp,
|
||||
model_id: "Xenova/multilingual-e5-small",
|
||||
model_repo: "intfloat/multilingual-e5-small",
|
||||
dimensions: 384
|
||||
|
||||
Reference in New Issue
Block a user