perf: batch CPU embedding inference and add A1-14c Apple GPU (EMLX) spec gap

This commit is contained in:
2026-05-29 14:43:39 +02:00
parent a1004d72bf
commit 744f7543d7
10 changed files with 275 additions and 75 deletions

View File

@@ -64,7 +64,11 @@ config :bds, :embeddings,
backend: BDS.Embeddings.Backends.Neural,
model_id: "Xenova/multilingual-e5-small",
model_repo: "intfloat/multilingual-e5-small",
dimensions: 384
dimensions: 384,
# Inference is batched: batch_size texts per compiled run, truncated to
# sequence_length tokens. Tuning these trades throughput against memory.
batch_size: 16,
sequence_length: 256
# Cache downloaded model files under the app data directory so they persist
# across sessions (ModelCaching invariant). Overridden at runtime in prod.

View File

@@ -15,4 +15,6 @@ config :bds, :embeddings,
backend: BDS.Embeddings.Backends.InApp,
model_id: "Xenova/multilingual-e5-small",
model_repo: "intfloat/multilingual-e5-small",
dimensions: 384
dimensions: 384,
batch_size: 16,
sequence_length: 256