fix: A1-14c run embedding model on Apple GPU via EMLX with EXLA-CPU fallback

This commit is contained in:
2026-05-29 16:26:33 +02:00
parent d03d033548
commit 84b91750fb
7 changed files with 112 additions and 12 deletions

View File

@@ -68,7 +68,10 @@ config :bds, :embeddings,
# Inference is batched: batch_size texts per compiled run, truncated to
# sequence_length tokens. Tuning these trades throughput against memory.
batch_size: 16,
sequence_length: 256
sequence_length: 256,
# Hardware acceleration: :auto prefers the Apple GPU (EMLX/Metal) on Apple
# Silicon and falls back to EXLA-CPU elsewhere. Force with :emlx or :exla.
accelerator: :auto
# Cache downloaded model files under the app data directory so they persist
# across sessions (ModelCaching invariant). Overridden at runtime in prod.