chore: update allium spec for clearer wording towards embedding model

2026-05-28 22:21:36 +02:00
parent e58d68e73e
commit fceb995c7c
1 changed files with 8 additions and 0 deletions
--- a/specs/embedding.allium
+++ b/specs/embedding.allium
@@ -213,6 +213,14 @@ invariant VectorCacheInDb {
    -- Enables instant reload without re-embedding
 }

+invariant RealNeuralModel {
+    -- Embeddings MUST be produced by the actual ONNX neural model (multilingual-e5-small),
+    -- not by lexical approximations (TF-IDF, bag-of-words, hash projections).
+    -- Cross-language semantic similarity is a primary requirement:
+    -- posts in different languages about the same topic must produce similar vectors.
+    -- This is only achievable with the trained multilingual transformer model.
+}
+
 invariant ModelCaching {
    -- Model files (~100 MB) downloaded from Hugging Face Hub on first use
    -- Cached in app data directory, persists across sessions