perf: A1-14b replace O(n^2) embedding snapshot with hnswlib HNSW index and debounced persistence

This commit is contained in:
2026-05-29 15:36:13 +02:00
parent 744f7543d7
commit 61ff2a77c0
12 changed files with 474 additions and 287 deletions

View File

@@ -63,7 +63,7 @@ value EmbeddingVector {
-- ─── Entities ───────────────────────────────────────────────
entity EmbeddingKey {
label: Integer -- HNSW label for USearch
label: Integer -- HNSW node label / id
post: post/Post
content_hash: String -- SHA-256 of "{title}\n\n{content}"
vector: EmbeddingVector
@@ -75,9 +75,11 @@ entity DismissedDuplicatePair {
-- IDs stored in canonical order (sorted) for dedup
}
-- ─── USearch HNSW Index ─────────────────────────────────────
-- ─── HNSW Index ─────────────────────────────────────────────
config {
-- HNSW approximate-nearest-neighbour index (hnswlib). USearch has no Elixir
-- binding; hnswlib provides the same HNSW algorithm and parameters.
model_id: String = "Xenova/multilingual-e5-small"
embedding_dimensions: Integer = 384
hnsw_metric: String = "cosine"
@@ -86,7 +88,8 @@ config {
hnsw_expansion_search: Integer = 64 -- efSearch
debounce_persist: Duration = 5.seconds
-- Index file: {userData}/projects/{projectId}/embeddings.usearch
-- Key mapping is persisted alongside the embedding records
-- Key mapping (label → post_id) persisted in a sidecar (.meta.json) next
-- to the index file, plus the source-of-truth rows in embedding_keys
batch_size: Integer = 16 -- texts per batched inference run
sequence_length: Integer = 256 -- max tokens per input (truncated)
}
@@ -112,7 +115,7 @@ rule EmbedPost {
let existing = EmbeddingKey{post: post}
if not exists existing or existing.content_hash != hash:
-- Compute embedding vector via local model
-- Upsert into USearch index + embedding_keys DB table
-- Upsert into HNSW index + embedding_keys DB table
-- Debounced index save (5s)
ensures: EmbeddingKeyUpdated(post)
}
@@ -151,9 +154,9 @@ rule IndexUnindexed {
rule FindSimilar {
when: FindSimilarRequested(post, limit)
requires: semantic_similarity_enabled
-- HNSW approximate nearest neighbor search via USearch
-- HNSW approximate nearest neighbor search (hnswlib)
-- Searches index for (limit + 1) neighbors, excludes self
-- Converts USearch cosine distance to similarity: max(0, 1 - distance)
-- Converts HNSW cosine distance to similarity: max(0, 1 - distance)
-- Returns ranked list sorted by descending similarity
ensures: SimilarPostsResult(post, ranked_matches)
}
@@ -162,7 +165,7 @@ rule ComputeSimilarities {
when: ComputeSimilaritiesRequested(source_post, target_post_ids)
requires: semantic_similarity_enabled
-- Exact pairwise cosine similarity between source vector and each target vector
-- Uses in-memory vector cache, NOT USearch search
-- Uses in-memory vector cache, NOT the HNSW index
-- Returns map of post_id -> similarity score
-- Used by InsertPostLinkModal to rank FTS search results
ensures: SimilarityScoresResult(source_post, scores)
@@ -207,7 +210,7 @@ invariant ContentHashSkipsUnchanged {
}
invariant DebouncedPersistence {
-- USearch index persistence is debounced at 5 seconds
-- HNSW index persistence is debounced at 5 seconds
-- Prevents excessive disk I/O during bulk operations
-- Index also force-saved on project switch and app shutdown
}
@@ -234,8 +237,9 @@ invariant NativeAcceleratedExecution {
-- inference pass and inputs are truncated to a bounded sequence_length, so
-- (re)indexing many posts is not serialised one document at a time.
-- Current implementation: Bumblebee + EXLA, which is native CPU on Apple
-- Silicon (XLA has no Metal backend). Apple GPU acceleration via EMLX/MLX
-- is tracked as a follow-up (SPECGAPS A1-14c).
-- Silicon (XLA has no Metal backend); neighbour search is HNSW (hnswlib).
-- Apple GPU acceleration via EMLX/MLX is tracked as a follow-up
-- (SPECGAPS A1-14c).
}
invariant ModelCaching {
@@ -245,7 +249,7 @@ invariant ModelCaching {
}
invariant ProjectIsolation {
-- Each project has its own USearch index file and embedding_keys rows
-- Each project has its own HNSW index file and embedding_keys rows
-- On project switch: save current index, load new project's index
-- Model pipeline shared across projects (not reloaded)
}