Files
bDS2/specs/search.allium

168 lines
5.0 KiB
Plaintext

-- allium: 1
-- bDS Full-Text Search
-- Scope: core (Wave 1 — in-app full-text search with Snowball stemmers)
-- Distilled from: src/main/engine/PostEngine.ts (FTS methods),
-- MediaEngine.ts (FTS methods), stemmer.ts
use "./post.allium" as post
use "./media.allium" as media
surface SearchControlSurface {
facing _: SearchOperator
provides:
SearchPostsRequested(query, filters)
SearchMediaRequested(query, filters)
}
surface SearchIndexRuntimeSurface {
facing _: SearchRuntime
provides:
SearchIndexUpdated(post)
SearchIndexUpdated(media)
}
value StemmerLanguage {
-- Snowball stemmers via library (Stemex)
-- Languages with a Snowball algorithm get real stemming;
-- others pass through unstemmed
-- Applied to both indexing and query processing
code: String
}
surface StemmerLanguageSurface {
context language: StemmerLanguage
exposes:
language.code
}
entity PostSearchIndex {
-- FTS5 virtual table with per-field stemmed columns
-- Each field is stemmed independently; translations are
-- stemmed with their own language stemmer and appended
-- to the corresponding field
post: post/Post
title: String
excerpt: String
content: String
tags: String
categories: String
}
entity MediaSearchIndex {
-- FTS5 virtual table with per-field stemmed columns
-- Each field is stemmed independently; translations are
-- stemmed with their own language stemmer and appended
-- to the corresponding field
media: media/Media
title: String
alt: String
caption: String
original_name: String
tags: String
}
invariant CrossLanguageStemming {
-- Search index uses Snowball stemmer matched to content language
-- A post in German is stemmed with the German stemmer
-- Translations are stemmed with their respective language stemmers
-- Query-time stemming matches the index language
}
rule SearchPosts {
when: SearchPostsRequested(query, filters)
-- Full-text search with optional filters:
-- status, tags, categories, language, missingTranslationLanguage,
-- year, month, date range (from/to)
-- Returns paginated results with total count
let stemmed_query = stem(query, detect_language(query))
let matched = search_fts(PostSearchIndex, stemmed_query, filters)
ensures: SearchResults(
posts: matched,
total: matched.count,
offset: filters.offset,
limit: filters.limit
)
}
rule SearchMedia {
when: SearchMediaRequested(query, filters)
-- Full-text search with optional filters:
-- language, tags, year, month, date range (from/to)
-- Returns paginated results with total count
let stemmed_query = stem(query, detect_language(query))
let matched = search_fts(MediaSearchIndex, stemmed_query, filters)
ensures: SearchResults(
media: matched,
total: matched.count,
offset: filters.offset,
limit: filters.limit
)
}
rule IndexPost {
when: SearchIndexUpdated(post)
-- Delete-and-reinsert: no in-place update for FTS5 rows
-- Each field is stemmed per-language; translations are stemmed
-- with their own language stemmer and joined into the same field
let lang = post.language
let translations = post.translations
let title = join_stemmed(
stem(post.title, lang),
for t in translations: stem(t.title, t.language)
)
let excerpt = join_stemmed(
stem(post.excerpt, lang),
for t in translations: stem(t.excerpt, t.language)
)
let content = join_stemmed(
stem(post.content, lang),
for t in translations: stem(t.content, t.language)
)
let tags = stem(join(post.tags, " "), lang)
let categories = stem(join(post.categories, " "), lang)
ensures: not exists PostSearchIndex{post: post}
ensures: PostSearchIndex.created(
post: post,
title: title,
excerpt: excerpt,
content: content,
tags: tags,
categories: categories
)
}
rule IndexMedia {
when: SearchIndexUpdated(media)
-- Delete-and-reinsert: no in-place update for FTS5 rows
-- Each field is stemmed per-language; translations are stemmed
-- with their own language stemmer and joined into the same field
let lang = media.language
let translations = media.translations
let title = join_stemmed(
stem(media.title, lang),
for t in translations: stem(t.title, t.language)
)
let alt = join_stemmed(
stem(media.alt, lang),
for t in translations: stem(t.alt, t.language)
)
let caption = join_stemmed(
stem(media.caption, lang),
for t in translations: stem(t.caption, t.language)
)
let original_name = stem(media.original_name, lang)
let tags = stem(join(media.tags, " "), lang)
ensures: not exists MediaSearchIndex{media: media}
ensures: MediaSearchIndex.created(
media: media,
title: title,
alt: alt,
caption: caption,
original_name: original_name,
tags: tags
)
}