168 lines
5.0 KiB
Plaintext
168 lines
5.0 KiB
Plaintext
-- allium: 1
|
|
-- bDS Full-Text Search
|
|
-- Scope: core (Wave 1 — in-app full-text search with Snowball stemmers)
|
|
-- Distilled from: src/main/engine/PostEngine.ts (FTS methods),
|
|
-- MediaEngine.ts (FTS methods), stemmer.ts
|
|
|
|
use "./post.allium" as post
|
|
use "./media.allium" as media
|
|
|
|
surface SearchControlSurface {
|
|
facing _: SearchOperator
|
|
|
|
provides:
|
|
SearchPostsRequested(query, filters)
|
|
SearchMediaRequested(query, filters)
|
|
}
|
|
|
|
surface SearchIndexRuntimeSurface {
|
|
facing _: SearchRuntime
|
|
|
|
provides:
|
|
SearchIndexUpdated(post)
|
|
SearchIndexUpdated(media)
|
|
}
|
|
|
|
value StemmerLanguage {
|
|
-- Snowball stemmers via library (Stemex)
|
|
-- Languages with a Snowball algorithm get real stemming;
|
|
-- others pass through unstemmed
|
|
-- Applied to both indexing and query processing
|
|
code: String
|
|
}
|
|
|
|
surface StemmerLanguageSurface {
|
|
context language: StemmerLanguage
|
|
|
|
exposes:
|
|
language.code
|
|
}
|
|
|
|
entity PostSearchIndex {
|
|
-- FTS5 virtual table with per-field stemmed columns
|
|
-- Each field is stemmed independently; translations are
|
|
-- stemmed with their own language stemmer and appended
|
|
-- to the corresponding field
|
|
post: post/Post
|
|
title: String
|
|
excerpt: String
|
|
content: String
|
|
tags: String
|
|
categories: String
|
|
}
|
|
|
|
entity MediaSearchIndex {
|
|
-- FTS5 virtual table with per-field stemmed columns
|
|
-- Each field is stemmed independently; translations are
|
|
-- stemmed with their own language stemmer and appended
|
|
-- to the corresponding field
|
|
media: media/Media
|
|
title: String
|
|
alt: String
|
|
caption: String
|
|
original_name: String
|
|
tags: String
|
|
}
|
|
|
|
invariant CrossLanguageStemming {
|
|
-- Search index uses Snowball stemmer matched to content language
|
|
-- A post in German is stemmed with the German stemmer
|
|
-- Translations are stemmed with their respective language stemmers
|
|
-- Query-time stemming matches the index language
|
|
}
|
|
|
|
rule SearchPosts {
|
|
when: SearchPostsRequested(query, filters)
|
|
-- Full-text search with optional filters:
|
|
-- status, tags, categories, language, missingTranslationLanguage,
|
|
-- year, month, date range (from/to)
|
|
-- Returns paginated results with total count
|
|
let stemmed_query = stem(query, detect_language(query))
|
|
let matched = search_fts(PostSearchIndex, stemmed_query, filters)
|
|
ensures: SearchResults(
|
|
posts: matched,
|
|
total: matched.count,
|
|
offset: filters.offset,
|
|
limit: filters.limit
|
|
)
|
|
}
|
|
|
|
rule SearchMedia {
|
|
when: SearchMediaRequested(query, filters)
|
|
-- Full-text search with optional filters:
|
|
-- language, tags, year, month, date range (from/to)
|
|
-- Returns paginated results with total count
|
|
let stemmed_query = stem(query, detect_language(query))
|
|
let matched = search_fts(MediaSearchIndex, stemmed_query, filters)
|
|
ensures: SearchResults(
|
|
media: matched,
|
|
total: matched.count,
|
|
offset: filters.offset,
|
|
limit: filters.limit
|
|
)
|
|
}
|
|
|
|
rule IndexPost {
|
|
when: SearchIndexUpdated(post)
|
|
-- Delete-and-reinsert: no in-place update for FTS5 rows
|
|
-- Each field is stemmed per-language; translations are stemmed
|
|
-- with their own language stemmer and joined into the same field
|
|
let lang = post.language
|
|
let translations = post.translations
|
|
let title = join_stemmed(
|
|
stem(post.title, lang),
|
|
for t in translations: stem(t.title, t.language)
|
|
)
|
|
let excerpt = join_stemmed(
|
|
stem(post.excerpt, lang),
|
|
for t in translations: stem(t.excerpt, t.language)
|
|
)
|
|
let content = join_stemmed(
|
|
stem(post.content, lang),
|
|
for t in translations: stem(t.content, t.language)
|
|
)
|
|
let tags = stem(join(post.tags, " "), lang)
|
|
let categories = stem(join(post.categories, " "), lang)
|
|
ensures: not exists PostSearchIndex{post: post}
|
|
ensures: PostSearchIndex.created(
|
|
post: post,
|
|
title: title,
|
|
excerpt: excerpt,
|
|
content: content,
|
|
tags: tags,
|
|
categories: categories
|
|
)
|
|
}
|
|
|
|
rule IndexMedia {
|
|
when: SearchIndexUpdated(media)
|
|
-- Delete-and-reinsert: no in-place update for FTS5 rows
|
|
-- Each field is stemmed per-language; translations are stemmed
|
|
-- with their own language stemmer and joined into the same field
|
|
let lang = media.language
|
|
let translations = media.translations
|
|
let title = join_stemmed(
|
|
stem(media.title, lang),
|
|
for t in translations: stem(t.title, t.language)
|
|
)
|
|
let alt = join_stemmed(
|
|
stem(media.alt, lang),
|
|
for t in translations: stem(t.alt, t.language)
|
|
)
|
|
let caption = join_stemmed(
|
|
stem(media.caption, lang),
|
|
for t in translations: stem(t.caption, t.language)
|
|
)
|
|
let original_name = stem(media.original_name, lang)
|
|
let tags = stem(join(media.tags, " "), lang)
|
|
ensures: not exists MediaSearchIndex{media: media}
|
|
ensures: MediaSearchIndex.created(
|
|
media: media,
|
|
title: title,
|
|
alt: alt,
|
|
caption: caption,
|
|
original_name: original_name,
|
|
tags: tags
|
|
)
|
|
}
|