feat: search

This commit is contained in:
2026-04-23 16:57:51 +02:00
parent b6255122a9
commit 5d16a89d1c
4 changed files with 659 additions and 0 deletions

View File

@@ -7,6 +7,7 @@ defmodule BDS.Media do
alias BDS.Media.Translation alias BDS.Media.Translation
alias BDS.Projects alias BDS.Projects
alias BDS.Repo alias BDS.Repo
alias BDS.Search
alias BDS.Sidecar alias BDS.Sidecar
def import_media(attrs) do def import_media(attrs) do
@@ -52,6 +53,7 @@ defmodule BDS.Media do
:ok = File.cp(source_path, destination) :ok = File.cp(source_path, destination)
:ok = write_sidecar(project, media) :ok = write_sidecar(project, media)
:ok = ensure_thumbnails(project, media) :ok = ensure_thumbnails(project, media)
:ok = Search.sync_media(media)
media media
end) end)
|> case do |> case do
@@ -86,6 +88,7 @@ defmodule BDS.Media do
|> Repo.update!() |> Repo.update!()
:ok = write_sidecar(project, updated_media) :ok = write_sidecar(project, updated_media)
:ok = Search.sync_media(updated_media)
updated_media updated_media
end) end)
|> case do |> case do
@@ -113,6 +116,7 @@ defmodule BDS.Media do
end) end)
Repo.delete!(media) Repo.delete!(media)
:ok = Search.delete_media(media.id)
{:ok, :deleted} {:ok, :deleted}
end end
end end
@@ -149,6 +153,7 @@ defmodule BDS.Media do
|> Repo.insert_or_update!() |> Repo.insert_or_update!()
:ok = write_translation_sidecar(project, media, updated_translation) :ok = write_translation_sidecar(project, media, updated_translation)
:ok = Search.sync_media(media.id)
updated_translation updated_translation
end) end)
|> case do |> case do
@@ -244,6 +249,7 @@ defmodule BDS.Media do
|> Media.changeset(attrs) |> Media.changeset(attrs)
|> Repo.insert_or_update!() |> Repo.insert_or_update!()
|> tap(fn reloaded_media -> ensure_thumbnails(project, reloaded_media) end) |> tap(fn reloaded_media -> ensure_thumbnails(project, reloaded_media) end)
|> tap(&Search.sync_media/1)
end end
defp write_sidecar(project, media) do defp write_sidecar(project, media) do
@@ -316,6 +322,8 @@ defmodule BDS.Media do
updated_at: now updated_at: now
}) })
|> Repo.insert_or_update!() |> Repo.insert_or_update!()
:ok = Search.sync_media(media.id)
end end
end end

View File

@@ -7,6 +7,7 @@ defmodule BDS.Posts do
alias BDS.Posts.Post alias BDS.Posts.Post
alias BDS.Projects alias BDS.Projects
alias BDS.Repo alias BDS.Repo
alias BDS.Search
alias BDS.Slug alias BDS.Slug
def create_post(attrs) do def create_post(attrs) do
@@ -42,6 +43,14 @@ defmodule BDS.Posts do
published_excerpt: nil published_excerpt: nil
}) })
|> Repo.insert() |> Repo.insert()
|> case do
{:ok, post} ->
:ok = Search.sync_post(post)
{:ok, post}
error ->
error
end
end end
def update_post(post_id, attrs) do def update_post(post_id, attrs) do
@@ -61,6 +70,14 @@ defmodule BDS.Posts do
post post
|> Post.changeset(updates) |> Post.changeset(updates)
|> Repo.update() |> Repo.update()
|> case do
{:ok, updated_post} ->
:ok = Search.sync_post(updated_post)
{:ok, updated_post}
error ->
error
end
else else
{:error, changeset} -> {:error, changeset} {:error, changeset} -> {:error, changeset}
end end
@@ -92,6 +109,14 @@ defmodule BDS.Posts do
updated_at: updated_at updated_at: updated_at
}) })
|> Repo.update() |> Repo.update()
|> case do
{:ok, updated_post} ->
:ok = Search.sync_post(updated_post)
{:ok, updated_post}
error ->
error
end
end end
end end
@@ -116,6 +141,7 @@ defmodule BDS.Posts do
%Post{} = post -> %Post{} = post ->
delete_post_file(post) delete_post_file(post)
Repo.delete!(post) Repo.delete!(post)
:ok = Search.delete_post(post.id)
{:ok, :deleted} {:ok, :deleted}
end end
end end
@@ -129,6 +155,14 @@ defmodule BDS.Posts do
post post
|> Post.changeset(%{status: :archived, updated_at: System.system_time(:second)}) |> Post.changeset(%{status: :archived, updated_at: System.system_time(:second)})
|> Repo.update() |> Repo.update()
|> case do
{:ok, updated_post} ->
:ok = Search.sync_post(updated_post)
{:ok, updated_post}
error ->
error
end
%Post{} = post -> %Post{} = post ->
{:error, {:error,
@@ -345,6 +379,7 @@ defmodule BDS.Posts do
post post
|> Post.changeset(attrs) |> Post.changeset(attrs)
|> Repo.insert_or_update!() |> Repo.insert_or_update!()
|> tap(&Search.sync_post/1)
end end
defp parse_post_status(status) when is_atom(status), do: status defp parse_post_status(status) when is_atom(status), do: status

389
lib/bds/search.ex Normal file
View File

@@ -0,0 +1,389 @@
defmodule BDS.Search do
@moduledoc false
import Ecto.Query
alias BDS.Media.Media
alias BDS.Media.Translation, as: MediaTranslation
alias BDS.Posts.Post
alias BDS.Projects
alias BDS.Repo
def search_posts(project_id, query, filters \\ %{}) do
filters = normalize_filters(filters)
posts =
project_id
|> candidate_post_ids(query)
|> load_posts_in_order()
|> filter_posts(filters)
{:ok,
%{
posts: paginate(posts, filters),
total: length(posts),
offset: filters.offset,
limit: filters.limit
}}
end
def search_media(project_id, query, filters \\ %{}) do
filters = normalize_filters(filters)
media_items =
project_id
|> candidate_media_ids(query)
|> load_media_in_order()
{:ok,
%{
media: paginate(media_items, filters),
total: length(media_items),
offset: filters.offset,
limit: filters.limit
}}
end
def reindex_project(project_id) do
Repo.query!("DELETE FROM posts_fts WHERE post_id IN (SELECT id FROM posts WHERE project_id = ?)", [project_id])
Repo.query!("DELETE FROM media_fts WHERE media_id IN (SELECT id FROM media WHERE project_id = ?)", [project_id])
Repo.all(from post in Post, where: post.project_id == ^project_id)
|> Enum.each(&sync_post/1)
Repo.all(from media in Media, where: media.project_id == ^project_id)
|> Enum.each(&sync_media/1)
:ok
end
def sync_post(%Post{} = post) do
delete_post(post.id)
{title, excerpt, content, tags, categories} = post_index_fields(post)
Repo.query!(
"INSERT INTO posts_fts (post_id, title, excerpt, content, tags, categories) VALUES (?, ?, ?, ?, ?, ?)",
[post.id, title, excerpt, content, tags, categories]
)
:ok
end
def sync_post(post_id) when is_binary(post_id) do
case Repo.get(Post, post_id) do
nil -> delete_post(post_id)
post -> sync_post(post)
end
end
def delete_post(%Post{id: post_id}), do: delete_post(post_id)
def delete_post(post_id) when is_binary(post_id) do
Repo.query!("DELETE FROM posts_fts WHERE post_id = ?", [post_id])
:ok
end
def sync_media(%Media{} = media) do
delete_media(media.id)
{title, alt, caption, original_name, tags} = media_index_fields(media)
Repo.query!(
"INSERT INTO media_fts (media_id, title, alt, caption, original_name, tags) VALUES (?, ?, ?, ?, ?, ?)",
[media.id, title, alt, caption, original_name, tags]
)
:ok
end
def sync_media(media_id) when is_binary(media_id) do
case Repo.get(Media, media_id) do
nil -> delete_media(media_id)
media -> sync_media(media)
end
end
def delete_media(%Media{id: media_id}), do: delete_media(media_id)
def delete_media(media_id) when is_binary(media_id) do
Repo.query!("DELETE FROM media_fts WHERE media_id = ?", [media_id])
:ok
end
defp candidate_post_ids(project_id, query) do
if blank_query?(query) do
Repo.all(from post in Post, where: post.project_id == ^project_id, select: post.id)
else
Repo.query!(
"""
SELECT posts_fts.post_id
FROM posts_fts
JOIN posts ON posts.id = posts_fts.post_id
WHERE posts.project_id = ? AND posts_fts MATCH ?
ORDER BY bm25(posts_fts), posts_fts.rowid
""",
[project_id, query]
).rows
|> Enum.map(fn [post_id] -> post_id end)
end
end
defp candidate_media_ids(project_id, query) do
if blank_query?(query) do
Repo.all(from media in Media, where: media.project_id == ^project_id, select: media.id)
else
Repo.query!(
"""
SELECT media_fts.media_id
FROM media_fts
JOIN media ON media.id = media_fts.media_id
WHERE media.project_id = ? AND media_fts MATCH ?
ORDER BY bm25(media_fts), media_fts.rowid
""",
[project_id, query]
).rows
|> Enum.map(fn [media_id] -> media_id end)
end
end
defp load_posts_in_order([]), do: []
defp load_posts_in_order(post_ids) do
posts_by_id =
Repo.all(from post in Post, where: post.id in ^post_ids)
|> Map.new(&{&1.id, &1})
Enum.map(post_ids, &Map.get(posts_by_id, &1))
|> Enum.reject(&is_nil/1)
end
defp load_media_in_order([]), do: []
defp load_media_in_order(media_ids) do
media_by_id =
Repo.all(from media in Media, where: media.id in ^media_ids)
|> Map.new(&{&1.id, &1})
Enum.map(media_ids, &Map.get(media_by_id, &1))
|> Enum.reject(&is_nil/1)
end
defp filter_posts(posts, filters) do
translation_languages =
if is_binary(filters.missing_translation_language) do
post_translation_languages(posts)
else
%{}
end
Enum.filter(posts, fn post ->
matches_status?(post, filters.status) and
matches_overlap?(post.tags, filters.tags) and
matches_overlap?(post.categories, filters.categories) and
matches_exact?(post.language, filters.language) and
matches_year?(post, filters.year) and
matches_month?(post, filters.month) and
matches_from?(post, filters.from) and
matches_to?(post, filters.to) and
matches_missing_translation?(post, filters.missing_translation_language, translation_languages)
end)
end
defp matches_status?(_post, nil), do: true
defp matches_status?(post, status), do: to_string(post.status) == to_string(status)
defp matches_overlap?(_values, []), do: true
defp matches_overlap?(values, required_values) do
not MapSet.disjoint?(MapSet.new(values || []), MapSet.new(required_values))
end
defp matches_exact?(_value, nil), do: true
defp matches_exact?(value, expected), do: value == expected
defp matches_year?(_post, nil), do: true
defp matches_year?(post, year), do: DateTime.from_unix!(post.created_at).year == year
defp matches_month?(_post, nil), do: true
defp matches_month?(post, month), do: DateTime.from_unix!(post.created_at).month == month
defp matches_from?(_post, nil), do: true
defp matches_from?(post, from_unix), do: post.created_at >= from_unix
defp matches_to?(_post, nil), do: true
defp matches_to?(post, to_unix), do: post.created_at <= to_unix
defp matches_missing_translation?(_post, nil, _translation_languages), do: true
defp matches_missing_translation?(%Post{do_not_translate: true}, _language, _translation_languages), do: false
defp matches_missing_translation?(post, language, translation_languages) do
language not in Map.get(translation_languages, post.id, [])
end
defp post_translation_languages([]), do: %{}
defp post_translation_languages(posts) do
post_ids = Enum.map(posts, & &1.id)
placeholders = Enum.map_join(post_ids, ",", fn _ -> "?" end)
Repo.query!(
"SELECT translation_for, language FROM post_translations WHERE translation_for IN (#{placeholders})",
post_ids
).rows
|> Enum.group_by(fn [post_id, _language] -> post_id end, fn [_post_id, language] -> language end)
end
defp paginate(items, filters) do
items
|> Enum.drop(filters.offset)
|> Enum.take(filters.limit)
end
defp post_index_fields(post) do
translations = post_translations(post.id)
title = [post.title | Enum.map(translations, &Map.get(&1, "title"))] |> join_text()
excerpt = [post.excerpt | Enum.map(translations, &Map.get(&1, "excerpt"))] |> join_text()
content =
[post_content(post) | Enum.map(translations, &translation_content(post.project_id, &1))]
|> join_text()
tags = Enum.join(post.tags || [], " ")
categories = Enum.join(post.categories || [], " ")
{title, excerpt, content, tags, categories}
end
defp media_index_fields(media) do
translations =
Repo.all(from translation in MediaTranslation, where: translation.translation_for == ^media.id)
title = [media.title | Enum.map(translations, & &1.title)] |> join_text()
alt = [media.alt | Enum.map(translations, & &1.alt)] |> join_text()
caption = [media.caption | Enum.map(translations, & &1.caption)] |> join_text()
original_name = media.original_name || ""
tags = Enum.join(media.tags || [], " ")
{title, alt, caption, original_name, tags}
end
defp post_translations(post_id) do
Repo.query!(
"SELECT language, title, excerpt, content, status, file_path FROM post_translations WHERE translation_for = ?",
[post_id]
).rows
|> Enum.map(fn [language, title, excerpt, content, status, file_path] ->
%{
"language" => language,
"title" => title,
"excerpt" => excerpt,
"content" => content,
"status" => status,
"file_path" => file_path
}
end)
end
defp post_content(%Post{content: content}) when is_binary(content), do: content
defp post_content(%Post{project_id: project_id, file_path: file_path}) when is_binary(file_path) and file_path != "" do
project_id
|> Projects.get_project!()
|> Projects.project_data_dir()
|> Path.join(file_path)
|> markdown_body_from_file()
end
defp post_content(_post), do: ""
defp translation_content(_project_id, %{"content" => content}) when is_binary(content), do: content
defp translation_content(project_id, %{"status" => "published", "file_path" => file_path}) when is_binary(file_path) and file_path != "" do
project_id
|> Projects.get_project!()
|> Projects.project_data_dir()
|> Path.join(file_path)
|> markdown_body_from_file()
end
defp translation_content(_project_id, _translation), do: ""
defp markdown_body_from_file(path) do
case File.read(path) do
{:ok, contents} ->
case String.split(contents, "\n---\n", parts: 2) do
[_frontmatter, body] -> String.trim_trailing(body, "\n")
_parts -> contents
end
{:error, _reason} ->
""
end
end
defp join_text(values) do
values
|> Enum.reject(&(&1 in [nil, ""]))
|> Enum.join("\n")
end
defp normalize_filters(filters) do
%{
status: attr(filters, :status),
tags: normalize_list_filter(attr(filters, :tags)),
categories: normalize_list_filter(attr(filters, :categories)),
language: attr(filters, :language),
missing_translation_language: attr(filters, :missing_translation_language),
year: normalize_integer(attr(filters, :year)),
month: normalize_integer(attr(filters, :month)),
from: normalize_timestamp(attr(filters, :from), :start),
to: normalize_timestamp(attr(filters, :to), :end),
offset: normalize_non_negative_integer(attr(filters, :offset), 0),
limit: normalize_non_negative_integer(attr(filters, :limit), 50)
}
end
defp normalize_list_filter(nil), do: []
defp normalize_list_filter(value) when is_list(value), do: Enum.reject(value, &is_nil/1)
defp normalize_list_filter(value), do: [value]
defp normalize_integer(nil), do: nil
defp normalize_integer(value) when is_integer(value), do: value
defp normalize_integer(value) when is_binary(value) do
case Integer.parse(value) do
{integer, ""} -> integer
_ -> nil
end
end
defp normalize_non_negative_integer(nil, default), do: default
defp normalize_non_negative_integer(value, _default) when is_integer(value) and value >= 0, do: value
defp normalize_non_negative_integer(value, default), do: normalize_integer(value) || default
defp normalize_timestamp(nil, _position), do: nil
defp normalize_timestamp(value, _position) when is_integer(value), do: value
defp normalize_timestamp(value, position) when is_binary(value) do
case Date.from_iso8601(value) do
{:ok, date} ->
time = if position == :start, do: ~T[00:00:00], else: ~T[23:59:59]
{:ok, datetime} = DateTime.new(date, time, "Etc/UTC")
DateTime.to_unix(datetime)
{:error, _reason} -> nil
end
end
defp blank_query?(query), do: query in [nil, ""] or String.trim(to_string(query)) == ""
defp attr(attrs, key) do
cond do
Map.has_key?(attrs, key) -> Map.get(attrs, key)
Map.has_key?(attrs, Atom.to_string(key)) -> Map.get(attrs, Atom.to_string(key))
true -> nil
end
end
end

227
test/bds/search_test.exs Normal file
View File

@@ -0,0 +1,227 @@
defmodule BDS.SearchTest do
use ExUnit.Case, async: false
alias BDS.Repo
setup do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
temp_dir = Path.join(System.tmp_dir!(), "bds-search-#{System.unique_integer([:positive])}")
File.mkdir_p!(temp_dir)
on_exit(fn -> File.rm_rf(temp_dir) end)
{:ok, project} = BDS.Projects.create_project(%{name: "Search", data_path: temp_dir})
%{project: project, temp_dir: temp_dir}
end
test "search_posts indexes writes, supports filters and pagination, and removes deleted posts", %{project: project} do
assert {:ok, draft_post} =
BDS.Posts.create_post(%{
project_id: project.id,
title: "Galaxy Draft",
content: "alpha nebula body",
tags: ["space", "draft"],
categories: ["astronomy"],
language: "en"
})
assert {:ok, published_post} =
BDS.Posts.create_post(%{
project_id: project.id,
title: "Galaxy Published",
content: "alpha nebula published",
tags: ["space", "published"],
categories: ["astronomy"],
language: "de"
})
assert {:ok, published_post} = BDS.Posts.publish_post(published_post.id)
assert {:ok, archived_post} =
BDS.Posts.create_post(%{
project_id: project.id,
title: "Galaxy Archived",
content: "alpha nebula archive",
tags: ["space", "archived"],
categories: ["history"],
language: "en"
})
assert {:ok, archived_post} = BDS.Posts.archive_post(archived_post.id)
assert {:ok, results} = BDS.Search.search_posts(project.id, "nebula", %{status: :draft})
assert results.total == 1
assert results.offset == 0
assert results.limit == 50
assert Enum.map(results.posts, & &1.id) == [draft_post.id]
assert {:ok, tag_results} = BDS.Search.search_posts(project.id, "galaxy", %{tags: ["space"], categories: ["astronomy"]})
assert tag_results.total == 2
assert Enum.sort(Enum.map(tag_results.posts, & &1.id)) == Enum.sort([draft_post.id, published_post.id])
assert {:ok, language_results} = BDS.Search.search_posts(project.id, "galaxy", %{language: "de"})
assert Enum.map(language_results.posts, & &1.id) == [published_post.id]
assert {:ok, paged_results} = BDS.Search.search_posts(project.id, "galaxy", %{limit: 1, offset: 1})
assert paged_results.total == 3
assert paged_results.offset == 1
assert paged_results.limit == 1
assert length(paged_results.posts) == 1
assert {:ok, updated_post} = BDS.Posts.update_post(draft_post.id, %{title: "Comet Draft"})
assert {:ok, empty_results} = BDS.Search.search_posts(project.id, "Galaxy Draft", %{})
assert empty_results.total == 0
assert {:ok, updated_results} = BDS.Search.search_posts(project.id, "Comet Draft", %{})
assert Enum.map(updated_results.posts, & &1.id) == [updated_post.id]
assert {:ok, :deleted} = BDS.Posts.delete_post(archived_post.id)
assert {:ok, deleted_results} = BDS.Search.search_posts(project.id, "Galaxy Archived", %{})
assert deleted_results.total == 0
end
test "search_posts includes translation text after reindexing", %{project: project} do
assert {:ok, post} =
BDS.Posts.create_post(%{
project_id: project.id,
title: "Canonical",
content: "root body",
language: "en"
})
now = System.system_time(:second)
Repo.query!(
"""
INSERT INTO post_translations (
id, project_id, translation_for, language, title, excerpt, content, status,
created_at, updated_at, published_at, file_path, checksum
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
[
Ecto.UUID.generate(),
project.id,
post.id,
"fr",
"Bonjour galaxie",
"Resume",
"contenu lunaire",
"draft",
now,
now,
nil,
"",
nil
]
)
assert :ok = BDS.Search.reindex_project(project.id)
assert {:ok, results} = BDS.Search.search_posts(project.id, "lunaire", %{})
assert Enum.map(results.posts, & &1.id) == [post.id]
assert {:ok, missing_translation_results} =
BDS.Search.search_posts(project.id, "Canonical", %{missing_translation_language: "de"})
assert Enum.map(missing_translation_results.posts, & &1.id) == [post.id]
end
test "search_media indexes metadata, includes translation text, and removes deleted media", %{project: project, temp_dir: temp_dir} do
source_path = Path.join(temp_dir, "hero.txt")
File.write!(source_path, "hero")
assert {:ok, media} =
BDS.Media.import_media(%{
project_id: project.id,
source_path: source_path,
title: "Aurora asset",
alt: "Orbit illustration",
caption: "Captioned item",
tags: ["space"]
})
assert {:ok, _translation} =
BDS.Media.upsert_media_translation(media.id, "de", %{
title: "Weltraum Titel",
alt: "Orbit auf Deutsch",
caption: "Beschriftung"
})
assert {:ok, results} = BDS.Search.search_media(project.id, "Orbit", %{})
assert Enum.map(results.media, & &1.id) == [media.id]
assert {:ok, translated_results} = BDS.Search.search_media(project.id, "Weltraum", %{})
assert Enum.map(translated_results.media, & &1.id) == [media.id]
assert {:ok, updated_media} = BDS.Media.update_media(media.id, %{title: "Renamed asset"})
assert {:ok, old_results} = BDS.Search.search_media(project.id, "Aurora", %{})
assert old_results.total == 0
assert {:ok, new_results} = BDS.Search.search_media(project.id, "Renamed", %{})
assert Enum.map(new_results.media, & &1.id) == [updated_media.id]
assert {:ok, :deleted} = BDS.Media.delete_media(media.id)
assert {:ok, deleted_results} = BDS.Search.search_media(project.id, "Renamed", %{})
assert deleted_results.total == 0
end
test "rebuild operations repopulate the search index from filesystem truth", %{project: project, temp_dir: temp_dir} do
posts_dir = Path.join([temp_dir, "posts", "2026", "04"])
File.mkdir_p!(posts_dir)
File.write!(
Path.join(posts_dir, "filesystem-post.md"),
[
"---",
"id: search-post-from-file",
"title: File Search Post",
"slug: filesystem-post",
"status: published",
"language: en",
"created_at: 1711843200",
"updated_at: 1711929600",
"published_at: 1712016000",
"tags:",
" - filesystem",
"categories:",
" - imports",
"---",
"starlight filesystem body",
""
]
|> Enum.join("\n")
)
media_dir = Path.join([temp_dir, "media", "2026", "04"])
File.mkdir_p!(media_dir)
File.write!(Path.join(media_dir, "filesystem.txt"), "media body")
File.write!(
Path.join(media_dir, "filesystem.txt.meta"),
[
"id: search-media-from-file",
"original_name: filesystem.txt",
"mime_type: text/plain",
"size: 10",
"title: File Search Media",
"alt: imported alt",
"caption: imported caption",
"language: en",
"created_at: 1711843200",
"updated_at: 1711929600",
"tags:",
" - filesystem",
""
]
|> Enum.join("\n")
)
assert {:ok, _posts} = BDS.Posts.rebuild_posts_from_files(project.id)
assert {:ok, _media} = BDS.Media.rebuild_media_from_files(project.id)
assert {:ok, post_results} = BDS.Search.search_posts(project.id, "starlight", %{})
assert Enum.map(post_results.posts, & &1.id) == ["search-post-from-file"]
assert {:ok, media_results} = BDS.Search.search_media(project.id, "imported", %{})
assert Enum.map(media_results.media, & &1.id) == ["search-media-from-file"]
end
end