From 5d16a89d1c7738b6aa4dbf72da8734469a04fe3b Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Thu, 23 Apr 2026 16:57:51 +0200 Subject: [PATCH] feat: search --- lib/bds/media.ex | 8 + lib/bds/posts.ex | 35 ++++ lib/bds/search.ex | 389 +++++++++++++++++++++++++++++++++++++++ test/bds/search_test.exs | 227 +++++++++++++++++++++++ 4 files changed, 659 insertions(+) create mode 100644 lib/bds/search.ex create mode 100644 test/bds/search_test.exs diff --git a/lib/bds/media.ex b/lib/bds/media.ex index 813454f..afc761e 100644 --- a/lib/bds/media.ex +++ b/lib/bds/media.ex @@ -7,6 +7,7 @@ defmodule BDS.Media do alias BDS.Media.Translation alias BDS.Projects alias BDS.Repo + alias BDS.Search alias BDS.Sidecar def import_media(attrs) do @@ -52,6 +53,7 @@ defmodule BDS.Media do :ok = File.cp(source_path, destination) :ok = write_sidecar(project, media) :ok = ensure_thumbnails(project, media) + :ok = Search.sync_media(media) media end) |> case do @@ -86,6 +88,7 @@ defmodule BDS.Media do |> Repo.update!() :ok = write_sidecar(project, updated_media) + :ok = Search.sync_media(updated_media) updated_media end) |> case do @@ -113,6 +116,7 @@ defmodule BDS.Media do end) Repo.delete!(media) + :ok = Search.delete_media(media.id) {:ok, :deleted} end end @@ -149,6 +153,7 @@ defmodule BDS.Media do |> Repo.insert_or_update!() :ok = write_translation_sidecar(project, media, updated_translation) + :ok = Search.sync_media(media.id) updated_translation end) |> case do @@ -244,6 +249,7 @@ defmodule BDS.Media do |> Media.changeset(attrs) |> Repo.insert_or_update!() |> tap(fn reloaded_media -> ensure_thumbnails(project, reloaded_media) end) + |> tap(&Search.sync_media/1) end defp write_sidecar(project, media) do @@ -316,6 +322,8 @@ defmodule BDS.Media do updated_at: now }) |> Repo.insert_or_update!() + + :ok = Search.sync_media(media.id) end end diff --git a/lib/bds/posts.ex b/lib/bds/posts.ex index 629c34f..eb91c08 100644 --- a/lib/bds/posts.ex +++ b/lib/bds/posts.ex @@ -7,6 +7,7 @@ defmodule BDS.Posts do alias BDS.Posts.Post alias BDS.Projects alias BDS.Repo + alias BDS.Search alias BDS.Slug def create_post(attrs) do @@ -42,6 +43,14 @@ defmodule BDS.Posts do published_excerpt: nil }) |> Repo.insert() + |> case do + {:ok, post} -> + :ok = Search.sync_post(post) + {:ok, post} + + error -> + error + end end def update_post(post_id, attrs) do @@ -61,6 +70,14 @@ defmodule BDS.Posts do post |> Post.changeset(updates) |> Repo.update() + |> case do + {:ok, updated_post} -> + :ok = Search.sync_post(updated_post) + {:ok, updated_post} + + error -> + error + end else {:error, changeset} -> {:error, changeset} end @@ -92,6 +109,14 @@ defmodule BDS.Posts do updated_at: updated_at }) |> Repo.update() + |> case do + {:ok, updated_post} -> + :ok = Search.sync_post(updated_post) + {:ok, updated_post} + + error -> + error + end end end @@ -116,6 +141,7 @@ defmodule BDS.Posts do %Post{} = post -> delete_post_file(post) Repo.delete!(post) + :ok = Search.delete_post(post.id) {:ok, :deleted} end end @@ -129,6 +155,14 @@ defmodule BDS.Posts do post |> Post.changeset(%{status: :archived, updated_at: System.system_time(:second)}) |> Repo.update() + |> case do + {:ok, updated_post} -> + :ok = Search.sync_post(updated_post) + {:ok, updated_post} + + error -> + error + end %Post{} = post -> {:error, @@ -345,6 +379,7 @@ defmodule BDS.Posts do post |> Post.changeset(attrs) |> Repo.insert_or_update!() + |> tap(&Search.sync_post/1) end defp parse_post_status(status) when is_atom(status), do: status diff --git a/lib/bds/search.ex b/lib/bds/search.ex new file mode 100644 index 0000000..7b42293 --- /dev/null +++ b/lib/bds/search.ex @@ -0,0 +1,389 @@ +defmodule BDS.Search do + @moduledoc false + + import Ecto.Query + + alias BDS.Media.Media + alias BDS.Media.Translation, as: MediaTranslation + alias BDS.Posts.Post + alias BDS.Projects + alias BDS.Repo + + def search_posts(project_id, query, filters \\ %{}) do + filters = normalize_filters(filters) + + posts = + project_id + |> candidate_post_ids(query) + |> load_posts_in_order() + |> filter_posts(filters) + + {:ok, + %{ + posts: paginate(posts, filters), + total: length(posts), + offset: filters.offset, + limit: filters.limit + }} + end + + def search_media(project_id, query, filters \\ %{}) do + filters = normalize_filters(filters) + + media_items = + project_id + |> candidate_media_ids(query) + |> load_media_in_order() + + {:ok, + %{ + media: paginate(media_items, filters), + total: length(media_items), + offset: filters.offset, + limit: filters.limit + }} + end + + def reindex_project(project_id) do + Repo.query!("DELETE FROM posts_fts WHERE post_id IN (SELECT id FROM posts WHERE project_id = ?)", [project_id]) + Repo.query!("DELETE FROM media_fts WHERE media_id IN (SELECT id FROM media WHERE project_id = ?)", [project_id]) + + Repo.all(from post in Post, where: post.project_id == ^project_id) + |> Enum.each(&sync_post/1) + + Repo.all(from media in Media, where: media.project_id == ^project_id) + |> Enum.each(&sync_media/1) + + :ok + end + + def sync_post(%Post{} = post) do + delete_post(post.id) + + {title, excerpt, content, tags, categories} = post_index_fields(post) + + Repo.query!( + "INSERT INTO posts_fts (post_id, title, excerpt, content, tags, categories) VALUES (?, ?, ?, ?, ?, ?)", + [post.id, title, excerpt, content, tags, categories] + ) + + :ok + end + + def sync_post(post_id) when is_binary(post_id) do + case Repo.get(Post, post_id) do + nil -> delete_post(post_id) + post -> sync_post(post) + end + end + + def delete_post(%Post{id: post_id}), do: delete_post(post_id) + + def delete_post(post_id) when is_binary(post_id) do + Repo.query!("DELETE FROM posts_fts WHERE post_id = ?", [post_id]) + :ok + end + + def sync_media(%Media{} = media) do + delete_media(media.id) + + {title, alt, caption, original_name, tags} = media_index_fields(media) + + Repo.query!( + "INSERT INTO media_fts (media_id, title, alt, caption, original_name, tags) VALUES (?, ?, ?, ?, ?, ?)", + [media.id, title, alt, caption, original_name, tags] + ) + + :ok + end + + def sync_media(media_id) when is_binary(media_id) do + case Repo.get(Media, media_id) do + nil -> delete_media(media_id) + media -> sync_media(media) + end + end + + def delete_media(%Media{id: media_id}), do: delete_media(media_id) + + def delete_media(media_id) when is_binary(media_id) do + Repo.query!("DELETE FROM media_fts WHERE media_id = ?", [media_id]) + :ok + end + + defp candidate_post_ids(project_id, query) do + if blank_query?(query) do + Repo.all(from post in Post, where: post.project_id == ^project_id, select: post.id) + else + Repo.query!( + """ + SELECT posts_fts.post_id + FROM posts_fts + JOIN posts ON posts.id = posts_fts.post_id + WHERE posts.project_id = ? AND posts_fts MATCH ? + ORDER BY bm25(posts_fts), posts_fts.rowid + """, + [project_id, query] + ).rows + |> Enum.map(fn [post_id] -> post_id end) + end + end + + defp candidate_media_ids(project_id, query) do + if blank_query?(query) do + Repo.all(from media in Media, where: media.project_id == ^project_id, select: media.id) + else + Repo.query!( + """ + SELECT media_fts.media_id + FROM media_fts + JOIN media ON media.id = media_fts.media_id + WHERE media.project_id = ? AND media_fts MATCH ? + ORDER BY bm25(media_fts), media_fts.rowid + """, + [project_id, query] + ).rows + |> Enum.map(fn [media_id] -> media_id end) + end + end + + defp load_posts_in_order([]), do: [] + + defp load_posts_in_order(post_ids) do + posts_by_id = + Repo.all(from post in Post, where: post.id in ^post_ids) + |> Map.new(&{&1.id, &1}) + + Enum.map(post_ids, &Map.get(posts_by_id, &1)) + |> Enum.reject(&is_nil/1) + end + + defp load_media_in_order([]), do: [] + + defp load_media_in_order(media_ids) do + media_by_id = + Repo.all(from media in Media, where: media.id in ^media_ids) + |> Map.new(&{&1.id, &1}) + + Enum.map(media_ids, &Map.get(media_by_id, &1)) + |> Enum.reject(&is_nil/1) + end + + defp filter_posts(posts, filters) do + translation_languages = + if is_binary(filters.missing_translation_language) do + post_translation_languages(posts) + else + %{} + end + + Enum.filter(posts, fn post -> + matches_status?(post, filters.status) and + matches_overlap?(post.tags, filters.tags) and + matches_overlap?(post.categories, filters.categories) and + matches_exact?(post.language, filters.language) and + matches_year?(post, filters.year) and + matches_month?(post, filters.month) and + matches_from?(post, filters.from) and + matches_to?(post, filters.to) and + matches_missing_translation?(post, filters.missing_translation_language, translation_languages) + end) + end + + defp matches_status?(_post, nil), do: true + defp matches_status?(post, status), do: to_string(post.status) == to_string(status) + + defp matches_overlap?(_values, []), do: true + + defp matches_overlap?(values, required_values) do + not MapSet.disjoint?(MapSet.new(values || []), MapSet.new(required_values)) + end + + defp matches_exact?(_value, nil), do: true + defp matches_exact?(value, expected), do: value == expected + + defp matches_year?(_post, nil), do: true + defp matches_year?(post, year), do: DateTime.from_unix!(post.created_at).year == year + + defp matches_month?(_post, nil), do: true + defp matches_month?(post, month), do: DateTime.from_unix!(post.created_at).month == month + + defp matches_from?(_post, nil), do: true + defp matches_from?(post, from_unix), do: post.created_at >= from_unix + + defp matches_to?(_post, nil), do: true + defp matches_to?(post, to_unix), do: post.created_at <= to_unix + + defp matches_missing_translation?(_post, nil, _translation_languages), do: true + defp matches_missing_translation?(%Post{do_not_translate: true}, _language, _translation_languages), do: false + + defp matches_missing_translation?(post, language, translation_languages) do + language not in Map.get(translation_languages, post.id, []) + end + + defp post_translation_languages([]), do: %{} + + defp post_translation_languages(posts) do + post_ids = Enum.map(posts, & &1.id) + placeholders = Enum.map_join(post_ids, ",", fn _ -> "?" end) + + Repo.query!( + "SELECT translation_for, language FROM post_translations WHERE translation_for IN (#{placeholders})", + post_ids + ).rows + |> Enum.group_by(fn [post_id, _language] -> post_id end, fn [_post_id, language] -> language end) + end + + defp paginate(items, filters) do + items + |> Enum.drop(filters.offset) + |> Enum.take(filters.limit) + end + + defp post_index_fields(post) do + translations = post_translations(post.id) + + title = [post.title | Enum.map(translations, &Map.get(&1, "title"))] |> join_text() + excerpt = [post.excerpt | Enum.map(translations, &Map.get(&1, "excerpt"))] |> join_text() + + content = + [post_content(post) | Enum.map(translations, &translation_content(post.project_id, &1))] + |> join_text() + + tags = Enum.join(post.tags || [], " ") + categories = Enum.join(post.categories || [], " ") + + {title, excerpt, content, tags, categories} + end + + defp media_index_fields(media) do + translations = + Repo.all(from translation in MediaTranslation, where: translation.translation_for == ^media.id) + + title = [media.title | Enum.map(translations, & &1.title)] |> join_text() + alt = [media.alt | Enum.map(translations, & &1.alt)] |> join_text() + caption = [media.caption | Enum.map(translations, & &1.caption)] |> join_text() + original_name = media.original_name || "" + tags = Enum.join(media.tags || [], " ") + + {title, alt, caption, original_name, tags} + end + + defp post_translations(post_id) do + Repo.query!( + "SELECT language, title, excerpt, content, status, file_path FROM post_translations WHERE translation_for = ?", + [post_id] + ).rows + |> Enum.map(fn [language, title, excerpt, content, status, file_path] -> + %{ + "language" => language, + "title" => title, + "excerpt" => excerpt, + "content" => content, + "status" => status, + "file_path" => file_path + } + end) + end + + defp post_content(%Post{content: content}) when is_binary(content), do: content + + defp post_content(%Post{project_id: project_id, file_path: file_path}) when is_binary(file_path) and file_path != "" do + project_id + |> Projects.get_project!() + |> Projects.project_data_dir() + |> Path.join(file_path) + |> markdown_body_from_file() + end + + defp post_content(_post), do: "" + + defp translation_content(_project_id, %{"content" => content}) when is_binary(content), do: content + + defp translation_content(project_id, %{"status" => "published", "file_path" => file_path}) when is_binary(file_path) and file_path != "" do + project_id + |> Projects.get_project!() + |> Projects.project_data_dir() + |> Path.join(file_path) + |> markdown_body_from_file() + end + + defp translation_content(_project_id, _translation), do: "" + + defp markdown_body_from_file(path) do + case File.read(path) do + {:ok, contents} -> + case String.split(contents, "\n---\n", parts: 2) do + [_frontmatter, body] -> String.trim_trailing(body, "\n") + _parts -> contents + end + + {:error, _reason} -> + "" + end + end + + defp join_text(values) do + values + |> Enum.reject(&(&1 in [nil, ""])) + |> Enum.join("\n") + end + + defp normalize_filters(filters) do + %{ + status: attr(filters, :status), + tags: normalize_list_filter(attr(filters, :tags)), + categories: normalize_list_filter(attr(filters, :categories)), + language: attr(filters, :language), + missing_translation_language: attr(filters, :missing_translation_language), + year: normalize_integer(attr(filters, :year)), + month: normalize_integer(attr(filters, :month)), + from: normalize_timestamp(attr(filters, :from), :start), + to: normalize_timestamp(attr(filters, :to), :end), + offset: normalize_non_negative_integer(attr(filters, :offset), 0), + limit: normalize_non_negative_integer(attr(filters, :limit), 50) + } + end + + defp normalize_list_filter(nil), do: [] + defp normalize_list_filter(value) when is_list(value), do: Enum.reject(value, &is_nil/1) + defp normalize_list_filter(value), do: [value] + + defp normalize_integer(nil), do: nil + defp normalize_integer(value) when is_integer(value), do: value + + defp normalize_integer(value) when is_binary(value) do + case Integer.parse(value) do + {integer, ""} -> integer + _ -> nil + end + end + + defp normalize_non_negative_integer(nil, default), do: default + defp normalize_non_negative_integer(value, _default) when is_integer(value) and value >= 0, do: value + defp normalize_non_negative_integer(value, default), do: normalize_integer(value) || default + + defp normalize_timestamp(nil, _position), do: nil + defp normalize_timestamp(value, _position) when is_integer(value), do: value + + defp normalize_timestamp(value, position) when is_binary(value) do + case Date.from_iso8601(value) do + {:ok, date} -> + time = if position == :start, do: ~T[00:00:00], else: ~T[23:59:59] + {:ok, datetime} = DateTime.new(date, time, "Etc/UTC") + DateTime.to_unix(datetime) + + {:error, _reason} -> nil + end + end + + defp blank_query?(query), do: query in [nil, ""] or String.trim(to_string(query)) == "" + + defp attr(attrs, key) do + cond do + Map.has_key?(attrs, key) -> Map.get(attrs, key) + Map.has_key?(attrs, Atom.to_string(key)) -> Map.get(attrs, Atom.to_string(key)) + true -> nil + end + end +end diff --git a/test/bds/search_test.exs b/test/bds/search_test.exs new file mode 100644 index 0000000..10913ff --- /dev/null +++ b/test/bds/search_test.exs @@ -0,0 +1,227 @@ +defmodule BDS.SearchTest do + use ExUnit.Case, async: false + + alias BDS.Repo + + setup do + :ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo) + temp_dir = Path.join(System.tmp_dir!(), "bds-search-#{System.unique_integer([:positive])}") + File.mkdir_p!(temp_dir) + on_exit(fn -> File.rm_rf(temp_dir) end) + + {:ok, project} = BDS.Projects.create_project(%{name: "Search", data_path: temp_dir}) + %{project: project, temp_dir: temp_dir} + end + + test "search_posts indexes writes, supports filters and pagination, and removes deleted posts", %{project: project} do + assert {:ok, draft_post} = + BDS.Posts.create_post(%{ + project_id: project.id, + title: "Galaxy Draft", + content: "alpha nebula body", + tags: ["space", "draft"], + categories: ["astronomy"], + language: "en" + }) + + assert {:ok, published_post} = + BDS.Posts.create_post(%{ + project_id: project.id, + title: "Galaxy Published", + content: "alpha nebula published", + tags: ["space", "published"], + categories: ["astronomy"], + language: "de" + }) + + assert {:ok, published_post} = BDS.Posts.publish_post(published_post.id) + + assert {:ok, archived_post} = + BDS.Posts.create_post(%{ + project_id: project.id, + title: "Galaxy Archived", + content: "alpha nebula archive", + tags: ["space", "archived"], + categories: ["history"], + language: "en" + }) + + assert {:ok, archived_post} = BDS.Posts.archive_post(archived_post.id) + + assert {:ok, results} = BDS.Search.search_posts(project.id, "nebula", %{status: :draft}) + assert results.total == 1 + assert results.offset == 0 + assert results.limit == 50 + assert Enum.map(results.posts, & &1.id) == [draft_post.id] + + assert {:ok, tag_results} = BDS.Search.search_posts(project.id, "galaxy", %{tags: ["space"], categories: ["astronomy"]}) + assert tag_results.total == 2 + assert Enum.sort(Enum.map(tag_results.posts, & &1.id)) == Enum.sort([draft_post.id, published_post.id]) + + assert {:ok, language_results} = BDS.Search.search_posts(project.id, "galaxy", %{language: "de"}) + assert Enum.map(language_results.posts, & &1.id) == [published_post.id] + + assert {:ok, paged_results} = BDS.Search.search_posts(project.id, "galaxy", %{limit: 1, offset: 1}) + assert paged_results.total == 3 + assert paged_results.offset == 1 + assert paged_results.limit == 1 + assert length(paged_results.posts) == 1 + + assert {:ok, updated_post} = BDS.Posts.update_post(draft_post.id, %{title: "Comet Draft"}) + assert {:ok, empty_results} = BDS.Search.search_posts(project.id, "Galaxy Draft", %{}) + assert empty_results.total == 0 + + assert {:ok, updated_results} = BDS.Search.search_posts(project.id, "Comet Draft", %{}) + assert Enum.map(updated_results.posts, & &1.id) == [updated_post.id] + + assert {:ok, :deleted} = BDS.Posts.delete_post(archived_post.id) + assert {:ok, deleted_results} = BDS.Search.search_posts(project.id, "Galaxy Archived", %{}) + assert deleted_results.total == 0 + end + + test "search_posts includes translation text after reindexing", %{project: project} do + assert {:ok, post} = + BDS.Posts.create_post(%{ + project_id: project.id, + title: "Canonical", + content: "root body", + language: "en" + }) + + now = System.system_time(:second) + + Repo.query!( + """ + INSERT INTO post_translations ( + id, project_id, translation_for, language, title, excerpt, content, status, + created_at, updated_at, published_at, file_path, checksum + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + [ + Ecto.UUID.generate(), + project.id, + post.id, + "fr", + "Bonjour galaxie", + "Resume", + "contenu lunaire", + "draft", + now, + now, + nil, + "", + nil + ] + ) + + assert :ok = BDS.Search.reindex_project(project.id) + + assert {:ok, results} = BDS.Search.search_posts(project.id, "lunaire", %{}) + assert Enum.map(results.posts, & &1.id) == [post.id] + + assert {:ok, missing_translation_results} = + BDS.Search.search_posts(project.id, "Canonical", %{missing_translation_language: "de"}) + + assert Enum.map(missing_translation_results.posts, & &1.id) == [post.id] + end + + test "search_media indexes metadata, includes translation text, and removes deleted media", %{project: project, temp_dir: temp_dir} do + source_path = Path.join(temp_dir, "hero.txt") + File.write!(source_path, "hero") + + assert {:ok, media} = + BDS.Media.import_media(%{ + project_id: project.id, + source_path: source_path, + title: "Aurora asset", + alt: "Orbit illustration", + caption: "Captioned item", + tags: ["space"] + }) + + assert {:ok, _translation} = + BDS.Media.upsert_media_translation(media.id, "de", %{ + title: "Weltraum Titel", + alt: "Orbit auf Deutsch", + caption: "Beschriftung" + }) + + assert {:ok, results} = BDS.Search.search_media(project.id, "Orbit", %{}) + assert Enum.map(results.media, & &1.id) == [media.id] + + assert {:ok, translated_results} = BDS.Search.search_media(project.id, "Weltraum", %{}) + assert Enum.map(translated_results.media, & &1.id) == [media.id] + + assert {:ok, updated_media} = BDS.Media.update_media(media.id, %{title: "Renamed asset"}) + assert {:ok, old_results} = BDS.Search.search_media(project.id, "Aurora", %{}) + assert old_results.total == 0 + + assert {:ok, new_results} = BDS.Search.search_media(project.id, "Renamed", %{}) + assert Enum.map(new_results.media, & &1.id) == [updated_media.id] + + assert {:ok, :deleted} = BDS.Media.delete_media(media.id) + assert {:ok, deleted_results} = BDS.Search.search_media(project.id, "Renamed", %{}) + assert deleted_results.total == 0 + end + + test "rebuild operations repopulate the search index from filesystem truth", %{project: project, temp_dir: temp_dir} do + posts_dir = Path.join([temp_dir, "posts", "2026", "04"]) + File.mkdir_p!(posts_dir) + + File.write!( + Path.join(posts_dir, "filesystem-post.md"), + [ + "---", + "id: search-post-from-file", + "title: File Search Post", + "slug: filesystem-post", + "status: published", + "language: en", + "created_at: 1711843200", + "updated_at: 1711929600", + "published_at: 1712016000", + "tags:", + " - filesystem", + "categories:", + " - imports", + "---", + "starlight filesystem body", + "" + ] + |> Enum.join("\n") + ) + + media_dir = Path.join([temp_dir, "media", "2026", "04"]) + File.mkdir_p!(media_dir) + File.write!(Path.join(media_dir, "filesystem.txt"), "media body") + + File.write!( + Path.join(media_dir, "filesystem.txt.meta"), + [ + "id: search-media-from-file", + "original_name: filesystem.txt", + "mime_type: text/plain", + "size: 10", + "title: File Search Media", + "alt: imported alt", + "caption: imported caption", + "language: en", + "created_at: 1711843200", + "updated_at: 1711929600", + "tags:", + " - filesystem", + "" + ] + |> Enum.join("\n") + ) + + assert {:ok, _posts} = BDS.Posts.rebuild_posts_from_files(project.id) + assert {:ok, _media} = BDS.Media.rebuild_media_from_files(project.id) + + assert {:ok, post_results} = BDS.Search.search_posts(project.id, "starlight", %{}) + assert Enum.map(post_results.posts, & &1.id) == ["search-post-from-file"] + + assert {:ok, media_results} = BDS.Search.search_media(project.id, "imported", %{}) + assert Enum.map(media_results.media, & &1.id) == ["search-media-from-file"] + end +end