From 4cf0f5281b0bbe8557a6b2a3c770103552df5e98 Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Sat, 2 May 2026 10:33:19 +0200 Subject: [PATCH] feat: fill missing translations implemented --- lib/bds/bounded_atoms.ex | 1 + lib/bds/desktop/shell_commands.ex | 40 +++ lib/bds/desktop/shell_live.ex | 2 +- lib/bds/posts.ex | 13 + lib/bds/posts/auto_translation.ex | 304 +++++++++++++++++++++-- test/bds/bounded_atoms_test.exs | 1 + test/bds/desktop/shell_commands_test.exs | 196 +++++++++++++++ test/bds/desktop_test.exs | 4 +- 8 files changed, 533 insertions(+), 28 deletions(-) diff --git a/lib/bds/bounded_atoms.ex b/lib/bds/bounded_atoms.ex index 0728e1a..9cd26d1 100644 --- a/lib/bds/bounded_atoms.ex +++ b/lib/bds/bounded_atoms.ex @@ -48,6 +48,7 @@ defmodule BDS.BoundedAtoms do :metadata_diff, :regenerate_calendar, :validate_translations, + :fill_missing_translations, :find_duplicates, :generate_sitemap, :validate_site, diff --git a/lib/bds/desktop/shell_commands.ex b/lib/bds/desktop/shell_commands.ex index 142423f..b0c8b67 100644 --- a/lib/bds/desktop/shell_commands.ex +++ b/lib/bds/desktop/shell_commands.ex @@ -365,6 +365,33 @@ defmodule BDS.Desktop.ShellCommands do ) end + defp dispatch("fill_missing_translations", project, _params) do + with {:ok, metadata} <- Metadata.get_project_metadata(project.id) do + if translation_fill_enabled?(metadata) do + queue_task( + project, + "fill_missing_translations", + "Fill Missing Translations", + "AI", + fn report -> + {:ok, result} = Posts.fill_missing_translations(project.id, on_progress: report) + Map.put(result, :project_id, project.id) + end + ) + else + {:ok, + %{ + kind: "output", + action: "fill_missing_translations", + title: "Fill Missing Translations", + message: "All translations are up to date", + project_id: project.id, + level: "info" + }} + end + end + end + defp dispatch("find_duplicates", project, _params) do queue_task(project, "find_duplicates", "Find Duplicate Posts", "Embeddings", fn report -> {:ok, pairs} = Embeddings.find_duplicates(project.id, on_progress: report) @@ -421,6 +448,19 @@ defmodule BDS.Desktop.ShellCommands do end end + defp translation_fill_enabled?(metadata) do + ([Map.get(metadata, :main_language)] ++ Map.get(metadata, :blog_languages, [])) + |> Enum.map(fn language -> + language + |> to_string() + |> String.trim() + |> String.downcase() + end) + |> Enum.reject(&(&1 == "")) + |> Enum.uniq() + |> length() > 1 + end + defp rebuild_database_steps(project) do [ %{ diff --git a/lib/bds/desktop/shell_live.ex b/lib/bds/desktop/shell_live.ex index 8e55464..0166020 100644 --- a/lib/bds/desktop/shell_live.ex +++ b/lib/bds/desktop/shell_live.ex @@ -108,7 +108,7 @@ defmodule BDS.Desktop.ShellLive do |> MapSet.union(MapSet.new([:open_in_browser, :open_data_folder])) |> MapSet.union(MapSet.new([:preview_post, :rebuild_database, :reindex_text])) |> MapSet.union(MapSet.new([:rebuild_embedding_index, :metadata_diff, :regenerate_calendar])) - |> MapSet.union(MapSet.new([:validate_translations, :find_duplicates])) + |> MapSet.union(MapSet.new([:validate_translations, :fill_missing_translations, :find_duplicates])) |> MapSet.union(MapSet.new([:generate_sitemap, :validate_site, :upload_site])) end diff --git a/lib/bds/posts.ex b/lib/bds/posts.ex index e62c4cd..fad9121 100644 --- a/lib/bds/posts.ex +++ b/lib/bds/posts.ex @@ -471,6 +471,19 @@ defmodule BDS.Posts do }} defdelegate fix_invalid_translations(report), to: TranslationValidation, as: :fix_invalid + @spec fill_missing_translations(String.t(), rebuild_opts()) :: + {:ok, + %{ + translated_posts: non_neg_integer(), + translated_media: non_neg_integer(), + failed_count: non_neg_integer(), + warned_count: non_neg_integer(), + nothing_to_do: boolean() + }} + defdelegate fill_missing_translations(project_id, opts \\ []), + to: AutoTranslation, + as: :fill_missing + @spec rewrite_published_post(String.t()) :: :ok def rewrite_published_post(post_id) do post = Repo.get!(Post, post_id) diff --git a/lib/bds/posts/auto_translation.ex b/lib/bds/posts/auto_translation.ex index 2d7be79..fc274cd 100644 --- a/lib/bds/posts/auto_translation.ex +++ b/lib/bds/posts/auto_translation.ex @@ -6,6 +6,7 @@ defmodule BDS.Posts.AutoTranslation do alias BDS.AI alias BDS.Media alias BDS.Metadata + alias BDS.Posts alias BDS.Posts.Post alias BDS.Posts.PostMedia alias BDS.Posts.Translation @@ -33,25 +34,141 @@ defmodule BDS.Posts.AutoTranslation do :ok end + @doc """ + Fill missing translations for published posts and their linked media. + + This mirrors the legacy batch workflow: only published posts are scanned, + posts marked `do_not_translate` are skipped, generated post translations are + auto-published, and linked media translations are created for any remaining + configured languages. + """ + @spec fill_missing(String.t(), keyword()) :: + {:ok, + %{ + translated_posts: non_neg_integer(), + translated_media: non_neg_integer(), + failed_count: non_neg_integer(), + warned_count: non_neg_integer(), + nothing_to_do: boolean() + }} + def fill_missing(project_id, opts \\ []) when is_binary(project_id) and is_list(opts) do + on_progress = Keyword.get(opts, :on_progress) + + with {:ok, metadata} <- Metadata.get_project_metadata(project_id) do + languages = configured_languages(metadata) + + if length(languages) <= 1 do + report_progress(on_progress, 1.0, "All translations are up to date") + + {:ok, + %{ + translated_posts: 0, + translated_media: 0, + failed_count: 0, + warned_count: 0, + nothing_to_do: true + }} + else + report_progress(on_progress, 0.0, "Scanning published posts") + + published_posts = + Repo.all( + from post in Post, + where: post.project_id == ^project_id and post.status == :published, + order_by: [asc: post.created_at, asc: post.slug] + ) + + post_languages = existing_post_languages(project_id) + + post_items = + published_posts + |> Enum.reject(& &1.do_not_translate) + |> Enum.flat_map(fn post -> + post + |> missing_languages(metadata, Map.get(post_languages, post.id, MapSet.new())) + |> Enum.map(&%{post: post, language: &1}) + end) + + report_progress(on_progress, 0.1, "Scanning linked media") + + media_items = collect_missing_media_items(published_posts, metadata, languages) + total_items = length(post_items) + length(media_items) + + if total_items == 0 do + report_progress(on_progress, 1.0, "All translations are up to date") + + {:ok, + %{ + translated_posts: 0, + translated_media: 0, + failed_count: 0, + warned_count: 0, + nothing_to_do: true + }} + else + report_progress( + on_progress, + 0.15, + "Found #{length(post_items)} posts and #{length(media_items)} media to translate" + ) + + {summary, completed} = + Enum.reduce(post_items, {empty_fill_summary(), 0}, fn %{post: post, language: language}, + {summary, completed} -> + report_fill_item_progress( + on_progress, + completed, + total_items, + "Translating \"#{post.title}\" to #{language}" + ) + + next_summary = + case translate_post(post, language, auto_publish: true) do + {:ok, _translation} -> Map.update!(summary, :translated_posts, &(&1 + 1)) + {:error, _reason} -> Map.update!(summary, :failed_count, &(&1 + 1)) + end + + {next_summary, completed + 1} + end) + + {summary, _completed} = + Enum.reduce(media_items, {summary, completed}, fn %{media_id: media_id, language: language}, + {summary, completed} -> + report_fill_item_progress( + on_progress, + completed, + total_items, + "Translating media #{String.slice(media_id, 0, 8)} to #{language}" + ) + + next_summary = + case translate_media(media_id, language) do + {:ok, _translation} -> Map.update!(summary, :translated_media, &(&1 + 1)) + {:error, _reason} -> Map.update!(summary, :failed_count, &(&1 + 1)) + end + + {next_summary, completed + 1} + end) + + final_summary = Map.put(summary, :nothing_to_do, false) + report_progress(on_progress, 1.0, completion_message(final_summary)) + {:ok, final_summary} + end + end + end + end + @doc false def missing_languages(%Post{} = post, metadata) do - source_language = normalize_language(post.language || metadata.main_language) - - configured_languages = - ([metadata.main_language] ++ (metadata.blog_languages || [])) - |> Enum.map(&normalize_language/1) - |> Enum.reject(&(&1 in [nil, ""])) - |> Enum.uniq() - existing_languages = Repo.all( from translation in Translation, where: translation.translation_for == ^post.id, select: translation.language ) + |> MapSet.new() - configured_languages - |> Enum.reject(&(&1 == source_language or &1 in existing_languages)) + missing_languages(post, metadata, existing_languages) end defp queue_post(%Post{} = post, language) do @@ -61,14 +178,7 @@ defmodule BDS.Posts.AutoTranslation do fn report -> report.(0.05, "Translating post to #{language}") - with {:ok, translation} <- AI.translate_post(post.id, language, ai_opts()), - {:ok, saved_translation} <- - BDS.Posts.upsert_post_translation(post.id, language, %{ - title: translation.title, - excerpt: translation.excerpt, - content: translation.content, - auto_generated: true - }) do + with {:ok, saved_translation} <- translate_post(post, language) do report.(0.85, "Post translation saved") :ok = queue_media_cascade(post, language) report.(1.0, "Post translation complete") @@ -101,13 +211,7 @@ defmodule BDS.Posts.AutoTranslation do fn report -> report.(0.05, "Translating media to #{language}") - with {:ok, translation} <- AI.translate_media(media_id, language, ai_opts()), - {:ok, saved_translation} <- - Media.upsert_media_translation(media_id, language, %{ - title: translation.title, - alt: translation.alt, - caption: translation.caption - }) do + with {:ok, saved_translation} <- translate_media(media_id, language) do report.(1.0, "Media translation complete") %{media_id: media_id, translation_id: saved_translation.id, language: language} else @@ -141,6 +245,156 @@ defmodule BDS.Posts.AutoTranslation do |> Keyword.get(:auto_translation_ai_opts, []) end + defp configured_languages(metadata) do + ([Map.get(metadata, :main_language)] ++ Map.get(metadata, :blog_languages, [])) + |> Enum.map(&normalize_language/1) + |> Enum.reject(&(&1 in [nil, ""])) + |> Enum.uniq() + end + + defp existing_post_languages(project_id) do + Repo.all( + from translation in Translation, + where: translation.project_id == ^project_id, + select: {translation.translation_for, translation.language} + ) + |> Enum.reduce(%{}, fn {post_id, language}, acc -> + Map.update(acc, post_id, MapSet.new([language]), &MapSet.put(&1, language)) + end) + end + + defp collect_missing_media_items(published_posts, metadata, languages) do + linked_media_ids = + published_posts + |> Enum.reject(& &1.do_not_translate) + |> Enum.flat_map(&linked_media_ids(&1.id)) + |> Enum.uniq() + + media_by_id = + Repo.all(from media in Media.Media, where: media.id in ^linked_media_ids) + |> Map.new(&{&1.id, &1}) + + media_languages = existing_media_languages(linked_media_ids) + + Enum.flat_map(linked_media_ids, fn media_id -> + case Map.get(media_by_id, media_id) do + nil -> + [] + + media -> + source_language = normalize_language(media.language || metadata.main_language) + existing_languages = Map.get(media_languages, media_id, MapSet.new()) + + languages + |> Enum.reject(&(&1 == source_language or MapSet.member?(existing_languages, &1))) + |> Enum.map(&%{media_id: media_id, language: &1}) + end + end) + end + + defp existing_media_languages(media_ids) do + Repo.all( + from translation in Media.Translation, + where: translation.translation_for in ^media_ids, + select: {translation.translation_for, translation.language} + ) + |> Enum.reduce(%{}, fn {media_id, language}, acc -> + Map.update(acc, media_id, MapSet.new([language]), &MapSet.put(&1, language)) + end) + end + + defp empty_fill_summary do + %{ + translated_posts: 0, + translated_media: 0, + failed_count: 0, + warned_count: 0, + nothing_to_do: false + } + end + + defp completion_message(summary) do + extras = + [] + |> maybe_add_completion_detail(summary.failed_count, "failed") + |> maybe_add_completion_detail(summary.warned_count, "warnings") + + if extras == [] do + "Done" + else + "Done (#{Enum.join(extras, ", ")})" + end + end + + defp maybe_add_completion_detail(details, 0, _label), do: details + + defp maybe_add_completion_detail(details, count, label) do + details ++ ["#{count} #{label}"] + end + + defp report_fill_item_progress(on_progress, completed, total_items, message) do + progress = 0.15 + completed / total_items * 0.85 + report_progress(on_progress, progress, message) + end + + defp report_progress(on_progress, value, message) when is_function(on_progress, 2) do + on_progress.(value, message) + end + + defp report_progress(_on_progress, _value, _message), do: :ok + + defp missing_languages(%Post{} = post, metadata, existing_languages) do + source_language = normalize_language(post.language || metadata.main_language) + + configured_languages(metadata) + |> Enum.reject(&(&1 == source_language or MapSet.member?(existing_languages, &1))) + end + + defp translate_post(%Post{} = post, language, opts \\ []) do + auto_publish? = Keyword.get(opts, :auto_publish, false) + content = Posts.editor_body(post) + + if String.trim(content) == "" do + {:error, :no_content_to_translate} + else + with {:ok, translation} <- + AI.translate_post( + %{title: post.title || "", excerpt: post.excerpt || "", content: content}, + language, + ai_opts() + ), + {:ok, saved_translation} <- + Posts.upsert_post_translation(post.id, language, %{ + title: translation.title, + excerpt: translation.excerpt, + content: translation.content, + auto_generated: true + }), + {:ok, published_translation} <- + maybe_publish_post_translation(post.id, language, saved_translation, auto_publish?) do + {:ok, published_translation} + end + end + end + + defp maybe_publish_post_translation(_post_id, _language, saved_translation, false), + do: {:ok, saved_translation} + + defp maybe_publish_post_translation(post_id, language, _saved_translation, true), + do: Posts.publish_post_translation(post_id, language) + + defp translate_media(media_id, language) do + with {:ok, translation} <- AI.translate_media(media_id, language, ai_opts()), + {:ok, saved_translation} <- + Media.upsert_media_translation(media_id, language, %{ + title: translation.title, + alt: translation.alt, + caption: translation.caption + }) do + {:ok, saved_translation} + end + end + defp configured? do mode = if AI.airplane_mode?(), do: :airplane, else: :online diff --git a/test/bds/bounded_atoms_test.exs b/test/bds/bounded_atoms_test.exs index f4992b0..14553c5 100644 --- a/test/bds/bounded_atoms_test.exs +++ b/test/bds/bounded_atoms_test.exs @@ -27,6 +27,7 @@ defmodule BDS.BoundedAtomsTest do {"rebuild_embedding_index", :rebuild_embedding_index}, {"metadata_diff", :metadata_diff}, {"validate_translations", :validate_translations}, + {"fill_missing_translations", :fill_missing_translations}, {"find_duplicates", :find_duplicates}, {"generate_sitemap", :generate_sitemap}, {"validate_site", :validate_site}, diff --git a/test/bds/desktop/shell_commands_test.exs b/test/bds/desktop/shell_commands_test.exs index cee8c83..b728c56 100644 --- a/test/bds/desktop/shell_commands_test.exs +++ b/test/bds/desktop/shell_commands_test.exs @@ -1,7 +1,53 @@ defmodule BDS.Desktop.ShellCommandsTest do use ExUnit.Case, async: false + alias BDS.AI alias BDS.Desktop.ShellCommands + alias BDS.Media + alias BDS.Metadata + alias BDS.Posts + alias BDS.Repo + + defmodule FakeRuntime do + def generate(_endpoint, request, opts) do + test_pid = Keyword.fetch!(opts, :test_pid) + send(test_pid, {:runtime_request, request.operation}) + + case request.operation do + :translate_post -> + {:ok, + %{ + json: %{ + "title" => "Hallo Welt", + "excerpt" => "Kurze Zusammenfassung", + "content" => "# Hallo Welt\n\nUbersetzter Inhalt" + }, + usage: %{ + input_tokens: 22, + output_tokens: 14, + cache_read_tokens: 0, + cache_write_tokens: 0 + } + }} + + :translate_media -> + {:ok, + %{ + json: %{ + "title" => "Medientitel", + "alt" => "Medien Alt", + "caption" => "Medien Beschriftung" + }, + usage: %{ + input_tokens: 12, + output_tokens: 10, + cache_read_tokens: 0, + cache_write_tokens: 0 + } + }} + end + end + end defmodule SlowEmbeddingBackend do @behaviour BDS.Embeddings.Backend @@ -132,6 +178,137 @@ defmodule BDS.Desktop.ShellCommandsTest do ] = completed.result.payload.invalid_filesystem_files end + test "fill_missing_translations queues a tracked AI task and publishes missing post and media translations", + %{project: project, temp_dir: temp_dir} do + assert {:ok, post} = + Posts.create_post(%{ + project_id: project.id, + title: "Hello", + excerpt: "English summary", + content: "World body", + language: "en" + }) + + media_source = Path.join(temp_dir, "source-image.txt") + File.write!(media_source, "image bytes") + + assert {:ok, media} = + Media.import_media(%{ + project_id: project.id, + source_path: media_source, + title: "Image title", + alt: "Image alt", + caption: "Image caption", + language: "en" + }) + + assert {:ok, _link} = Media.link_media_to_post(media.id, post.id) + assert {:ok, _published_post} = Posts.publish_post(post.id) + + configure_auto_translation_test_runtime() + + assert {:ok, _metadata} = + Metadata.update_project_metadata(project.id, %{ + main_language: "en", + blog_languages: ["en", "de"] + }) + + assert {:ok, result} = ShellCommands.execute("fill_missing_translations") + + assert result.kind == "task_queued" + assert result.action == "fill_missing_translations" + assert is_binary(result.task_id) + + completed = wait_for_task(result.task_id, &(&1.status == :completed and is_map(&1.result)), 5_000) + + assert completed.group_name == "AI" + assert completed.result.project_id == project.id + assert completed.result.translated_posts == 1 + assert completed.result.translated_media == 1 + assert completed.result.failed_count == 0 + + translation = Repo.get_by!(BDS.Posts.Translation, translation_for: post.id, language: "de") + assert translation.status == :published + assert translation.content == nil + assert is_binary(translation.file_path) + assert File.exists?(Path.join(temp_dir, translation.file_path)) + + media_translation = + Repo.get_by!(BDS.Media.Translation, translation_for: media.id, language: "de") + + assert media_translation.title == "Medientitel" + assert media_translation.alt == "Medien Alt" + assert media_translation.caption == "Medien Beschriftung" + assert File.exists?(Path.join(temp_dir, media.file_path <> ".de.meta")) + + assert_received {:runtime_request, :translate_post} + assert_received {:runtime_request, :translate_media} + end + + test "fill_missing_translations returns a no-op output when only one language is configured", + %{project: project} do + assert {:ok, _metadata} = + Metadata.update_project_metadata(project.id, %{ + main_language: "en", + blog_languages: ["en"] + }) + + assert {:ok, result} = ShellCommands.execute("fill_missing_translations") + + assert result.kind == "output" + assert result.action == "fill_missing_translations" + assert result.message == "All translations are up to date" + assert BDS.Tasks.list_tasks() == [] + end + + test "fill_missing_translations uses the media canonical language when choosing missing media targets", + %{project: project, temp_dir: temp_dir} do + assert {:ok, post} = + Posts.create_post(%{ + project_id: project.id, + title: "Hallo Welt", + excerpt: "Deutsche Zusammenfassung", + content: "Deutscher Inhalt", + language: "de" + }) + + media_source = Path.join(temp_dir, "english-media.txt") + File.write!(media_source, "image bytes") + + assert {:ok, media} = + Media.import_media(%{ + project_id: project.id, + source_path: media_source, + title: "English image", + alt: "English alt", + caption: "English caption", + language: "en" + }) + + assert {:ok, _link} = Media.link_media_to_post(media.id, post.id) + assert {:ok, _published_post} = Posts.publish_post(post.id) + + configure_auto_translation_test_runtime() + + assert {:ok, _metadata} = + Metadata.update_project_metadata(project.id, %{ + main_language: "de", + blog_languages: ["de", "en"] + }) + + assert {:ok, result} = ShellCommands.execute("fill_missing_translations") + completed = wait_for_task(result.task_id, &(&1.status == :completed and is_map(&1.result)), 5_000) + + assert completed.result.translated_posts == 1 + assert completed.result.translated_media == 1 + assert Repo.get_by(BDS.Media.Translation, translation_for: media.id, language: "en") == nil + + media_translation = + Repo.get_by!(BDS.Media.Translation, translation_for: media.id, language: "de") + + assert media_translation.title == "Medientitel" + end + test "validate_site queues a tracked validation task and returns the report as an editor payload" do assert {:ok, result} = ShellCommands.execute("validate_site") @@ -643,4 +820,23 @@ defmodule BDS.Desktop.ShellCommandsTest do wait_for_named_task(name, matcher, timeout - 20) end end + + defp configure_auto_translation_test_runtime do + assert {:ok, _endpoint} = + AI.put_endpoint(:online, %{ + url: "https://api.example.test/v1", + api_key: "online-secret", + model: "gpt-4o-mini" + }) + + assert :ok = AI.set_airplane_mode(false) + assert :ok = AI.put_model_preference(:title, "gpt-4.1-mini") + + Application.put_env(:bds, :posts, + auto_translation_ai_opts: [ + runtime: FakeRuntime, + test_pid: self() + ] + ) + end end diff --git a/test/bds/desktop_test.exs b/test/bds/desktop_test.exs index cb15aa9..39d6369 100644 --- a/test/bds/desktop_test.exs +++ b/test/bds/desktop_test.exs @@ -131,7 +131,7 @@ defmodule BDS.DesktopTest do assert menu_item(groups, :metadata_diff).shortcut == nil end - test "prod forwarded menu surface is covered by the shell dispatcher except unresolved filler action" do + test "prod forwarded menu surface is covered by the shell dispatcher" do forwarded_actions = BDS.Desktop.MenuBar.groups(dev_mode?: false) |> Enum.flat_map(fn group -> @@ -146,7 +146,7 @@ defmodule BDS.DesktopTest do |> MapSet.difference(BDS.Desktop.ShellLive.supported_menu_actions()) |> Enum.sort() - assert unsupported_actions == [:fill_missing_translations] + assert unsupported_actions == [] end test "native menu quit requests app-owned shutdown" do