feat: fill missing translations implemented

This commit is contained in:
2026-05-02 10:33:19 +02:00
parent 24f114c24e
commit 4cf0f5281b
8 changed files with 533 additions and 28 deletions

View File

@@ -48,6 +48,7 @@ defmodule BDS.BoundedAtoms do
:metadata_diff,
:regenerate_calendar,
:validate_translations,
:fill_missing_translations,
:find_duplicates,
:generate_sitemap,
:validate_site,

View File

@@ -365,6 +365,33 @@ defmodule BDS.Desktop.ShellCommands do
)
end
defp dispatch("fill_missing_translations", project, _params) do
with {:ok, metadata} <- Metadata.get_project_metadata(project.id) do
if translation_fill_enabled?(metadata) do
queue_task(
project,
"fill_missing_translations",
"Fill Missing Translations",
"AI",
fn report ->
{:ok, result} = Posts.fill_missing_translations(project.id, on_progress: report)
Map.put(result, :project_id, project.id)
end
)
else
{:ok,
%{
kind: "output",
action: "fill_missing_translations",
title: "Fill Missing Translations",
message: "All translations are up to date",
project_id: project.id,
level: "info"
}}
end
end
end
defp dispatch("find_duplicates", project, _params) do
queue_task(project, "find_duplicates", "Find Duplicate Posts", "Embeddings", fn report ->
{:ok, pairs} = Embeddings.find_duplicates(project.id, on_progress: report)
@@ -421,6 +448,19 @@ defmodule BDS.Desktop.ShellCommands do
end
end
defp translation_fill_enabled?(metadata) do
([Map.get(metadata, :main_language)] ++ Map.get(metadata, :blog_languages, []))
|> Enum.map(fn language ->
language
|> to_string()
|> String.trim()
|> String.downcase()
end)
|> Enum.reject(&(&1 == ""))
|> Enum.uniq()
|> length() > 1
end
defp rebuild_database_steps(project) do
[
%{

View File

@@ -108,7 +108,7 @@ defmodule BDS.Desktop.ShellLive do
|> MapSet.union(MapSet.new([:open_in_browser, :open_data_folder]))
|> MapSet.union(MapSet.new([:preview_post, :rebuild_database, :reindex_text]))
|> MapSet.union(MapSet.new([:rebuild_embedding_index, :metadata_diff, :regenerate_calendar]))
|> MapSet.union(MapSet.new([:validate_translations, :find_duplicates]))
|> MapSet.union(MapSet.new([:validate_translations, :fill_missing_translations, :find_duplicates]))
|> MapSet.union(MapSet.new([:generate_sitemap, :validate_site, :upload_site]))
end

View File

@@ -471,6 +471,19 @@ defmodule BDS.Posts do
}}
defdelegate fix_invalid_translations(report), to: TranslationValidation, as: :fix_invalid
@spec fill_missing_translations(String.t(), rebuild_opts()) ::
{:ok,
%{
translated_posts: non_neg_integer(),
translated_media: non_neg_integer(),
failed_count: non_neg_integer(),
warned_count: non_neg_integer(),
nothing_to_do: boolean()
}}
defdelegate fill_missing_translations(project_id, opts \\ []),
to: AutoTranslation,
as: :fill_missing
@spec rewrite_published_post(String.t()) :: :ok
def rewrite_published_post(post_id) do
post = Repo.get!(Post, post_id)

View File

@@ -6,6 +6,7 @@ defmodule BDS.Posts.AutoTranslation do
alias BDS.AI
alias BDS.Media
alias BDS.Metadata
alias BDS.Posts
alias BDS.Posts.Post
alias BDS.Posts.PostMedia
alias BDS.Posts.Translation
@@ -33,25 +34,141 @@ defmodule BDS.Posts.AutoTranslation do
:ok
end
@doc """
Fill missing translations for published posts and their linked media.
This mirrors the legacy batch workflow: only published posts are scanned,
posts marked `do_not_translate` are skipped, generated post translations are
auto-published, and linked media translations are created for any remaining
configured languages.
"""
@spec fill_missing(String.t(), keyword()) ::
{:ok,
%{
translated_posts: non_neg_integer(),
translated_media: non_neg_integer(),
failed_count: non_neg_integer(),
warned_count: non_neg_integer(),
nothing_to_do: boolean()
}}
def fill_missing(project_id, opts \\ []) when is_binary(project_id) and is_list(opts) do
on_progress = Keyword.get(opts, :on_progress)
with {:ok, metadata} <- Metadata.get_project_metadata(project_id) do
languages = configured_languages(metadata)
if length(languages) <= 1 do
report_progress(on_progress, 1.0, "All translations are up to date")
{:ok,
%{
translated_posts: 0,
translated_media: 0,
failed_count: 0,
warned_count: 0,
nothing_to_do: true
}}
else
report_progress(on_progress, 0.0, "Scanning published posts")
published_posts =
Repo.all(
from post in Post,
where: post.project_id == ^project_id and post.status == :published,
order_by: [asc: post.created_at, asc: post.slug]
)
post_languages = existing_post_languages(project_id)
post_items =
published_posts
|> Enum.reject(& &1.do_not_translate)
|> Enum.flat_map(fn post ->
post
|> missing_languages(metadata, Map.get(post_languages, post.id, MapSet.new()))
|> Enum.map(&%{post: post, language: &1})
end)
report_progress(on_progress, 0.1, "Scanning linked media")
media_items = collect_missing_media_items(published_posts, metadata, languages)
total_items = length(post_items) + length(media_items)
if total_items == 0 do
report_progress(on_progress, 1.0, "All translations are up to date")
{:ok,
%{
translated_posts: 0,
translated_media: 0,
failed_count: 0,
warned_count: 0,
nothing_to_do: true
}}
else
report_progress(
on_progress,
0.15,
"Found #{length(post_items)} posts and #{length(media_items)} media to translate"
)
{summary, completed} =
Enum.reduce(post_items, {empty_fill_summary(), 0}, fn %{post: post, language: language},
{summary, completed} ->
report_fill_item_progress(
on_progress,
completed,
total_items,
"Translating \"#{post.title}\" to #{language}"
)
next_summary =
case translate_post(post, language, auto_publish: true) do
{:ok, _translation} -> Map.update!(summary, :translated_posts, &(&1 + 1))
{:error, _reason} -> Map.update!(summary, :failed_count, &(&1 + 1))
end
{next_summary, completed + 1}
end)
{summary, _completed} =
Enum.reduce(media_items, {summary, completed}, fn %{media_id: media_id, language: language},
{summary, completed} ->
report_fill_item_progress(
on_progress,
completed,
total_items,
"Translating media #{String.slice(media_id, 0, 8)} to #{language}"
)
next_summary =
case translate_media(media_id, language) do
{:ok, _translation} -> Map.update!(summary, :translated_media, &(&1 + 1))
{:error, _reason} -> Map.update!(summary, :failed_count, &(&1 + 1))
end
{next_summary, completed + 1}
end)
final_summary = Map.put(summary, :nothing_to_do, false)
report_progress(on_progress, 1.0, completion_message(final_summary))
{:ok, final_summary}
end
end
end
end
@doc false
def missing_languages(%Post{} = post, metadata) do
source_language = normalize_language(post.language || metadata.main_language)
configured_languages =
([metadata.main_language] ++ (metadata.blog_languages || []))
|> Enum.map(&normalize_language/1)
|> Enum.reject(&(&1 in [nil, ""]))
|> Enum.uniq()
existing_languages =
Repo.all(
from translation in Translation,
where: translation.translation_for == ^post.id,
select: translation.language
)
|> MapSet.new()
configured_languages
|> Enum.reject(&(&1 == source_language or &1 in existing_languages))
missing_languages(post, metadata, existing_languages)
end
defp queue_post(%Post{} = post, language) do
@@ -61,14 +178,7 @@ defmodule BDS.Posts.AutoTranslation do
fn report ->
report.(0.05, "Translating post to #{language}")
with {:ok, translation} <- AI.translate_post(post.id, language, ai_opts()),
{:ok, saved_translation} <-
BDS.Posts.upsert_post_translation(post.id, language, %{
title: translation.title,
excerpt: translation.excerpt,
content: translation.content,
auto_generated: true
}) do
with {:ok, saved_translation} <- translate_post(post, language) do
report.(0.85, "Post translation saved")
:ok = queue_media_cascade(post, language)
report.(1.0, "Post translation complete")
@@ -101,13 +211,7 @@ defmodule BDS.Posts.AutoTranslation do
fn report ->
report.(0.05, "Translating media to #{language}")
with {:ok, translation} <- AI.translate_media(media_id, language, ai_opts()),
{:ok, saved_translation} <-
Media.upsert_media_translation(media_id, language, %{
title: translation.title,
alt: translation.alt,
caption: translation.caption
}) do
with {:ok, saved_translation} <- translate_media(media_id, language) do
report.(1.0, "Media translation complete")
%{media_id: media_id, translation_id: saved_translation.id, language: language}
else
@@ -141,6 +245,156 @@ defmodule BDS.Posts.AutoTranslation do
|> Keyword.get(:auto_translation_ai_opts, [])
end
defp configured_languages(metadata) do
([Map.get(metadata, :main_language)] ++ Map.get(metadata, :blog_languages, []))
|> Enum.map(&normalize_language/1)
|> Enum.reject(&(&1 in [nil, ""]))
|> Enum.uniq()
end
defp existing_post_languages(project_id) do
Repo.all(
from translation in Translation,
where: translation.project_id == ^project_id,
select: {translation.translation_for, translation.language}
)
|> Enum.reduce(%{}, fn {post_id, language}, acc ->
Map.update(acc, post_id, MapSet.new([language]), &MapSet.put(&1, language))
end)
end
defp collect_missing_media_items(published_posts, metadata, languages) do
linked_media_ids =
published_posts
|> Enum.reject(& &1.do_not_translate)
|> Enum.flat_map(&linked_media_ids(&1.id))
|> Enum.uniq()
media_by_id =
Repo.all(from media in Media.Media, where: media.id in ^linked_media_ids)
|> Map.new(&{&1.id, &1})
media_languages = existing_media_languages(linked_media_ids)
Enum.flat_map(linked_media_ids, fn media_id ->
case Map.get(media_by_id, media_id) do
nil ->
[]
media ->
source_language = normalize_language(media.language || metadata.main_language)
existing_languages = Map.get(media_languages, media_id, MapSet.new())
languages
|> Enum.reject(&(&1 == source_language or MapSet.member?(existing_languages, &1)))
|> Enum.map(&%{media_id: media_id, language: &1})
end
end)
end
defp existing_media_languages(media_ids) do
Repo.all(
from translation in Media.Translation,
where: translation.translation_for in ^media_ids,
select: {translation.translation_for, translation.language}
)
|> Enum.reduce(%{}, fn {media_id, language}, acc ->
Map.update(acc, media_id, MapSet.new([language]), &MapSet.put(&1, language))
end)
end
defp empty_fill_summary do
%{
translated_posts: 0,
translated_media: 0,
failed_count: 0,
warned_count: 0,
nothing_to_do: false
}
end
defp completion_message(summary) do
extras =
[]
|> maybe_add_completion_detail(summary.failed_count, "failed")
|> maybe_add_completion_detail(summary.warned_count, "warnings")
if extras == [] do
"Done"
else
"Done (#{Enum.join(extras, ", ")})"
end
end
defp maybe_add_completion_detail(details, 0, _label), do: details
defp maybe_add_completion_detail(details, count, label) do
details ++ ["#{count} #{label}"]
end
defp report_fill_item_progress(on_progress, completed, total_items, message) do
progress = 0.15 + completed / total_items * 0.85
report_progress(on_progress, progress, message)
end
defp report_progress(on_progress, value, message) when is_function(on_progress, 2) do
on_progress.(value, message)
end
defp report_progress(_on_progress, _value, _message), do: :ok
defp missing_languages(%Post{} = post, metadata, existing_languages) do
source_language = normalize_language(post.language || metadata.main_language)
configured_languages(metadata)
|> Enum.reject(&(&1 == source_language or MapSet.member?(existing_languages, &1)))
end
defp translate_post(%Post{} = post, language, opts \\ []) do
auto_publish? = Keyword.get(opts, :auto_publish, false)
content = Posts.editor_body(post)
if String.trim(content) == "" do
{:error, :no_content_to_translate}
else
with {:ok, translation} <-
AI.translate_post(
%{title: post.title || "", excerpt: post.excerpt || "", content: content},
language,
ai_opts()
),
{:ok, saved_translation} <-
Posts.upsert_post_translation(post.id, language, %{
title: translation.title,
excerpt: translation.excerpt,
content: translation.content,
auto_generated: true
}),
{:ok, published_translation} <-
maybe_publish_post_translation(post.id, language, saved_translation, auto_publish?) do
{:ok, published_translation}
end
end
end
defp maybe_publish_post_translation(_post_id, _language, saved_translation, false),
do: {:ok, saved_translation}
defp maybe_publish_post_translation(post_id, language, _saved_translation, true),
do: Posts.publish_post_translation(post_id, language)
defp translate_media(media_id, language) do
with {:ok, translation} <- AI.translate_media(media_id, language, ai_opts()),
{:ok, saved_translation} <-
Media.upsert_media_translation(media_id, language, %{
title: translation.title,
alt: translation.alt,
caption: translation.caption
}) do
{:ok, saved_translation}
end
end
defp configured? do
mode = if AI.airplane_mode?(), do: :airplane, else: :online

View File

@@ -27,6 +27,7 @@ defmodule BDS.BoundedAtomsTest do
{"rebuild_embedding_index", :rebuild_embedding_index},
{"metadata_diff", :metadata_diff},
{"validate_translations", :validate_translations},
{"fill_missing_translations", :fill_missing_translations},
{"find_duplicates", :find_duplicates},
{"generate_sitemap", :generate_sitemap},
{"validate_site", :validate_site},

View File

@@ -1,7 +1,53 @@
defmodule BDS.Desktop.ShellCommandsTest do
use ExUnit.Case, async: false
alias BDS.AI
alias BDS.Desktop.ShellCommands
alias BDS.Media
alias BDS.Metadata
alias BDS.Posts
alias BDS.Repo
defmodule FakeRuntime do
def generate(_endpoint, request, opts) do
test_pid = Keyword.fetch!(opts, :test_pid)
send(test_pid, {:runtime_request, request.operation})
case request.operation do
:translate_post ->
{:ok,
%{
json: %{
"title" => "Hallo Welt",
"excerpt" => "Kurze Zusammenfassung",
"content" => "# Hallo Welt\n\nUbersetzter Inhalt"
},
usage: %{
input_tokens: 22,
output_tokens: 14,
cache_read_tokens: 0,
cache_write_tokens: 0
}
}}
:translate_media ->
{:ok,
%{
json: %{
"title" => "Medientitel",
"alt" => "Medien Alt",
"caption" => "Medien Beschriftung"
},
usage: %{
input_tokens: 12,
output_tokens: 10,
cache_read_tokens: 0,
cache_write_tokens: 0
}
}}
end
end
end
defmodule SlowEmbeddingBackend do
@behaviour BDS.Embeddings.Backend
@@ -132,6 +178,137 @@ defmodule BDS.Desktop.ShellCommandsTest do
] = completed.result.payload.invalid_filesystem_files
end
test "fill_missing_translations queues a tracked AI task and publishes missing post and media translations",
%{project: project, temp_dir: temp_dir} do
assert {:ok, post} =
Posts.create_post(%{
project_id: project.id,
title: "Hello",
excerpt: "English summary",
content: "World body",
language: "en"
})
media_source = Path.join(temp_dir, "source-image.txt")
File.write!(media_source, "image bytes")
assert {:ok, media} =
Media.import_media(%{
project_id: project.id,
source_path: media_source,
title: "Image title",
alt: "Image alt",
caption: "Image caption",
language: "en"
})
assert {:ok, _link} = Media.link_media_to_post(media.id, post.id)
assert {:ok, _published_post} = Posts.publish_post(post.id)
configure_auto_translation_test_runtime()
assert {:ok, _metadata} =
Metadata.update_project_metadata(project.id, %{
main_language: "en",
blog_languages: ["en", "de"]
})
assert {:ok, result} = ShellCommands.execute("fill_missing_translations")
assert result.kind == "task_queued"
assert result.action == "fill_missing_translations"
assert is_binary(result.task_id)
completed = wait_for_task(result.task_id, &(&1.status == :completed and is_map(&1.result)), 5_000)
assert completed.group_name == "AI"
assert completed.result.project_id == project.id
assert completed.result.translated_posts == 1
assert completed.result.translated_media == 1
assert completed.result.failed_count == 0
translation = Repo.get_by!(BDS.Posts.Translation, translation_for: post.id, language: "de")
assert translation.status == :published
assert translation.content == nil
assert is_binary(translation.file_path)
assert File.exists?(Path.join(temp_dir, translation.file_path))
media_translation =
Repo.get_by!(BDS.Media.Translation, translation_for: media.id, language: "de")
assert media_translation.title == "Medientitel"
assert media_translation.alt == "Medien Alt"
assert media_translation.caption == "Medien Beschriftung"
assert File.exists?(Path.join(temp_dir, media.file_path <> ".de.meta"))
assert_received {:runtime_request, :translate_post}
assert_received {:runtime_request, :translate_media}
end
test "fill_missing_translations returns a no-op output when only one language is configured",
%{project: project} do
assert {:ok, _metadata} =
Metadata.update_project_metadata(project.id, %{
main_language: "en",
blog_languages: ["en"]
})
assert {:ok, result} = ShellCommands.execute("fill_missing_translations")
assert result.kind == "output"
assert result.action == "fill_missing_translations"
assert result.message == "All translations are up to date"
assert BDS.Tasks.list_tasks() == []
end
test "fill_missing_translations uses the media canonical language when choosing missing media targets",
%{project: project, temp_dir: temp_dir} do
assert {:ok, post} =
Posts.create_post(%{
project_id: project.id,
title: "Hallo Welt",
excerpt: "Deutsche Zusammenfassung",
content: "Deutscher Inhalt",
language: "de"
})
media_source = Path.join(temp_dir, "english-media.txt")
File.write!(media_source, "image bytes")
assert {:ok, media} =
Media.import_media(%{
project_id: project.id,
source_path: media_source,
title: "English image",
alt: "English alt",
caption: "English caption",
language: "en"
})
assert {:ok, _link} = Media.link_media_to_post(media.id, post.id)
assert {:ok, _published_post} = Posts.publish_post(post.id)
configure_auto_translation_test_runtime()
assert {:ok, _metadata} =
Metadata.update_project_metadata(project.id, %{
main_language: "de",
blog_languages: ["de", "en"]
})
assert {:ok, result} = ShellCommands.execute("fill_missing_translations")
completed = wait_for_task(result.task_id, &(&1.status == :completed and is_map(&1.result)), 5_000)
assert completed.result.translated_posts == 1
assert completed.result.translated_media == 1
assert Repo.get_by(BDS.Media.Translation, translation_for: media.id, language: "en") == nil
media_translation =
Repo.get_by!(BDS.Media.Translation, translation_for: media.id, language: "de")
assert media_translation.title == "Medientitel"
end
test "validate_site queues a tracked validation task and returns the report as an editor payload" do
assert {:ok, result} = ShellCommands.execute("validate_site")
@@ -643,4 +820,23 @@ defmodule BDS.Desktop.ShellCommandsTest do
wait_for_named_task(name, matcher, timeout - 20)
end
end
defp configure_auto_translation_test_runtime do
assert {:ok, _endpoint} =
AI.put_endpoint(:online, %{
url: "https://api.example.test/v1",
api_key: "online-secret",
model: "gpt-4o-mini"
})
assert :ok = AI.set_airplane_mode(false)
assert :ok = AI.put_model_preference(:title, "gpt-4.1-mini")
Application.put_env(:bds, :posts,
auto_translation_ai_opts: [
runtime: FakeRuntime,
test_pid: self()
]
)
end
end

View File

@@ -131,7 +131,7 @@ defmodule BDS.DesktopTest do
assert menu_item(groups, :metadata_diff).shortcut == nil
end
test "prod forwarded menu surface is covered by the shell dispatcher except unresolved filler action" do
test "prod forwarded menu surface is covered by the shell dispatcher" do
forwarded_actions =
BDS.Desktop.MenuBar.groups(dev_mode?: false)
|> Enum.flat_map(fn group ->
@@ -146,7 +146,7 @@ defmodule BDS.DesktopTest do
|> MapSet.difference(BDS.Desktop.ShellLive.supported_menu_actions())
|> Enum.sort()
assert unsupported_actions == [:fill_missing_translations]
assert unsupported_actions == []
end
test "native menu quit requests app-owned shutdown" do