Files
bDS2/lib/bds/posts/auto_translation.ex
2026-05-04 06:18:06 +02:00

450 lines
14 KiB
Elixir

defmodule BDS.Posts.AutoTranslation do
@moduledoc false
import Ecto.Query
alias BDS.AI
alias BDS.Media
alias BDS.Metadata
alias BDS.Posts
alias BDS.Posts.Post
alias BDS.Posts.PostMedia
alias BDS.Posts.Translation
alias BDS.Repo
alias BDS.Tasks
@doc """
Schedule background auto-translation tasks for any missing target languages.
Returns `:ok` even when nothing is scheduled (offline mode, no metadata, etc.).
"""
@spec maybe_schedule(Post.t()) :: :ok
def maybe_schedule(%Post{do_not_translate: true}), do: :ok
def maybe_schedule(%Post{} = post) do
with true <- configured?(),
{:ok, metadata} <- Metadata.get_project_metadata(post.project_id) do
post
|> missing_languages(metadata)
|> Enum.each(&queue_post(post, &1))
else
_other -> :ok
end
:ok
end
@doc """
Fill missing translations for published posts and their linked media.
This mirrors the legacy batch workflow: only published posts are scanned,
posts marked `do_not_translate` are skipped, generated post translations are
auto-published, and linked media translations are created for any remaining
configured languages.
"""
@spec fill_missing(String.t(), keyword()) ::
{:ok,
%{
translated_posts: non_neg_integer(),
translated_media: non_neg_integer(),
failed_count: non_neg_integer(),
warned_count: non_neg_integer(),
nothing_to_do: boolean()
}}
def fill_missing(project_id, opts \\ []) when is_binary(project_id) and is_list(opts) do
on_progress = Keyword.get(opts, :on_progress)
with {:ok, metadata} <- Metadata.get_project_metadata(project_id) do
languages = configured_languages(metadata)
if length(languages) <= 1 do
report_progress(on_progress, 1.0, "All translations are up to date")
{:ok,
%{
translated_posts: 0,
translated_media: 0,
failed_count: 0,
warned_count: 0,
nothing_to_do: true
}}
else
report_progress(on_progress, 0.0, "Scanning published posts")
published_posts =
Repo.all(
from post in Post,
where: post.project_id == ^project_id and post.status == :published,
order_by: [asc: post.created_at, asc: post.slug]
)
post_languages = existing_post_languages(project_id)
post_items =
published_posts
|> Enum.reject(& &1.do_not_translate)
|> Enum.flat_map(fn post ->
post
|> missing_languages(metadata, Map.get(post_languages, post.id, MapSet.new()))
|> Enum.map(&%{post: post, language: &1})
end)
report_progress(on_progress, 0.1, "Scanning linked media")
media_items = collect_missing_media_items(published_posts, metadata, languages)
total_items = length(post_items) + length(media_items)
if total_items == 0 do
report_progress(on_progress, 1.0, "All translations are up to date")
{:ok,
%{
translated_posts: 0,
translated_media: 0,
failed_count: 0,
warned_count: 0,
nothing_to_do: true
}}
else
report_progress(
on_progress,
0.15,
"Found #{length(post_items)} posts and #{length(media_items)} media to translate"
)
{summary, completed} =
Enum.reduce(post_items, {empty_fill_summary(), 0}, fn %{
post: post,
language: language
},
{summary, completed} ->
report_fill_item_progress(
on_progress,
completed,
total_items,
"Translating \"#{post.title}\" to #{language}"
)
next_summary =
case translate_post(post, language, auto_publish: true) do
{:ok, _translation} -> Map.update!(summary, :translated_posts, &(&1 + 1))
{:error, _reason} -> Map.update!(summary, :failed_count, &(&1 + 1))
end
{next_summary, completed + 1}
end)
{summary, _completed} =
Enum.reduce(media_items, {summary, completed}, fn %{
media_id: media_id,
language: language
},
{summary, completed} ->
report_fill_item_progress(
on_progress,
completed,
total_items,
"Translating media #{String.slice(media_id, 0, 8)} to #{language}"
)
next_summary =
case translate_media(media_id, language) do
{:ok, _translation} -> Map.update!(summary, :translated_media, &(&1 + 1))
{:error, _reason} -> Map.update!(summary, :failed_count, &(&1 + 1))
end
{next_summary, completed + 1}
end)
final_summary = Map.put(summary, :nothing_to_do, false)
report_progress(on_progress, 1.0, completion_message(final_summary))
{:ok, final_summary}
end
end
end
end
@doc false
def missing_languages(%Post{} = post, metadata) do
existing_languages =
Repo.all(
from translation in Translation,
where: translation.translation_for == ^post.id,
select: translation.language
)
|> MapSet.new()
missing_languages(post, metadata, existing_languages)
end
defp queue_post(%Post{} = post, language) do
_ =
Tasks.submit_task(
"Auto-translate Post to #{language}",
fn report ->
report.(0.05, "Translating post to #{language}")
with {:ok, saved_translation} <- translate_post(post, language) do
report.(0.85, "Post translation saved")
:ok = queue_media_cascade(post, language)
report.(1.0, "Post translation complete")
%{post_id: post.id, translation_id: saved_translation.id, language: language}
else
{:error, reason} -> {:error, reason}
end
end,
task_attrs(post)
)
:ok
end
defp queue_media_cascade(%Post{} = post, language) do
linked_media_ids(post.id)
|> Enum.each(fn media_id ->
if media_needed?(media_id, language) do
queue_media(post, media_id, language)
end
end)
:ok
end
defp queue_media(%Post{} = post, media_id, language) do
_ =
Tasks.submit_task(
"Auto-translate Media to #{language}",
fn report ->
report.(0.05, "Translating media to #{language}")
with {:ok, saved_translation} <- translate_media(media_id, language) do
report.(1.0, "Media translation complete")
%{media_id: media_id, translation_id: saved_translation.id, language: language}
else
{:error, reason} -> {:error, reason}
end
end,
task_attrs(post)
)
:ok
end
defp media_needed?(media_id, language) do
case Repo.get(Media.Media, media_id) do
%Media.Media{language: source_language}
when source_language not in [nil, ""] and source_language != language ->
not Repo.exists?(
from translation in Media.Translation,
where: translation.translation_for == ^media_id and translation.language == ^language
)
_other ->
false
end
end
defp task_attrs(%Post{} = post), do: %{group_id: post.project_id, group_name: "AI"}
defp ai_opts do
Application.get_env(:bds, :posts, [])
|> Keyword.get(:auto_translation_ai_opts, [])
end
defp configured_languages(metadata) do
([Map.get(metadata, :main_language)] ++ Map.get(metadata, :blog_languages, []))
|> Enum.map(&normalize_language/1)
|> Enum.reject(&(&1 in [nil, ""]))
|> Enum.uniq()
end
defp existing_post_languages(project_id) do
Repo.all(
from translation in Translation,
where: translation.project_id == ^project_id,
select: {translation.translation_for, translation.language}
)
|> Enum.reduce(%{}, fn {post_id, language}, acc ->
Map.update(acc, post_id, MapSet.new([language]), &MapSet.put(&1, language))
end)
end
defp collect_missing_media_items(published_posts, metadata, languages) do
linked_media_ids =
published_posts
|> Enum.reject(& &1.do_not_translate)
|> Enum.flat_map(&linked_media_ids(&1.id))
|> Enum.uniq()
media_by_id =
Repo.all(from media in Media.Media, where: media.id in ^linked_media_ids)
|> Map.new(&{&1.id, &1})
media_languages = existing_media_languages(linked_media_ids)
Enum.flat_map(linked_media_ids, fn media_id ->
case Map.get(media_by_id, media_id) do
nil ->
[]
media ->
source_language = normalize_language(media.language || metadata.main_language)
existing_languages = Map.get(media_languages, media_id, MapSet.new())
languages
|> Enum.reject(&(&1 == source_language or MapSet.member?(existing_languages, &1)))
|> Enum.map(&%{media_id: media_id, language: &1})
end
end)
end
defp existing_media_languages(media_ids) do
Repo.all(
from translation in Media.Translation,
where: translation.translation_for in ^media_ids,
select: {translation.translation_for, translation.language}
)
|> Enum.reduce(%{}, fn {media_id, language}, acc ->
Map.update(acc, media_id, MapSet.new([language]), &MapSet.put(&1, language))
end)
end
defp empty_fill_summary do
%{
translated_posts: 0,
translated_media: 0,
failed_count: 0,
warned_count: 0,
nothing_to_do: false
}
end
defp completion_message(summary) do
extras =
[]
|> maybe_add_completion_detail(summary.failed_count, "failed")
|> maybe_add_completion_detail(summary.warned_count, "warnings")
if extras == [] do
"Done"
else
"Done (#{Enum.join(extras, ", ")})"
end
end
defp maybe_add_completion_detail(details, 0, _label), do: details
defp maybe_add_completion_detail(details, count, label) do
details ++ ["#{count} #{label}"]
end
defp report_fill_item_progress(on_progress, completed, total_items, message) do
progress = 0.15 + completed / total_items * 0.85
report_progress(on_progress, progress, message)
end
defp report_progress(on_progress, value, message) when is_function(on_progress, 2) do
on_progress.(value, message)
end
defp report_progress(_on_progress, _value, _message), do: :ok
defp missing_languages(%Post{} = post, metadata, existing_languages) do
source_language = normalize_language(post.language || metadata.main_language)
configured_languages(metadata)
|> Enum.reject(&(&1 == source_language or MapSet.member?(existing_languages, &1)))
end
defp translate_post(%Post{} = post, language, opts \\ []) do
auto_publish? = Keyword.get(opts, :auto_publish, false)
content = Posts.editor_body(post)
source_language = normalize_language(post.language)
if String.trim(content) == "" do
{:error, :no_content_to_translate}
else
with {:ok, translation} <-
AI.translate_post(
%{title: post.title || "", excerpt: post.excerpt || "", content: content},
language,
Keyword.put(ai_opts(), :source_language, source_language)
),
{:ok, saved_translation} <-
Posts.upsert_post_translation(post.id, language, %{
title: translation.title,
excerpt: translation.excerpt,
content: translation.content,
auto_generated: true
}),
{:ok, published_translation} <-
maybe_publish_post_translation(post.id, language, saved_translation, auto_publish?) do
{:ok, published_translation}
end
end
end
defp maybe_publish_post_translation(_post_id, _language, saved_translation, false),
do: {:ok, saved_translation}
defp maybe_publish_post_translation(post_id, language, _saved_translation, true),
do: Posts.publish_post_translation(post_id, language)
defp translate_media(media_id, language) do
source_language =
case Repo.get(Media.Media, media_id) do
nil -> ""
media -> normalize_language(media.language)
end
with {:ok, translation} <-
AI.translate_media(
media_id,
language,
Keyword.put(ai_opts(), :source_language, source_language)
),
{:ok, saved_translation} <-
Media.upsert_media_translation(media_id, language, %{
title: translation.title,
alt: translation.alt,
caption: translation.caption
}) do
{:ok, saved_translation}
end
end
defp configured? do
mode = if AI.airplane_mode?(), do: :airplane, else: :online
case AI.get_endpoint(mode) do
{:ok, %{url: url, model: model} = endpoint}
when is_binary(url) and url != "" and is_binary(model) and model != "" ->
mode == :airplane or present?(Map.get(endpoint, :api_key))
_other ->
false
end
end
defp linked_media_ids(post_id) do
Repo.all(
from pm in PostMedia,
where: pm.post_id == ^post_id,
order_by: [asc: pm.sort_order, asc: pm.media_id],
select: pm.media_id
)
end
defp normalize_language(nil), do: ""
defp normalize_language(language) do
language
|> to_string()
|> String.trim()
|> String.downcase()
end
defp present?(value) when is_binary(value), do: String.trim(value) != ""
defp present?(value), do: not is_nil(value)
end