diff --git a/CODESMELL.md b/CODESMELL.md index 6e22d23..9379c75 100644 --- a/CODESMELL.md +++ b/CODESMELL.md @@ -452,7 +452,18 @@ Total: 2245 lines now live in focused submodules; the remaining 647 in `BDS.Gene | `SessionUtil` | 49 | Workbench-session restore, project-name picker, task-result tracking | Coordinator (`shell_live.ex`) now 1545 lines containing only `mount/3`, `render/1`, `handle_event/3`, `handle_info/2` clauses, plus thin dispatchers and small editor-assign helpers. -- ⏳ `BDS.Posts` (1781). +- ⏳ `BDS.Posts` (1781 → 569, 68% reduction). Submodules extracted under `lib/bds/posts/`: + + | Module | Lines | Responsibility | + |---|---|---| + | `Slugs` | 86 | `slug_available/3`, `unique_slug_for_title/3`, `unique`, `unique_for_import`, `default_source` | + | `AutoTranslation` | 176 | `maybe_schedule/1`, missing-language detection, post + cascading media auto-translate task scheduling | + | `FileSync` | 146 | Post/translation relative-path computation, frontmatter serialization, body extraction, on-disk delete | + | `TranslationValidation` | 464 | `validate/2`, `fix_invalid/1`, invalid DB/FS issue classification, legacy report fields, canonical-language helpers, markdown-file recursion | + | `RebuildFromFiles` | 320 | `rebuild_posts_from_files/2`, `import_orphan_post_file/2`, `import_orphan_post_translation_file/2`, `parse_rebuild_file`, `upsert_post_from_file`, `upsert_post_from_rebuild_file`, `upsert_post_translation_from_rebuild_file`, `progress_callback/1`, `report_rebuild_started/3`, `report_rebuild_progress/4`, `parse_post_status`, `parse_translation_status` | + | `Translations` | 279 | `publish_post_translation/2`, `list_post_translations/1`, `upsert_post_translation/3`, `delete_post_translation/1`, `sync_post_translation_from_file/1`, `rewrite_published_post_translation/1`, `publish_translation/2`, `publish_post_translations/1`, `normalize_translation_updates`, `maybe_reopen_source_post_for_manual_translation` | + + Public API on `BDS.Posts` preserved via `defdelegate` for: `slug_available/3`, `unique_slug_for_title/3`, `validate_translations/2`, `fix_invalid_translations/1`, `rebuild_posts_from_files/2`, `import_orphan_post_file/2`, `import_orphan_post_translation_file/2`, `publish_post_translation/2`, `list_post_translations/1`, `upsert_post_translation/3`, `delete_post_translation/1`, `sync_post_translation_from_file/1`, `rewrite_published_post_translation/1`. Remaining clusters in posts.ex are core CRUD (`create_post`, `update_post`, `publish_post`, `delete_post`, `archive_post`, `discard_post_changes`, `sync_post_from_file`, `rewrite_published_post`, `editor_body`), small stats (`dashboard_stats`, `post_counts_by_year_month`, ~40 lines extractable), and `rebuild_post_links` (~22 lines). Stats could be split next, but ~569 lines is a reasonable steady state. - ⏳ `BDS.AI` (1711). - ⏳ `BDS.MCP` (677). diff --git a/lib/bds/posts.ex b/lib/bds/posts.ex index 874f047..8efb8af 100644 --- a/lib/bds/posts.ex +++ b/lib/bds/posts.ex @@ -3,24 +3,34 @@ defmodule BDS.Posts do import Ecto.Query - alias BDS.DocumentFields - alias BDS.Frontmatter alias BDS.Embeddings - alias BDS.AI alias BDS.Media - alias BDS.Metadata alias BDS.Persistence alias BDS.PostLinks + alias BDS.Posts.AutoTranslation + alias BDS.Posts.FileSync alias BDS.Posts.Link alias BDS.Posts.Post alias BDS.Posts.PostMedia + alias BDS.Posts.RebuildFromFiles + alias BDS.Posts.Slugs alias BDS.Posts.Translation + alias BDS.Posts.Translations + alias BDS.Posts.TranslationValidation alias BDS.Projects - alias BDS.Rebuild alias BDS.Repo alias BDS.Search alias BDS.Slug - alias BDS.Tasks + + import FileSync, + only: [ + post_relative_path: 2, + publishable_post_body: 3, + published_post_body: 2, + read_markdown_body: 1, + serialize_post_file: 2, + delete_post_file: 1 + ] @typedoc "An attribute map that may use atom or string keys." @type attrs :: %{optional(atom()) => term(), optional(String.t()) => term()} @@ -55,14 +65,14 @@ defmodule BDS.Posts do now = Persistence.now_ms() project_id = attr(attrs, :project_id) title = normalize_title(attr(attrs, :title)) - base_slug = title |> default_slug_source() |> Slug.slugify() + base_slug = title |> Slugs.default_source() |> Slug.slugify() %Post{} |> Post.changeset(%{ id: Ecto.UUID.generate(), project_id: project_id, title: title, - slug: unique_slug(project_id, base_slug), + slug: Slugs.unique(project_id, base_slug), excerpt: attr(attrs, :excerpt), content: attr(attrs, :content), status: :draft, @@ -88,7 +98,7 @@ defmodule BDS.Posts do {:ok, post} -> :ok = Embeddings.sync_post(post) :ok = Search.sync_post(post) - :ok = maybe_schedule_auto_translations(post) + :ok = AutoTranslation.maybe_schedule(post) {:ok, post} error -> @@ -127,7 +137,7 @@ defmodule BDS.Posts do :ok = Embeddings.sync_post(updated_post) :ok = PostLinks.sync_post_links(updated_post) :ok = Search.sync_post(updated_post) - :ok = maybe_schedule_auto_translations(updated_post) + :ok = AutoTranslation.maybe_schedule(updated_post) {:ok, updated_post} error -> @@ -149,7 +159,7 @@ defmodule BDS.Posts do %Post{} = post -> project = Projects.get_project!(post.project_id) published_at = post.published_at || Persistence.now_ms() - relative_path = build_post_relative_path(post.slug, post.created_at) + relative_path = post_relative_path(post.slug, post.created_at) full_path = Path.join(Projects.project_data_dir(project), relative_path) updated_at = Persistence.now_ms() body = publishable_post_body(post, full_path, project) @@ -172,7 +182,7 @@ defmodule BDS.Posts do |> case do {:ok, updated_post} -> :ok = Embeddings.sync_post(updated_post) - :ok = publish_post_translations(updated_post) + :ok = Translations.publish_post_translations(updated_post) :ok = PostLinks.sync_post_links(updated_post) :ok = Search.sync_post(updated_post) {:ok, updated_post} @@ -184,56 +194,7 @@ defmodule BDS.Posts do end @spec rebuild_posts_from_files(String.t(), rebuild_opts()) :: {:ok, [Post.t()]} - def rebuild_posts_from_files(project_id, opts \\ []) do - project = Projects.get_project!(project_id) - on_progress = progress_callback(opts) - - rebuild_files = - project - |> Projects.project_data_dir() - |> Path.join("posts") - |> list_matching_files("*.md") - |> Rebuild.parallel_map(&parse_rebuild_file(project, &1)) - - total_files = length(rebuild_files) - :ok = report_rebuild_started(on_progress, total_files, "post files") - - {translation_files, post_files} = Enum.split_with(rebuild_files, &translation_rebuild_file?/1) - - posts = - post_files - |> Enum.with_index(1) - |> Enum.map(fn {file, index} -> - post = upsert_post_from_rebuild_file(project_id, file, sync_search: false, sync_embeddings: false) - :ok = report_rebuild_progress(on_progress, index, total_files, "post files") - post - end) - - translation_files - |> Enum.with_index(length(post_files) + 1) - |> Enum.each(fn {file, index} -> - upsert_post_translation_from_rebuild_file(project_id, file, sync_search: false) - :ok = report_rebuild_progress(on_progress, index, total_files, "post files") - end) - - if Keyword.get(opts, :reindex_search, true) do - :ok = report_rebuild_phase(on_progress, 0.97, "Refreshing post search index") - :ok = - Search.reindex_posts(project_id, - on_progress: scaled_progress_reporter(on_progress, 0.97, 0.99) - ) - end - - if Keyword.get(opts, :rebuild_embeddings, true) do - :ok = report_rebuild_phase(on_progress, 0.99, "Refreshing post embeddings") - {:ok, _rebuilt_post_ids} = - Embeddings.rebuild_project(project_id, - on_progress: scaled_progress_reporter(on_progress, 0.99, 1.0) - ) - end - - {:ok, posts} - end + defdelegate rebuild_posts_from_files(project_id, opts \\ []), to: RebuildFromFiles @spec discard_post_changes(String.t()) :: {:ok, Post.t()} | {:error, :not_found} @@ -250,7 +211,7 @@ defmodule BDS.Posts do full_path = Path.join(Projects.project_data_dir(project), post.file_path) if File.exists?(full_path) do - restored_post = upsert_post_from_file(post.project_id, project, full_path) + restored_post = RebuildFromFiles.upsert_post_from_file(post.project_id, project, full_path) :ok = PostLinks.sync_post_links(restored_post) {:ok, restored_post} else @@ -298,7 +259,7 @@ defmodule BDS.Posts do full_path = Path.join(Projects.project_data_dir(project), post.file_path) if File.exists?(full_path) do - repaired_post = upsert_post_from_file(post.project_id, project, full_path) + repaired_post = RebuildFromFiles.upsert_post_from_file(post.project_id, project, full_path) :ok = PostLinks.sync_post_links(repaired_post) {:ok, repaired_post} else @@ -309,102 +270,19 @@ defmodule BDS.Posts do @spec sync_post_translation_from_file(String.t()) :: {:ok, Translation.t()} | {:error, :not_found} - def sync_post_translation_from_file(translation_id) do - case Repo.get(Translation, translation_id) do - nil -> - {:error, :not_found} - - %Translation{file_path: file_path} when file_path in [nil, ""] -> - {:error, :not_found} - - %Translation{} = translation -> - project = Projects.get_project!(translation.project_id) - full_path = Path.join(Projects.project_data_dir(project), translation.file_path) - - if File.exists?(full_path) do - rebuild_file = parse_rebuild_file(project, full_path) - {:ok, upsert_post_translation_from_rebuild_file(translation.project_id, rebuild_file, sync_search: true)} - else - {:error, :not_found} - end - end - end + defdelegate sync_post_translation_from_file(translation_id), to: Translations @spec rewrite_published_post_translation(String.t()) :: {:ok, Translation.t()} | {:error, :not_found} - def rewrite_published_post_translation(translation_id) do - case Repo.get(Translation, translation_id) do - nil -> - {:error, :not_found} - - %Translation{file_path: file_path, status: status} = translation - when file_path not in [nil, ""] and status == :published -> - post = Repo.get!(Post, translation.translation_for) - :ok = publish_translation(post, translation) - {:ok, Repo.get!(Translation, translation_id)} - - %Translation{} -> - {:error, :not_found} - end - end + defdelegate rewrite_published_post_translation(translation_id), to: Translations @spec import_orphan_post_file(String.t(), String.t()) :: {:ok, Post.t()} | {:error, :not_found | :unsupported_file} - def import_orphan_post_file(project_id, relative_path) do - project = Projects.get_project!(project_id) - full_path = Path.join(Projects.project_data_dir(project), relative_path) - - if File.exists?(full_path) do - rebuild_file = parse_rebuild_file(project, full_path) - - if translation_rebuild_file?(rebuild_file) do - {:error, :unsupported_file} - else - fields = - rebuild_file.fields - |> Map.put("id", unique_post_id(Map.get(rebuild_file.fields, "id"))) - |> Map.put("slug", unique_slug_for_import(project_id, Map.fetch!(rebuild_file.fields, "slug"))) - - {:ok, upsert_post_from_rebuild_file(project_id, %{rebuild_file | fields: fields})} - end - else - {:error, :not_found} - end - end + defdelegate import_orphan_post_file(project_id, relative_path), to: RebuildFromFiles @spec import_orphan_post_translation_file(String.t(), String.t()) :: {:ok, Translation.t()} | {:error, :not_found | :unsupported_file | :conflict} - def import_orphan_post_translation_file(project_id, relative_path) do - project = Projects.get_project!(project_id) - full_path = Path.join(Projects.project_data_dir(project), relative_path) - - if File.exists?(full_path) do - rebuild_file = parse_rebuild_file(project, full_path) - - if translation_rebuild_file?(rebuild_file) do - source_post_id = Map.fetch!(rebuild_file.fields, "translationFor") - language = normalize_language(Map.fetch!(rebuild_file.fields, "language")) - - case Repo.get(Post, source_post_id) do - nil -> - {:error, :not_found} - - %Post{} = post -> - if normalize_language(post.language) == language or - Repo.get_by(Translation, translation_for: source_post_id, language: language) do - {:error, :conflict} - else - fields = Map.put(rebuild_file.fields, "id", Ecto.UUID.generate()) - {:ok, upsert_post_translation_from_rebuild_file(project_id, %{rebuild_file | fields: fields}, sync_search: true)} - end - end - else - {:error, :unsupported_file} - end - else - {:error, :not_found} - end - end + defdelegate import_orphan_post_translation_file(project_id, relative_path), to: RebuildFromFiles @spec delete_post(String.t()) :: {:ok, :deleted} | {:error, :not_found} def delete_post(post_id) do @@ -413,7 +291,14 @@ defmodule BDS.Posts do {:error, :not_found} %Post{} = post -> - linked_media_ids = linked_media_ids(post.id) + linked_media_ids = + Repo.all( + from pm in PostMedia, + where: pm.post_id == ^post.id, + order_by: [asc: pm.sort_order, asc: pm.media_id], + select: pm.media_id + ) + delete_post_file(post) :ok = Embeddings.remove_post(post.id) :ok = PostLinks.delete_post_links(post.id) @@ -460,56 +345,17 @@ defmodule BDS.Posts do @spec publish_post_translation(String.t(), String.t() | atom()) :: {:ok, Translation.t()} | {:error, :not_found | term()} - def publish_post_translation(post_id, language) do - normalized_language = language |> to_string() |> String.trim() |> String.downcase() - - case Repo.get_by(Translation, translation_for: post_id, language: normalized_language) do - nil -> - {:error, :not_found} - - %Translation{} -> - with {:ok, _post} <- publish_post(post_id), - %Translation{} = translation <- Repo.get_by(Translation, translation_for: post_id, language: normalized_language) do - {:ok, translation} - else - nil -> {:error, :not_found} - error -> error - end - end - end + defdelegate publish_post_translation(post_id, language), to: Translations @spec slug_available(String.t(), String.t(), String.t() | nil) :: boolean() - def slug_available(project_id, slug, exclude_post_id \\ nil) do - normalized_slug = slug |> to_string() |> String.trim() - - query = - from(post in Post, - where: post.project_id == ^project_id and post.slug == ^normalized_slug, - select: post.id, - limit: 1 - ) - - case Repo.one(query) do - nil -> true - ^exclude_post_id -> true - _other -> false - end - end + defdelegate slug_available(project_id, slug, exclude_post_id \\ nil), + to: Slugs, + as: :available @spec unique_slug_for_title(String.t(), String.t(), String.t() | nil) :: String.t() - def unique_slug_for_title(project_id, title, exclude_post_id \\ nil) do - base_slug = title |> default_slug_source() |> Slug.slugify() - - if slug_available(project_id, base_slug, exclude_post_id) do - base_slug - else - Stream.iterate(2, &(&1 + 1)) - |> Enum.find_value(fn counter -> - candidate = "#{base_slug}-#{counter}" - if slug_available(project_id, candidate, exclude_post_id), do: candidate, else: nil - end) - end - end + defdelegate unique_slug_for_title(project_id, title, exclude_post_id \\ nil), + to: Slugs, + as: :unique_for_title @spec dashboard_stats(String.t()) :: dashboard_stats() def dashboard_stats(project_id) do @@ -554,7 +400,7 @@ defmodule BDS.Posts do @spec rebuild_post_links(String.t(), rebuild_opts()) :: :ok def rebuild_post_links(project_id, opts \\ []) do post_ids = Repo.all(from(post in Post, where: post.project_id == ^project_id, select: post.id)) - on_progress = progress_callback(opts) + on_progress = RebuildFromFiles.progress_callback(opts) Repo.delete_all( from(link in Link, @@ -564,162 +410,30 @@ defmodule BDS.Posts do posts = Repo.all(from(post in Post, where: post.project_id == ^project_id, order_by: [asc: post.created_at])) total_posts = length(posts) - :ok = report_rebuild_started(on_progress, total_posts, "post links") + :ok = RebuildFromFiles.report_rebuild_started(on_progress, total_posts, "post links") posts |> Enum.with_index(1) |> Enum.each(fn {post, index} -> PostLinks.sync_post_links(post) - :ok = report_rebuild_progress(on_progress, index, total_posts, "post links") + :ok = RebuildFromFiles.report_rebuild_progress(on_progress, index, total_posts, "post links") end) :ok end @spec list_post_translations(String.t()) :: {:ok, [Translation.t()]} - def list_post_translations(post_id) do - {:ok, - Repo.all( - from translation in Translation, - where: translation.translation_for == ^post_id, - order_by: [asc: translation.language] - )} - end + defdelegate list_post_translations(post_id), to: Translations @spec upsert_post_translation(String.t(), String.t() | atom(), attrs()) :: {:ok, Translation.t()} | {:error, :not_found | Ecto.Changeset.t()} - def upsert_post_translation(post_id, language, attrs) do - case Repo.get(Post, post_id) do - nil -> - {:error, :not_found} - - %Post{do_not_translate: true} = post -> - {:error, - post - |> Post.changeset(%{}) - |> Ecto.Changeset.add_error( - :do_not_translate, - "cannot add translations when do_not_translate is true" - )} - - %Post{} = post -> - now = Persistence.now_ms() - normalized_language = normalize_language(language) - - translation = - Repo.get_by(Translation, translation_for: post.id, language: normalized_language) || - %Translation{} - - updates = - normalize_translation_updates(post, translation, normalized_language, attrs, now) - - translation - |> Translation.changeset(updates) - |> Repo.insert_or_update() - |> case do - {:ok, saved_translation} -> - {:ok, _post} = maybe_reopen_source_post_for_manual_translation(post, attrs) - :ok = Search.sync_post(post.id) - {:ok, saved_translation} - - error -> - error - end - end - end + defdelegate upsert_post_translation(post_id, language, attrs), to: Translations @spec delete_post_translation(String.t()) :: {:ok, :deleted} | {:error, :not_found} - def delete_post_translation(translation_id) do - case Repo.get(Translation, translation_id) do - nil -> - {:error, :not_found} - - %Translation{} = translation -> - :ok = delete_translation_file(translation) - Repo.delete!(translation) - :ok = Search.sync_post(translation.translation_for) - {:ok, :deleted} - end - end + defdelegate delete_post_translation(translation_id), to: Translations @spec validate_translations(String.t(), rebuild_opts()) :: {:ok, translation_validation_report()} - def validate_translations(project_id, opts \\ []) do - project = Projects.get_project!(project_id) - {:ok, metadata} = Metadata.get_project_metadata(project_id) - on_progress = progress_callback(opts) - - source_posts = - Repo.all( - from post in Post, - where: post.project_id == ^project_id, - order_by: [asc: post.created_at, asc: post.slug] - ) - - source_post_map = Map.new(source_posts, &{&1.id, &1}) - - translation_rows = - Repo.all( - from translation in Translation, - where: translation.project_id == ^project_id, - order_by: [asc: translation.translation_for, asc: translation.language, asc: translation.id] - ) - - project_data_dir = Projects.project_data_dir(project) - - markdown_files = - project_data_dir - |> Path.join("posts") - |> list_markdown_files_recursive() - - total_items = length(translation_rows) + length(markdown_files) - :ok = report_rebuild_started(on_progress, total_items, "translations") - - invalid_database_rows = - translation_rows - |> Enum.with_index(1) - |> Enum.flat_map(fn {translation, index} -> - :ok = report_rebuild_progress(on_progress, index, total_items, "translations") - - case invalid_database_translation_issue(translation, source_post_map, metadata) do - nil -> [] - issue -> [issue] - end - end) - |> Enum.sort_by(&translation_validation_issue_sort_key/1) - - {checked_filesystem_file_count, invalid_filesystem_files} = - markdown_files - |> Enum.with_index(length(translation_rows) + 1) - |> Enum.reduce({0, []}, fn {file_path, index}, {count, issues} -> - :ok = report_rebuild_progress(on_progress, index, total_items, "translations") - - case invalid_filesystem_translation_issue(file_path, source_post_map, metadata) do - {:ok, nil} -> - {count + 1, issues} - - {:ok, issue} -> - {count + 1, [issue | issues]} - - :skip -> - {count, issues} - end - end) - - missing = legacy_missing_translation_entries(source_posts, translation_rows, metadata) - orphan_files = legacy_orphan_translation_files(invalid_filesystem_files, project_data_dir) - do_not_translate_posts = legacy_do_not_translate_posts(source_posts) - - {:ok, - %{ - checked_database_row_count: length(translation_rows), - checked_filesystem_file_count: checked_filesystem_file_count, - invalid_database_rows: invalid_database_rows, - invalid_filesystem_files: Enum.reverse(invalid_filesystem_files) |> Enum.sort_by(&translation_validation_issue_sort_key/1), - missing: missing, - orphan_files: orphan_files, - do_not_translate_posts: do_not_translate_posts - }} - end + defdelegate validate_translations(project_id, opts \\ []), to: TranslationValidation, as: :validate @spec fix_invalid_translations(map()) :: {:ok, @@ -728,41 +442,7 @@ defmodule BDS.Posts do deleted_files: non_neg_integer(), flushed_translations: non_neg_integer() }} - def fix_invalid_translations(report) when is_map(report) do - normalized_report = normalize_translation_validation_report(report) - - {deleted_database_rows, flushed_translations, synced_post_ids} = - Enum.reduce(normalized_report.invalid_database_rows, {0, 0, MapSet.new()}, fn issue, {deleted, flushed, synced_ids} -> - case fix_invalid_database_translation(issue) do - {:deleted, post_id} -> - {deleted + 1, flushed, maybe_put_synced_post(synced_ids, post_id)} - - {:flushed, post_id} -> - {deleted, flushed + 1, maybe_put_synced_post(synced_ids, post_id)} - - :noop -> - {deleted, flushed, synced_ids} - end - end) - - deleted_files = - Enum.reduce(normalized_report.invalid_filesystem_files, 0, fn issue, count -> - if delete_translation_validation_file(issue.file_path) do - count + 1 - else - count - end - end) - - Enum.each(synced_post_ids, &Search.sync_post/1) - - {:ok, - %{ - deleted_database_rows: deleted_database_rows, - deleted_files: deleted_files, - flushed_translations: flushed_translations - }} - end + defdelegate fix_invalid_translations(report), to: TranslationValidation, as: :fix_invalid @spec rewrite_published_post(String.t()) :: :ok def rewrite_published_post(post_id) do @@ -858,55 +538,9 @@ defmodule BDS.Posts do ) end - defp unique_slug(project_id, base_slug) do - normalized = if base_slug == "", do: "untitled", else: base_slug - - if slug_available?(project_id, normalized) do - normalized - else - find_unique_slug(project_id, normalized, 2) - end - end - - defp find_unique_slug(project_id, base_slug, suffix) do - candidate = "#{base_slug}-#{suffix}" - - if slug_available?(project_id, candidate) do - candidate - else - find_unique_slug(project_id, base_slug, suffix + 1) - end - end - - defp slug_available?(project_id, slug) do - not Repo.exists?( - from post in Post, where: post.project_id == ^project_id and post.slug == ^slug - ) - end - defp maybe_put(map, _key, nil), do: map defp maybe_put(map, key, value), do: Map.put(map, key, value) - defp unique_slug_for_import(project_id, slug) do - normalized = default_slug_source(slug) |> Slug.slugify() - - if slug_available?(project_id, normalized) do - normalized - else - find_unique_slug(project_id, normalized, 2) - end - end - - defp unique_post_id(nil), do: Ecto.UUID.generate() - - defp unique_post_id(id) do - if Repo.get(Post, id) || Repo.get(Translation, id) do - Ecto.UUID.generate() - else - id - end - end - defp normalize_title(nil), do: "" defp normalize_title(title), do: title @@ -914,512 +548,6 @@ defmodule BDS.Posts do if has_attr?(attrs, :title), do: normalize_title(attr(attrs, :title)), else: nil end - defp default_slug_source(""), do: "untitled" - defp default_slug_source(title), do: title - - defp build_post_relative_path(slug, created_at) do - datetime = Persistence.from_unix_ms!(created_at) - year = Integer.to_string(datetime.year) - month = datetime.month |> Integer.to_string() |> String.pad_leading(2, "0") - Path.join(["posts", year, month, "#{slug}.md"]) - end - - defp publishable_post_body(%Post{content: content}, _full_path, _project) - when is_binary(content), do: content - - defp publishable_post_body(%Post{file_path: file_path} = post, full_path, project) do - source_path = - if file_path in [nil, ""] do - full_path - else - Path.join(Projects.project_data_dir(project), file_path) - end - - published_post_body(post, source_path) - end - - defp serialize_post_file(post, published_at) do - Frontmatter.serialize_document( - [ - {"id", post.id}, - {"title", post.title}, - {"slug", post.slug}, - {"excerpt", post.excerpt}, - {"status", :published}, - {"author", post.author}, - {"language", post.language}, - {"doNotTranslate", post.do_not_translate}, - {"templateSlug", post.template_slug}, - {"createdAt", post.created_at}, - {"updatedAt", post.updated_at}, - {"publishedAt", published_at}, - {"tags", post.tags || []}, - {"categories", post.categories || []} - ], - post.content - ) - end - - defp published_post_body(%Post{content: content}, _full_path) when is_binary(content), - do: content - - defp published_post_body(_post, full_path), do: read_markdown_body(full_path) - - defp read_markdown_body(path) do - case File.read(path) do - {:ok, contents} -> - case String.split(contents, "\n---\n", parts: 2) do - [_frontmatter, body] -> String.trim_trailing(body, "\n") - _parts -> "" - end - - {:error, _reason} -> - "" - end - end - - defp upsert_post_from_file(project_id, project, path) do - rebuild_file = parse_rebuild_file(project, path) - upsert_post_from_rebuild_file(project_id, rebuild_file) - end - - defp upsert_post_from_rebuild_file(project_id, rebuild_file, opts \\ []) do - fields = rebuild_file.fields - now = Persistence.now_ms() - - attrs = %{ - id: DocumentFields.get(fields, "id") || Ecto.UUID.generate(), - project_id: project_id, - title: DocumentFields.get(fields, "title") || "", - slug: DocumentFields.fetch!(fields, "slug"), - excerpt: Map.get(fields, "excerpt"), - content: nil, - status: parse_post_status(DocumentFields.get(fields, "status", "published")), - author: Map.get(fields, "author"), - created_at: DocumentFields.get(fields, "createdAt", now), - updated_at: DocumentFields.get(fields, "updatedAt", now), - published_at: DocumentFields.get(fields, "publishedAt"), - file_path: rebuild_file.relative_path, - checksum: nil, - tags: Map.get(fields, "tags", []), - categories: Map.get(fields, "categories", []), - template_slug: DocumentFields.get(fields, "templateSlug"), - language: Map.get(fields, "language"), - do_not_translate: DocumentFields.get(fields, "doNotTranslate", false), - published_title: nil, - published_content: nil, - published_tags: nil, - published_categories: nil, - published_excerpt: nil - } - - post = - Repo.get(Post, attrs.id) || - Repo.get_by(Post, project_id: project_id, file_path: rebuild_file.relative_path) || - Repo.get_by(Post, project_id: project_id, slug: attrs.slug) || %Post{} - - post = - post - |> Post.changeset(attrs) - |> Repo.insert_or_update!() - - if Keyword.get(opts, :sync_search, true) do - :ok = Search.sync_post(post) - end - - if Keyword.get(opts, :sync_embeddings, true) do - :ok = Embeddings.sync_post(post) - end - - post - end - - defp upsert_post_translation_from_rebuild_file(project_id, rebuild_file, opts) do - fields = rebuild_file.fields - source_post_id = DocumentFields.fetch!(fields, "translationFor") - source_post = Repo.get_by!(Post, project_id: project_id, id: source_post_id) - now = Persistence.now_ms() - language = normalize_language(DocumentFields.fetch!(fields, "language")) - - translation = - Repo.get_by(Translation, translation_for: source_post_id, language: language) || %Translation{} - - attrs = %{ - id: DocumentFields.get(fields, "id") || Ecto.UUID.generate(), - project_id: project_id, - translation_for: source_post_id, - language: language, - title: DocumentFields.get(fields, "title") || "", - excerpt: Map.get(fields, "excerpt"), - content: nil, - status: parse_translation_status(DocumentFields.get(fields, "status", "published")), - created_at: DocumentFields.get(fields, "createdAt", source_post.created_at || now), - updated_at: DocumentFields.get(fields, "updatedAt", source_post.updated_at || source_post.created_at || now), - published_at: DocumentFields.get(fields, "publishedAt", source_post.published_at), - file_path: rebuild_file.relative_path, - checksum: nil - } - - translation - |> Translation.changeset(attrs) - |> Repo.insert_or_update!() - |> tap(fn _translation -> - if Keyword.get(opts, :sync_search, true) do - :ok = Search.sync_post(source_post_id) - end - end) - end - - defp parse_post_status(status) when is_atom(status), do: status - defp parse_post_status(status), do: String.to_existing_atom(status) - - defp parse_translation_status(status) when is_atom(status), do: status - defp parse_translation_status(status), do: String.to_existing_atom(status) - - defp parse_rebuild_file(project, path) do - contents = File.read!(path) - {:ok, %{fields: fields}} = Frontmatter.parse_document(contents) - - %{ - path: path, - relative_path: Path.relative_to(path, Projects.project_data_dir(project)), - fields: fields - } - end - - defp translation_rebuild_file?(%{fields: fields}) do - DocumentFields.has_key?(fields, "translationFor") and not DocumentFields.has_key?(fields, "slug") - end - - defp list_matching_files(dir, pattern) do - if File.dir?(dir) do - Path.join([dir, "**", pattern]) - |> Path.wildcard() - |> Enum.sort() - else - [] - end - end - - defp delete_post_file(%Post{project_id: _project_id, file_path: file_path}) - when file_path in [nil, ""], do: :ok - - defp delete_post_file(%Post{} = post) do - project = Projects.get_project!(post.project_id) - full_path = Path.join(Projects.project_data_dir(project), post.file_path) - - case File.rm(full_path) do - :ok -> :ok - {:error, :enoent} -> :ok - {:error, reason} -> {:error, reason} - end - end - - defp normalize_translation_updates(post, %Translation{} = translation, language, attrs, now) do - requested_status = - case attr(attrs, :status) do - nil -> nil - status -> parse_translation_status(status) - end - - updates = - %{} - |> maybe_put(:title, attr(attrs, :title)) - |> maybe_put(:excerpt, attr(attrs, :excerpt)) - |> maybe_put(:content, attr(attrs, :content)) - - reopened? = - translation.status == :published and translation_content_change?(translation, updates) - - status = if(reopened?, do: :draft, else: requested_status || translation.status || :draft) - - %{ - id: translation.id || Ecto.UUID.generate(), - project_id: post.project_id, - translation_for: post.id, - language: language, - title: Map.get(updates, :title, translation.title), - excerpt: Map.get(updates, :excerpt, translation.excerpt), - content: Map.get(updates, :content, translation.content), - status: status, - created_at: translation.created_at || now, - updated_at: now, - published_at: translation.published_at || if(status == :published, do: now, else: nil), - file_path: translation.file_path || "", - checksum: translation.checksum - } - end - - defp translation_content_change?(translation, updates) do - Enum.any?([:title, :excerpt, :content], fn field -> - case Map.fetch(updates, field) do - {:ok, value} -> value != Map.get(translation, field) - :error -> false - end - end) - end - - defp publish_post_translations(%Post{} = post) do - Repo.all(from translation in Translation, where: translation.translation_for == ^post.id) - |> Enum.each(fn translation -> - if translation.status == :draft do - publish_translation(post, translation) - end - end) - - :ok - end - - defp publish_translation(%Post{} = post, %Translation{} = translation) do - project = Projects.get_project!(post.project_id) - published_at = translation.published_at || Persistence.now_ms() - relative_path = build_translation_relative_path(post, translation.language) - full_path = Path.join(Projects.project_data_dir(project), relative_path) - updated_at = Persistence.now_ms() - body = publishable_translation_body(translation, full_path) - - :ok = - Persistence.atomic_write( - full_path, - serialize_translation_file( - %{translation | updated_at: updated_at, content: body}, - published_at - ) - ) - - translation - |> Translation.changeset(%{ - status: :published, - published_at: published_at, - file_path: relative_path, - content: nil, - updated_at: updated_at - }) - |> Repo.update!() - - :ok - end - - defp build_translation_relative_path(post, language) do - datetime = Persistence.from_unix_ms!(post.created_at) - year = Integer.to_string(datetime.year) - month = datetime.month |> Integer.to_string() |> String.pad_leading(2, "0") - Path.join(["posts", year, month, "#{post.slug}.#{language}.md"]) - end - - defp serialize_translation_file(translation, published_at) do - Frontmatter.serialize_document( - [ - {"id", translation.id}, - {"translationFor", translation.translation_for}, - {"language", translation.language}, - {"title", translation.title}, - {"excerpt", translation.excerpt}, - {"status", :published}, - {"createdAt", translation.created_at}, - {"updatedAt", translation.updated_at}, - {"publishedAt", published_at} - ], - translation.content - ) - end - - defp publishable_translation_body(%Translation{content: content}, _full_path) - when is_binary(content), do: content - - defp publishable_translation_body(_translation, full_path) do - case File.read(full_path) do - {:ok, contents} -> - case String.split(contents, "\n---\n", parts: 2) do - [_frontmatter, body] -> String.trim_trailing(body, "\n") - _parts -> "" - end - - {:error, _reason} -> - "" - end - end - - defp delete_translation_file(%Translation{project_id: _project_id, file_path: file_path}) - when file_path in [nil, ""], do: :ok - - defp delete_translation_file(%Translation{} = translation) do - project = Projects.get_project!(translation.project_id) - full_path = Path.join(Projects.project_data_dir(project), translation.file_path) - - case File.rm(full_path) do - :ok -> :ok - {:error, :enoent} -> :ok - {:error, reason} -> {:error, reason} - end - end - - defp maybe_reopen_source_post_for_manual_translation(%Post{} = post, attrs) do - if attr(attrs, :auto_generated) == true or post.status != :published or post.file_path in [nil, ""] do - {:ok, post} - else - project = Projects.get_project!(post.project_id) - full_path = Path.join(Projects.project_data_dir(project), post.file_path) - restored_content = published_post_body(post, full_path) - - post - |> Post.changeset(%{ - status: :draft, - content: restored_content, - updated_at: Persistence.now_ms() - }) - |> Repo.update() - end - end - - defp maybe_schedule_auto_translations(%Post{do_not_translate: true}), do: :ok - - defp maybe_schedule_auto_translations(%Post{} = post) do - with true <- auto_translation_configured?(), - {:ok, metadata} <- Metadata.get_project_metadata(post.project_id) do - post - |> missing_auto_translation_languages(metadata) - |> Enum.each(&queue_post_auto_translation(post, &1)) - else - _other -> :ok - end - - :ok - end - - defp missing_auto_translation_languages(%Post{} = post, metadata) do - source_language = normalize_language(post.language || metadata.main_language) - - configured_languages = - ([metadata.main_language] ++ (metadata.blog_languages || [])) - |> Enum.map(&normalize_language/1) - |> Enum.reject(&(&1 in [nil, ""])) - |> Enum.uniq() - - existing_languages = - Repo.all( - from translation in Translation, - where: translation.translation_for == ^post.id, - select: translation.language - ) - - configured_languages - |> Enum.reject(&(&1 == source_language or &1 in existing_languages)) - end - - defp queue_post_auto_translation(%Post{} = post, language) do - _ = - Tasks.submit_task( - "Auto-translate Post to #{language}", - fn report -> - report.(0.05, "Translating post to #{language}") - - with {:ok, translation} <- AI.translate_post(post.id, language, auto_translation_ai_opts()), - {:ok, saved_translation} <- - upsert_post_translation(post.id, language, %{ - title: translation.title, - excerpt: translation.excerpt, - content: translation.content, - auto_generated: true - }) do - report.(0.85, "Post translation saved") - :ok = queue_media_translation_cascade(post, language) - report.(1.0, "Post translation complete") - %{post_id: post.id, translation_id: saved_translation.id, language: language} - else - {:error, reason} -> {:error, reason} - end - end, - auto_translation_task_attrs(post) - ) - - :ok - end - - defp queue_media_translation_cascade(%Post{} = post, language) do - linked_media_ids(post.id) - |> Enum.each(fn media_id -> - if media_translation_needed?(media_id, language) do - queue_media_translation(post, media_id, language) - end - end) - - :ok - end - - defp queue_media_translation(%Post{} = post, media_id, language) do - _ = - Tasks.submit_task( - "Auto-translate Media to #{language}", - fn report -> - report.(0.05, "Translating media to #{language}") - - with {:ok, translation} <- AI.translate_media(media_id, language, auto_translation_ai_opts()), - {:ok, saved_translation} <- - Media.upsert_media_translation(media_id, language, %{ - title: translation.title, - alt: translation.alt, - caption: translation.caption - }) do - report.(1.0, "Media translation complete") - %{media_id: media_id, translation_id: saved_translation.id, language: language} - else - {:error, reason} -> {:error, reason} - end - end, - auto_translation_task_attrs(post) - ) - - :ok - end - - defp media_translation_needed?(media_id, language) do - case Repo.get(Media.Media, media_id) do - %Media.Media{language: source_language} when source_language not in [nil, ""] and source_language != language -> - not Repo.exists?( - from translation in Media.Translation, - where: translation.translation_for == ^media_id and translation.language == ^language - ) - - _other -> - false - end - end - - defp auto_translation_task_attrs(%Post{} = post) do - %{ - group_id: post.project_id, - group_name: "AI" - } - end - - defp auto_translation_ai_opts do - Application.get_env(:bds, :posts, []) - |> Keyword.get(:auto_translation_ai_opts, []) - end - - defp auto_translation_configured? do - mode = if AI.airplane_mode?(), do: :airplane, else: :online - - case AI.get_endpoint(mode) do - {:ok, %{url: url, model: model} = endpoint} - when is_binary(url) and url != "" and is_binary(model) and model != "" -> - mode == :airplane or present?(Map.get(endpoint, :api_key)) - - _other -> - false - end - end - - defp linked_media_ids(post_id) do - Repo.all( - from pm in PostMedia, - where: pm.post_id == ^post_id, - order_by: [asc: pm.sort_order, asc: pm.media_id], - select: pm.media_id - ) - end - defp sync_deleted_post_media_sidecar(media_id) do case Media.sync_media_sidecar(media_id) do :ok -> :ok @@ -1427,303 +555,6 @@ defmodule BDS.Posts do end end - defp present?(value) when is_binary(value), do: String.trim(value) != "" - defp present?(value), do: not is_nil(value) - - defp list_markdown_files_recursive(dir) do - ["*.md", "*.markdown", "*.mdx"] - |> Enum.flat_map(&list_matching_files(dir, &1)) - |> Enum.uniq() - |> Enum.sort() - end - - defp invalid_database_translation_issue(%Translation{} = translation, source_post_map, metadata) do - source_post = Map.get(source_post_map, translation.translation_for) - normalized_language = normalize_language(translation.language) - - cond do - is_nil(source_post) -> - translation_validation_issue(%{ - issue: "missing-source-post", - translation_id: translation.id, - translation_for: translation.translation_for, - translation_language: normalized_language, - title: translation.title, - file_path: blank_to_nil(translation.file_path) - }) - - canonical_translation_language?(source_post, normalized_language, metadata) -> - translation_validation_issue(%{ - issue: "same-language-as-canonical", - translation_id: translation.id, - translation_for: translation.translation_for, - canonical_language: canonical_translation_language(source_post, metadata), - translation_language: normalized_language, - title: translation.title, - file_path: blank_to_nil(translation.file_path) - }) - - source_post.do_not_translate -> - translation_validation_issue(%{ - issue: "do-not-translate-has-translations", - translation_id: translation.id, - translation_for: translation.translation_for, - translation_language: normalized_language, - title: translation.title, - file_path: blank_to_nil(translation.file_path) - }) - - translation.status == :published and present?(translation.content) -> - translation_validation_issue(%{ - issue: "content-in-database", - translation_id: translation.id, - translation_for: translation.translation_for, - translation_language: normalized_language, - title: translation.title, - file_path: blank_to_nil(translation.file_path) - }) - - true -> - nil - end - end - - defp invalid_filesystem_translation_issue(file_path, source_post_map, metadata) do - with {:ok, contents} <- File.read(file_path), - {:ok, %{fields: fields}} <- Frontmatter.parse_document(contents), - true <- translation_rebuild_file?(%{fields: fields}) do - translation_for = DocumentFields.get(fields, "translationFor") - source_post = Map.get(source_post_map, translation_for) - normalized_language = normalize_language(DocumentFields.get(fields, "language")) - title = DocumentFields.get(fields, "title") - - issue = - cond do - is_nil(source_post) -> - translation_validation_issue(%{ - issue: "missing-source-post", - translation_for: translation_for, - translation_language: normalized_language, - title: title, - file_path: file_path - }) - - canonical_translation_language?(source_post, normalized_language, metadata) -> - translation_validation_issue(%{ - issue: "same-language-as-canonical", - translation_for: translation_for, - canonical_language: canonical_translation_language(source_post, metadata), - translation_language: normalized_language, - title: title, - file_path: file_path - }) - - source_post.do_not_translate -> - translation_validation_issue(%{ - issue: "do-not-translate-has-translations", - translation_for: translation_for, - translation_language: normalized_language, - title: title, - file_path: file_path - }) - - true -> - nil - end - - {:ok, issue} - else - false -> :skip - _other -> :skip - end - end - - defp normalize_translation_validation_report(report) do - %{ - checked_database_row_count: map_value(report, :checked_database_row_count, 0), - checked_filesystem_file_count: map_value(report, :checked_filesystem_file_count, 0), - invalid_database_rows: - report - |> map_value(:invalid_database_rows, []) - |> Enum.map(&normalize_translation_validation_issue/1), - invalid_filesystem_files: - report - |> map_value(:invalid_filesystem_files, []) - |> Enum.map(&normalize_translation_validation_issue/1) - } - end - - defp legacy_missing_translation_entries(source_posts, translation_rows, metadata) do - configured_languages = - ([Map.get(metadata, :main_language)] ++ Map.get(metadata, :blog_languages, [])) - |> Enum.map(&normalize_language/1) - |> Enum.reject(&(&1 in [nil, ""])) - |> Enum.uniq() - - existing_languages_by_post = - Enum.reduce(translation_rows, %{}, fn translation, acc -> - Map.update( - acc, - translation.translation_for, - MapSet.new([normalize_language(translation.language)]), - &MapSet.put(&1, normalize_language(translation.language)) - ) - end) - - source_posts - |> Enum.filter(&(&1.status == :published and not &1.do_not_translate)) - |> Enum.flat_map(fn post -> - canonical_language = canonical_translation_language(post, metadata) - existing_languages = Map.get(existing_languages_by_post, post.id, MapSet.new()) - - configured_languages - |> Enum.reject(&(&1 == canonical_language or MapSet.member?(existing_languages, &1))) - |> Enum.map(&%{post_id: post.id, language: &1}) - end) - |> Enum.sort_by(&{&1.post_id, &1.language}) - end - - defp legacy_orphan_translation_files(invalid_filesystem_files, project_data_dir) do - invalid_filesystem_files - |> Enum.filter(&(Map.get(&1, :issue) == "missing-source-post")) - |> Enum.map(fn issue -> - issue - |> Map.get(:file_path) - |> relative_project_data_path(project_data_dir) - end) - |> Enum.reject(&is_nil/1) - |> Enum.sort() - end - - defp legacy_do_not_translate_posts(source_posts) do - source_posts - |> Enum.filter(&(&1.status == :published and &1.do_not_translate)) - |> Enum.map(& &1.id) - |> Enum.sort() - end - - defp normalize_translation_validation_issue(issue) when is_map(issue) do - %{ - issue: map_value(issue, :issue), - translation_id: blank_to_nil(map_value(issue, :translation_id)), - translation_for: map_value(issue, :translation_for), - canonical_language: blank_to_nil(map_value(issue, :canonical_language)), - translation_language: map_value(issue, :translation_language), - title: blank_to_nil(map_value(issue, :title)), - file_path: blank_to_nil(map_value(issue, :file_path)) - } - end - - defp fix_invalid_database_translation(%{issue: "content-in-database", translation_id: translation_id}) - when is_binary(translation_id) do - case Repo.get(Translation, translation_id) do - %Translation{} = translation -> - case Repo.get(Post, translation.translation_for) do - %Post{} = post -> - :ok = publish_translation(post, translation) - {:flushed, translation.translation_for} - - nil -> - :noop - end - - nil -> - :noop - end - end - - defp fix_invalid_database_translation(%{translation_id: translation_id, translation_for: translation_for}) - when is_binary(translation_id) do - case Repo.get(Translation, translation_id) do - %Translation{} = translation -> - Repo.delete!(translation) - {:deleted, translation_for} - - nil -> - :noop - end - end - - defp fix_invalid_database_translation(_issue), do: :noop - - defp delete_translation_validation_file(file_path) when file_path in [nil, ""], do: false - - defp delete_translation_validation_file(file_path) do - case File.rm(file_path) do - :ok -> true - {:error, :enoent} -> false - {:error, _reason} -> false - end - end - - defp translation_validation_issue(attrs) do - %{ - issue: Map.get(attrs, :issue), - translation_id: Map.get(attrs, :translation_id), - translation_for: Map.get(attrs, :translation_for), - canonical_language: Map.get(attrs, :canonical_language), - translation_language: Map.get(attrs, :translation_language), - title: Map.get(attrs, :title), - file_path: Map.get(attrs, :file_path) - } - end - - defp translation_validation_issue_sort_key(issue) do - [Map.get(issue, :translation_for), Map.get(issue, :translation_id), Map.get(issue, :file_path)] - |> Enum.map(&to_string(&1 || "")) - |> Enum.join(":") - end - - defp canonical_translation_language(source_post, metadata) do - language = normalize_language(source_post.language) - - if language == "" do - normalize_language(Map.get(metadata, :main_language)) - else - language - end - end - - defp canonical_translation_language?(source_post, language, metadata) do - canonical_language = canonical_translation_language(source_post, metadata) - canonical_language != "" and canonical_language == normalize_language(language) - end - - defp map_value(map, key, default \\ nil) when is_map(map) do - Map.get(map, key, Map.get(map, Atom.to_string(key), default)) - end - - defp blank_to_nil(value) when is_binary(value) do - case String.trim(value) do - "" -> nil - trimmed -> trimmed - end - end - - defp blank_to_nil(value), do: value - - defp relative_project_data_path(nil, _project_data_dir), do: nil - - defp relative_project_data_path(file_path, project_data_dir) do - case Path.relative_to(file_path, project_data_dir) do - relative_path when relative_path == file_path -> file_path - relative_path -> relative_path - end - end - - defp maybe_put_synced_post(set, post_id) when is_binary(post_id) and post_id != "", do: MapSet.put(set, post_id) - defp maybe_put_synced_post(set, _post_id), do: set - - defp normalize_language(nil), do: "" - - defp normalize_language(language) do - language - |> to_string() - |> String.downcase() - |> String.split("-", parts: 2) - |> hd() - end - defp has_attr?(attrs, key) do Map.has_key?(attrs, key) or Map.has_key?(attrs, Atom.to_string(key)) end @@ -1735,47 +566,4 @@ defmodule BDS.Posts do true -> nil end end - - defp progress_callback(opts) do - case Keyword.get(opts, :on_progress) do - callback when is_function(callback, 2) -> callback - _other -> nil - end - end - - defp scaled_progress_reporter(nil, _start_value, _end_value), do: nil - - defp scaled_progress_reporter(report, start_value, end_value) when is_function(report, 2) do - fn value, message -> - scaled_value = start_value + (end_value - start_value) * value - report.(scaled_value, message) - end - end - - defp report_rebuild_started(nil, _total, _label), do: :ok - - defp report_rebuild_started(callback, 0, label) do - callback.(1.0, "No #{label} found") - :ok - end - - defp report_rebuild_started(callback, total, label) do - callback.(0.05, "Rebuilding #{label} (0/#{total})") - :ok - end - - defp report_rebuild_progress(nil, _current, _total, _label), do: :ok - defp report_rebuild_progress(_callback, _current, 0, _label), do: :ok - - defp report_rebuild_progress(callback, current, total, label) do - callback.(0.05 + 0.95 * (current / total), "Rebuilding #{label} (#{current}/#{total})") - :ok - end - - defp report_rebuild_phase(nil, _progress, _message), do: :ok - - defp report_rebuild_phase(callback, progress, message) do - callback.(progress, message) - :ok - end end diff --git a/lib/bds/posts/auto_translation.ex b/lib/bds/posts/auto_translation.ex new file mode 100644 index 0000000..8fbd1d5 --- /dev/null +++ b/lib/bds/posts/auto_translation.ex @@ -0,0 +1,176 @@ +defmodule BDS.Posts.AutoTranslation do + @moduledoc false + + import Ecto.Query + + alias BDS.AI + alias BDS.Media + alias BDS.Metadata + alias BDS.Posts.Post + alias BDS.Posts.PostMedia + alias BDS.Posts.Translation + alias BDS.Repo + alias BDS.Tasks + + @doc """ + Schedule background auto-translation tasks for any missing target languages. + + Returns `:ok` even when nothing is scheduled (offline mode, no metadata, etc.). + """ + @spec maybe_schedule(Post.t()) :: :ok + def maybe_schedule(%Post{do_not_translate: true}), do: :ok + + def maybe_schedule(%Post{} = post) do + with true <- configured?(), + {:ok, metadata} <- Metadata.get_project_metadata(post.project_id) do + post + |> missing_languages(metadata) + |> Enum.each(&queue_post(post, &1)) + else + _other -> :ok + end + + :ok + end + + @doc false + def missing_languages(%Post{} = post, metadata) do + source_language = normalize_language(post.language || metadata.main_language) + + configured_languages = + ([metadata.main_language] ++ (metadata.blog_languages || [])) + |> Enum.map(&normalize_language/1) + |> Enum.reject(&(&1 in [nil, ""])) + |> Enum.uniq() + + existing_languages = + Repo.all( + from translation in Translation, + where: translation.translation_for == ^post.id, + select: translation.language + ) + + configured_languages + |> Enum.reject(&(&1 == source_language or &1 in existing_languages)) + end + + defp queue_post(%Post{} = post, language) do + _ = + Tasks.submit_task( + "Auto-translate Post to #{language}", + fn report -> + report.(0.05, "Translating post to #{language}") + + with {:ok, translation} <- AI.translate_post(post.id, language, ai_opts()), + {:ok, saved_translation} <- + BDS.Posts.upsert_post_translation(post.id, language, %{ + title: translation.title, + excerpt: translation.excerpt, + content: translation.content, + auto_generated: true + }) do + report.(0.85, "Post translation saved") + :ok = queue_media_cascade(post, language) + report.(1.0, "Post translation complete") + %{post_id: post.id, translation_id: saved_translation.id, language: language} + else + {:error, reason} -> {:error, reason} + end + end, + task_attrs(post) + ) + + :ok + end + + defp queue_media_cascade(%Post{} = post, language) do + linked_media_ids(post.id) + |> Enum.each(fn media_id -> + if media_needed?(media_id, language) do + queue_media(post, media_id, language) + end + end) + + :ok + end + + defp queue_media(%Post{} = post, media_id, language) do + _ = + Tasks.submit_task( + "Auto-translate Media to #{language}", + fn report -> + report.(0.05, "Translating media to #{language}") + + with {:ok, translation} <- AI.translate_media(media_id, language, ai_opts()), + {:ok, saved_translation} <- + Media.upsert_media_translation(media_id, language, %{ + title: translation.title, + alt: translation.alt, + caption: translation.caption + }) do + report.(1.0, "Media translation complete") + %{media_id: media_id, translation_id: saved_translation.id, language: language} + else + {:error, reason} -> {:error, reason} + end + end, + task_attrs(post) + ) + + :ok + end + + defp media_needed?(media_id, language) do + case Repo.get(Media.Media, media_id) do + %Media.Media{language: source_language} when source_language not in [nil, ""] and source_language != language -> + not Repo.exists?( + from translation in Media.Translation, + where: translation.translation_for == ^media_id and translation.language == ^language + ) + + _other -> + false + end + end + + defp task_attrs(%Post{} = post), do: %{group_id: post.project_id, group_name: "AI"} + + defp ai_opts do + Application.get_env(:bds, :posts, []) + |> Keyword.get(:auto_translation_ai_opts, []) + end + + defp configured? do + mode = if AI.airplane_mode?(), do: :airplane, else: :online + + case AI.get_endpoint(mode) do + {:ok, %{url: url, model: model} = endpoint} + when is_binary(url) and url != "" and is_binary(model) and model != "" -> + mode == :airplane or present?(Map.get(endpoint, :api_key)) + + _other -> + false + end + end + + defp linked_media_ids(post_id) do + Repo.all( + from pm in PostMedia, + where: pm.post_id == ^post_id, + order_by: [asc: pm.sort_order, asc: pm.media_id], + select: pm.media_id + ) + end + + defp normalize_language(nil), do: "" + + defp normalize_language(language) do + language + |> to_string() + |> String.trim() + |> String.downcase() + end + + defp present?(value) when is_binary(value), do: String.trim(value) != "" + defp present?(value), do: not is_nil(value) +end diff --git a/lib/bds/posts/file_sync.ex b/lib/bds/posts/file_sync.ex new file mode 100644 index 0000000..9d45e1f --- /dev/null +++ b/lib/bds/posts/file_sync.ex @@ -0,0 +1,146 @@ +defmodule BDS.Posts.FileSync do + @moduledoc false + + alias BDS.Frontmatter + alias BDS.Persistence + alias BDS.Posts.Post + alias BDS.Posts.Translation + alias BDS.Projects + + @doc "Compute the canonical relative path for a published post." + @spec post_relative_path(String.t(), integer()) :: String.t() + def post_relative_path(slug, created_at) do + datetime = Persistence.from_unix_ms!(created_at) + year = Integer.to_string(datetime.year) + month = datetime.month |> Integer.to_string() |> String.pad_leading(2, "0") + Path.join(["posts", year, month, "#{slug}.md"]) + end + + @doc "Compute the canonical relative path for a translation file." + @spec translation_relative_path(Post.t(), String.t()) :: String.t() + def translation_relative_path(post, language) do + datetime = Persistence.from_unix_ms!(post.created_at) + year = Integer.to_string(datetime.year) + month = datetime.month |> Integer.to_string() |> String.pad_leading(2, "0") + Path.join(["posts", year, month, "#{post.slug}.#{language}.md"]) + end + + @doc "Resolve the body to publish for a post, falling back to its existing file." + @spec publishable_post_body(Post.t(), String.t(), term()) :: String.t() + def publishable_post_body(%Post{content: content}, _full_path, _project) + when is_binary(content), do: content + + def publishable_post_body(%Post{file_path: file_path} = post, full_path, project) do + source_path = + if file_path in [nil, ""] do + full_path + else + Path.join(Projects.project_data_dir(project), file_path) + end + + published_post_body(post, source_path) + end + + @doc "Read the body of a previously-published post (DB content first, file fallback)." + @spec published_post_body(Post.t(), String.t()) :: String.t() + def published_post_body(%Post{content: content}, _full_path) when is_binary(content), + do: content + + def published_post_body(_post, full_path), do: read_markdown_body(full_path) + + @doc "Read the body section (after frontmatter) from a markdown file on disk." + @spec read_markdown_body(String.t()) :: String.t() + def read_markdown_body(path) do + case File.read(path) do + {:ok, contents} -> + case String.split(contents, "\n---\n", parts: 2) do + [_frontmatter, body] -> String.trim_trailing(body, "\n") + _parts -> "" + end + + {:error, _reason} -> + "" + end + end + + @doc "Serialize a post to a frontmatter+body string for the published file." + @spec serialize_post_file(Post.t(), integer()) :: String.t() + def serialize_post_file(post, published_at) do + Frontmatter.serialize_document( + [ + {"id", post.id}, + {"title", post.title}, + {"slug", post.slug}, + {"excerpt", post.excerpt}, + {"status", :published}, + {"author", post.author}, + {"language", post.language}, + {"doNotTranslate", post.do_not_translate}, + {"templateSlug", post.template_slug}, + {"createdAt", post.created_at}, + {"updatedAt", post.updated_at}, + {"publishedAt", published_at}, + {"tags", post.tags || []}, + {"categories", post.categories || []} + ], + post.content + ) + end + + @doc "Serialize a translation row to a frontmatter+body string." + @spec serialize_translation_file(Translation.t(), integer()) :: String.t() + def serialize_translation_file(translation, published_at) do + Frontmatter.serialize_document( + [ + {"id", translation.id}, + {"translationFor", translation.translation_for}, + {"language", translation.language}, + {"title", translation.title}, + {"excerpt", translation.excerpt}, + {"status", :published}, + {"createdAt", translation.created_at}, + {"updatedAt", translation.updated_at}, + {"publishedAt", published_at} + ], + translation.content + ) + end + + @doc "Resolve the body of a translation, falling back to its existing file." + @spec publishable_translation_body(Translation.t(), String.t()) :: String.t() + def publishable_translation_body(%Translation{content: content}, _full_path) + when is_binary(content), do: content + + def publishable_translation_body(_translation, full_path) do + read_markdown_body(full_path) + end + + @doc "Delete a published post's file on disk (no-op if it has none)." + @spec delete_post_file(Post.t()) :: :ok | {:error, term()} + def delete_post_file(%Post{file_path: file_path}) when file_path in [nil, ""], do: :ok + + def delete_post_file(%Post{} = post) do + project = Projects.get_project!(post.project_id) + full_path = Path.join(Projects.project_data_dir(project), post.file_path) + rm_quiet(full_path) + end + + @doc "Delete a translation's file on disk (no-op if it has none)." + @spec delete_translation_file(Translation.t()) :: :ok | {:error, term()} + def delete_translation_file(%Translation{file_path: file_path}) when file_path in [nil, ""], + do: :ok + + def delete_translation_file(%Translation{} = translation) do + project = Projects.get_project!(translation.project_id) + full_path = Path.join(Projects.project_data_dir(project), translation.file_path) + rm_quiet(full_path) + end + + defp rm_quiet(full_path) do + case File.rm(full_path) do + :ok -> :ok + {:error, :enoent} -> :ok + {:error, reason} -> {:error, reason} + end + end +end diff --git a/lib/bds/posts/rebuild_from_files.ex b/lib/bds/posts/rebuild_from_files.ex new file mode 100644 index 0000000..c6a5684 --- /dev/null +++ b/lib/bds/posts/rebuild_from_files.ex @@ -0,0 +1,320 @@ +defmodule BDS.Posts.RebuildFromFiles do + @moduledoc false + + alias BDS.DocumentFields + alias BDS.Embeddings + alias BDS.Frontmatter + alias BDS.Persistence + alias BDS.Posts.Post + alias BDS.Posts.Slugs + alias BDS.Posts.Translation + alias BDS.Posts.TranslationValidation + alias BDS.Projects + alias BDS.Rebuild + alias BDS.Repo + alias BDS.Search + + @spec rebuild_posts_from_files(String.t(), keyword()) :: {:ok, [Post.t()]} + def rebuild_posts_from_files(project_id, opts \\ []) do + project = Projects.get_project!(project_id) + on_progress = progress_callback(opts) + + rebuild_files = + project + |> Projects.project_data_dir() + |> Path.join("posts") + |> TranslationValidation.list_matching_files("*.md") + |> Rebuild.parallel_map(&parse_rebuild_file(project, &1)) + + total_files = length(rebuild_files) + :ok = report_rebuild_started(on_progress, total_files, "post files") + + {translation_files, post_files} = + Enum.split_with(rebuild_files, &TranslationValidation.translation_rebuild_file?/1) + + posts = + post_files + |> Enum.with_index(1) + |> Enum.map(fn {file, index} -> + post = + upsert_post_from_rebuild_file(project_id, file, + sync_search: false, + sync_embeddings: false + ) + + :ok = report_rebuild_progress(on_progress, index, total_files, "post files") + post + end) + + translation_files + |> Enum.with_index(length(post_files) + 1) + |> Enum.each(fn {file, index} -> + upsert_post_translation_from_rebuild_file(project_id, file, sync_search: false) + :ok = report_rebuild_progress(on_progress, index, total_files, "post files") + end) + + if Keyword.get(opts, :reindex_search, true) do + :ok = report_rebuild_phase(on_progress, 0.97, "Refreshing post search index") + + :ok = + Search.reindex_posts(project_id, + on_progress: scaled_progress_reporter(on_progress, 0.97, 0.99) + ) + end + + if Keyword.get(opts, :rebuild_embeddings, true) do + :ok = report_rebuild_phase(on_progress, 0.99, "Refreshing post embeddings") + + {:ok, _rebuilt_post_ids} = + Embeddings.rebuild_project(project_id, + on_progress: scaled_progress_reporter(on_progress, 0.99, 1.0) + ) + end + + {:ok, posts} + end + + @spec import_orphan_post_file(String.t(), String.t()) :: + {:ok, Post.t()} | {:error, :not_found | :unsupported_file} + def import_orphan_post_file(project_id, relative_path) do + project = Projects.get_project!(project_id) + full_path = Path.join(Projects.project_data_dir(project), relative_path) + + if File.exists?(full_path) do + rebuild_file = parse_rebuild_file(project, full_path) + + if TranslationValidation.translation_rebuild_file?(rebuild_file) do + {:error, :unsupported_file} + else + fields = + rebuild_file.fields + |> Map.put("id", unique_post_id(Map.get(rebuild_file.fields, "id"))) + |> Map.put( + "slug", + Slugs.unique_for_import(project_id, Map.fetch!(rebuild_file.fields, "slug")) + ) + + {:ok, upsert_post_from_rebuild_file(project_id, %{rebuild_file | fields: fields})} + end + else + {:error, :not_found} + end + end + + @spec import_orphan_post_translation_file(String.t(), String.t()) :: + {:ok, Translation.t()} | {:error, :not_found | :unsupported_file | :conflict} + def import_orphan_post_translation_file(project_id, relative_path) do + project = Projects.get_project!(project_id) + full_path = Path.join(Projects.project_data_dir(project), relative_path) + + if File.exists?(full_path) do + rebuild_file = parse_rebuild_file(project, full_path) + + if TranslationValidation.translation_rebuild_file?(rebuild_file) do + source_post_id = Map.fetch!(rebuild_file.fields, "translationFor") + language = TranslationValidation.normalize_language(Map.fetch!(rebuild_file.fields, "language")) + + case Repo.get(Post, source_post_id) do + nil -> + {:error, :not_found} + + %Post{} = post -> + if TranslationValidation.normalize_language(post.language) == language or + Repo.get_by(Translation, translation_for: source_post_id, language: language) do + {:error, :conflict} + else + fields = Map.put(rebuild_file.fields, "id", Ecto.UUID.generate()) + + {:ok, + upsert_post_translation_from_rebuild_file( + project_id, + %{rebuild_file | fields: fields}, + sync_search: true + )} + end + end + else + {:error, :unsupported_file} + end + else + {:error, :not_found} + end + end + + @doc false + def upsert_post_from_file(project_id, project, path) do + rebuild_file = parse_rebuild_file(project, path) + upsert_post_from_rebuild_file(project_id, rebuild_file) + end + + @doc false + def upsert_post_from_rebuild_file(project_id, rebuild_file, opts \\ []) do + fields = rebuild_file.fields + now = Persistence.now_ms() + + attrs = %{ + id: DocumentFields.get(fields, "id") || Ecto.UUID.generate(), + project_id: project_id, + title: DocumentFields.get(fields, "title") || "", + slug: DocumentFields.fetch!(fields, "slug"), + excerpt: Map.get(fields, "excerpt"), + content: nil, + status: parse_post_status(DocumentFields.get(fields, "status", "published")), + author: Map.get(fields, "author"), + created_at: DocumentFields.get(fields, "createdAt", now), + updated_at: DocumentFields.get(fields, "updatedAt", now), + published_at: DocumentFields.get(fields, "publishedAt"), + file_path: rebuild_file.relative_path, + checksum: nil, + tags: Map.get(fields, "tags", []), + categories: Map.get(fields, "categories", []), + template_slug: DocumentFields.get(fields, "templateSlug"), + language: Map.get(fields, "language"), + do_not_translate: DocumentFields.get(fields, "doNotTranslate", false), + published_title: nil, + published_content: nil, + published_tags: nil, + published_categories: nil, + published_excerpt: nil + } + + post = + Repo.get(Post, attrs.id) || + Repo.get_by(Post, project_id: project_id, file_path: rebuild_file.relative_path) || + Repo.get_by(Post, project_id: project_id, slug: attrs.slug) || %Post{} + + post = + post + |> Post.changeset(attrs) + |> Repo.insert_or_update!() + + if Keyword.get(opts, :sync_search, true) do + :ok = Search.sync_post(post) + end + + if Keyword.get(opts, :sync_embeddings, true) do + :ok = Embeddings.sync_post(post) + end + + post + end + + @doc false + def upsert_post_translation_from_rebuild_file(project_id, rebuild_file, opts) do + fields = rebuild_file.fields + source_post_id = DocumentFields.fetch!(fields, "translationFor") + source_post = Repo.get_by!(Post, project_id: project_id, id: source_post_id) + now = Persistence.now_ms() + language = TranslationValidation.normalize_language(DocumentFields.fetch!(fields, "language")) + + translation = + Repo.get_by(Translation, translation_for: source_post_id, language: language) || + %Translation{} + + attrs = %{ + id: DocumentFields.get(fields, "id") || Ecto.UUID.generate(), + project_id: project_id, + translation_for: source_post_id, + language: language, + title: DocumentFields.get(fields, "title") || "", + excerpt: Map.get(fields, "excerpt"), + content: nil, + status: parse_translation_status(DocumentFields.get(fields, "status", "published")), + created_at: DocumentFields.get(fields, "createdAt", source_post.created_at || now), + updated_at: + DocumentFields.get( + fields, + "updatedAt", + source_post.updated_at || source_post.created_at || now + ), + published_at: DocumentFields.get(fields, "publishedAt", source_post.published_at), + file_path: rebuild_file.relative_path, + checksum: nil + } + + translation + |> Translation.changeset(attrs) + |> Repo.insert_or_update!() + |> tap(fn _translation -> + if Keyword.get(opts, :sync_search, true) do + :ok = Search.sync_post(source_post_id) + end + end) + end + + @doc false + def parse_rebuild_file(project, path) do + contents = File.read!(path) + {:ok, %{fields: fields}} = Frontmatter.parse_document(contents) + + %{ + path: path, + relative_path: Path.relative_to(path, Projects.project_data_dir(project)), + fields: fields + } + end + + @doc false + def parse_post_status(status) when is_atom(status), do: status + def parse_post_status(status), do: String.to_existing_atom(status) + + @doc false + def parse_translation_status(status) when is_atom(status), do: status + def parse_translation_status(status), do: String.to_existing_atom(status) + + @doc false + def progress_callback(opts) do + case Keyword.get(opts, :on_progress) do + callback when is_function(callback, 2) -> callback + _other -> nil + end + end + + @doc false + def report_rebuild_started(nil, _total, _label), do: :ok + + def report_rebuild_started(callback, 0, label) do + callback.(1.0, "No #{label} found") + :ok + end + + def report_rebuild_started(callback, total, label) do + callback.(0.05, "Rebuilding #{label} (0/#{total})") + :ok + end + + @doc false + def report_rebuild_progress(nil, _current, _total, _label), do: :ok + def report_rebuild_progress(_callback, _current, 0, _label), do: :ok + + def report_rebuild_progress(callback, current, total, label) do + callback.(0.05 + 0.95 * (current / total), "Rebuilding #{label} (#{current}/#{total})") + :ok + end + + defp scaled_progress_reporter(nil, _start_value, _end_value), do: nil + + defp scaled_progress_reporter(report, start_value, end_value) when is_function(report, 2) do + fn value, message -> + scaled_value = start_value + (end_value - start_value) * value + report.(scaled_value, message) + end + end + + defp report_rebuild_phase(nil, _progress, _message), do: :ok + + defp report_rebuild_phase(callback, progress, message) do + callback.(progress, message) + :ok + end + + defp unique_post_id(nil), do: Ecto.UUID.generate() + + defp unique_post_id(id) do + if Repo.get(Post, id) || Repo.get(Translation, id) do + Ecto.UUID.generate() + else + id + end + end +end diff --git a/lib/bds/posts/slugs.ex b/lib/bds/posts/slugs.ex new file mode 100644 index 0000000..89d8a15 --- /dev/null +++ b/lib/bds/posts/slugs.ex @@ -0,0 +1,86 @@ +defmodule BDS.Posts.Slugs do + @moduledoc false + + import Ecto.Query + + alias BDS.Posts.Post + alias BDS.Repo + alias BDS.Slug + + @spec available(String.t(), String.t(), String.t() | nil) :: boolean() + def available(project_id, slug, exclude_post_id \\ nil) do + normalized_slug = slug |> to_string() |> String.trim() + + query = + from(post in Post, + where: post.project_id == ^project_id and post.slug == ^normalized_slug, + select: post.id, + limit: 1 + ) + + case Repo.one(query) do + nil -> true + ^exclude_post_id -> true + _other -> false + end + end + + @spec unique_for_title(String.t(), String.t(), String.t() | nil) :: String.t() + def unique_for_title(project_id, title, exclude_post_id \\ nil) do + base_slug = title |> default_source() |> Slug.slugify() + + if available(project_id, base_slug, exclude_post_id) do + base_slug + else + Stream.iterate(2, &(&1 + 1)) + |> Enum.find_value(fn counter -> + candidate = "#{base_slug}-#{counter}" + if available(project_id, candidate, exclude_post_id), do: candidate, else: nil + end) + end + end + + @doc "Pick a free slug, falling back to `untitled` for blank input." + @spec unique(String.t(), String.t()) :: String.t() + def unique(project_id, base_slug) do + normalized = if base_slug == "", do: "untitled", else: base_slug + + if available?(project_id, normalized) do + normalized + else + find_unique(project_id, normalized, 2) + end + end + + @doc "Pick a free slug for an imported post by re-slugifying the source value." + @spec unique_for_import(String.t(), String.t()) :: String.t() + def unique_for_import(project_id, slug) do + normalized = slug |> default_source() |> Slug.slugify() + + if available?(project_id, normalized) do + normalized + else + find_unique(project_id, normalized, 2) + end + end + + @spec default_source(String.t()) :: String.t() + def default_source(""), do: "untitled" + def default_source(title), do: title + + defp find_unique(project_id, base_slug, suffix) do + candidate = "#{base_slug}-#{suffix}" + + if available?(project_id, candidate) do + candidate + else + find_unique(project_id, base_slug, suffix + 1) + end + end + + defp available?(project_id, slug) do + not Repo.exists?( + from post in Post, where: post.project_id == ^project_id and post.slug == ^slug + ) + end +end diff --git a/lib/bds/posts/translation_validation.ex b/lib/bds/posts/translation_validation.ex new file mode 100644 index 0000000..921056f --- /dev/null +++ b/lib/bds/posts/translation_validation.ex @@ -0,0 +1,464 @@ +defmodule BDS.Posts.TranslationValidation do + @moduledoc false + + import Ecto.Query + + alias BDS.DocumentFields + alias BDS.Frontmatter + alias BDS.Metadata + alias BDS.Posts.Post + alias BDS.Posts.RebuildFromFiles + alias BDS.Posts.Translation + alias BDS.Posts.Translations + alias BDS.Projects + alias BDS.Repo + alias BDS.Search + + @type report :: %{ + required(:checked_database_row_count) => non_neg_integer(), + required(:checked_filesystem_file_count) => non_neg_integer(), + required(:invalid_database_rows) => [map()], + required(:invalid_filesystem_files) => [map()], + required(:missing) => [map()], + required(:orphan_files) => [String.t()], + required(:do_not_translate_posts) => [String.t()] + } + + @doc """ + Validate translation rows + on-disk translation files for a project. + + The result map preserves both the modern invalid-item shape + (`invalid_database_rows`, `invalid_filesystem_files`, etc.) and the legacy + summary fields (`missing`, `orphan_files`, `do_not_translate_posts`). + """ + @spec validate(String.t(), keyword()) :: {:ok, report()} + def validate(project_id, opts \\ []) do + project = Projects.get_project!(project_id) + {:ok, metadata} = Metadata.get_project_metadata(project_id) + on_progress = RebuildFromFiles.progress_callback(opts) + + source_posts = + Repo.all( + from post in Post, + where: post.project_id == ^project_id, + order_by: [asc: post.created_at, asc: post.slug] + ) + + source_post_map = Map.new(source_posts, &{&1.id, &1}) + + translation_rows = + Repo.all( + from translation in Translation, + where: translation.project_id == ^project_id, + order_by: [asc: translation.translation_for, asc: translation.language, asc: translation.id] + ) + + project_data_dir = Projects.project_data_dir(project) + + markdown_files = + project_data_dir + |> Path.join("posts") + |> list_markdown_files_recursive() + + total_items = length(translation_rows) + length(markdown_files) + :ok = RebuildFromFiles.report_rebuild_started(on_progress, total_items, "translations") + + invalid_database_rows = + translation_rows + |> Enum.with_index(1) + |> Enum.flat_map(fn {translation, index} -> + :ok = RebuildFromFiles.report_rebuild_progress(on_progress, index, total_items, "translations") + + case invalid_database_translation_issue(translation, source_post_map, metadata) do + nil -> [] + issue -> [issue] + end + end) + |> Enum.sort_by(&issue_sort_key/1) + + {checked_filesystem_file_count, invalid_filesystem_files} = + markdown_files + |> Enum.with_index(length(translation_rows) + 1) + |> Enum.reduce({0, []}, fn {file_path, index}, {count, issues} -> + :ok = RebuildFromFiles.report_rebuild_progress(on_progress, index, total_items, "translations") + + case invalid_filesystem_translation_issue(file_path, source_post_map, metadata) do + {:ok, nil} -> {count + 1, issues} + {:ok, issue} -> {count + 1, [issue | issues]} + :skip -> {count, issues} + end + end) + + missing = legacy_missing_entries(source_posts, translation_rows, metadata) + orphan_files = legacy_orphan_files(invalid_filesystem_files, project_data_dir) + do_not_translate_posts = legacy_do_not_translate_posts(source_posts) + + {:ok, + %{ + checked_database_row_count: length(translation_rows), + checked_filesystem_file_count: checked_filesystem_file_count, + invalid_database_rows: invalid_database_rows, + invalid_filesystem_files: + invalid_filesystem_files |> Enum.reverse() |> Enum.sort_by(&issue_sort_key/1), + missing: missing, + orphan_files: orphan_files, + do_not_translate_posts: do_not_translate_posts + }} + end + + @doc "Apply fixes for the issues described in a validation `report`." + @spec fix_invalid(map()) :: + {:ok, + %{ + deleted_database_rows: non_neg_integer(), + deleted_files: non_neg_integer(), + flushed_translations: non_neg_integer() + }} + def fix_invalid(report) when is_map(report) do + normalized_report = normalize_report(report) + + {deleted_database_rows, flushed_translations, synced_post_ids} = + Enum.reduce(normalized_report.invalid_database_rows, {0, 0, MapSet.new()}, fn issue, {deleted, flushed, synced_ids} -> + case fix_invalid_database_row(issue) do + {:deleted, post_id} -> {deleted + 1, flushed, maybe_put_synced_post(synced_ids, post_id)} + {:flushed, post_id} -> {deleted, flushed + 1, maybe_put_synced_post(synced_ids, post_id)} + :noop -> {deleted, flushed, synced_ids} + end + end) + + deleted_files = + Enum.reduce(normalized_report.invalid_filesystem_files, 0, fn issue, count -> + if delete_validation_file(issue.file_path), do: count + 1, else: count + end) + + Enum.each(synced_post_ids, &Search.sync_post/1) + + {:ok, + %{ + deleted_database_rows: deleted_database_rows, + deleted_files: deleted_files, + flushed_translations: flushed_translations + }} + end + + @doc "True if the parsed rebuild file represents a translation (`translationFor` set, no `slug`)." + @spec translation_rebuild_file?(map()) :: boolean() + def translation_rebuild_file?(%{fields: fields}) do + DocumentFields.has_key?(fields, "translationFor") and + not DocumentFields.has_key?(fields, "slug") + end + + @doc "Recursively list `.md`/`.markdown`/`.mdx` files under `dir`." + @spec list_markdown_files_recursive(String.t()) :: [String.t()] + def list_markdown_files_recursive(dir) do + ["*.md", "*.markdown", "*.mdx"] + |> Enum.flat_map(&list_matching_files(dir, &1)) + |> Enum.uniq() + |> Enum.sort() + end + + @doc "List files in `dir` matching `pattern` (recursive glob)." + @spec list_matching_files(String.t(), String.t()) :: [String.t()] + def list_matching_files(dir, pattern) do + if File.dir?(dir) do + Path.join([dir, "**", pattern]) + |> Path.wildcard() + |> Enum.sort() + else + [] + end + end + + @doc false + def normalize_language(value), do: do_normalize_language(value) + + # ----- internals ----- + + defp invalid_database_translation_issue(%Translation{} = translation, source_post_map, metadata) do + source_post = Map.get(source_post_map, translation.translation_for) + normalized_language = do_normalize_language(translation.language) + + cond do + is_nil(source_post) -> + issue(%{ + issue: "missing-source-post", + translation_id: translation.id, + translation_for: translation.translation_for, + translation_language: normalized_language, + title: translation.title, + file_path: blank_to_nil(translation.file_path) + }) + + canonical_language?(source_post, normalized_language, metadata) -> + issue(%{ + issue: "same-language-as-canonical", + translation_id: translation.id, + translation_for: translation.translation_for, + canonical_language: canonical_language(source_post, metadata), + translation_language: normalized_language, + title: translation.title, + file_path: blank_to_nil(translation.file_path) + }) + + source_post.do_not_translate -> + issue(%{ + issue: "do-not-translate-has-translations", + translation_id: translation.id, + translation_for: translation.translation_for, + translation_language: normalized_language, + title: translation.title, + file_path: blank_to_nil(translation.file_path) + }) + + translation.status == :published and present?(translation.content) -> + issue(%{ + issue: "content-in-database", + translation_id: translation.id, + translation_for: translation.translation_for, + translation_language: normalized_language, + title: translation.title, + file_path: blank_to_nil(translation.file_path) + }) + + true -> + nil + end + end + + defp invalid_filesystem_translation_issue(file_path, source_post_map, metadata) do + with {:ok, contents} <- File.read(file_path), + {:ok, %{fields: fields}} <- Frontmatter.parse_document(contents), + true <- translation_rebuild_file?(%{fields: fields}) do + translation_for = DocumentFields.get(fields, "translationFor") + source_post = Map.get(source_post_map, translation_for) + normalized_language = do_normalize_language(DocumentFields.get(fields, "language")) + title = DocumentFields.get(fields, "title") + + result = + cond do + is_nil(source_post) -> + issue(%{ + issue: "missing-source-post", + translation_for: translation_for, + translation_language: normalized_language, + title: title, + file_path: file_path + }) + + canonical_language?(source_post, normalized_language, metadata) -> + issue(%{ + issue: "same-language-as-canonical", + translation_for: translation_for, + canonical_language: canonical_language(source_post, metadata), + translation_language: normalized_language, + title: title, + file_path: file_path + }) + + source_post.do_not_translate -> + issue(%{ + issue: "do-not-translate-has-translations", + translation_for: translation_for, + translation_language: normalized_language, + title: title, + file_path: file_path + }) + + true -> + nil + end + + {:ok, result} + else + false -> :skip + _other -> :skip + end + end + + defp normalize_report(report) do + %{ + checked_database_row_count: map_value(report, :checked_database_row_count, 0), + checked_filesystem_file_count: map_value(report, :checked_filesystem_file_count, 0), + invalid_database_rows: + report |> map_value(:invalid_database_rows, []) |> Enum.map(&normalize_issue/1), + invalid_filesystem_files: + report |> map_value(:invalid_filesystem_files, []) |> Enum.map(&normalize_issue/1) + } + end + + defp legacy_missing_entries(source_posts, translation_rows, metadata) do + configured_languages = + ([Map.get(metadata, :main_language)] ++ Map.get(metadata, :blog_languages, [])) + |> Enum.map(&do_normalize_language/1) + |> Enum.reject(&(&1 in [nil, ""])) + |> Enum.uniq() + + existing_languages_by_post = + Enum.reduce(translation_rows, %{}, fn translation, acc -> + Map.update( + acc, + translation.translation_for, + MapSet.new([do_normalize_language(translation.language)]), + &MapSet.put(&1, do_normalize_language(translation.language)) + ) + end) + + source_posts + |> Enum.filter(&(&1.status == :published and not &1.do_not_translate)) + |> Enum.flat_map(fn post -> + canonical = canonical_language(post, metadata) + existing_languages = Map.get(existing_languages_by_post, post.id, MapSet.new()) + + configured_languages + |> Enum.reject(&(&1 == canonical or MapSet.member?(existing_languages, &1))) + |> Enum.map(&%{post_id: post.id, language: &1}) + end) + |> Enum.sort_by(&{&1.post_id, &1.language}) + end + + defp legacy_orphan_files(invalid_filesystem_files, project_data_dir) do + invalid_filesystem_files + |> Enum.filter(&(Map.get(&1, :issue) == "missing-source-post")) + |> Enum.map(fn issue -> + issue + |> Map.get(:file_path) + |> relative_project_data_path(project_data_dir) + end) + |> Enum.reject(&is_nil/1) + |> Enum.sort() + end + + defp legacy_do_not_translate_posts(source_posts) do + source_posts + |> Enum.filter(&(&1.status == :published and &1.do_not_translate)) + |> Enum.map(& &1.id) + |> Enum.sort() + end + + defp normalize_issue(issue) when is_map(issue) do + %{ + issue: map_value(issue, :issue), + translation_id: blank_to_nil(map_value(issue, :translation_id)), + translation_for: map_value(issue, :translation_for), + canonical_language: blank_to_nil(map_value(issue, :canonical_language)), + translation_language: map_value(issue, :translation_language), + title: blank_to_nil(map_value(issue, :title)), + file_path: blank_to_nil(map_value(issue, :file_path)) + } + end + + defp fix_invalid_database_row(%{issue: "content-in-database", translation_id: translation_id}) + when is_binary(translation_id) do + case Repo.get(Translation, translation_id) do + %Translation{} = translation -> + case Repo.get(Post, translation.translation_for) do + %Post{} = post -> + :ok = Translations.publish_translation(post, translation) + {:flushed, translation.translation_for} + + nil -> + :noop + end + + nil -> + :noop + end + end + + defp fix_invalid_database_row(%{translation_id: translation_id, translation_for: translation_for}) + when is_binary(translation_id) do + case Repo.get(Translation, translation_id) do + %Translation{} = translation -> + Repo.delete!(translation) + {:deleted, translation_for} + + nil -> + :noop + end + end + + defp fix_invalid_database_row(_issue), do: :noop + + defp delete_validation_file(file_path) when file_path in [nil, ""], do: false + + defp delete_validation_file(file_path) do + case File.rm(file_path) do + :ok -> true + {:error, :enoent} -> false + {:error, _reason} -> false + end + end + + defp issue(attrs) do + %{ + issue: Map.get(attrs, :issue), + translation_id: Map.get(attrs, :translation_id), + translation_for: Map.get(attrs, :translation_for), + canonical_language: Map.get(attrs, :canonical_language), + translation_language: Map.get(attrs, :translation_language), + title: Map.get(attrs, :title), + file_path: Map.get(attrs, :file_path) + } + end + + defp issue_sort_key(issue) do + [Map.get(issue, :translation_for), Map.get(issue, :translation_id), Map.get(issue, :file_path)] + |> Enum.map(&to_string(&1 || "")) + |> Enum.join(":") + end + + defp canonical_language(source_post, metadata) do + language = do_normalize_language(source_post.language) + + if language == "" do + do_normalize_language(Map.get(metadata, :main_language)) + else + language + end + end + + defp canonical_language?(source_post, language, metadata) do + canonical = canonical_language(source_post, metadata) + canonical != "" and canonical == do_normalize_language(language) + end + + defp do_normalize_language(nil), do: "" + + defp do_normalize_language(language) do + language + |> to_string() + |> String.downcase() + |> String.split("-", parts: 2) + |> hd() + end + + defp map_value(map, key, default \\ nil) when is_map(map) do + Map.get(map, key, Map.get(map, Atom.to_string(key), default)) + end + + defp blank_to_nil(value) when is_binary(value) do + case String.trim(value) do + "" -> nil + trimmed -> trimmed + end + end + + defp blank_to_nil(value), do: value + + defp relative_project_data_path(nil, _project_data_dir), do: nil + + defp relative_project_data_path(file_path, project_data_dir) do + case Path.relative_to(file_path, project_data_dir) do + relative_path when relative_path == file_path -> file_path + relative_path -> relative_path + end + end + + defp maybe_put_synced_post(set, post_id) when is_binary(post_id) and post_id != "", + do: MapSet.put(set, post_id) + + defp maybe_put_synced_post(set, _post_id), do: set + + defp present?(value) when is_binary(value), do: String.trim(value) != "" + defp present?(value), do: not is_nil(value) +end diff --git a/lib/bds/posts/translations.ex b/lib/bds/posts/translations.ex new file mode 100644 index 0000000..ba48199 --- /dev/null +++ b/lib/bds/posts/translations.ex @@ -0,0 +1,279 @@ +defmodule BDS.Posts.Translations do + @moduledoc false + + import Ecto.Query + + alias BDS.Persistence + alias BDS.Posts + alias BDS.Posts.FileSync + alias BDS.Posts.Post + alias BDS.Posts.RebuildFromFiles + alias BDS.Posts.Translation + alias BDS.Projects + alias BDS.Repo + alias BDS.Search + + @type attrs :: %{optional(atom()) => term(), optional(String.t()) => term()} + + @spec publish_post_translation(String.t(), String.t() | atom()) :: + {:ok, Translation.t()} | {:error, :not_found | term()} + def publish_post_translation(post_id, language) do + normalized_language = language |> to_string() |> String.trim() |> String.downcase() + + case Repo.get_by(Translation, translation_for: post_id, language: normalized_language) do + nil -> + {:error, :not_found} + + %Translation{} -> + with {:ok, _post} <- Posts.publish_post(post_id), + %Translation{} = translation <- + Repo.get_by(Translation, translation_for: post_id, language: normalized_language) do + {:ok, translation} + else + nil -> {:error, :not_found} + error -> error + end + end + end + + @spec list_post_translations(String.t()) :: {:ok, [Translation.t()]} + def list_post_translations(post_id) do + {:ok, + Repo.all( + from(translation in Translation, + where: translation.translation_for == ^post_id, + order_by: [asc: translation.language] + ) + )} + end + + @spec upsert_post_translation(String.t(), String.t() | atom(), attrs()) :: + {:ok, Translation.t()} | {:error, :not_found | Ecto.Changeset.t()} + def upsert_post_translation(post_id, language, attrs) do + case Repo.get(Post, post_id) do + nil -> + {:error, :not_found} + + %Post{do_not_translate: true} = post -> + {:error, + post + |> Post.changeset(%{}) + |> Ecto.Changeset.add_error( + :do_not_translate, + "cannot add translations when do_not_translate is true" + )} + + %Post{} = post -> + now = Persistence.now_ms() + normalized_language = normalize_language(language) + + translation = + Repo.get_by(Translation, translation_for: post.id, language: normalized_language) || + %Translation{} + + updates = + normalize_translation_updates(post, translation, normalized_language, attrs, now) + + translation + |> Translation.changeset(updates) + |> Repo.insert_or_update() + |> case do + {:ok, saved_translation} -> + {:ok, _post} = maybe_reopen_source_post_for_manual_translation(post, attrs) + :ok = Search.sync_post(post.id) + {:ok, saved_translation} + + error -> + error + end + end + end + + @spec delete_post_translation(String.t()) :: {:ok, :deleted} | {:error, :not_found} + def delete_post_translation(translation_id) do + case Repo.get(Translation, translation_id) do + nil -> + {:error, :not_found} + + %Translation{} = translation -> + :ok = FileSync.delete_translation_file(translation) + Repo.delete!(translation) + :ok = Search.sync_post(translation.translation_for) + {:ok, :deleted} + end + end + + @spec sync_post_translation_from_file(String.t()) :: + {:ok, Translation.t()} | {:error, :not_found} + def sync_post_translation_from_file(translation_id) do + case Repo.get(Translation, translation_id) do + nil -> + {:error, :not_found} + + %Translation{file_path: file_path} when file_path in [nil, ""] -> + {:error, :not_found} + + %Translation{} = translation -> + project = Projects.get_project!(translation.project_id) + full_path = Path.join(Projects.project_data_dir(project), translation.file_path) + + if File.exists?(full_path) do + rebuild_file = RebuildFromFiles.parse_rebuild_file(project, full_path) + + {:ok, + RebuildFromFiles.upsert_post_translation_from_rebuild_file( + translation.project_id, + rebuild_file, + sync_search: true + )} + else + {:error, :not_found} + end + end + end + + @spec rewrite_published_post_translation(String.t()) :: + {:ok, Translation.t()} | {:error, :not_found} + def rewrite_published_post_translation(translation_id) do + case Repo.get(Translation, translation_id) do + nil -> + {:error, :not_found} + + %Translation{file_path: file_path, status: status} = translation + when file_path not in [nil, ""] and status == :published -> + post = Repo.get!(Post, translation.translation_for) + :ok = publish_translation(post, translation) + {:ok, Repo.get!(Translation, translation_id)} + + %Translation{} -> + {:error, :not_found} + end + end + + @doc false + def publish_post_translations(%Post{} = post) do + Repo.all(from(translation in Translation, where: translation.translation_for == ^post.id)) + |> Enum.each(fn translation -> + if translation.status == :draft do + publish_translation(post, translation) + end + end) + + :ok + end + + @doc false + def publish_translation(%Post{} = post, %Translation{} = translation) do + project = Projects.get_project!(post.project_id) + published_at = translation.published_at || Persistence.now_ms() + relative_path = FileSync.translation_relative_path(post, translation.language) + full_path = Path.join(Projects.project_data_dir(project), relative_path) + updated_at = Persistence.now_ms() + body = FileSync.publishable_translation_body(translation, full_path) + + :ok = + Persistence.atomic_write( + full_path, + FileSync.serialize_translation_file( + %{translation | updated_at: updated_at, content: body}, + published_at + ) + ) + + translation + |> Translation.changeset(%{ + status: :published, + published_at: published_at, + file_path: relative_path, + content: nil, + updated_at: updated_at + }) + |> Repo.update!() + + :ok + end + + defp normalize_translation_updates(post, %Translation{} = translation, language, attrs, now) do + requested_status = + case attr(attrs, :status) do + nil -> nil + status -> RebuildFromFiles.parse_translation_status(status) + end + + updates = + %{} + |> maybe_put(:title, attr(attrs, :title)) + |> maybe_put(:excerpt, attr(attrs, :excerpt)) + |> maybe_put(:content, attr(attrs, :content)) + + reopened? = + translation.status == :published and translation_content_change?(translation, updates) + + status = if(reopened?, do: :draft, else: requested_status || translation.status || :draft) + + %{ + id: translation.id || Ecto.UUID.generate(), + project_id: post.project_id, + translation_for: post.id, + language: language, + title: Map.get(updates, :title, translation.title), + excerpt: Map.get(updates, :excerpt, translation.excerpt), + content: Map.get(updates, :content, translation.content), + status: status, + created_at: translation.created_at || now, + updated_at: now, + published_at: translation.published_at || if(status == :published, do: now, else: nil), + file_path: translation.file_path || "", + checksum: translation.checksum + } + end + + defp translation_content_change?(translation, updates) do + Enum.any?([:title, :excerpt, :content], fn field -> + case Map.fetch(updates, field) do + {:ok, value} -> value != Map.get(translation, field) + :error -> false + end + end) + end + + defp maybe_reopen_source_post_for_manual_translation(%Post{} = post, attrs) do + if attr(attrs, :auto_generated) == true or post.status != :published or + post.file_path in [nil, ""] do + {:ok, post} + else + project = Projects.get_project!(post.project_id) + full_path = Path.join(Projects.project_data_dir(project), post.file_path) + restored_content = FileSync.published_post_body(post, full_path) + + post + |> Post.changeset(%{ + status: :draft, + content: restored_content, + updated_at: Persistence.now_ms() + }) + |> Repo.update() + end + end + + defp normalize_language(nil), do: "" + + defp normalize_language(language) do + language + |> to_string() + |> String.downcase() + |> String.split("-", parts: 2) + |> hd() + end + + defp maybe_put(map, _key, nil), do: map + defp maybe_put(map, key, value), do: Map.put(map, key, value) + + defp attr(attrs, key) do + cond do + Map.has_key?(attrs, key) -> Map.get(attrs, key) + Map.has_key?(attrs, Atom.to_string(key)) -> Map.get(attrs, Atom.to_string(key)) + true -> nil + end + end +end