chore: posts.ex also refactored
Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
464
lib/bds/posts/translation_validation.ex
Normal file
464
lib/bds/posts/translation_validation.ex
Normal file
@@ -0,0 +1,464 @@
|
||||
defmodule BDS.Posts.TranslationValidation do
|
||||
@moduledoc false
|
||||
|
||||
import Ecto.Query
|
||||
|
||||
alias BDS.DocumentFields
|
||||
alias BDS.Frontmatter
|
||||
alias BDS.Metadata
|
||||
alias BDS.Posts.Post
|
||||
alias BDS.Posts.RebuildFromFiles
|
||||
alias BDS.Posts.Translation
|
||||
alias BDS.Posts.Translations
|
||||
alias BDS.Projects
|
||||
alias BDS.Repo
|
||||
alias BDS.Search
|
||||
|
||||
@type report :: %{
|
||||
required(:checked_database_row_count) => non_neg_integer(),
|
||||
required(:checked_filesystem_file_count) => non_neg_integer(),
|
||||
required(:invalid_database_rows) => [map()],
|
||||
required(:invalid_filesystem_files) => [map()],
|
||||
required(:missing) => [map()],
|
||||
required(:orphan_files) => [String.t()],
|
||||
required(:do_not_translate_posts) => [String.t()]
|
||||
}
|
||||
|
||||
@doc """
|
||||
Validate translation rows + on-disk translation files for a project.
|
||||
|
||||
The result map preserves both the modern invalid-item shape
|
||||
(`invalid_database_rows`, `invalid_filesystem_files`, etc.) and the legacy
|
||||
summary fields (`missing`, `orphan_files`, `do_not_translate_posts`).
|
||||
"""
|
||||
@spec validate(String.t(), keyword()) :: {:ok, report()}
|
||||
def validate(project_id, opts \\ []) do
|
||||
project = Projects.get_project!(project_id)
|
||||
{:ok, metadata} = Metadata.get_project_metadata(project_id)
|
||||
on_progress = RebuildFromFiles.progress_callback(opts)
|
||||
|
||||
source_posts =
|
||||
Repo.all(
|
||||
from post in Post,
|
||||
where: post.project_id == ^project_id,
|
||||
order_by: [asc: post.created_at, asc: post.slug]
|
||||
)
|
||||
|
||||
source_post_map = Map.new(source_posts, &{&1.id, &1})
|
||||
|
||||
translation_rows =
|
||||
Repo.all(
|
||||
from translation in Translation,
|
||||
where: translation.project_id == ^project_id,
|
||||
order_by: [asc: translation.translation_for, asc: translation.language, asc: translation.id]
|
||||
)
|
||||
|
||||
project_data_dir = Projects.project_data_dir(project)
|
||||
|
||||
markdown_files =
|
||||
project_data_dir
|
||||
|> Path.join("posts")
|
||||
|> list_markdown_files_recursive()
|
||||
|
||||
total_items = length(translation_rows) + length(markdown_files)
|
||||
:ok = RebuildFromFiles.report_rebuild_started(on_progress, total_items, "translations")
|
||||
|
||||
invalid_database_rows =
|
||||
translation_rows
|
||||
|> Enum.with_index(1)
|
||||
|> Enum.flat_map(fn {translation, index} ->
|
||||
:ok = RebuildFromFiles.report_rebuild_progress(on_progress, index, total_items, "translations")
|
||||
|
||||
case invalid_database_translation_issue(translation, source_post_map, metadata) do
|
||||
nil -> []
|
||||
issue -> [issue]
|
||||
end
|
||||
end)
|
||||
|> Enum.sort_by(&issue_sort_key/1)
|
||||
|
||||
{checked_filesystem_file_count, invalid_filesystem_files} =
|
||||
markdown_files
|
||||
|> Enum.with_index(length(translation_rows) + 1)
|
||||
|> Enum.reduce({0, []}, fn {file_path, index}, {count, issues} ->
|
||||
:ok = RebuildFromFiles.report_rebuild_progress(on_progress, index, total_items, "translations")
|
||||
|
||||
case invalid_filesystem_translation_issue(file_path, source_post_map, metadata) do
|
||||
{:ok, nil} -> {count + 1, issues}
|
||||
{:ok, issue} -> {count + 1, [issue | issues]}
|
||||
:skip -> {count, issues}
|
||||
end
|
||||
end)
|
||||
|
||||
missing = legacy_missing_entries(source_posts, translation_rows, metadata)
|
||||
orphan_files = legacy_orphan_files(invalid_filesystem_files, project_data_dir)
|
||||
do_not_translate_posts = legacy_do_not_translate_posts(source_posts)
|
||||
|
||||
{:ok,
|
||||
%{
|
||||
checked_database_row_count: length(translation_rows),
|
||||
checked_filesystem_file_count: checked_filesystem_file_count,
|
||||
invalid_database_rows: invalid_database_rows,
|
||||
invalid_filesystem_files:
|
||||
invalid_filesystem_files |> Enum.reverse() |> Enum.sort_by(&issue_sort_key/1),
|
||||
missing: missing,
|
||||
orphan_files: orphan_files,
|
||||
do_not_translate_posts: do_not_translate_posts
|
||||
}}
|
||||
end
|
||||
|
||||
@doc "Apply fixes for the issues described in a validation `report`."
|
||||
@spec fix_invalid(map()) ::
|
||||
{:ok,
|
||||
%{
|
||||
deleted_database_rows: non_neg_integer(),
|
||||
deleted_files: non_neg_integer(),
|
||||
flushed_translations: non_neg_integer()
|
||||
}}
|
||||
def fix_invalid(report) when is_map(report) do
|
||||
normalized_report = normalize_report(report)
|
||||
|
||||
{deleted_database_rows, flushed_translations, synced_post_ids} =
|
||||
Enum.reduce(normalized_report.invalid_database_rows, {0, 0, MapSet.new()}, fn issue, {deleted, flushed, synced_ids} ->
|
||||
case fix_invalid_database_row(issue) do
|
||||
{:deleted, post_id} -> {deleted + 1, flushed, maybe_put_synced_post(synced_ids, post_id)}
|
||||
{:flushed, post_id} -> {deleted, flushed + 1, maybe_put_synced_post(synced_ids, post_id)}
|
||||
:noop -> {deleted, flushed, synced_ids}
|
||||
end
|
||||
end)
|
||||
|
||||
deleted_files =
|
||||
Enum.reduce(normalized_report.invalid_filesystem_files, 0, fn issue, count ->
|
||||
if delete_validation_file(issue.file_path), do: count + 1, else: count
|
||||
end)
|
||||
|
||||
Enum.each(synced_post_ids, &Search.sync_post/1)
|
||||
|
||||
{:ok,
|
||||
%{
|
||||
deleted_database_rows: deleted_database_rows,
|
||||
deleted_files: deleted_files,
|
||||
flushed_translations: flushed_translations
|
||||
}}
|
||||
end
|
||||
|
||||
@doc "True if the parsed rebuild file represents a translation (`translationFor` set, no `slug`)."
|
||||
@spec translation_rebuild_file?(map()) :: boolean()
|
||||
def translation_rebuild_file?(%{fields: fields}) do
|
||||
DocumentFields.has_key?(fields, "translationFor") and
|
||||
not DocumentFields.has_key?(fields, "slug")
|
||||
end
|
||||
|
||||
@doc "Recursively list `.md`/`.markdown`/`.mdx` files under `dir`."
|
||||
@spec list_markdown_files_recursive(String.t()) :: [String.t()]
|
||||
def list_markdown_files_recursive(dir) do
|
||||
["*.md", "*.markdown", "*.mdx"]
|
||||
|> Enum.flat_map(&list_matching_files(dir, &1))
|
||||
|> Enum.uniq()
|
||||
|> Enum.sort()
|
||||
end
|
||||
|
||||
@doc "List files in `dir` matching `pattern` (recursive glob)."
|
||||
@spec list_matching_files(String.t(), String.t()) :: [String.t()]
|
||||
def list_matching_files(dir, pattern) do
|
||||
if File.dir?(dir) do
|
||||
Path.join([dir, "**", pattern])
|
||||
|> Path.wildcard()
|
||||
|> Enum.sort()
|
||||
else
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
@doc false
|
||||
def normalize_language(value), do: do_normalize_language(value)
|
||||
|
||||
# ----- internals -----
|
||||
|
||||
defp invalid_database_translation_issue(%Translation{} = translation, source_post_map, metadata) do
|
||||
source_post = Map.get(source_post_map, translation.translation_for)
|
||||
normalized_language = do_normalize_language(translation.language)
|
||||
|
||||
cond do
|
||||
is_nil(source_post) ->
|
||||
issue(%{
|
||||
issue: "missing-source-post",
|
||||
translation_id: translation.id,
|
||||
translation_for: translation.translation_for,
|
||||
translation_language: normalized_language,
|
||||
title: translation.title,
|
||||
file_path: blank_to_nil(translation.file_path)
|
||||
})
|
||||
|
||||
canonical_language?(source_post, normalized_language, metadata) ->
|
||||
issue(%{
|
||||
issue: "same-language-as-canonical",
|
||||
translation_id: translation.id,
|
||||
translation_for: translation.translation_for,
|
||||
canonical_language: canonical_language(source_post, metadata),
|
||||
translation_language: normalized_language,
|
||||
title: translation.title,
|
||||
file_path: blank_to_nil(translation.file_path)
|
||||
})
|
||||
|
||||
source_post.do_not_translate ->
|
||||
issue(%{
|
||||
issue: "do-not-translate-has-translations",
|
||||
translation_id: translation.id,
|
||||
translation_for: translation.translation_for,
|
||||
translation_language: normalized_language,
|
||||
title: translation.title,
|
||||
file_path: blank_to_nil(translation.file_path)
|
||||
})
|
||||
|
||||
translation.status == :published and present?(translation.content) ->
|
||||
issue(%{
|
||||
issue: "content-in-database",
|
||||
translation_id: translation.id,
|
||||
translation_for: translation.translation_for,
|
||||
translation_language: normalized_language,
|
||||
title: translation.title,
|
||||
file_path: blank_to_nil(translation.file_path)
|
||||
})
|
||||
|
||||
true ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
defp invalid_filesystem_translation_issue(file_path, source_post_map, metadata) do
|
||||
with {:ok, contents} <- File.read(file_path),
|
||||
{:ok, %{fields: fields}} <- Frontmatter.parse_document(contents),
|
||||
true <- translation_rebuild_file?(%{fields: fields}) do
|
||||
translation_for = DocumentFields.get(fields, "translationFor")
|
||||
source_post = Map.get(source_post_map, translation_for)
|
||||
normalized_language = do_normalize_language(DocumentFields.get(fields, "language"))
|
||||
title = DocumentFields.get(fields, "title")
|
||||
|
||||
result =
|
||||
cond do
|
||||
is_nil(source_post) ->
|
||||
issue(%{
|
||||
issue: "missing-source-post",
|
||||
translation_for: translation_for,
|
||||
translation_language: normalized_language,
|
||||
title: title,
|
||||
file_path: file_path
|
||||
})
|
||||
|
||||
canonical_language?(source_post, normalized_language, metadata) ->
|
||||
issue(%{
|
||||
issue: "same-language-as-canonical",
|
||||
translation_for: translation_for,
|
||||
canonical_language: canonical_language(source_post, metadata),
|
||||
translation_language: normalized_language,
|
||||
title: title,
|
||||
file_path: file_path
|
||||
})
|
||||
|
||||
source_post.do_not_translate ->
|
||||
issue(%{
|
||||
issue: "do-not-translate-has-translations",
|
||||
translation_for: translation_for,
|
||||
translation_language: normalized_language,
|
||||
title: title,
|
||||
file_path: file_path
|
||||
})
|
||||
|
||||
true ->
|
||||
nil
|
||||
end
|
||||
|
||||
{:ok, result}
|
||||
else
|
||||
false -> :skip
|
||||
_other -> :skip
|
||||
end
|
||||
end
|
||||
|
||||
defp normalize_report(report) do
|
||||
%{
|
||||
checked_database_row_count: map_value(report, :checked_database_row_count, 0),
|
||||
checked_filesystem_file_count: map_value(report, :checked_filesystem_file_count, 0),
|
||||
invalid_database_rows:
|
||||
report |> map_value(:invalid_database_rows, []) |> Enum.map(&normalize_issue/1),
|
||||
invalid_filesystem_files:
|
||||
report |> map_value(:invalid_filesystem_files, []) |> Enum.map(&normalize_issue/1)
|
||||
}
|
||||
end
|
||||
|
||||
defp legacy_missing_entries(source_posts, translation_rows, metadata) do
|
||||
configured_languages =
|
||||
([Map.get(metadata, :main_language)] ++ Map.get(metadata, :blog_languages, []))
|
||||
|> Enum.map(&do_normalize_language/1)
|
||||
|> Enum.reject(&(&1 in [nil, ""]))
|
||||
|> Enum.uniq()
|
||||
|
||||
existing_languages_by_post =
|
||||
Enum.reduce(translation_rows, %{}, fn translation, acc ->
|
||||
Map.update(
|
||||
acc,
|
||||
translation.translation_for,
|
||||
MapSet.new([do_normalize_language(translation.language)]),
|
||||
&MapSet.put(&1, do_normalize_language(translation.language))
|
||||
)
|
||||
end)
|
||||
|
||||
source_posts
|
||||
|> Enum.filter(&(&1.status == :published and not &1.do_not_translate))
|
||||
|> Enum.flat_map(fn post ->
|
||||
canonical = canonical_language(post, metadata)
|
||||
existing_languages = Map.get(existing_languages_by_post, post.id, MapSet.new())
|
||||
|
||||
configured_languages
|
||||
|> Enum.reject(&(&1 == canonical or MapSet.member?(existing_languages, &1)))
|
||||
|> Enum.map(&%{post_id: post.id, language: &1})
|
||||
end)
|
||||
|> Enum.sort_by(&{&1.post_id, &1.language})
|
||||
end
|
||||
|
||||
defp legacy_orphan_files(invalid_filesystem_files, project_data_dir) do
|
||||
invalid_filesystem_files
|
||||
|> Enum.filter(&(Map.get(&1, :issue) == "missing-source-post"))
|
||||
|> Enum.map(fn issue ->
|
||||
issue
|
||||
|> Map.get(:file_path)
|
||||
|> relative_project_data_path(project_data_dir)
|
||||
end)
|
||||
|> Enum.reject(&is_nil/1)
|
||||
|> Enum.sort()
|
||||
end
|
||||
|
||||
defp legacy_do_not_translate_posts(source_posts) do
|
||||
source_posts
|
||||
|> Enum.filter(&(&1.status == :published and &1.do_not_translate))
|
||||
|> Enum.map(& &1.id)
|
||||
|> Enum.sort()
|
||||
end
|
||||
|
||||
defp normalize_issue(issue) when is_map(issue) do
|
||||
%{
|
||||
issue: map_value(issue, :issue),
|
||||
translation_id: blank_to_nil(map_value(issue, :translation_id)),
|
||||
translation_for: map_value(issue, :translation_for),
|
||||
canonical_language: blank_to_nil(map_value(issue, :canonical_language)),
|
||||
translation_language: map_value(issue, :translation_language),
|
||||
title: blank_to_nil(map_value(issue, :title)),
|
||||
file_path: blank_to_nil(map_value(issue, :file_path))
|
||||
}
|
||||
end
|
||||
|
||||
defp fix_invalid_database_row(%{issue: "content-in-database", translation_id: translation_id})
|
||||
when is_binary(translation_id) do
|
||||
case Repo.get(Translation, translation_id) do
|
||||
%Translation{} = translation ->
|
||||
case Repo.get(Post, translation.translation_for) do
|
||||
%Post{} = post ->
|
||||
:ok = Translations.publish_translation(post, translation)
|
||||
{:flushed, translation.translation_for}
|
||||
|
||||
nil ->
|
||||
:noop
|
||||
end
|
||||
|
||||
nil ->
|
||||
:noop
|
||||
end
|
||||
end
|
||||
|
||||
defp fix_invalid_database_row(%{translation_id: translation_id, translation_for: translation_for})
|
||||
when is_binary(translation_id) do
|
||||
case Repo.get(Translation, translation_id) do
|
||||
%Translation{} = translation ->
|
||||
Repo.delete!(translation)
|
||||
{:deleted, translation_for}
|
||||
|
||||
nil ->
|
||||
:noop
|
||||
end
|
||||
end
|
||||
|
||||
defp fix_invalid_database_row(_issue), do: :noop
|
||||
|
||||
defp delete_validation_file(file_path) when file_path in [nil, ""], do: false
|
||||
|
||||
defp delete_validation_file(file_path) do
|
||||
case File.rm(file_path) do
|
||||
:ok -> true
|
||||
{:error, :enoent} -> false
|
||||
{:error, _reason} -> false
|
||||
end
|
||||
end
|
||||
|
||||
defp issue(attrs) do
|
||||
%{
|
||||
issue: Map.get(attrs, :issue),
|
||||
translation_id: Map.get(attrs, :translation_id),
|
||||
translation_for: Map.get(attrs, :translation_for),
|
||||
canonical_language: Map.get(attrs, :canonical_language),
|
||||
translation_language: Map.get(attrs, :translation_language),
|
||||
title: Map.get(attrs, :title),
|
||||
file_path: Map.get(attrs, :file_path)
|
||||
}
|
||||
end
|
||||
|
||||
defp issue_sort_key(issue) do
|
||||
[Map.get(issue, :translation_for), Map.get(issue, :translation_id), Map.get(issue, :file_path)]
|
||||
|> Enum.map(&to_string(&1 || ""))
|
||||
|> Enum.join(":")
|
||||
end
|
||||
|
||||
defp canonical_language(source_post, metadata) do
|
||||
language = do_normalize_language(source_post.language)
|
||||
|
||||
if language == "" do
|
||||
do_normalize_language(Map.get(metadata, :main_language))
|
||||
else
|
||||
language
|
||||
end
|
||||
end
|
||||
|
||||
defp canonical_language?(source_post, language, metadata) do
|
||||
canonical = canonical_language(source_post, metadata)
|
||||
canonical != "" and canonical == do_normalize_language(language)
|
||||
end
|
||||
|
||||
defp do_normalize_language(nil), do: ""
|
||||
|
||||
defp do_normalize_language(language) do
|
||||
language
|
||||
|> to_string()
|
||||
|> String.downcase()
|
||||
|> String.split("-", parts: 2)
|
||||
|> hd()
|
||||
end
|
||||
|
||||
defp map_value(map, key, default \\ nil) when is_map(map) do
|
||||
Map.get(map, key, Map.get(map, Atom.to_string(key), default))
|
||||
end
|
||||
|
||||
defp blank_to_nil(value) when is_binary(value) do
|
||||
case String.trim(value) do
|
||||
"" -> nil
|
||||
trimmed -> trimmed
|
||||
end
|
||||
end
|
||||
|
||||
defp blank_to_nil(value), do: value
|
||||
|
||||
defp relative_project_data_path(nil, _project_data_dir), do: nil
|
||||
|
||||
defp relative_project_data_path(file_path, project_data_dir) do
|
||||
case Path.relative_to(file_path, project_data_dir) do
|
||||
relative_path when relative_path == file_path -> file_path
|
||||
relative_path -> relative_path
|
||||
end
|
||||
end
|
||||
|
||||
defp maybe_put_synced_post(set, post_id) when is_binary(post_id) and post_id != "",
|
||||
do: MapSet.put(set, post_id)
|
||||
|
||||
defp maybe_put_synced_post(set, _post_id), do: set
|
||||
|
||||
defp present?(value) when is_binary(value), do: String.trim(value) != ""
|
||||
defp present?(value), do: not is_nil(value)
|
||||
end
|
||||
Reference in New Issue
Block a user