fix: website validation now with proper progress and speed

This commit is contained in:
2026-04-27 18:43:02 +02:00
parent 8322d3cb3a
commit f6f3c0c255
2 changed files with 554 additions and 81 deletions

View File

@@ -68,19 +68,30 @@ defmodule BDS.Generation do
def validate_site(project_id, sections, opts) when is_binary(project_id) and is_list(sections) and is_list(opts) do def validate_site(project_id, sections, opts) when is_binary(project_id) and is_list(sections) and is_list(opts) do
with {:ok, plan} <- plan_generation(project_id, sections) do with {:ok, plan} <- plan_generation(project_id, sections) do
on_progress = progress_callback(opts) on_progress = progress_callback(opts)
data = generation_data(plan)
generated_file_updated_at = generated_file_updated_at_map(project_id)
:ok = report_validation_progress(on_progress, 0.0, "Collecting sitemap URLs...") :ok = report_validation_progress(on_progress, 0.0, "Collecting sitemap URLs...")
sitemap_content = data =
plan generation_data(plan,
|> build_validation_route_paths(data, on_progress) on_snapshot_progress: fn stage, current, total ->
|> Enum.map(&url_for_output(plan.base_url, &1)) report_validation_snapshot_progress(on_progress, stage, current, total)
|> render_sitemap() end
)
generated_file_updated_at = generated_file_updated_at_map(project_id)
additional_languages = additional_languages(plan)
published_route_posts = suppress_subtree_translation_variants(data.published_route_posts, additional_languages)
{sitemap_content, sitemap_to_write, additional_expected_paths, additional_post_timestamp_checks} =
build_validation_sitemap_artifacts(
plan,
data,
published_route_posts,
generated_file_updated_at,
on_progress
)
{:ok, sitemap_write} = {:ok, sitemap_write} =
write_generated_file(project_id, "sitemap.xml", sitemap_content) write_generated_file(project_id, "sitemap.xml", sitemap_to_write)
:ok = report_validation_progress(on_progress, 0.5, "Comparing sitemap to html pages...") :ok = report_validation_progress(on_progress, 0.5, "Comparing sitemap to html pages...")
@@ -93,11 +104,10 @@ defmodule BDS.Generation do
post_timestamp_checks: post_timestamp_checks:
build_post_timestamp_checks( build_post_timestamp_checks(
data.project_data_dir, data.project_data_dir,
plan.language, published_route_posts,
data.published_posts,
flattened_generation_translations(data.translations_by_post),
generated_file_updated_at generated_file_updated_at
) ) ++ additional_post_timestamp_checks,
additional_expected_paths: additional_expected_paths
}) })
completion_message = completion_message =
@@ -152,11 +162,29 @@ defmodule BDS.Generation do
:ok :ok
end end
defp report_validation_snapshot_progress(nil, _stage, _current, _total), do: :ok
defp report_validation_snapshot_progress(_callback, _stage, _current, total)
when total <= 0,
do: :ok
defp report_validation_snapshot_progress(callback, :posts, current, total) do
progress = min(0.18, current / total * 0.18)
callback.(progress, "Collecting sitemap URLs... #{current}/#{total}")
:ok
end
defp report_validation_snapshot_progress(callback, :translations, current, total) do
progress = 0.18 + min(0.12, current / total * 0.12)
callback.(progress, "Collecting sitemap URLs... #{current}/#{total}")
:ok
end
defp report_validation_collection_progress(nil, _current, _total), do: :ok defp report_validation_collection_progress(nil, _current, _total), do: :ok
defp report_validation_collection_progress(_callback, _current, total) when total <= 0, do: :ok defp report_validation_collection_progress(_callback, _current, total) when total <= 0, do: :ok
defp report_validation_collection_progress(callback, current, total) do defp report_validation_collection_progress(callback, current, total) do
progress = min(0.49, current / total * 0.5) progress = min(0.49, 0.30 + current / total * 0.19)
callback.(progress, "Collecting sitemap URLs... #{current}/#{total}") callback.(progress, "Collecting sitemap URLs... #{current}/#{total}")
:ok :ok
end end
@@ -336,10 +364,11 @@ defmodule BDS.Generation do
:ok :ok
end end
defp generation_data(plan) do defp generation_data(plan, opts \\ []) do
project = Projects.get_project!(plan.project_id) project = Projects.get_project!(plan.project_id)
project_data_dir = Projects.project_data_dir(project) project_data_dir = Projects.project_data_dir(project)
list_excluded_categories = excluded_list_categories(plan) list_excluded_categories = excluded_list_categories(plan)
on_snapshot_progress = Keyword.get(opts, :on_snapshot_progress)
published_candidates = published_candidates =
Repo.all( Repo.all(
@@ -355,9 +384,14 @@ defmodule BDS.Generation do
order_by: [desc: post.created_at, desc: post.published_at, asc: post.slug] order_by: [desc: post.created_at, desc: post.published_at, asc: post.slug]
) )
post_snapshot_candidates = published_candidates ++ draft_candidates
snapshots_by_id = snapshots_by_id =
(published_candidates ++ draft_candidates) post_snapshot_candidates
|> Enum.reduce(%{}, fn post, acc -> |> Enum.with_index(1)
|> Enum.reduce(%{}, fn {post, index}, acc ->
:ok = report_snapshot_stage_progress(on_snapshot_progress, :posts, index, length(post_snapshot_candidates))
case published_post_snapshot(project_data_dir, post) do case published_post_snapshot(project_data_dir, post) do
nil -> acc nil -> acc
snapshot -> Map.put(acc, post.id, snapshot) snapshot -> Map.put(acc, post.id, snapshot)
@@ -383,7 +417,12 @@ defmodule BDS.Generation do
|> Enum.sort_by(&{-(&1.created_at || 0), -(&1.published_at || 0), to_string(&1.slug)}) |> Enum.sort_by(&{-(&1.created_at || 0), -(&1.published_at || 0), to_string(&1.slug)})
{published_route_posts, translations_by_post} = {published_route_posts, translations_by_post} =
build_generation_route_posts(plan.project_id, project_data_dir, published_posts) build_generation_route_posts(
plan.project_id,
project_data_dir,
published_posts,
on_snapshot_progress
)
%{ %{
project: project, project: project,
@@ -481,7 +520,7 @@ defmodule BDS.Generation do
end end
end end
defp build_generation_route_posts(project_id, project_data_dir, published_posts) do defp build_generation_route_posts(project_id, project_data_dir, published_posts, on_snapshot_progress) do
source_post_ids = Enum.map(published_posts, & &1.id) source_post_ids = Enum.map(published_posts, & &1.id)
translation_candidates = translation_candidates =
@@ -494,7 +533,10 @@ defmodule BDS.Generation do
translations_by_post = translations_by_post =
translation_candidates translation_candidates
|> Enum.reduce(%{}, fn translation, acc -> |> Enum.with_index(1)
|> Enum.reduce(%{}, fn {translation, index}, acc ->
:ok = report_snapshot_stage_progress(on_snapshot_progress, :translations, index, length(translation_candidates))
case published_translation_snapshot(project_data_dir, translation) do case published_translation_snapshot(project_data_dir, translation) do
nil -> acc nil -> acc
snapshot -> Map.update(acc, translation.translation_for, [snapshot], &[snapshot | &1]) snapshot -> Map.update(acc, translation.translation_for, [snapshot], &[snapshot | &1])
@@ -675,25 +717,209 @@ defmodule BDS.Generation do
core_outputs ++ page_outputs ++ single_outputs ++ archive_outputs ++ sitemap ++ pagefind_outputs ++ asset_outputs core_outputs ++ page_outputs ++ single_outputs ++ archive_outputs ++ sitemap ++ pagefind_outputs ++ asset_outputs
end end
defp build_validation_route_paths(plan, data, on_progress) do defp build_validation_sitemap_artifacts(
_ = data plan,
data,
published_route_posts,
generated_file_updated_at,
on_progress
) do
main_paths =
build_validation_route_paths(
plan,
published_route_posts,
data.published_list_posts,
data.post_index,
nil
)
route_paths = additional_language_sets =
plan Enum.map(additional_languages(plan), fn language ->
|> build_outputs() language_posts = Enum.reject(data.published_posts, &truthy_flag?(Map.get(&1, :do_not_translate)))
|> Enum.map(&elem(&1, 0)) language_list_posts = Enum.reject(data.published_list_posts, &truthy_flag?(Map.get(&1, :do_not_translate)))
|> Enum.filter(&route_html_path?/1) language_post_index = build_generation_post_index(language_list_posts)
total_route_count = length(route_paths) {language,
language_posts,
build_validation_route_paths(plan, language_posts, language_list_posts, language_post_index, language)}
end)
route_paths all_collection_paths =
main_paths ++ Enum.flat_map(additional_language_sets, fn {_language, _posts, paths} -> paths end)
total_route_count = max(length(all_collection_paths), 1)
all_collection_paths
|> Enum.with_index(1) |> Enum.with_index(1)
|> Enum.map(fn {relative_path, index} -> |> Enum.each(fn {_relative_path, index} ->
:ok = report_validation_collection_progress(on_progress, index, total_route_count) :ok = report_validation_collection_progress(on_progress, index, total_route_count)
relative_path end)
sitemap_content =
main_paths
|> Enum.map(&url_for_output(plan.base_url, &1))
|> render_sitemap()
additional_expected_paths =
additional_language_sets
|> Enum.flat_map(fn {_language, _posts, paths} -> paths end)
|> Enum.map(&relative_path_to_url_path/1)
additional_post_timestamp_checks =
additional_language_sets
|> Enum.flat_map(fn {language, posts, _paths} ->
build_language_post_timestamp_checks(
data.project_data_dir,
language,
posts,
generated_file_updated_at
)
end)
sitemap_to_write =
case additional_languages(plan) do
[] -> sitemap_content
languages ->
render_multi_language_sitemap(
plan,
Enum.reject(data.published_posts, &truthy_flag?(Map.get(&1, :do_not_translate))),
Enum.filter(data.published_posts, &truthy_flag?(Map.get(&1, :do_not_translate))),
data.published_list_posts,
data.post_index,
languages
)
end
{sitemap_content, sitemap_to_write, additional_expected_paths, additional_post_timestamp_checks}
end
defp build_validation_route_paths(plan, route_posts, published_list_posts, post_index, route_language) do
[
core_route_paths(plan, published_list_posts, route_language),
page_route_paths(plan, route_posts, route_language),
single_route_paths(plan, route_posts, route_language),
category_route_paths(plan, post_index.posts_by_category, route_language),
tag_route_paths(plan, post_index.posts_by_tag, route_language),
date_route_paths(plan, post_index, route_language)
]
|> List.flatten()
|> Enum.uniq()
end
defp core_route_paths(plan, published_list_posts, route_language) do
if :core in plan.sections do
root_route_paths(route_language, length(published_list_posts), plan.max_posts_per_page)
else
[]
end
end
defp page_route_paths(plan, route_posts, route_language) do
if :core in plan.sections do
route_posts
|> Enum.filter(&("page" in (&1.categories || [])))
|> Enum.map(&page_output_path(&1.slug, route_language))
else
[]
end
end
defp single_route_paths(plan, route_posts, route_language) do
if :single in plan.sections do
Enum.map(route_posts, &route_post_output_path(&1, route_language))
else
[]
end
end
defp category_route_paths(plan, posts_by_category, route_language) do
if :category in plan.sections do
Enum.flat_map(posts_by_category, fn {category, posts} ->
paginated_archive_paths(
route_language,
["category", Slug.slugify(category)],
length(posts),
plan.max_posts_per_page
)
end)
else
[]
end
end
defp tag_route_paths(plan, posts_by_tag, route_language) do
if :tag in plan.sections do
Enum.flat_map(posts_by_tag, fn {tag, posts} ->
paginated_archive_paths(
route_language,
["tag", Slug.slugify(tag)],
length(posts),
plan.max_posts_per_page
)
end)
else
[]
end
end
defp date_route_paths(plan, post_index, route_language) do
if :date in plan.sections do
year_paths =
Enum.flat_map(post_index.posts_by_year, fn {year, posts} ->
paginated_archive_paths(
route_language,
[Integer.to_string(year)],
length(posts),
plan.max_posts_per_page
)
end)
month_paths =
Enum.flat_map(post_index.posts_by_year_month, fn {year_month, posts} ->
[year, month] = String.split(year_month, "/", parts: 2)
paginated_archive_paths(
route_language,
[year, month],
length(posts),
plan.max_posts_per_page
)
end)
day_paths =
Enum.flat_map(post_index.posts_by_year_month_day, fn {year_month_day, posts} ->
[year, month, day] = String.split(year_month_day, "/", parts: 3)
paginated_archive_paths(
route_language,
[year, month, day],
length(posts),
plan.max_posts_per_page
)
end)
year_paths ++ month_paths ++ day_paths
else
[]
end
end
defp route_post_output_path(post, nil), do: post_output_path(post)
defp route_post_output_path(post, ""), do: post_output_path(post)
defp route_post_output_path(post, route_language), do: post_output_path(post, route_language)
defp suppress_subtree_translation_variants(route_posts, additional_languages) do
subtree_languages = MapSet.new(additional_languages)
Enum.reject(route_posts, fn post ->
is_binary(Map.get(post, :translation_source_slug)) and
MapSet.member?(subtree_languages, to_string(Map.get(post, :language)))
end) end)
end end
defp truthy_flag?(value), do: value not in [false, nil]
defp disk_generated_files(project_id) do defp disk_generated_files(project_id) do
project = Projects.get_project!(project_id) project = Projects.get_project!(project_id)
html_root = output_path(project, "") html_root = output_path(project, "")
@@ -1019,6 +1245,22 @@ defmodule BDS.Generation do
end) end)
end end
defp paginated_archive_paths(route_language, segments, total_items, max_posts_per_page) do
total_pages = page_count(total_items, max_posts_per_page)
Enum.map(1..total_pages, fn page_number ->
archive_path(route_language, segments, page_number)
end)
end
defp root_route_paths(route_language, total_items, max_posts_per_page) do
total_pages = page_count(total_items, max_posts_per_page)
Enum.map(1..total_pages, fn page_number ->
root_output_path(route_language, page_number)
end)
end
defp root_output_path(nil, 1), do: "index.html" defp root_output_path(nil, 1), do: "index.html"
defp root_output_path("", 1), do: "index.html" defp root_output_path("", 1), do: "index.html"
defp root_output_path(route_language, 1), do: Path.join(route_language, "index.html") defp root_output_path(route_language, 1), do: Path.join(route_language, "index.html")
@@ -1080,6 +1322,14 @@ defmodule BDS.Generation do
end end
end end
defp report_snapshot_stage_progress(nil, _stage, _current, _total), do: :ok
defp report_snapshot_stage_progress(_callback, _stage, _current, total) when total <= 0, do: :ok
defp report_snapshot_stage_progress(callback, stage, current, total) do
callback.(stage, current, total)
:ok
end
defp build_single_outputs( defp build_single_outputs(
project_id, project_id,
main_language, main_language,
@@ -1272,6 +1522,199 @@ defmodule BDS.Generation do
"<urlset>#{entries}</urlset>" "<urlset>#{entries}</urlset>"
end end
defp render_multi_language_sitemap(
plan,
translatable_posts,
do_not_translate_posts,
published_list_posts,
post_index,
additional_languages
) do
all_languages = [plan.language | additional_languages]
latest_post_updated_at = latest_post_updated_at_iso(published_list_posts)
urls =
[
render_multi_language_sitemap_url(
url_for_path(plan.base_url, "/"),
latest_post_updated_at,
"daily",
"1.0",
build_hreflang_links(plan.base_url, "/", plan.language, all_languages)
)
] ++
Enum.map(root_pagination_pages(length(published_list_posts), plan.max_posts_per_page), fn page_number ->
page_path = "/page/#{page_number}"
render_multi_language_sitemap_url(
url_for_path(plan.base_url, page_path),
latest_post_updated_at,
"daily",
"0.9",
build_hreflang_links(plan.base_url, page_path, plan.language, all_languages)
)
end) ++
Enum.map(translatable_posts, fn post ->
post_path = relative_path_to_url_path(post_output_path(post))
render_multi_language_sitemap_url(
url_for_path(plan.base_url, post_path),
unix_ms_to_iso8601(post.updated_at),
"monthly",
"0.8",
build_hreflang_links(plan.base_url, post_path, plan.language, all_languages)
)
end) ++
Enum.map(do_not_translate_posts, fn post ->
post_path = relative_path_to_url_path(post_output_path(post))
render_multi_language_sitemap_url(
url_for_path(plan.base_url, post_path),
unix_ms_to_iso8601(post.updated_at),
"monthly",
"0.8",
build_hreflang_links(plan.base_url, post_path, plan.language, [plan.language])
)
end) ++
Enum.flat_map(translatable_posts ++ do_not_translate_posts, fn post ->
if "page" in (post.categories || []) and to_string(post.slug) != "" do
page_path = relative_path_to_url_path(page_output_path(post.slug, nil))
languages = if truthy_flag?(Map.get(post, :do_not_translate)), do: [plan.language], else: all_languages
[
render_multi_language_sitemap_url(
url_for_path(plan.base_url, page_path),
unix_ms_to_iso8601(post.updated_at),
"weekly",
"0.7",
build_hreflang_links(plan.base_url, page_path, plan.language, languages)
)
]
else
[]
end
end) ++
Enum.map(Enum.sort_by(post_index.posts_by_year, &elem(&1, 0), :desc), fn {year, _posts} ->
year_path = "/#{year}"
render_multi_language_sitemap_url(
url_for_path(plan.base_url, year_path),
latest_post_updated_at,
"monthly",
"0.5",
build_hreflang_links(plan.base_url, year_path, plan.language, all_languages)
)
end) ++
Enum.map(Enum.sort_by(post_index.posts_by_year_month, &elem(&1, 0), :desc), fn {year_month, _posts} ->
month_path = "/#{year_month}"
render_multi_language_sitemap_url(
url_for_path(plan.base_url, month_path),
latest_post_updated_at,
"monthly",
"0.5",
build_hreflang_links(plan.base_url, month_path, plan.language, all_languages)
)
end) ++
Enum.map(Enum.sort_by(post_index.posts_by_year_month_day, &elem(&1, 0), :desc), fn {year_month_day, _posts} ->
day_path = "/#{year_month_day}"
render_multi_language_sitemap_url(
url_for_path(plan.base_url, day_path),
latest_post_updated_at,
"monthly",
"0.4",
build_hreflang_links(plan.base_url, day_path, plan.language, all_languages)
)
end) ++
Enum.map(Enum.sort_by(post_index.posts_by_category, &elem(&1, 0)), fn {category, _posts} ->
category_path = "/category/#{Slug.slugify(category)}"
render_multi_language_sitemap_url(
url_for_path(plan.base_url, category_path),
latest_post_updated_at,
"weekly",
"0.6",
build_hreflang_links(plan.base_url, category_path, plan.language, all_languages)
)
end) ++
Enum.map(Enum.sort_by(post_index.posts_by_tag, &elem(&1, 0)), fn {tag, _posts} ->
tag_path = "/tag/#{Slug.slugify(tag)}"
render_multi_language_sitemap_url(
url_for_path(plan.base_url, tag_path),
latest_post_updated_at,
"weekly",
"0.6",
build_hreflang_links(plan.base_url, tag_path, plan.language, all_languages)
)
end)
[
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:xhtml=\"http://www.w3.org/1999/xhtml\">",
Enum.join(urls, "\n"),
"</urlset>",
""
]
|> Enum.join("\n")
end
defp latest_post_updated_at_iso([]), do: DateTime.utc_now() |> DateTime.to_iso8601()
defp latest_post_updated_at_iso([post | _rest]), do: unix_ms_to_iso8601(post.updated_at)
defp root_pagination_pages(total_items, max_posts_per_page) do
case page_count(total_items, max_posts_per_page) do
total_pages when total_pages > 1 -> Enum.to_list(2..total_pages)
_other -> []
end
end
defp unix_ms_to_iso8601(nil), do: DateTime.utc_now() |> DateTime.to_iso8601()
defp unix_ms_to_iso8601(value), do: value |> Persistence.from_unix_ms!() |> DateTime.to_iso8601()
defp url_for_path(nil, path), do: ensure_trailing_slash(path)
defp url_for_path(base_url, path) do
String.trim_trailing(base_url, "/") <> ensure_trailing_slash(path)
end
defp ensure_trailing_slash(path) do
normalized_path = normalize_url_path(path)
if normalized_path == "/", do: "/", else: normalized_path <> "/"
end
defp build_hreflang_links(base_url, url_path, main_language, languages) do
Enum.map(languages, fn language ->
prefixed_path =
if language == main_language do
url_path
else
normalize_url_path("/#{language}#{url_path}")
end
canonical_href = url_for_path(base_url, prefixed_path)
" <xhtml:link rel=\"alternate\" hreflang=\"#{xml_escape(language)}\" href=\"#{xml_escape(canonical_href)}\" />"
end) ++
[
" <xhtml:link rel=\"alternate\" hreflang=\"x-default\" href=\"#{xml_escape(url_for_path(base_url, url_path))}\" />"
]
end
defp render_multi_language_sitemap_url(loc, lastmod, changefreq, priority, hreflang_links) do
[
" <url>",
" <loc>#{xml_escape(loc)}</loc>",
" <lastmod>#{xml_escape(lastmod)}</lastmod>",
" <changefreq>#{changefreq}</changefreq>",
" <priority>#{priority}</priority>",
Enum.join(hreflang_links, "\n"),
" </url>"
]
|> Enum.join("\n")
end
defp sitemap_route_output?("404.html"), do: false defp sitemap_route_output?("404.html"), do: false
defp sitemap_route_output?("feed.xml"), do: false defp sitemap_route_output?("feed.xml"), do: false
defp sitemap_route_output?("atom.xml"), do: false defp sitemap_route_output?("atom.xml"), do: false
@@ -1561,59 +2004,37 @@ defmodule BDS.Generation do
|> then(fn {:ok, files} -> Map.new(files, &{&1.relative_path, &1.updated_at}) end) |> then(fn {:ok, files} -> Map.new(files, &{&1.relative_path, &1.updated_at}) end)
end end
defp build_post_timestamp_checks( defp build_post_timestamp_checks(project_data_dir, published_route_posts, generated_file_updated_at) do
project_data_dir, Enum.map(published_route_posts, fn post ->
main_language, relative_path = post_output_path(post)
%{
post_url_path: relative_path_to_url_path(relative_path),
post_file_path:
source_full_path(
project_data_dir,
Map.get(post, :translation_file_path) || Map.get(post, :file_path)
),
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
}
end)
end
defp build_language_post_timestamp_checks(
project_data_dir,
language,
published_posts, published_posts,
published_translations,
generated_file_updated_at generated_file_updated_at
) do ) do
translations_by_post_language = Enum.map(published_posts, fn post ->
Map.new(published_translations, fn translation -> relative_path = post_output_path(post, language)
{{translation.translation_for, translation.language}, translation}
end)
post_by_id = Map.new(published_posts, &{&1.id, &1}) %{
post_url_path: relative_path_to_url_path(relative_path),
canonical_checks = post_file_path: source_full_path(project_data_dir, Map.get(post, :file_path)),
Enum.map(published_posts, fn post -> generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
canonical_variant = Map.get(translations_by_post_language, {post.id, main_language}, post) }
relative_path = post_output_path(post) end)
%{
post_url_path: relative_path_to_url_path(relative_path),
post_file_path: source_full_path(project_data_dir, canonical_variant.file_path),
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
}
end)
translation_checks =
Enum.flat_map(published_posts, fn post ->
post_variant =
if post.language == main_language do
[]
else
[{post.language, post}]
end
translation_variants =
published_translations
|> Enum.filter(&(&1.translation_for == post.id and &1.language != main_language))
|> Enum.map(&{&1.language, &1})
Enum.map(post_variant ++ translation_variants, fn {language, variant} ->
canonical_post = Map.get(post_by_id, post.id, post)
relative_path = post_output_path(canonical_post, language)
%{
post_url_path: relative_path_to_url_path(relative_path),
post_file_path: source_full_path(project_data_dir, variant.file_path),
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
}
end)
end)
canonical_checks ++ translation_checks
end end
defp source_full_path(_project_data_dir, file_path) when file_path in [nil, ""], do: nil defp source_full_path(_project_data_dir, file_path) when file_path in [nil, ""], do: nil
@@ -1631,7 +2052,12 @@ defmodule BDS.Generation do
params.sitemap_xml params.sitemap_xml
|> extract_sitemap_locs() |> extract_sitemap_locs()
|> Enum.map(&sitemap_loc_to_project_path(&1, params.base_url)) |> Enum.map(&sitemap_loc_to_project_path(&1, params.base_url))
|> MapSet.new() |> Enum.reduce(MapSet.new(), &MapSet.put(&2, normalize_url_path(&1)))
|> then(fn expected_paths ->
Enum.reduce(Map.get(params, :additional_expected_paths, []), expected_paths, fn path, acc ->
MapSet.put(acc, normalize_url_path(path))
end)
end)
{existing_html_path_set, zero_byte_html_path_set} = {existing_html_path_set, zero_byte_html_path_set} =
collect_html_index_paths(index_paths, params.html_dir, params.on_progress, total_compare_steps) collect_html_index_paths(index_paths, params.html_dir, params.on_progress, total_compare_steps)

View File

@@ -409,6 +409,7 @@ defmodule BDS.GenerationTest do
events = collect_validate_progress_events() events = collect_validate_progress_events()
assert {0.0, "Collecting sitemap URLs..."} in events assert {0.0, "Collecting sitemap URLs..."} in events
assert Enum.any?(events, fn assert Enum.any?(events, fn
{value, message} {value, message}
when is_number(value) and value > 0.0 and value < 0.5 and when is_number(value) and value > 0.0 and value < 0.5 and
@@ -982,6 +983,52 @@ defmodule BDS.GenerationTest do
) )
end end
test "validate_site follows old language subtree expectations and combined sitemap output", %{
project: project,
temp_dir: temp_dir
} do
assert {:ok, _metadata} =
Metadata.update_project_metadata(project.id, %{
public_url: "https://example.com/blog",
main_language: "en",
blog_languages: ["en", "de"]
})
assert {:ok, post} =
Posts.create_post(%{
project_id: project.id,
title: "Localized Post",
content: "Canonical body",
language: "en"
})
created_at = DateTime.to_unix(~U[2026-04-15 12:00:00Z])
Repo.update_all(from(p in BDS.Posts.Post, where: p.id == ^post.id),
set: [created_at: created_at, updated_at: created_at]
)
assert {:ok, _translation} =
Posts.upsert_post_translation(post.id, "de", %{
title: "Lokalisierter Beitrag",
content: "Deutscher Inhalt"
})
assert {:ok, _published_post} = Posts.publish_post(post.id)
assert {:ok, _result} = BDS.Generation.generate_site(project.id, [:core, :single])
assert {:ok, report} = BDS.Generation.validate_site(project.id, [:core, :single])
assert report.missing_url_paths == []
assert report.extra_url_paths == []
assert report.updated_post_url_paths == []
sitemap_xml = File.read!(Path.join([temp_dir, "html", "sitemap.xml"]))
assert sitemap_xml =~ "hreflang=\"de\""
assert sitemap_xml =~ "https://example.com/blog/de/2026/04/15/localized-post/"
refute sitemap_xml =~ "localized-post.de"
end
test "generation and validation include old-app pagination and day archive routes", %{ test "generation and validation include old-app pagination and day archive routes", %{
project: project, project: project,
temp_dir: temp_dir temp_dir: temp_dir