fix: more work on site validation

This commit is contained in:
2026-04-27 15:53:18 +02:00
parent 02abd5de1f
commit 8d3d6a6c91
8 changed files with 351 additions and 38 deletions

View File

@@ -172,7 +172,7 @@ defmodule BDS.Desktop.MenuBar do
defp item_label(:validate_translations), do: "Validate Translations"
defp item_label(:fill_missing_translations), do: "Fill Missing Translations"
defp item_label(:find_duplicates), do: "Find Duplicate Posts"
defp item_label(:generate_sitemap), do: "Generate Sitemap"
defp item_label(:generate_sitemap), do: "Generate Site"
defp item_label(:validate_site), do: "Validate Site"
defp item_label(:upload_site), do: "Upload Site"
defp item_label(:about), do: "About"

View File

@@ -191,7 +191,7 @@ defmodule BDS.Desktop.ShellCommands do
end
defp dispatch("generate_sitemap", project, _params) do
queue_task(project, "generate_sitemap", "Generate Sitemap", "Generation", fn report ->
queue_task(project, "generate_sitemap", "Generate Site", "Generation", fn report ->
{:ok, generation} = Generation.generate_site(project.id, @site_sections, on_progress: report)
report.(1.0, "Generated site output")
%{project_id: project.id, sections: generation.sections, generated_count: length(generation.generated_files)}
@@ -201,7 +201,6 @@ defmodule BDS.Desktop.ShellCommands do
defp dispatch("validate_site", project, _params) do
queue_task(project, "validate_site", "Validate Site", "Validation", fn report ->
{:ok, validation} = Generation.validate_site(project.id, @site_sections, on_progress: report)
report.(1.0, "Site validation complete")
site_validation_result(project.id, validation)
end)
end

View File

@@ -64,34 +64,35 @@ defmodule BDS.Generation do
def validate_site(project_id, sections, opts) when is_binary(project_id) and is_list(sections) and is_list(opts) do
with {:ok, plan} <- plan_generation(project_id, sections) do
expected_outputs = build_outputs(plan)
expected_output_map = Map.new(expected_outputs)
on_progress = progress_callback(opts)
total_outputs = length(expected_outputs)
project = Projects.get_project!(project_id)
project_data_dir = Projects.project_data_dir(project)
published_posts = list_published_posts(project_id)
published_translations = list_published_translations(project_id)
generated_file_updated_at = generated_file_updated_at_map(project_id)
:ok = report_generation_started(on_progress, total_outputs, "generated files")
:ok = report_validation_progress(on_progress, 0.0, "Collecting sitemap URLs...")
Enum.each(1..total_outputs, fn index ->
:ok = report_generation_progress(on_progress, index, total_outputs, "generated files")
end)
sitemap_content = Map.fetch!(expected_output_map, "sitemap.xml")
sitemap_content =
plan
|> build_validation_route_paths(published_posts, published_translations, on_progress)
|> Enum.map(&url_for_output(plan.base_url, &1))
|> render_sitemap()
{:ok, sitemap_write} =
write_generated_file(project_id, "sitemap.xml", sitemap_content)
:ok = report_validation_progress(on_progress, 0.5, "Comparing sitemap to html pages...")
diff_result =
compare_sitemap_to_html(%{
sitemap_xml: sitemap_content,
base_url: plan.base_url,
html_dir: output_path(project, ""),
on_progress: on_progress,
post_timestamp_checks:
build_post_timestamp_checks(
project_id,
project_data_dir,
plan.language,
published_posts,
published_translations,
@@ -99,6 +100,11 @@ defmodule BDS.Generation do
)
})
completion_message =
"Validation complete (#{length(diff_result.missing_url_paths)} missing, #{length(diff_result.extra_url_paths)} extra, #{length(diff_result.updated_post_url_paths)} updated)"
:ok = report_validation_progress(on_progress, 1.0, completion_message)
{:ok,
%{
sitemap_path: output_path(project, "sitemap.xml"),
@@ -139,6 +145,22 @@ defmodule BDS.Generation do
:ok
end
defp report_validation_progress(nil, _progress, _message), do: :ok
defp report_validation_progress(callback, progress, message) do
callback.(progress, message)
:ok
end
defp report_validation_collection_progress(nil, _current, _total), do: :ok
defp report_validation_collection_progress(_callback, _current, total) when total <= 0, do: :ok
defp report_validation_collection_progress(callback, current, total) do
progress = min(0.49, current / total * 0.5)
callback.(progress, "Collecting sitemap URLs... #{current}/#{total}")
:ok
end
def apply_validation(project_id, sections) when is_binary(project_id) and is_list(sections) do
with {:ok, plan} <- plan_generation(project_id, sections) do
expected_outputs = build_outputs(plan)
@@ -373,6 +395,140 @@ defmodule BDS.Generation do
core_outputs ++ single_outputs ++ archive_outputs ++ sitemap ++ pagefind_outputs ++ asset_outputs
end
defp build_validation_route_paths(plan, published_posts, published_translations, on_progress) do
route_paths = [
core_route_paths(plan),
single_route_paths(plan, published_posts, published_translations),
category_route_paths(plan, published_posts),
tag_route_paths(plan, published_posts),
date_route_paths(plan, published_posts)
]
total_route_count =
route_paths
|> Enum.map(&length/1)
|> Enum.sum()
route_paths
|> List.flatten()
|> Enum.with_index(1)
|> Enum.map(fn {relative_path, index} ->
:ok = report_validation_collection_progress(on_progress, index, total_route_count)
relative_path
end)
end
defp core_route_paths(plan) do
if :core in plan.sections do
["index.html"] ++
(plan.blog_languages
|> Enum.reject(&(&1 == plan.language))
|> Enum.map(&Path.join(&1, "index.html")))
else
[]
end
end
defp single_route_paths(plan, published_posts, published_translations) do
if :single in plan.sections do
post_by_id = Map.new(published_posts, &{&1.id, &1})
translation_paths =
Enum.flat_map(published_posts, fn post ->
post_variant =
if post.language == plan.language do
[]
else
[post_output_path(post, post.language)]
end
translation_variant_paths =
published_translations
|> Enum.filter(&(&1.translation_for == post.id and &1.language != plan.language))
|> Enum.map(fn translation ->
canonical_post = Map.get(post_by_id, post.id, post)
post_output_path(canonical_post, translation.language)
end)
post_variant ++ translation_variant_paths
end)
Enum.map(published_posts, &post_output_path/1) ++ translation_paths
else
[]
end
end
defp category_route_paths(plan, published_posts) do
if :category in plan.sections do
published_posts
|> Enum.flat_map(fn post -> Enum.map(post.categories || [], &{&1, post}) end)
|> Enum.group_by(fn {category, _post} -> category end, fn {_category, post} -> post end)
|> Enum.flat_map(fn {category, posts} ->
category_slug = Slug.slugify(category)
posts
|> Enum.chunk_every(max(plan.max_posts_per_page, 1))
|> Enum.with_index(1)
|> Enum.flat_map(fn {_page_posts, page_number} ->
Enum.map(plan.blog_languages, fn language ->
archive_path(route_language(plan.language, language), ["category", category_slug], page_number)
end)
end)
end)
else
[]
end
end
defp tag_route_paths(plan, published_posts) do
if :tag in plan.sections do
published_posts
|> Enum.flat_map(fn post -> Enum.map(post.tags || [], &{&1, post}) end)
|> Enum.group_by(fn {tag, _post} -> tag end, fn {_tag, post} -> post end)
|> Enum.flat_map(fn {tag, posts} ->
tag_slug = Slug.slugify(tag)
posts
|> Enum.chunk_every(max(plan.max_posts_per_page, 1))
|> Enum.with_index(1)
|> Enum.flat_map(fn {_page_posts, page_number} ->
Enum.map(plan.blog_languages, fn language ->
archive_path(route_language(plan.language, language), ["tag", tag_slug], page_number)
end)
end)
end)
else
[]
end
end
defp date_route_paths(plan, published_posts) do
if :date in plan.sections do
year_paths =
published_posts
|> Enum.group_by(&year_key(&1.created_at))
|> Enum.flat_map(fn {year, _posts} ->
Enum.map(plan.blog_languages, fn language ->
archive_path(route_language(plan.language, language), [year], 1)
end)
end)
month_paths =
published_posts
|> Enum.group_by(&month_key(&1.created_at))
|> Enum.flat_map(fn {{year, month}, _posts} ->
Enum.map(plan.blog_languages, fn language ->
archive_path(route_language(plan.language, language), [year, month], 1)
end)
end)
year_paths ++ month_paths
else
[]
end
end
defp disk_generated_files(project_id) do
project = Projects.get_project!(project_id)
html_root = output_path(project, "")
@@ -1128,14 +1284,11 @@ defmodule BDS.Generation do
defp generated_file_updated_at_map(project_id) do
project_id
|> list_generated_files()
|> case do
{:ok, files} -> Map.new(files, &{&1.relative_path, &1.updated_at})
_other -> %{}
end
|> then(fn {:ok, files} -> Map.new(files, &{&1.relative_path, &1.updated_at}) end)
end
defp build_post_timestamp_checks(
project_id,
project_data_dir,
main_language,
published_posts,
published_translations,
@@ -1155,7 +1308,7 @@ defmodule BDS.Generation do
%{
post_url_path: relative_path_to_url_path(relative_path),
post_file_path: source_full_path(project_id, canonical_variant.file_path),
post_file_path: source_full_path(project_data_dir, canonical_variant.file_path),
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
}
end)
@@ -1180,7 +1333,7 @@ defmodule BDS.Generation do
%{
post_url_path: relative_path_to_url_path(relative_path),
post_file_path: source_full_path(project_id, variant.file_path),
post_file_path: source_full_path(project_data_dir, variant.file_path),
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
}
end)
@@ -1189,21 +1342,25 @@ defmodule BDS.Generation do
canonical_checks ++ translation_checks
end
defp source_full_path(_project_id, file_path) when file_path in [nil, ""], do: nil
defp source_full_path(_project_data_dir, file_path) when file_path in [nil, ""], do: nil
defp source_full_path(project_id, file_path) do
project = Projects.get_project!(project_id)
Path.join(Projects.project_data_dir(project), file_path)
defp source_full_path(project_data_dir, file_path) do
Path.join(project_data_dir, file_path)
end
defp compare_sitemap_to_html(params) do
post_timestamp_checks = Map.get(params, :post_timestamp_checks, [])
index_paths = Path.wildcard(Path.join(params.html_dir, "**/index.html"))
total_compare_steps = max(length(index_paths) + length(post_timestamp_checks), 1)
expected_path_set =
params.sitemap_xml
|> extract_sitemap_locs()
|> Enum.map(&sitemap_loc_to_project_path(&1, params.base_url))
|> MapSet.new()
{existing_html_path_set, zero_byte_html_path_set} = collect_html_index_paths(params.html_dir)
{existing_html_path_set, zero_byte_html_path_set} =
collect_html_index_paths(index_paths, params.html_dir, params.on_progress, total_compare_steps)
missing_url_paths =
expected_path_set
@@ -1224,9 +1381,16 @@ defmodule BDS.Generation do
|> Enum.sort()
updated_post_url_paths =
params
|> Map.get(:post_timestamp_checks, [])
|> Enum.reduce(MapSet.new(), fn check, acc ->
post_timestamp_checks
|> Enum.with_index(1)
|> Enum.reduce(MapSet.new(), fn {check, index}, acc ->
:ok =
report_validation_compare_progress(
params.on_progress,
length(index_paths) + index,
total_compare_steps
)
normalized_url_path = normalize_url_path(check.post_url_path)
cond do
@@ -1297,10 +1461,12 @@ defmodule BDS.Generation do
end
end
defp collect_html_index_paths(html_dir) do
index_paths = Path.wildcard(Path.join(html_dir, "**/index.html"))
defp collect_html_index_paths(index_paths, html_dir, on_progress, total_compare_steps) do
index_paths
|> Enum.with_index(1)
|> Enum.reduce({MapSet.new(), MapSet.new()}, fn {path, index}, {existing, zero_byte} ->
:ok = report_validation_compare_progress(on_progress, index, total_compare_steps)
Enum.reduce(index_paths, {MapSet.new(), MapSet.new()}, fn path, {existing, zero_byte} ->
relative_dir =
path
|> Path.relative_to(html_dir)
@@ -1320,6 +1486,15 @@ defmodule BDS.Generation do
end)
end
defp report_validation_compare_progress(nil, _current, _total), do: :ok
defp report_validation_compare_progress(_callback, _current, total) when total <= 0, do: :ok
defp report_validation_compare_progress(callback, current, total) do
progress = min(0.99, 0.5 + current / total * 0.49)
callback.(progress, "Comparing sitemap to html pages... #{current}/#{total}")
:ok
end
defp normalize_url_path(nil), do: "/"
defp normalize_url_path(url_path) do
@@ -1533,7 +1708,7 @@ defmodule BDS.Generation do
case Regex.run(~r|^/([a-z]{2,3})(/.*)?$|, path) do
[_, language, suffix] ->
if language in additional_languages do
{language, normalize_url_path(suffix || "/")}
{language, normalize_url_path(suffix)}
else
{nil, path}
end

View File

@@ -33,7 +33,7 @@ defmodule BDS.Rendering.Filters do
value
|> to_string()
|> replace_built_in_macros(language, context)
|> Earmark.as_html!()
|> render_markdown_html()
|> rewrite_rendered_html_urls(canonical_post_paths || %{}, canonical_media_paths || %{})
end
@@ -94,10 +94,19 @@ defmodule BDS.Rendering.Filters do
defp parse_macro_params(""), do: %{}
defp parse_macro_params(raw_params) do
Regex.scan(~r/(\w+)=(?:"([^"]*)"|'([^']*)'|([^\s\]]+))/, raw_params)
|> Enum.reduce(%{}, fn [_match, key, double_quoted, single_quoted, bare], acc ->
value = Enum.find([double_quoted, single_quoted, bare], &(&1 not in [nil, ""])) || ""
Map.put(acc, key, value)
Regex.scan(~r/(\w+)=(?:"([^"]*)"|'([^']*)'|([^\s\]]+))/, raw_params, capture: :all_but_first)
|> Enum.reduce(%{}, fn captures, acc ->
case captures do
[key, value] ->
Map.put(acc, key, value)
[key, double_quoted, single_quoted, bare] ->
value = Enum.find([double_quoted, single_quoted, bare], &(&1 not in [nil, ""])) || ""
Map.put(acc, key, value)
_other ->
acc
end
end)
end
@@ -120,6 +129,13 @@ defmodule BDS.Rendering.Filters do
_error -> ""
end
defp render_markdown_html(markdown) do
case Earmark.as_html(markdown) do
{:ok, html, _messages} -> html
{:error, html, _messages} -> html
end
end
defp rewrite_rendered_html_urls(html, canonical_post_paths, canonical_media_paths) do
html
|> rewrite_attribute(