fix: more work on site validation

This commit is contained in:
2026-04-27 15:53:18 +02:00
parent 02abd5de1f
commit 8d3d6a6c91
8 changed files with 351 additions and 38 deletions

View File

@@ -27,6 +27,7 @@ This document provides context and best practices for GitHub Copilot when workin
- we have an allium spec in the specs/ folder. you must weed the specs against built code to make sure you follow the spec.
- when changing the spec, validate the spec with the available command line tool.
- you MUST run tests with command line tools at least once to capture compile errors in tests, do not use the integrated testing of vscode, as that blocks on compile errors
- you MUST run build, test and check dialyzer messages and you MUST treet warnings as errors and fix them. we want clean builds, clean tests and clean dialyzer results
---

View File

@@ -172,7 +172,7 @@ defmodule BDS.Desktop.MenuBar do
defp item_label(:validate_translations), do: "Validate Translations"
defp item_label(:fill_missing_translations), do: "Fill Missing Translations"
defp item_label(:find_duplicates), do: "Find Duplicate Posts"
defp item_label(:generate_sitemap), do: "Generate Sitemap"
defp item_label(:generate_sitemap), do: "Generate Site"
defp item_label(:validate_site), do: "Validate Site"
defp item_label(:upload_site), do: "Upload Site"
defp item_label(:about), do: "About"

View File

@@ -191,7 +191,7 @@ defmodule BDS.Desktop.ShellCommands do
end
defp dispatch("generate_sitemap", project, _params) do
queue_task(project, "generate_sitemap", "Generate Sitemap", "Generation", fn report ->
queue_task(project, "generate_sitemap", "Generate Site", "Generation", fn report ->
{:ok, generation} = Generation.generate_site(project.id, @site_sections, on_progress: report)
report.(1.0, "Generated site output")
%{project_id: project.id, sections: generation.sections, generated_count: length(generation.generated_files)}
@@ -201,7 +201,6 @@ defmodule BDS.Desktop.ShellCommands do
defp dispatch("validate_site", project, _params) do
queue_task(project, "validate_site", "Validate Site", "Validation", fn report ->
{:ok, validation} = Generation.validate_site(project.id, @site_sections, on_progress: report)
report.(1.0, "Site validation complete")
site_validation_result(project.id, validation)
end)
end

View File

@@ -64,34 +64,35 @@ defmodule BDS.Generation do
def validate_site(project_id, sections, opts) when is_binary(project_id) and is_list(sections) and is_list(opts) do
with {:ok, plan} <- plan_generation(project_id, sections) do
expected_outputs = build_outputs(plan)
expected_output_map = Map.new(expected_outputs)
on_progress = progress_callback(opts)
total_outputs = length(expected_outputs)
project = Projects.get_project!(project_id)
project_data_dir = Projects.project_data_dir(project)
published_posts = list_published_posts(project_id)
published_translations = list_published_translations(project_id)
generated_file_updated_at = generated_file_updated_at_map(project_id)
:ok = report_generation_started(on_progress, total_outputs, "generated files")
:ok = report_validation_progress(on_progress, 0.0, "Collecting sitemap URLs...")
Enum.each(1..total_outputs, fn index ->
:ok = report_generation_progress(on_progress, index, total_outputs, "generated files")
end)
sitemap_content = Map.fetch!(expected_output_map, "sitemap.xml")
sitemap_content =
plan
|> build_validation_route_paths(published_posts, published_translations, on_progress)
|> Enum.map(&url_for_output(plan.base_url, &1))
|> render_sitemap()
{:ok, sitemap_write} =
write_generated_file(project_id, "sitemap.xml", sitemap_content)
:ok = report_validation_progress(on_progress, 0.5, "Comparing sitemap to html pages...")
diff_result =
compare_sitemap_to_html(%{
sitemap_xml: sitemap_content,
base_url: plan.base_url,
html_dir: output_path(project, ""),
on_progress: on_progress,
post_timestamp_checks:
build_post_timestamp_checks(
project_id,
project_data_dir,
plan.language,
published_posts,
published_translations,
@@ -99,6 +100,11 @@ defmodule BDS.Generation do
)
})
completion_message =
"Validation complete (#{length(diff_result.missing_url_paths)} missing, #{length(diff_result.extra_url_paths)} extra, #{length(diff_result.updated_post_url_paths)} updated)"
:ok = report_validation_progress(on_progress, 1.0, completion_message)
{:ok,
%{
sitemap_path: output_path(project, "sitemap.xml"),
@@ -139,6 +145,22 @@ defmodule BDS.Generation do
:ok
end
defp report_validation_progress(nil, _progress, _message), do: :ok
defp report_validation_progress(callback, progress, message) do
callback.(progress, message)
:ok
end
defp report_validation_collection_progress(nil, _current, _total), do: :ok
defp report_validation_collection_progress(_callback, _current, total) when total <= 0, do: :ok
defp report_validation_collection_progress(callback, current, total) do
progress = min(0.49, current / total * 0.5)
callback.(progress, "Collecting sitemap URLs... #{current}/#{total}")
:ok
end
def apply_validation(project_id, sections) when is_binary(project_id) and is_list(sections) do
with {:ok, plan} <- plan_generation(project_id, sections) do
expected_outputs = build_outputs(plan)
@@ -373,6 +395,140 @@ defmodule BDS.Generation do
core_outputs ++ single_outputs ++ archive_outputs ++ sitemap ++ pagefind_outputs ++ asset_outputs
end
defp build_validation_route_paths(plan, published_posts, published_translations, on_progress) do
route_paths = [
core_route_paths(plan),
single_route_paths(plan, published_posts, published_translations),
category_route_paths(plan, published_posts),
tag_route_paths(plan, published_posts),
date_route_paths(plan, published_posts)
]
total_route_count =
route_paths
|> Enum.map(&length/1)
|> Enum.sum()
route_paths
|> List.flatten()
|> Enum.with_index(1)
|> Enum.map(fn {relative_path, index} ->
:ok = report_validation_collection_progress(on_progress, index, total_route_count)
relative_path
end)
end
defp core_route_paths(plan) do
if :core in plan.sections do
["index.html"] ++
(plan.blog_languages
|> Enum.reject(&(&1 == plan.language))
|> Enum.map(&Path.join(&1, "index.html")))
else
[]
end
end
defp single_route_paths(plan, published_posts, published_translations) do
if :single in plan.sections do
post_by_id = Map.new(published_posts, &{&1.id, &1})
translation_paths =
Enum.flat_map(published_posts, fn post ->
post_variant =
if post.language == plan.language do
[]
else
[post_output_path(post, post.language)]
end
translation_variant_paths =
published_translations
|> Enum.filter(&(&1.translation_for == post.id and &1.language != plan.language))
|> Enum.map(fn translation ->
canonical_post = Map.get(post_by_id, post.id, post)
post_output_path(canonical_post, translation.language)
end)
post_variant ++ translation_variant_paths
end)
Enum.map(published_posts, &post_output_path/1) ++ translation_paths
else
[]
end
end
defp category_route_paths(plan, published_posts) do
if :category in plan.sections do
published_posts
|> Enum.flat_map(fn post -> Enum.map(post.categories || [], &{&1, post}) end)
|> Enum.group_by(fn {category, _post} -> category end, fn {_category, post} -> post end)
|> Enum.flat_map(fn {category, posts} ->
category_slug = Slug.slugify(category)
posts
|> Enum.chunk_every(max(plan.max_posts_per_page, 1))
|> Enum.with_index(1)
|> Enum.flat_map(fn {_page_posts, page_number} ->
Enum.map(plan.blog_languages, fn language ->
archive_path(route_language(plan.language, language), ["category", category_slug], page_number)
end)
end)
end)
else
[]
end
end
defp tag_route_paths(plan, published_posts) do
if :tag in plan.sections do
published_posts
|> Enum.flat_map(fn post -> Enum.map(post.tags || [], &{&1, post}) end)
|> Enum.group_by(fn {tag, _post} -> tag end, fn {_tag, post} -> post end)
|> Enum.flat_map(fn {tag, posts} ->
tag_slug = Slug.slugify(tag)
posts
|> Enum.chunk_every(max(plan.max_posts_per_page, 1))
|> Enum.with_index(1)
|> Enum.flat_map(fn {_page_posts, page_number} ->
Enum.map(plan.blog_languages, fn language ->
archive_path(route_language(plan.language, language), ["tag", tag_slug], page_number)
end)
end)
end)
else
[]
end
end
defp date_route_paths(plan, published_posts) do
if :date in plan.sections do
year_paths =
published_posts
|> Enum.group_by(&year_key(&1.created_at))
|> Enum.flat_map(fn {year, _posts} ->
Enum.map(plan.blog_languages, fn language ->
archive_path(route_language(plan.language, language), [year], 1)
end)
end)
month_paths =
published_posts
|> Enum.group_by(&month_key(&1.created_at))
|> Enum.flat_map(fn {{year, month}, _posts} ->
Enum.map(plan.blog_languages, fn language ->
archive_path(route_language(plan.language, language), [year, month], 1)
end)
end)
year_paths ++ month_paths
else
[]
end
end
defp disk_generated_files(project_id) do
project = Projects.get_project!(project_id)
html_root = output_path(project, "")
@@ -1128,14 +1284,11 @@ defmodule BDS.Generation do
defp generated_file_updated_at_map(project_id) do
project_id
|> list_generated_files()
|> case do
{:ok, files} -> Map.new(files, &{&1.relative_path, &1.updated_at})
_other -> %{}
end
|> then(fn {:ok, files} -> Map.new(files, &{&1.relative_path, &1.updated_at}) end)
end
defp build_post_timestamp_checks(
project_id,
project_data_dir,
main_language,
published_posts,
published_translations,
@@ -1155,7 +1308,7 @@ defmodule BDS.Generation do
%{
post_url_path: relative_path_to_url_path(relative_path),
post_file_path: source_full_path(project_id, canonical_variant.file_path),
post_file_path: source_full_path(project_data_dir, canonical_variant.file_path),
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
}
end)
@@ -1180,7 +1333,7 @@ defmodule BDS.Generation do
%{
post_url_path: relative_path_to_url_path(relative_path),
post_file_path: source_full_path(project_id, variant.file_path),
post_file_path: source_full_path(project_data_dir, variant.file_path),
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
}
end)
@@ -1189,21 +1342,25 @@ defmodule BDS.Generation do
canonical_checks ++ translation_checks
end
defp source_full_path(_project_id, file_path) when file_path in [nil, ""], do: nil
defp source_full_path(_project_data_dir, file_path) when file_path in [nil, ""], do: nil
defp source_full_path(project_id, file_path) do
project = Projects.get_project!(project_id)
Path.join(Projects.project_data_dir(project), file_path)
defp source_full_path(project_data_dir, file_path) do
Path.join(project_data_dir, file_path)
end
defp compare_sitemap_to_html(params) do
post_timestamp_checks = Map.get(params, :post_timestamp_checks, [])
index_paths = Path.wildcard(Path.join(params.html_dir, "**/index.html"))
total_compare_steps = max(length(index_paths) + length(post_timestamp_checks), 1)
expected_path_set =
params.sitemap_xml
|> extract_sitemap_locs()
|> Enum.map(&sitemap_loc_to_project_path(&1, params.base_url))
|> MapSet.new()
{existing_html_path_set, zero_byte_html_path_set} = collect_html_index_paths(params.html_dir)
{existing_html_path_set, zero_byte_html_path_set} =
collect_html_index_paths(index_paths, params.html_dir, params.on_progress, total_compare_steps)
missing_url_paths =
expected_path_set
@@ -1224,9 +1381,16 @@ defmodule BDS.Generation do
|> Enum.sort()
updated_post_url_paths =
params
|> Map.get(:post_timestamp_checks, [])
|> Enum.reduce(MapSet.new(), fn check, acc ->
post_timestamp_checks
|> Enum.with_index(1)
|> Enum.reduce(MapSet.new(), fn {check, index}, acc ->
:ok =
report_validation_compare_progress(
params.on_progress,
length(index_paths) + index,
total_compare_steps
)
normalized_url_path = normalize_url_path(check.post_url_path)
cond do
@@ -1297,10 +1461,12 @@ defmodule BDS.Generation do
end
end
defp collect_html_index_paths(html_dir) do
index_paths = Path.wildcard(Path.join(html_dir, "**/index.html"))
defp collect_html_index_paths(index_paths, html_dir, on_progress, total_compare_steps) do
index_paths
|> Enum.with_index(1)
|> Enum.reduce({MapSet.new(), MapSet.new()}, fn {path, index}, {existing, zero_byte} ->
:ok = report_validation_compare_progress(on_progress, index, total_compare_steps)
Enum.reduce(index_paths, {MapSet.new(), MapSet.new()}, fn path, {existing, zero_byte} ->
relative_dir =
path
|> Path.relative_to(html_dir)
@@ -1320,6 +1486,15 @@ defmodule BDS.Generation do
end)
end
defp report_validation_compare_progress(nil, _current, _total), do: :ok
defp report_validation_compare_progress(_callback, _current, total) when total <= 0, do: :ok
defp report_validation_compare_progress(callback, current, total) do
progress = min(0.99, 0.5 + current / total * 0.49)
callback.(progress, "Comparing sitemap to html pages... #{current}/#{total}")
:ok
end
defp normalize_url_path(nil), do: "/"
defp normalize_url_path(url_path) do
@@ -1533,7 +1708,7 @@ defmodule BDS.Generation do
case Regex.run(~r|^/([a-z]{2,3})(/.*)?$|, path) do
[_, language, suffix] ->
if language in additional_languages do
{language, normalize_url_path(suffix || "/")}
{language, normalize_url_path(suffix)}
else
{nil, path}
end

View File

@@ -33,7 +33,7 @@ defmodule BDS.Rendering.Filters do
value
|> to_string()
|> replace_built_in_macros(language, context)
|> Earmark.as_html!()
|> render_markdown_html()
|> rewrite_rendered_html_urls(canonical_post_paths || %{}, canonical_media_paths || %{})
end
@@ -94,10 +94,19 @@ defmodule BDS.Rendering.Filters do
defp parse_macro_params(""), do: %{}
defp parse_macro_params(raw_params) do
Regex.scan(~r/(\w+)=(?:"([^"]*)"|'([^']*)'|([^\s\]]+))/, raw_params)
|> Enum.reduce(%{}, fn [_match, key, double_quoted, single_quoted, bare], acc ->
value = Enum.find([double_quoted, single_quoted, bare], &(&1 not in [nil, ""])) || ""
Map.put(acc, key, value)
Regex.scan(~r/(\w+)=(?:"([^"]*)"|'([^']*)'|([^\s\]]+))/, raw_params, capture: :all_but_first)
|> Enum.reduce(%{}, fn captures, acc ->
case captures do
[key, value] ->
Map.put(acc, key, value)
[key, double_quoted, single_quoted, bare] ->
value = Enum.find([double_quoted, single_quoted, bare], &(&1 not in [nil, ""])) || ""
Map.put(acc, key, value)
_other ->
acc
end
end)
end
@@ -120,6 +129,13 @@ defmodule BDS.Rendering.Filters do
_error -> ""
end
defp render_markdown_html(markdown) do
case Earmark.as_html(markdown) do
{:ok, html, _messages} -> html
{:error, html, _messages} -> html
end
end
defp rewrite_rendered_html_urls(html, canonical_post_paths, canonical_media_paths) do
html
|> rewrite_attribute(

View File

@@ -94,6 +94,8 @@ defmodule BDS.Desktop.ShellCommandsTest do
completed = wait_for_task(result.task_id, &(&1.status == :completed and is_map(&1.result)))
assert completed.group_name == "Validation"
assert is_binary(completed.message)
assert String.starts_with?(completed.message, "Validation complete (")
assert completed.result.kind == "open_editor"
assert completed.result.route == "site_validation"
assert is_map(completed.result.payload.summary)

View File

@@ -89,7 +89,7 @@ defmodule BDS.DesktopTest do
assert menu_item(groups, :toggle_assistant_sidebar).native_label == "Toggle Assistant Sidebar\tCTRL+\\"
assert menu_item(groups, :publish_selected).native_label == "Publish Selected\tCTRL+SHIFT+P"
assert menu_item(groups, :preview_post).native_label == "Preview Post\tCTRL+SHIFT+V"
assert menu_item(groups, :generate_sitemap).native_label == "Generate Sitemap\tCTRL+R"
assert menu_item(groups, :generate_sitemap).native_label == "Generate Site\tCTRL+R"
assert menu_item(groups, :validate_site).native_label == "Validate Site\tCTRL+SHIFT+L"
assert menu_item(groups, :upload_site).native_label == "Upload Site\tCTRL+SHIFT+U"
assert menu_item(groups, :metadata_diff).shortcut == nil

View File

@@ -2,6 +2,7 @@ defmodule BDS.GenerationTest do
use ExUnit.Case, async: false
import Ecto.Query
import ExUnit.CaptureIO
alias BDS.Media
alias BDS.Metadata
@@ -352,6 +353,116 @@ defmodule BDS.GenerationTest do
assert post_html =~ "Language"
end
test "validate_site does not crash on unknown macros with quoted params", %{
project: project
} do
assert {:ok, _metadata} =
Metadata.update_project_metadata(project.id, %{
public_url: "https://example.com/blog",
main_language: "en",
blog_languages: ["en"]
})
assert {:ok, post} =
Posts.create_post(%{
project_id: project.id,
title: "Gallery Macro Post",
content: "[[gallery link=\"file\"]]",
language: "en"
})
assert {:ok, _published_post} = Posts.publish_post(post.id)
assert {:ok, _result} = BDS.Generation.generate_site(project.id, [:core, :single])
assert {:ok, report} = BDS.Generation.validate_site(project.id, [:core, :single])
assert report.missing_url_paths == []
assert report.extra_url_paths == []
end
test "validate_site reports old-app phase progress", %{project: project} do
assert {:ok, _metadata} =
Metadata.update_project_metadata(project.id, %{
public_url: "https://example.com/blog",
main_language: "en",
blog_languages: ["en"]
})
assert {:ok, post} =
Posts.create_post(%{
project_id: project.id,
title: "Progress Post",
content: "Progress body",
language: "en"
})
assert {:ok, _published_post} = Posts.publish_post(post.id)
parent = self()
on_progress = fn value, message ->
send(parent, {:validate_progress, value, message})
end
assert {:ok, _report} =
BDS.Generation.validate_site(project.id, [:core, :single], on_progress: on_progress)
events = collect_validate_progress_events()
assert {0.0, "Collecting sitemap URLs..."} in events
assert Enum.any?(events, fn
{value, message}
when is_number(value) and value > 0.0 and value < 0.5 and
is_binary(message) ->
String.starts_with?(message, "Collecting sitemap URLs")
_other ->
false
end)
assert {0.5, "Comparing sitemap to html pages..."} in events
assert Enum.any?(events, fn
{value, message}
when is_number(value) and value > 0.5 and value < 1.0 and is_binary(message) ->
String.starts_with?(message, "Comparing sitemap to html pages")
_other ->
false
end)
assert Enum.any?(events, fn
{1.0, message} -> String.starts_with?(message, "Validation complete (")
_other -> false
end)
end
test "validate_site does not emit markdown parser warnings for unclosed fenced blocks", %{
project: project
} do
assert {:ok, _metadata} =
Metadata.update_project_metadata(project.id, %{
public_url: "https://example.com/blog",
main_language: "en",
blog_languages: ["en"]
})
assert {:ok, post} =
Posts.create_post(%{
project_id: project.id,
title: "Malformed Markdown",
content: "```elixir\nIO.puts(:broken)",
language: "en"
})
assert {:ok, _published_post} = Posts.publish_post(post.id)
stderr =
capture_io(:stderr, fn ->
assert {:ok, _report} = BDS.Generation.validate_site(project.id, [:core, :single])
end)
assert stderr == ""
end
test "generation falls back to bundled default templates when the project has no template files or template rows",
%{project: project, temp_dir: temp_dir} do
File.rm_rf!(Path.join(temp_dir, "templates"))
@@ -891,4 +1002,13 @@ defmodule BDS.GenerationTest do
"/" <> cleaned
end
end
defp collect_validate_progress_events(acc \\ []) do
receive do
{:validate_progress, value, message} ->
collect_validate_progress_events([{value, message} | acc])
after
0 -> Enum.reverse(acc)
end
end
end