From 8d3d6a6c9112220cb19f326faaf291ceec66b7e9 Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Mon, 27 Apr 2026 15:53:18 +0200 Subject: [PATCH] fix: more work on site validation --- AGENTS.md | 1 + lib/bds/desktop/menu_bar.ex | 2 +- lib/bds/desktop/shell_commands.ex | 3 +- lib/bds/generation.ex | 233 ++++++++++++++++++++--- lib/bds/rendering/filters.ex | 26 ++- test/bds/desktop/shell_commands_test.exs | 2 + test/bds/desktop_test.exs | 2 +- test/bds/generation_test.exs | 120 ++++++++++++ 8 files changed, 351 insertions(+), 38 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index a3555bc..f5c885b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -27,6 +27,7 @@ This document provides context and best practices for GitHub Copilot when workin - we have an allium spec in the specs/ folder. you must weed the specs against built code to make sure you follow the spec. - when changing the spec, validate the spec with the available command line tool. - you MUST run tests with command line tools at least once to capture compile errors in tests, do not use the integrated testing of vscode, as that blocks on compile errors +- you MUST run build, test and check dialyzer messages and you MUST treet warnings as errors and fix them. we want clean builds, clean tests and clean dialyzer results --- diff --git a/lib/bds/desktop/menu_bar.ex b/lib/bds/desktop/menu_bar.ex index db79376..e4eefd8 100644 --- a/lib/bds/desktop/menu_bar.ex +++ b/lib/bds/desktop/menu_bar.ex @@ -172,7 +172,7 @@ defmodule BDS.Desktop.MenuBar do defp item_label(:validate_translations), do: "Validate Translations" defp item_label(:fill_missing_translations), do: "Fill Missing Translations" defp item_label(:find_duplicates), do: "Find Duplicate Posts" - defp item_label(:generate_sitemap), do: "Generate Sitemap" + defp item_label(:generate_sitemap), do: "Generate Site" defp item_label(:validate_site), do: "Validate Site" defp item_label(:upload_site), do: "Upload Site" defp item_label(:about), do: "About" diff --git a/lib/bds/desktop/shell_commands.ex b/lib/bds/desktop/shell_commands.ex index f4c918b..4347290 100644 --- a/lib/bds/desktop/shell_commands.ex +++ b/lib/bds/desktop/shell_commands.ex @@ -191,7 +191,7 @@ defmodule BDS.Desktop.ShellCommands do end defp dispatch("generate_sitemap", project, _params) do - queue_task(project, "generate_sitemap", "Generate Sitemap", "Generation", fn report -> + queue_task(project, "generate_sitemap", "Generate Site", "Generation", fn report -> {:ok, generation} = Generation.generate_site(project.id, @site_sections, on_progress: report) report.(1.0, "Generated site output") %{project_id: project.id, sections: generation.sections, generated_count: length(generation.generated_files)} @@ -201,7 +201,6 @@ defmodule BDS.Desktop.ShellCommands do defp dispatch("validate_site", project, _params) do queue_task(project, "validate_site", "Validate Site", "Validation", fn report -> {:ok, validation} = Generation.validate_site(project.id, @site_sections, on_progress: report) - report.(1.0, "Site validation complete") site_validation_result(project.id, validation) end) end diff --git a/lib/bds/generation.ex b/lib/bds/generation.ex index 0702d11..53f09d5 100644 --- a/lib/bds/generation.ex +++ b/lib/bds/generation.ex @@ -64,34 +64,35 @@ defmodule BDS.Generation do def validate_site(project_id, sections, opts) when is_binary(project_id) and is_list(sections) and is_list(opts) do with {:ok, plan} <- plan_generation(project_id, sections) do - expected_outputs = build_outputs(plan) - expected_output_map = Map.new(expected_outputs) on_progress = progress_callback(opts) - total_outputs = length(expected_outputs) project = Projects.get_project!(project_id) + project_data_dir = Projects.project_data_dir(project) published_posts = list_published_posts(project_id) published_translations = list_published_translations(project_id) generated_file_updated_at = generated_file_updated_at_map(project_id) - :ok = report_generation_started(on_progress, total_outputs, "generated files") + :ok = report_validation_progress(on_progress, 0.0, "Collecting sitemap URLs...") - Enum.each(1..total_outputs, fn index -> - :ok = report_generation_progress(on_progress, index, total_outputs, "generated files") - end) - - sitemap_content = Map.fetch!(expected_output_map, "sitemap.xml") + sitemap_content = + plan + |> build_validation_route_paths(published_posts, published_translations, on_progress) + |> Enum.map(&url_for_output(plan.base_url, &1)) + |> render_sitemap() {:ok, sitemap_write} = write_generated_file(project_id, "sitemap.xml", sitemap_content) + :ok = report_validation_progress(on_progress, 0.5, "Comparing sitemap to html pages...") + diff_result = compare_sitemap_to_html(%{ sitemap_xml: sitemap_content, base_url: plan.base_url, html_dir: output_path(project, ""), + on_progress: on_progress, post_timestamp_checks: build_post_timestamp_checks( - project_id, + project_data_dir, plan.language, published_posts, published_translations, @@ -99,6 +100,11 @@ defmodule BDS.Generation do ) }) + completion_message = + "Validation complete (#{length(diff_result.missing_url_paths)} missing, #{length(diff_result.extra_url_paths)} extra, #{length(diff_result.updated_post_url_paths)} updated)" + + :ok = report_validation_progress(on_progress, 1.0, completion_message) + {:ok, %{ sitemap_path: output_path(project, "sitemap.xml"), @@ -139,6 +145,22 @@ defmodule BDS.Generation do :ok end + defp report_validation_progress(nil, _progress, _message), do: :ok + + defp report_validation_progress(callback, progress, message) do + callback.(progress, message) + :ok + end + + defp report_validation_collection_progress(nil, _current, _total), do: :ok + defp report_validation_collection_progress(_callback, _current, total) when total <= 0, do: :ok + + defp report_validation_collection_progress(callback, current, total) do + progress = min(0.49, current / total * 0.5) + callback.(progress, "Collecting sitemap URLs... #{current}/#{total}") + :ok + end + def apply_validation(project_id, sections) when is_binary(project_id) and is_list(sections) do with {:ok, plan} <- plan_generation(project_id, sections) do expected_outputs = build_outputs(plan) @@ -373,6 +395,140 @@ defmodule BDS.Generation do core_outputs ++ single_outputs ++ archive_outputs ++ sitemap ++ pagefind_outputs ++ asset_outputs end + defp build_validation_route_paths(plan, published_posts, published_translations, on_progress) do + route_paths = [ + core_route_paths(plan), + single_route_paths(plan, published_posts, published_translations), + category_route_paths(plan, published_posts), + tag_route_paths(plan, published_posts), + date_route_paths(plan, published_posts) + ] + + total_route_count = + route_paths + |> Enum.map(&length/1) + |> Enum.sum() + + route_paths + |> List.flatten() + |> Enum.with_index(1) + |> Enum.map(fn {relative_path, index} -> + :ok = report_validation_collection_progress(on_progress, index, total_route_count) + relative_path + end) + end + + defp core_route_paths(plan) do + if :core in plan.sections do + ["index.html"] ++ + (plan.blog_languages + |> Enum.reject(&(&1 == plan.language)) + |> Enum.map(&Path.join(&1, "index.html"))) + else + [] + end + end + + defp single_route_paths(plan, published_posts, published_translations) do + if :single in plan.sections do + post_by_id = Map.new(published_posts, &{&1.id, &1}) + + translation_paths = + Enum.flat_map(published_posts, fn post -> + post_variant = + if post.language == plan.language do + [] + else + [post_output_path(post, post.language)] + end + + translation_variant_paths = + published_translations + |> Enum.filter(&(&1.translation_for == post.id and &1.language != plan.language)) + |> Enum.map(fn translation -> + canonical_post = Map.get(post_by_id, post.id, post) + post_output_path(canonical_post, translation.language) + end) + + post_variant ++ translation_variant_paths + end) + + Enum.map(published_posts, &post_output_path/1) ++ translation_paths + else + [] + end + end + + defp category_route_paths(plan, published_posts) do + if :category in plan.sections do + published_posts + |> Enum.flat_map(fn post -> Enum.map(post.categories || [], &{&1, post}) end) + |> Enum.group_by(fn {category, _post} -> category end, fn {_category, post} -> post end) + |> Enum.flat_map(fn {category, posts} -> + category_slug = Slug.slugify(category) + + posts + |> Enum.chunk_every(max(plan.max_posts_per_page, 1)) + |> Enum.with_index(1) + |> Enum.flat_map(fn {_page_posts, page_number} -> + Enum.map(plan.blog_languages, fn language -> + archive_path(route_language(plan.language, language), ["category", category_slug], page_number) + end) + end) + end) + else + [] + end + end + + defp tag_route_paths(plan, published_posts) do + if :tag in plan.sections do + published_posts + |> Enum.flat_map(fn post -> Enum.map(post.tags || [], &{&1, post}) end) + |> Enum.group_by(fn {tag, _post} -> tag end, fn {_tag, post} -> post end) + |> Enum.flat_map(fn {tag, posts} -> + tag_slug = Slug.slugify(tag) + + posts + |> Enum.chunk_every(max(plan.max_posts_per_page, 1)) + |> Enum.with_index(1) + |> Enum.flat_map(fn {_page_posts, page_number} -> + Enum.map(plan.blog_languages, fn language -> + archive_path(route_language(plan.language, language), ["tag", tag_slug], page_number) + end) + end) + end) + else + [] + end + end + + defp date_route_paths(plan, published_posts) do + if :date in plan.sections do + year_paths = + published_posts + |> Enum.group_by(&year_key(&1.created_at)) + |> Enum.flat_map(fn {year, _posts} -> + Enum.map(plan.blog_languages, fn language -> + archive_path(route_language(plan.language, language), [year], 1) + end) + end) + + month_paths = + published_posts + |> Enum.group_by(&month_key(&1.created_at)) + |> Enum.flat_map(fn {{year, month}, _posts} -> + Enum.map(plan.blog_languages, fn language -> + archive_path(route_language(plan.language, language), [year, month], 1) + end) + end) + + year_paths ++ month_paths + else + [] + end + end + defp disk_generated_files(project_id) do project = Projects.get_project!(project_id) html_root = output_path(project, "") @@ -1128,14 +1284,11 @@ defmodule BDS.Generation do defp generated_file_updated_at_map(project_id) do project_id |> list_generated_files() - |> case do - {:ok, files} -> Map.new(files, &{&1.relative_path, &1.updated_at}) - _other -> %{} - end + |> then(fn {:ok, files} -> Map.new(files, &{&1.relative_path, &1.updated_at}) end) end defp build_post_timestamp_checks( - project_id, + project_data_dir, main_language, published_posts, published_translations, @@ -1155,7 +1308,7 @@ defmodule BDS.Generation do %{ post_url_path: relative_path_to_url_path(relative_path), - post_file_path: source_full_path(project_id, canonical_variant.file_path), + post_file_path: source_full_path(project_data_dir, canonical_variant.file_path), generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0) } end) @@ -1180,7 +1333,7 @@ defmodule BDS.Generation do %{ post_url_path: relative_path_to_url_path(relative_path), - post_file_path: source_full_path(project_id, variant.file_path), + post_file_path: source_full_path(project_data_dir, variant.file_path), generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0) } end) @@ -1189,21 +1342,25 @@ defmodule BDS.Generation do canonical_checks ++ translation_checks end - defp source_full_path(_project_id, file_path) when file_path in [nil, ""], do: nil + defp source_full_path(_project_data_dir, file_path) when file_path in [nil, ""], do: nil - defp source_full_path(project_id, file_path) do - project = Projects.get_project!(project_id) - Path.join(Projects.project_data_dir(project), file_path) + defp source_full_path(project_data_dir, file_path) do + Path.join(project_data_dir, file_path) end defp compare_sitemap_to_html(params) do + post_timestamp_checks = Map.get(params, :post_timestamp_checks, []) + index_paths = Path.wildcard(Path.join(params.html_dir, "**/index.html")) + total_compare_steps = max(length(index_paths) + length(post_timestamp_checks), 1) + expected_path_set = params.sitemap_xml |> extract_sitemap_locs() |> Enum.map(&sitemap_loc_to_project_path(&1, params.base_url)) |> MapSet.new() - {existing_html_path_set, zero_byte_html_path_set} = collect_html_index_paths(params.html_dir) + {existing_html_path_set, zero_byte_html_path_set} = + collect_html_index_paths(index_paths, params.html_dir, params.on_progress, total_compare_steps) missing_url_paths = expected_path_set @@ -1224,9 +1381,16 @@ defmodule BDS.Generation do |> Enum.sort() updated_post_url_paths = - params - |> Map.get(:post_timestamp_checks, []) - |> Enum.reduce(MapSet.new(), fn check, acc -> + post_timestamp_checks + |> Enum.with_index(1) + |> Enum.reduce(MapSet.new(), fn {check, index}, acc -> + :ok = + report_validation_compare_progress( + params.on_progress, + length(index_paths) + index, + total_compare_steps + ) + normalized_url_path = normalize_url_path(check.post_url_path) cond do @@ -1297,10 +1461,12 @@ defmodule BDS.Generation do end end - defp collect_html_index_paths(html_dir) do - index_paths = Path.wildcard(Path.join(html_dir, "**/index.html")) + defp collect_html_index_paths(index_paths, html_dir, on_progress, total_compare_steps) do + index_paths + |> Enum.with_index(1) + |> Enum.reduce({MapSet.new(), MapSet.new()}, fn {path, index}, {existing, zero_byte} -> + :ok = report_validation_compare_progress(on_progress, index, total_compare_steps) - Enum.reduce(index_paths, {MapSet.new(), MapSet.new()}, fn path, {existing, zero_byte} -> relative_dir = path |> Path.relative_to(html_dir) @@ -1320,6 +1486,15 @@ defmodule BDS.Generation do end) end + defp report_validation_compare_progress(nil, _current, _total), do: :ok + defp report_validation_compare_progress(_callback, _current, total) when total <= 0, do: :ok + + defp report_validation_compare_progress(callback, current, total) do + progress = min(0.99, 0.5 + current / total * 0.49) + callback.(progress, "Comparing sitemap to html pages... #{current}/#{total}") + :ok + end + defp normalize_url_path(nil), do: "/" defp normalize_url_path(url_path) do @@ -1533,7 +1708,7 @@ defmodule BDS.Generation do case Regex.run(~r|^/([a-z]{2,3})(/.*)?$|, path) do [_, language, suffix] -> if language in additional_languages do - {language, normalize_url_path(suffix || "/")} + {language, normalize_url_path(suffix)} else {nil, path} end diff --git a/lib/bds/rendering/filters.ex b/lib/bds/rendering/filters.ex index ba28544..da3fc10 100644 --- a/lib/bds/rendering/filters.ex +++ b/lib/bds/rendering/filters.ex @@ -33,7 +33,7 @@ defmodule BDS.Rendering.Filters do value |> to_string() |> replace_built_in_macros(language, context) - |> Earmark.as_html!() + |> render_markdown_html() |> rewrite_rendered_html_urls(canonical_post_paths || %{}, canonical_media_paths || %{}) end @@ -94,10 +94,19 @@ defmodule BDS.Rendering.Filters do defp parse_macro_params(""), do: %{} defp parse_macro_params(raw_params) do - Regex.scan(~r/(\w+)=(?:"([^"]*)"|'([^']*)'|([^\s\]]+))/, raw_params) - |> Enum.reduce(%{}, fn [_match, key, double_quoted, single_quoted, bare], acc -> - value = Enum.find([double_quoted, single_quoted, bare], &(&1 not in [nil, ""])) || "" - Map.put(acc, key, value) + Regex.scan(~r/(\w+)=(?:"([^"]*)"|'([^']*)'|([^\s\]]+))/, raw_params, capture: :all_but_first) + |> Enum.reduce(%{}, fn captures, acc -> + case captures do + [key, value] -> + Map.put(acc, key, value) + + [key, double_quoted, single_quoted, bare] -> + value = Enum.find([double_quoted, single_quoted, bare], &(&1 not in [nil, ""])) || "" + Map.put(acc, key, value) + + _other -> + acc + end end) end @@ -120,6 +129,13 @@ defmodule BDS.Rendering.Filters do _error -> "" end + defp render_markdown_html(markdown) do + case Earmark.as_html(markdown) do + {:ok, html, _messages} -> html + {:error, html, _messages} -> html + end + end + defp rewrite_rendered_html_urls(html, canonical_post_paths, canonical_media_paths) do html |> rewrite_attribute( diff --git a/test/bds/desktop/shell_commands_test.exs b/test/bds/desktop/shell_commands_test.exs index 1cfade9..cd50ad2 100644 --- a/test/bds/desktop/shell_commands_test.exs +++ b/test/bds/desktop/shell_commands_test.exs @@ -94,6 +94,8 @@ defmodule BDS.Desktop.ShellCommandsTest do completed = wait_for_task(result.task_id, &(&1.status == :completed and is_map(&1.result))) assert completed.group_name == "Validation" + assert is_binary(completed.message) + assert String.starts_with?(completed.message, "Validation complete (") assert completed.result.kind == "open_editor" assert completed.result.route == "site_validation" assert is_map(completed.result.payload.summary) diff --git a/test/bds/desktop_test.exs b/test/bds/desktop_test.exs index 18325d5..0fb81d0 100644 --- a/test/bds/desktop_test.exs +++ b/test/bds/desktop_test.exs @@ -89,7 +89,7 @@ defmodule BDS.DesktopTest do assert menu_item(groups, :toggle_assistant_sidebar).native_label == "Toggle Assistant Sidebar\tCTRL+\\" assert menu_item(groups, :publish_selected).native_label == "Publish Selected\tCTRL+SHIFT+P" assert menu_item(groups, :preview_post).native_label == "Preview Post\tCTRL+SHIFT+V" - assert menu_item(groups, :generate_sitemap).native_label == "Generate Sitemap\tCTRL+R" + assert menu_item(groups, :generate_sitemap).native_label == "Generate Site\tCTRL+R" assert menu_item(groups, :validate_site).native_label == "Validate Site\tCTRL+SHIFT+L" assert menu_item(groups, :upload_site).native_label == "Upload Site\tCTRL+SHIFT+U" assert menu_item(groups, :metadata_diff).shortcut == nil diff --git a/test/bds/generation_test.exs b/test/bds/generation_test.exs index ec78207..c16e9bc 100644 --- a/test/bds/generation_test.exs +++ b/test/bds/generation_test.exs @@ -2,6 +2,7 @@ defmodule BDS.GenerationTest do use ExUnit.Case, async: false import Ecto.Query + import ExUnit.CaptureIO alias BDS.Media alias BDS.Metadata @@ -352,6 +353,116 @@ defmodule BDS.GenerationTest do assert post_html =~ "Language" end + test "validate_site does not crash on unknown macros with quoted params", %{ + project: project + } do + assert {:ok, _metadata} = + Metadata.update_project_metadata(project.id, %{ + public_url: "https://example.com/blog", + main_language: "en", + blog_languages: ["en"] + }) + + assert {:ok, post} = + Posts.create_post(%{ + project_id: project.id, + title: "Gallery Macro Post", + content: "[[gallery link=\"file\"]]", + language: "en" + }) + + assert {:ok, _published_post} = Posts.publish_post(post.id) + assert {:ok, _result} = BDS.Generation.generate_site(project.id, [:core, :single]) + + assert {:ok, report} = BDS.Generation.validate_site(project.id, [:core, :single]) + assert report.missing_url_paths == [] + assert report.extra_url_paths == [] + end + + test "validate_site reports old-app phase progress", %{project: project} do + assert {:ok, _metadata} = + Metadata.update_project_metadata(project.id, %{ + public_url: "https://example.com/blog", + main_language: "en", + blog_languages: ["en"] + }) + + assert {:ok, post} = + Posts.create_post(%{ + project_id: project.id, + title: "Progress Post", + content: "Progress body", + language: "en" + }) + + assert {:ok, _published_post} = Posts.publish_post(post.id) + + parent = self() + + on_progress = fn value, message -> + send(parent, {:validate_progress, value, message}) + end + + assert {:ok, _report} = + BDS.Generation.validate_site(project.id, [:core, :single], on_progress: on_progress) + + events = collect_validate_progress_events() + + assert {0.0, "Collecting sitemap URLs..."} in events + assert Enum.any?(events, fn + {value, message} + when is_number(value) and value > 0.0 and value < 0.5 and + is_binary(message) -> + String.starts_with?(message, "Collecting sitemap URLs") + + _other -> + false + end) + + assert {0.5, "Comparing sitemap to html pages..."} in events + assert Enum.any?(events, fn + {value, message} + when is_number(value) and value > 0.5 and value < 1.0 and is_binary(message) -> + String.starts_with?(message, "Comparing sitemap to html pages") + + _other -> + false + end) + + assert Enum.any?(events, fn + {1.0, message} -> String.starts_with?(message, "Validation complete (") + _other -> false + end) + end + + test "validate_site does not emit markdown parser warnings for unclosed fenced blocks", %{ + project: project + } do + assert {:ok, _metadata} = + Metadata.update_project_metadata(project.id, %{ + public_url: "https://example.com/blog", + main_language: "en", + blog_languages: ["en"] + }) + + assert {:ok, post} = + Posts.create_post(%{ + project_id: project.id, + title: "Malformed Markdown", + content: "```elixir\nIO.puts(:broken)", + language: "en" + }) + + assert {:ok, _published_post} = Posts.publish_post(post.id) + + stderr = + capture_io(:stderr, fn -> + assert {:ok, _report} = BDS.Generation.validate_site(project.id, [:core, :single]) + end) + + assert stderr == "" + end + test "generation falls back to bundled default templates when the project has no template files or template rows", %{project: project, temp_dir: temp_dir} do File.rm_rf!(Path.join(temp_dir, "templates")) @@ -891,4 +1002,13 @@ defmodule BDS.GenerationTest do "/" <> cleaned end end + + defp collect_validate_progress_events(acc \\ []) do + receive do + {:validate_progress, value, message} -> + collect_validate_progress_events([{value, message} | acc]) + after + 0 -> Enum.reverse(acc) + end + end end