defmodule BDS.Generation.Validation do @moduledoc false import BDS.Generation.Paths, only: [ archive_route_segment: 1, local_date_parts!: 1, normalize_url_path: 1, relative_path_to_url_path: 1, url_path_to_relative_index_path: 1 ] import BDS.Generation.Progress, only: [report_validation_compare_progress: 3] import BDS.Generation.Sitemap, only: [extract_locs: 1, loc_to_project_path: 2] alias BDS.Slug @spec generated_file_updated_at_map([map()]) :: map() def generated_file_updated_at_map(generated_files) do Map.new(generated_files, &{&1.relative_path, &1.updated_at}) end @spec build_post_timestamp_checks(String.t(), [map()], map()) :: [map()] def build_post_timestamp_checks( project_data_dir, published_route_posts, generated_file_updated_at ) do Enum.map(published_route_posts, fn post -> relative_path = BDS.Generation.Paths.post_output_path(post) %{ post_url_path: relative_path_to_url_path(relative_path), post_file_path: source_full_path( project_data_dir, Map.get(post, :translation_file_path) || Map.get(post, :file_path) ), generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0) } end) end @spec build_language_post_timestamp_checks(String.t(), String.t(), [map()], map()) :: [map()] def build_language_post_timestamp_checks( project_data_dir, language, published_posts, generated_file_updated_at ) do Enum.map(published_posts, fn post -> relative_path = BDS.Generation.Paths.post_output_path(post, language) %{ post_url_path: relative_path_to_url_path(relative_path), post_file_path: source_full_path(project_data_dir, Map.get(post, :file_path)), generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0) } end) end defp source_full_path(_project_data_dir, file_path) when file_path in [nil, ""], do: nil defp source_full_path(project_data_dir, file_path), do: Path.join(project_data_dir, file_path) @spec compare_sitemap_to_html(map()) :: map() def compare_sitemap_to_html(params) do post_timestamp_checks = Map.get(params, :post_timestamp_checks, []) index_paths = Path.wildcard(Path.join(params.html_dir, "**/index.html")) total_compare_steps = max(length(index_paths) + length(post_timestamp_checks), 1) expected_path_set = params.sitemap_xml |> extract_locs() |> Enum.map(&loc_to_project_path(&1, params.base_url)) |> Enum.reduce(MapSet.new(), &MapSet.put(&2, normalize_url_path(&1))) |> then(fn expected_paths -> Enum.reduce(Map.get(params, :additional_expected_paths, []), expected_paths, fn path, acc -> MapSet.put(acc, normalize_url_path(path)) end) end) {existing_html_path_set, zero_byte_html_path_set} = collect_html_index_paths( index_paths, params.html_dir, params.on_progress, total_compare_steps ) missing_url_paths = expected_path_set |> MapSet.to_list() |> Enum.reject(&MapSet.member?(existing_html_path_set, &1)) |> Enum.sort() extra_url_paths = existing_html_path_set |> MapSet.to_list() |> Enum.reject(&MapSet.member?(expected_path_set, &1)) |> Kernel.++( zero_byte_html_path_set |> MapSet.to_list() |> Enum.reject(&MapSet.member?(expected_path_set, &1)) ) |> Enum.uniq() |> Enum.sort() updated_post_url_paths = post_timestamp_checks |> Enum.with_index(1) |> Enum.reduce(MapSet.new(), fn {check, index}, acc -> :ok = report_validation_compare_progress( params.on_progress, length(index_paths) + index, total_compare_steps ) normalized_url_path = normalize_url_path(check.post_url_path) cond do not MapSet.member?(expected_path_set, normalized_url_path) -> acc normalized_url_path in missing_url_paths -> acc is_nil(check.post_file_path) or check.post_file_path == "" -> acc true -> html_path = Path.join(params.html_dir, url_path_to_relative_index_path(normalized_url_path)) case {File.stat(html_path, time: :posix), File.stat(check.post_file_path, time: :posix)} do {{:ok, html_stat}, {:ok, post_stat}} -> effective_generated_at_ms = max(mtime_ms(html_stat), check.generated_updated_at_ms || 0) if mtime_ms(post_stat) > effective_generated_at_ms do MapSet.put(acc, normalized_url_path) else acc end _other -> acc end end end) |> MapSet.to_list() |> Enum.sort() %{ missing_url_paths: missing_url_paths, extra_url_paths: extra_url_paths, updated_post_url_paths: updated_post_url_paths, expected_url_count: MapSet.size(expected_path_set), existing_html_url_count: MapSet.size(existing_html_path_set) } end defp collect_html_index_paths(index_paths, html_dir, on_progress, total_compare_steps) do index_paths |> Enum.with_index(1) |> Enum.reduce({MapSet.new(), MapSet.new()}, fn {path, index}, {existing, zero_byte} -> :ok = report_validation_compare_progress(on_progress, index, total_compare_steps) relative_dir = path |> Path.relative_to(html_dir) |> Path.dirname() url_path = case relative_dir do "." -> "/" value -> normalize_url_path("/" <> value) end case File.stat(path) do {:ok, %{size: size}} when size > 0 -> {MapSet.put(existing, url_path), zero_byte} {:ok, _stat} -> {existing, MapSet.put(zero_byte, url_path)} {:error, _reason} -> {existing, MapSet.put(zero_byte, url_path)} end end) end defp mtime_ms(%{mtime: mtime}) when is_integer(mtime), do: mtime * 1000 defp mtime_ms(%{mtime: mtime}) do mtime |> NaiveDateTime.from_erl!() |> DateTime.from_naive!("Etc/UTC") |> DateTime.to_unix(:millisecond) end @spec report_paths(map()) :: [String.t()] def report_paths(report) do Map.get(report, :missing_url_paths, []) ++ Map.get(report, :updated_post_url_paths, []) end @spec plan_validation_paths([String.t()], [String.t()]) :: map() def plan_validation_paths(paths, additional_languages) do {main_plan, language_plans} = Enum.reduce(paths, {empty_validation_path_plan(), %{}}, fn path, {plan, language_plans} -> normalized_path = normalize_url_path(path) {language, stripped_path} = extract_language_path(normalized_path, additional_languages) if is_binary(language) do language_plan = Map.get(language_plans, language, empty_validation_path_plan()) next_language_plan = classify_validation_path(stripped_path, language_plan) {plan, Map.put(language_plans, language, next_language_plan)} else {classify_validation_path(normalized_path, plan), language_plans} end end) Map.put(main_plan, :language_plans, language_plans) end @spec empty_validation_path_plan() :: map() def empty_validation_path_plan do %{ request_root_routes: false, requires_fallback_section_render: false, requested_category_slugs: MapSet.new(), requested_tag_slugs: MapSet.new(), requested_years: MapSet.new(), requested_year_months: MapSet.new(), requested_post_routes: [], language_plans: %{} } end defp classify_validation_path(path, plan) do case Regex.run(~r|^/category/([^/]+)(?:/page/\d+)?$|, path) do [_, slug] -> update_in(plan.requested_category_slugs, &MapSet.put(&1, slug)) nil -> case Regex.run(~r|^/tag/([^/]+)(?:/page/\d+)?$|, path) do [_, slug] -> update_in(plan.requested_tag_slugs, &MapSet.put(&1, slug)) nil -> case Regex.run(~r|^/(\d{4})/(\d{2})/(\d{2})/([^/]+)$|, path) do [_, year, month, day, slug] -> update_in( plan.requested_post_routes, &[ %{ year: String.to_integer(year), month: String.to_integer(month), day: String.to_integer(day), slug: slug } | &1 ] ) nil -> case Regex.run(~r|^/(\d{4})/(\d{2})(?:/page/\d+)?$|, path) do [_, year, month] -> update_in(plan.requested_year_months, &MapSet.put(&1, "#{year}/#{month}")) nil -> case Regex.run(~r|^/(\d{4})(?:/page/\d+)?$|, path) do [_, year] -> update_in(plan.requested_years, &MapSet.put(&1, String.to_integer(year))) nil -> if path == "/" or Regex.match?(~r|^/page/\d+$|, path) do %{plan | request_root_routes: true} else %{plan | requires_fallback_section_render: true} end end end end end end end @spec build_targeted_validation_plan(map(), [map()]) :: map() def build_targeted_validation_plan(initial_plan, published_posts) do if initial_plan.requires_fallback_section_render do initial_plan else available_category_slugs = published_posts |> Enum.flat_map(&(&1.categories || [])) |> Enum.map(&Slug.slugify/1) |> MapSet.new() available_tag_slugs = published_posts |> Enum.flat_map(&(&1.tags || [])) |> Enum.map(&Slug.slugify/1) |> MapSet.new() targeted_post_routes = Enum.reduce(initial_plan.requested_post_routes, MapSet.new(), fn route, acc -> MapSet.put(acc, route_key(route.year, route.month, route.day, route.slug)) end) enriched = Enum.reduce( initial_plan.requested_post_routes, %{initial_plan | requested_post_routes: targeted_post_routes}, fn route, acc -> case Enum.find(published_posts, &post_matches_route?(&1, route)) do nil -> acc |> update_in([:requested_years], &MapSet.put(&1, route.year)) |> update_in( [:requested_year_months], &MapSet.put(&1, route_month_key(route.year, route.month)) ) |> Map.put(:request_root_routes, true) post -> {year, month, _day} = local_date_parts!(post.created_at) acc |> update_in([:requested_category_slugs], fn set -> Enum.reduce( post.categories || [], set, &MapSet.put(&2, archive_route_segment(&1)) ) end) |> update_in([:requested_tag_slugs], fn set -> Enum.reduce(post.tags || [], set, &MapSet.put(&2, archive_route_segment(&1))) end) |> update_in([:requested_years], &MapSet.put(&1, year)) |> update_in( [:requested_year_months], &MapSet.put(&1, route_month_key(year, month)) ) |> Map.put(:request_root_routes, true) end end ) language_plans = initial_plan.language_plans |> Enum.map(fn {language, language_plan} -> {language, build_targeted_validation_plan(language_plan, published_posts)} end) |> Map.new() %{ enriched | requested_category_slugs: MapSet.intersection(enriched.requested_category_slugs, available_category_slugs), requested_tag_slugs: MapSet.intersection(enriched.requested_tag_slugs, available_tag_slugs), language_plans: language_plans } end end defp post_matches_route?(post, route) do {year, month, day} = local_date_parts!(post.created_at) post.slug == route.slug and year == route.year and month == route.month and day == route.day end defp route_key(year, month, day, slug) do "#{year}/#{String.pad_leading(Integer.to_string(month), 2, "0")}/#{String.pad_leading(Integer.to_string(day), 2, "0")}/#{slug}" end defp route_month_key(year, month) do "#{year}/#{String.pad_leading(Integer.to_string(month), 2, "0")}" end defp extract_language_path(path, additional_languages) do case Regex.run(~r|^/([a-z]{2,3})(/.*)?$|, path) do [_, language, suffix] -> if language in additional_languages do {language, normalize_url_path(suffix)} else {nil, path} end [_, language] -> if language in additional_languages do {language, "/"} else {nil, path} end _other -> {nil, path} end end @spec targeted_output?(String.t(), map(), String.t() | nil, [String.t()]) :: boolean() def targeted_output?(relative_path, targeted_plan, main_language, additional_languages) do {language, stripped_path} = extract_relative_output_language(relative_path, additional_languages) plan = case language do nil -> targeted_plan value -> Map.get(targeted_plan.language_plans, value, empty_validation_path_plan()) end targeted_output_for_plan?(stripped_path, plan, main_language == language or is_nil(language)) end defp extract_relative_output_language(relative_path, additional_languages) do segments = String.split(relative_path, "/", trim: true) case segments do [language | rest] -> if language in additional_languages do {language, Path.join(rest)} else {nil, relative_path} end _other -> {nil, relative_path} end end defp targeted_output_for_plan?( _relative_path, %{requires_fallback_section_render: true}, _main? ), do: true defp targeted_output_for_plan?(relative_path, plan, _main?) do cond do relative_path in ["index.html", "404.html", "feed.xml", "atom.xml"] -> plan.request_root_routes Regex.match?(~r|^category/([^/]+)(?:/page/\d+)?/index\.html$|, relative_path) -> [_, slug] = Regex.run(~r|^category/([^/]+)(?:/page/\d+)?/index\.html$|, relative_path) MapSet.member?(plan.requested_category_slugs, slug) Regex.match?(~r|^tag/([^/]+)/index\.html$|, relative_path) -> [_, slug] = Regex.run(~r|^tag/([^/]+)/index\.html$|, relative_path) MapSet.member?(plan.requested_tag_slugs, slug) Regex.match?(~r|^(\d{4})/(\d{2})/(\d{2})/([^/]+)/index\.html$|, relative_path) -> [_, year, month, day, slug] = Regex.run(~r|^(\d{4})/(\d{2})/(\d{2})/([^/]+)/index\.html$|, relative_path) MapSet.member?( plan.requested_post_routes, route_key( String.to_integer(year), String.to_integer(month), String.to_integer(day), slug ) ) Regex.match?(~r|^(\d{4})/(\d{2})/index\.html$|, relative_path) -> [_, year, month] = Regex.run(~r|^(\d{4})/(\d{2})/index\.html$|, relative_path) MapSet.member?(plan.requested_year_months, "#{year}/#{month}") Regex.match?(~r|^(\d{4})/index\.html$|, relative_path) -> [_, year] = Regex.run(~r|^(\d{4})/index\.html$|, relative_path) MapSet.member?(plan.requested_years, String.to_integer(year)) true -> false end end @spec route_html_path?(String.t()) :: boolean() def route_html_path?(relative_path), do: String.ends_with?(relative_path, "index.html") @spec prune_empty_parent_dirs(String.t(), String.t()) :: {non_neg_integer(), String.t()} def prune_empty_parent_dirs(current_dir, html_root) do cond do Path.expand(current_dir) == Path.expand(html_root) -> {0, current_dir} true -> case File.ls(current_dir) do {:ok, []} -> case File.rmdir(current_dir) do :ok -> {count, last_dir} = prune_empty_parent_dirs(Path.dirname(current_dir), html_root) {count + 1, last_dir} {:error, _reason} -> {0, current_dir} end _other -> {0, current_dir} end end end end