Files
bDS2/lib/bds/generation/validation.ex
2026-05-01 08:57:48 +02:00

446 lines
16 KiB
Elixir

defmodule BDS.Generation.Validation do
@moduledoc false
import BDS.Generation.Paths,
only: [
archive_route_segment: 1,
local_date_parts!: 1,
normalize_url_path: 1,
relative_path_to_url_path: 1,
url_path_to_relative_index_path: 1
]
import BDS.Generation.Progress, only: [report_validation_compare_progress: 3]
import BDS.Generation.Sitemap, only: [extract_locs: 1, loc_to_project_path: 2]
alias BDS.Slug
@spec generated_file_updated_at_map([map()]) :: map()
def generated_file_updated_at_map(generated_files) do
Map.new(generated_files, &{&1.relative_path, &1.updated_at})
end
@spec build_post_timestamp_checks(String.t(), [map()], map()) :: [map()]
def build_post_timestamp_checks(project_data_dir, published_route_posts, generated_file_updated_at) do
Enum.map(published_route_posts, fn post ->
relative_path = BDS.Generation.Paths.post_output_path(post)
%{
post_url_path: relative_path_to_url_path(relative_path),
post_file_path:
source_full_path(
project_data_dir,
Map.get(post, :translation_file_path) || Map.get(post, :file_path)
),
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
}
end)
end
@spec build_language_post_timestamp_checks(String.t(), String.t(), [map()], map()) :: [map()]
def build_language_post_timestamp_checks(
project_data_dir,
language,
published_posts,
generated_file_updated_at
) do
Enum.map(published_posts, fn post ->
relative_path = BDS.Generation.Paths.post_output_path(post, language)
%{
post_url_path: relative_path_to_url_path(relative_path),
post_file_path: source_full_path(project_data_dir, Map.get(post, :file_path)),
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
}
end)
end
defp source_full_path(_project_data_dir, file_path) when file_path in [nil, ""], do: nil
defp source_full_path(project_data_dir, file_path), do: Path.join(project_data_dir, file_path)
@spec compare_sitemap_to_html(map()) :: map()
def compare_sitemap_to_html(params) do
post_timestamp_checks = Map.get(params, :post_timestamp_checks, [])
index_paths = Path.wildcard(Path.join(params.html_dir, "**/index.html"))
total_compare_steps = max(length(index_paths) + length(post_timestamp_checks), 1)
expected_path_set =
params.sitemap_xml
|> extract_locs()
|> Enum.map(&loc_to_project_path(&1, params.base_url))
|> Enum.reduce(MapSet.new(), &MapSet.put(&2, normalize_url_path(&1)))
|> then(fn expected_paths ->
Enum.reduce(Map.get(params, :additional_expected_paths, []), expected_paths, fn path, acc ->
MapSet.put(acc, normalize_url_path(path))
end)
end)
{existing_html_path_set, zero_byte_html_path_set} =
collect_html_index_paths(index_paths, params.html_dir, params.on_progress, total_compare_steps)
missing_url_paths =
expected_path_set
|> MapSet.to_list()
|> Enum.reject(&MapSet.member?(existing_html_path_set, &1))
|> Enum.sort()
extra_url_paths =
existing_html_path_set
|> MapSet.to_list()
|> Enum.reject(&MapSet.member?(expected_path_set, &1))
|> Kernel.++(
zero_byte_html_path_set
|> MapSet.to_list()
|> Enum.reject(&MapSet.member?(expected_path_set, &1))
)
|> Enum.uniq()
|> Enum.sort()
updated_post_url_paths =
post_timestamp_checks
|> Enum.with_index(1)
|> Enum.reduce(MapSet.new(), fn {check, index}, acc ->
:ok =
report_validation_compare_progress(
params.on_progress,
length(index_paths) + index,
total_compare_steps
)
normalized_url_path = normalize_url_path(check.post_url_path)
cond do
not MapSet.member?(expected_path_set, normalized_url_path) ->
acc
normalized_url_path in missing_url_paths ->
acc
is_nil(check.post_file_path) or check.post_file_path == "" ->
acc
true ->
html_path = Path.join(params.html_dir, url_path_to_relative_index_path(normalized_url_path))
case {File.stat(html_path, time: :posix), File.stat(check.post_file_path, time: :posix)} do
{{:ok, html_stat}, {:ok, post_stat}} ->
effective_generated_at_ms = max(mtime_ms(html_stat), check.generated_updated_at_ms || 0)
if mtime_ms(post_stat) > effective_generated_at_ms do
MapSet.put(acc, normalized_url_path)
else
acc
end
_other ->
acc
end
end
end)
|> MapSet.to_list()
|> Enum.sort()
%{
missing_url_paths: missing_url_paths,
extra_url_paths: extra_url_paths,
updated_post_url_paths: updated_post_url_paths,
expected_url_count: MapSet.size(expected_path_set),
existing_html_url_count: MapSet.size(existing_html_path_set)
}
end
defp collect_html_index_paths(index_paths, html_dir, on_progress, total_compare_steps) do
index_paths
|> Enum.with_index(1)
|> Enum.reduce({MapSet.new(), MapSet.new()}, fn {path, index}, {existing, zero_byte} ->
:ok = report_validation_compare_progress(on_progress, index, total_compare_steps)
relative_dir =
path
|> Path.relative_to(html_dir)
|> Path.dirname()
url_path =
case relative_dir do
"." -> "/"
value -> normalize_url_path("/" <> value)
end
case File.stat(path) do
{:ok, %{size: size}} when size > 0 -> {MapSet.put(existing, url_path), zero_byte}
{:ok, _stat} -> {existing, MapSet.put(zero_byte, url_path)}
{:error, _reason} -> {existing, MapSet.put(zero_byte, url_path)}
end
end)
end
defp mtime_ms(%{mtime: mtime}) when is_integer(mtime), do: mtime * 1000
defp mtime_ms(%{mtime: mtime}) do
mtime
|> NaiveDateTime.from_erl!()
|> DateTime.from_naive!("Etc/UTC")
|> DateTime.to_unix(:millisecond)
end
@spec report_paths(map()) :: [String.t()]
def report_paths(report) do
Map.get(report, :missing_url_paths, []) ++ Map.get(report, :updated_post_url_paths, [])
end
@spec plan_validation_paths([String.t()], [String.t()]) :: map()
def plan_validation_paths(paths, additional_languages) do
{main_plan, language_plans} =
Enum.reduce(paths, {empty_validation_path_plan(), %{}}, fn path, {plan, language_plans} ->
normalized_path = normalize_url_path(path)
{language, stripped_path} = extract_language_path(normalized_path, additional_languages)
if is_binary(language) do
language_plan = Map.get(language_plans, language, empty_validation_path_plan())
next_language_plan = classify_validation_path(stripped_path, language_plan)
{plan, Map.put(language_plans, language, next_language_plan)}
else
{classify_validation_path(normalized_path, plan), language_plans}
end
end)
Map.put(main_plan, :language_plans, language_plans)
end
@spec empty_validation_path_plan() :: map()
def empty_validation_path_plan do
%{
request_root_routes: false,
requires_fallback_section_render: false,
requested_category_slugs: MapSet.new(),
requested_tag_slugs: MapSet.new(),
requested_years: MapSet.new(),
requested_year_months: MapSet.new(),
requested_post_routes: [],
language_plans: %{}
}
end
defp classify_validation_path(path, plan) do
case Regex.run(~r|^/category/([^/]+)(?:/page/\d+)?$|, path) do
[_, slug] ->
update_in(plan.requested_category_slugs, &MapSet.put(&1, slug))
nil ->
case Regex.run(~r|^/tag/([^/]+)(?:/page/\d+)?$|, path) do
[_, slug] ->
update_in(plan.requested_tag_slugs, &MapSet.put(&1, slug))
nil ->
case Regex.run(~r|^/(\d{4})/(\d{2})/(\d{2})/([^/]+)$|, path) do
[_, year, month, day, slug] ->
update_in(plan.requested_post_routes, &[ %{year: String.to_integer(year), month: String.to_integer(month), day: String.to_integer(day), slug: slug} | &1 ])
nil ->
case Regex.run(~r|^/(\d{4})/(\d{2})(?:/page/\d+)?$|, path) do
[_, year, month] ->
update_in(plan.requested_year_months, &MapSet.put(&1, "#{year}/#{month}"))
nil ->
case Regex.run(~r|^/(\d{4})(?:/page/\d+)?$|, path) do
[_, year] ->
update_in(plan.requested_years, &MapSet.put(&1, String.to_integer(year)))
nil ->
if path == "/" or Regex.match?(~r|^/page/\d+$|, path) do
%{plan | request_root_routes: true}
else
%{plan | requires_fallback_section_render: true}
end
end
end
end
end
end
end
@spec build_targeted_validation_plan(map(), [map()]) :: map()
def build_targeted_validation_plan(initial_plan, published_posts) do
if initial_plan.requires_fallback_section_render do
initial_plan
else
available_category_slugs =
published_posts
|> Enum.flat_map(&(&1.categories || []))
|> Enum.map(&Slug.slugify/1)
|> MapSet.new()
available_tag_slugs =
published_posts
|> Enum.flat_map(&(&1.tags || []))
|> Enum.map(&Slug.slugify/1)
|> MapSet.new()
targeted_post_routes =
Enum.reduce(initial_plan.requested_post_routes, MapSet.new(), fn route, acc ->
MapSet.put(acc, route_key(route.year, route.month, route.day, route.slug))
end)
enriched =
Enum.reduce(initial_plan.requested_post_routes, %{initial_plan | requested_post_routes: targeted_post_routes}, fn route, acc ->
case Enum.find(published_posts, &post_matches_route?(&1, route)) do
nil ->
acc
|> update_in([:requested_years], &MapSet.put(&1, route.year))
|> update_in([:requested_year_months], &MapSet.put(&1, route_month_key(route.year, route.month)))
|> Map.put(:request_root_routes, true)
post ->
{year, month, _day} = local_date_parts!(post.created_at)
acc
|> update_in([:requested_category_slugs], fn set ->
Enum.reduce(post.categories || [], set, &MapSet.put(&2, archive_route_segment(&1)))
end)
|> update_in([:requested_tag_slugs], fn set ->
Enum.reduce(post.tags || [], set, &MapSet.put(&2, archive_route_segment(&1)))
end)
|> update_in([:requested_years], &MapSet.put(&1, year))
|> update_in([:requested_year_months], &MapSet.put(&1, route_month_key(year, month)))
|> Map.put(:request_root_routes, true)
end
end)
language_plans =
initial_plan.language_plans
|> Enum.map(fn {language, language_plan} ->
{language, build_targeted_validation_plan(language_plan, published_posts)}
end)
|> Map.new()
%{
enriched
| requested_category_slugs: MapSet.intersection(enriched.requested_category_slugs, available_category_slugs),
requested_tag_slugs: MapSet.intersection(enriched.requested_tag_slugs, available_tag_slugs),
language_plans: language_plans
}
end
end
defp post_matches_route?(post, route) do
{year, month, day} = local_date_parts!(post.created_at)
post.slug == route.slug and year == route.year and month == route.month and day == route.day
end
defp route_key(year, month, day, slug) do
"#{year}/#{String.pad_leading(Integer.to_string(month), 2, "0")}/#{String.pad_leading(Integer.to_string(day), 2, "0")}/#{slug}"
end
defp route_month_key(year, month) do
"#{year}/#{String.pad_leading(Integer.to_string(month), 2, "0")}"
end
defp extract_language_path(path, additional_languages) do
case Regex.run(~r|^/([a-z]{2,3})(/.*)?$|, path) do
[_, language, suffix] ->
if language in additional_languages do
{language, normalize_url_path(suffix)}
else
{nil, path}
end
[_, language] ->
if language in additional_languages do
{language, "/"}
else
{nil, path}
end
_other -> {nil, path}
end
end
@spec targeted_output?(String.t(), map(), String.t() | nil, [String.t()]) :: boolean()
def targeted_output?(relative_path, targeted_plan, main_language, additional_languages) do
{language, stripped_path} = extract_relative_output_language(relative_path, additional_languages)
plan =
case language do
nil -> targeted_plan
value -> Map.get(targeted_plan.language_plans, value, empty_validation_path_plan())
end
targeted_output_for_plan?(stripped_path, plan, main_language == language or is_nil(language))
end
defp extract_relative_output_language(relative_path, additional_languages) do
segments = String.split(relative_path, "/", trim: true)
case segments do
[language | rest] ->
if language in additional_languages do
{language, Path.join(rest)}
else
{nil, relative_path}
end
_other ->
{nil, relative_path}
end
end
defp targeted_output_for_plan?(_relative_path, %{requires_fallback_section_render: true}, _main?), do: true
defp targeted_output_for_plan?(relative_path, plan, _main?) do
cond do
relative_path in ["index.html", "404.html", "feed.xml", "atom.xml"] ->
plan.request_root_routes
Regex.match?(~r|^category/([^/]+)(?:/page/\d+)?/index\.html$|, relative_path) ->
[_, slug] = Regex.run(~r|^category/([^/]+)(?:/page/\d+)?/index\.html$|, relative_path)
MapSet.member?(plan.requested_category_slugs, slug)
Regex.match?(~r|^tag/([^/]+)/index\.html$|, relative_path) ->
[_, slug] = Regex.run(~r|^tag/([^/]+)/index\.html$|, relative_path)
MapSet.member?(plan.requested_tag_slugs, slug)
Regex.match?(~r|^(\d{4})/(\d{2})/(\d{2})/([^/]+)/index\.html$|, relative_path) ->
[_, year, month, day, slug] = Regex.run(~r|^(\d{4})/(\d{2})/(\d{2})/([^/]+)/index\.html$|, relative_path)
MapSet.member?(plan.requested_post_routes, route_key(String.to_integer(year), String.to_integer(month), String.to_integer(day), slug))
Regex.match?(~r|^(\d{4})/(\d{2})/index\.html$|, relative_path) ->
[_, year, month] = Regex.run(~r|^(\d{4})/(\d{2})/index\.html$|, relative_path)
MapSet.member?(plan.requested_year_months, "#{year}/#{month}")
Regex.match?(~r|^(\d{4})/index\.html$|, relative_path) ->
[_, year] = Regex.run(~r|^(\d{4})/index\.html$|, relative_path)
MapSet.member?(plan.requested_years, String.to_integer(year))
true ->
false
end
end
@spec route_html_path?(String.t()) :: boolean()
def route_html_path?(relative_path), do: String.ends_with?(relative_path, "index.html")
@spec prune_empty_parent_dirs(String.t(), String.t()) :: {non_neg_integer(), String.t()}
def prune_empty_parent_dirs(current_dir, html_root) do
cond do
Path.expand(current_dir) == Path.expand(html_root) ->
{0, current_dir}
true ->
case File.ls(current_dir) do
{:ok, []} ->
case File.rmdir(current_dir) do
:ok ->
{count, last_dir} = prune_empty_parent_dirs(Path.dirname(current_dir), html_root)
{count + 1, last_dir}
{:error, _reason} ->
{0, current_dir}
end
_other ->
{0, current_dir}
end
end
end
end