chore: more god module refactor
Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
445
lib/bds/generation/validation.ex
Normal file
445
lib/bds/generation/validation.ex
Normal file
@@ -0,0 +1,445 @@
|
||||
defmodule BDS.Generation.Validation do
|
||||
@moduledoc false
|
||||
|
||||
import BDS.Generation.Paths,
|
||||
only: [
|
||||
archive_route_segment: 1,
|
||||
local_date_parts!: 1,
|
||||
normalize_url_path: 1,
|
||||
relative_path_to_url_path: 1,
|
||||
url_path_to_relative_index_path: 1
|
||||
]
|
||||
import BDS.Generation.Progress, only: [report_validation_compare_progress: 3]
|
||||
import BDS.Generation.Sitemap, only: [extract_locs: 1, loc_to_project_path: 2]
|
||||
|
||||
alias BDS.Slug
|
||||
|
||||
@spec generated_file_updated_at_map([map()]) :: map()
|
||||
def generated_file_updated_at_map(generated_files) do
|
||||
Map.new(generated_files, &{&1.relative_path, &1.updated_at})
|
||||
end
|
||||
|
||||
@spec build_post_timestamp_checks(String.t(), [map()], map()) :: [map()]
|
||||
def build_post_timestamp_checks(project_data_dir, published_route_posts, generated_file_updated_at) do
|
||||
Enum.map(published_route_posts, fn post ->
|
||||
relative_path = BDS.Generation.Paths.post_output_path(post)
|
||||
|
||||
%{
|
||||
post_url_path: relative_path_to_url_path(relative_path),
|
||||
post_file_path:
|
||||
source_full_path(
|
||||
project_data_dir,
|
||||
Map.get(post, :translation_file_path) || Map.get(post, :file_path)
|
||||
),
|
||||
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
|
||||
}
|
||||
end)
|
||||
end
|
||||
|
||||
@spec build_language_post_timestamp_checks(String.t(), String.t(), [map()], map()) :: [map()]
|
||||
def build_language_post_timestamp_checks(
|
||||
project_data_dir,
|
||||
language,
|
||||
published_posts,
|
||||
generated_file_updated_at
|
||||
) do
|
||||
Enum.map(published_posts, fn post ->
|
||||
relative_path = BDS.Generation.Paths.post_output_path(post, language)
|
||||
|
||||
%{
|
||||
post_url_path: relative_path_to_url_path(relative_path),
|
||||
post_file_path: source_full_path(project_data_dir, Map.get(post, :file_path)),
|
||||
generated_updated_at_ms: Map.get(generated_file_updated_at, relative_path, 0)
|
||||
}
|
||||
end)
|
||||
end
|
||||
|
||||
defp source_full_path(_project_data_dir, file_path) when file_path in [nil, ""], do: nil
|
||||
defp source_full_path(project_data_dir, file_path), do: Path.join(project_data_dir, file_path)
|
||||
|
||||
@spec compare_sitemap_to_html(map()) :: map()
|
||||
def compare_sitemap_to_html(params) do
|
||||
post_timestamp_checks = Map.get(params, :post_timestamp_checks, [])
|
||||
index_paths = Path.wildcard(Path.join(params.html_dir, "**/index.html"))
|
||||
total_compare_steps = max(length(index_paths) + length(post_timestamp_checks), 1)
|
||||
|
||||
expected_path_set =
|
||||
params.sitemap_xml
|
||||
|> extract_locs()
|
||||
|> Enum.map(&loc_to_project_path(&1, params.base_url))
|
||||
|> Enum.reduce(MapSet.new(), &MapSet.put(&2, normalize_url_path(&1)))
|
||||
|> then(fn expected_paths ->
|
||||
Enum.reduce(Map.get(params, :additional_expected_paths, []), expected_paths, fn path, acc ->
|
||||
MapSet.put(acc, normalize_url_path(path))
|
||||
end)
|
||||
end)
|
||||
|
||||
{existing_html_path_set, zero_byte_html_path_set} =
|
||||
collect_html_index_paths(index_paths, params.html_dir, params.on_progress, total_compare_steps)
|
||||
|
||||
missing_url_paths =
|
||||
expected_path_set
|
||||
|> MapSet.to_list()
|
||||
|> Enum.reject(&MapSet.member?(existing_html_path_set, &1))
|
||||
|> Enum.sort()
|
||||
|
||||
extra_url_paths =
|
||||
existing_html_path_set
|
||||
|> MapSet.to_list()
|
||||
|> Enum.reject(&MapSet.member?(expected_path_set, &1))
|
||||
|> Kernel.++(
|
||||
zero_byte_html_path_set
|
||||
|> MapSet.to_list()
|
||||
|> Enum.reject(&MapSet.member?(expected_path_set, &1))
|
||||
)
|
||||
|> Enum.uniq()
|
||||
|> Enum.sort()
|
||||
|
||||
updated_post_url_paths =
|
||||
post_timestamp_checks
|
||||
|> Enum.with_index(1)
|
||||
|> Enum.reduce(MapSet.new(), fn {check, index}, acc ->
|
||||
:ok =
|
||||
report_validation_compare_progress(
|
||||
params.on_progress,
|
||||
length(index_paths) + index,
|
||||
total_compare_steps
|
||||
)
|
||||
|
||||
normalized_url_path = normalize_url_path(check.post_url_path)
|
||||
|
||||
cond do
|
||||
not MapSet.member?(expected_path_set, normalized_url_path) ->
|
||||
acc
|
||||
|
||||
normalized_url_path in missing_url_paths ->
|
||||
acc
|
||||
|
||||
is_nil(check.post_file_path) or check.post_file_path == "" ->
|
||||
acc
|
||||
|
||||
true ->
|
||||
html_path = Path.join(params.html_dir, url_path_to_relative_index_path(normalized_url_path))
|
||||
|
||||
case {File.stat(html_path, time: :posix), File.stat(check.post_file_path, time: :posix)} do
|
||||
{{:ok, html_stat}, {:ok, post_stat}} ->
|
||||
effective_generated_at_ms = max(mtime_ms(html_stat), check.generated_updated_at_ms || 0)
|
||||
|
||||
if mtime_ms(post_stat) > effective_generated_at_ms do
|
||||
MapSet.put(acc, normalized_url_path)
|
||||
else
|
||||
acc
|
||||
end
|
||||
|
||||
_other ->
|
||||
acc
|
||||
end
|
||||
end
|
||||
end)
|
||||
|> MapSet.to_list()
|
||||
|> Enum.sort()
|
||||
|
||||
%{
|
||||
missing_url_paths: missing_url_paths,
|
||||
extra_url_paths: extra_url_paths,
|
||||
updated_post_url_paths: updated_post_url_paths,
|
||||
expected_url_count: MapSet.size(expected_path_set),
|
||||
existing_html_url_count: MapSet.size(existing_html_path_set)
|
||||
}
|
||||
end
|
||||
|
||||
defp collect_html_index_paths(index_paths, html_dir, on_progress, total_compare_steps) do
|
||||
index_paths
|
||||
|> Enum.with_index(1)
|
||||
|> Enum.reduce({MapSet.new(), MapSet.new()}, fn {path, index}, {existing, zero_byte} ->
|
||||
:ok = report_validation_compare_progress(on_progress, index, total_compare_steps)
|
||||
|
||||
relative_dir =
|
||||
path
|
||||
|> Path.relative_to(html_dir)
|
||||
|> Path.dirname()
|
||||
|
||||
url_path =
|
||||
case relative_dir do
|
||||
"." -> "/"
|
||||
value -> normalize_url_path("/" <> value)
|
||||
end
|
||||
|
||||
case File.stat(path) do
|
||||
{:ok, %{size: size}} when size > 0 -> {MapSet.put(existing, url_path), zero_byte}
|
||||
{:ok, _stat} -> {existing, MapSet.put(zero_byte, url_path)}
|
||||
{:error, _reason} -> {existing, MapSet.put(zero_byte, url_path)}
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
defp mtime_ms(%{mtime: mtime}) when is_integer(mtime), do: mtime * 1000
|
||||
|
||||
defp mtime_ms(%{mtime: mtime}) do
|
||||
mtime
|
||||
|> NaiveDateTime.from_erl!()
|
||||
|> DateTime.from_naive!("Etc/UTC")
|
||||
|> DateTime.to_unix(:millisecond)
|
||||
end
|
||||
|
||||
@spec report_paths(map()) :: [String.t()]
|
||||
def report_paths(report) do
|
||||
Map.get(report, :missing_url_paths, []) ++ Map.get(report, :updated_post_url_paths, [])
|
||||
end
|
||||
|
||||
@spec plan_validation_paths([String.t()], [String.t()]) :: map()
|
||||
def plan_validation_paths(paths, additional_languages) do
|
||||
{main_plan, language_plans} =
|
||||
Enum.reduce(paths, {empty_validation_path_plan(), %{}}, fn path, {plan, language_plans} ->
|
||||
normalized_path = normalize_url_path(path)
|
||||
{language, stripped_path} = extract_language_path(normalized_path, additional_languages)
|
||||
|
||||
if is_binary(language) do
|
||||
language_plan = Map.get(language_plans, language, empty_validation_path_plan())
|
||||
next_language_plan = classify_validation_path(stripped_path, language_plan)
|
||||
{plan, Map.put(language_plans, language, next_language_plan)}
|
||||
else
|
||||
{classify_validation_path(normalized_path, plan), language_plans}
|
||||
end
|
||||
end)
|
||||
|
||||
Map.put(main_plan, :language_plans, language_plans)
|
||||
end
|
||||
|
||||
@spec empty_validation_path_plan() :: map()
|
||||
def empty_validation_path_plan do
|
||||
%{
|
||||
request_root_routes: false,
|
||||
requires_fallback_section_render: false,
|
||||
requested_category_slugs: MapSet.new(),
|
||||
requested_tag_slugs: MapSet.new(),
|
||||
requested_years: MapSet.new(),
|
||||
requested_year_months: MapSet.new(),
|
||||
requested_post_routes: [],
|
||||
language_plans: %{}
|
||||
}
|
||||
end
|
||||
|
||||
defp classify_validation_path(path, plan) do
|
||||
case Regex.run(~r|^/category/([^/]+)(?:/page/\d+)?$|, path) do
|
||||
[_, slug] ->
|
||||
update_in(plan.requested_category_slugs, &MapSet.put(&1, slug))
|
||||
|
||||
nil ->
|
||||
case Regex.run(~r|^/tag/([^/]+)(?:/page/\d+)?$|, path) do
|
||||
[_, slug] ->
|
||||
update_in(plan.requested_tag_slugs, &MapSet.put(&1, slug))
|
||||
|
||||
nil ->
|
||||
case Regex.run(~r|^/(\d{4})/(\d{2})/(\d{2})/([^/]+)$|, path) do
|
||||
[_, year, month, day, slug] ->
|
||||
update_in(plan.requested_post_routes, &[ %{year: String.to_integer(year), month: String.to_integer(month), day: String.to_integer(day), slug: slug} | &1 ])
|
||||
|
||||
nil ->
|
||||
case Regex.run(~r|^/(\d{4})/(\d{2})(?:/page/\d+)?$|, path) do
|
||||
[_, year, month] ->
|
||||
update_in(plan.requested_year_months, &MapSet.put(&1, "#{year}/#{month}"))
|
||||
|
||||
nil ->
|
||||
case Regex.run(~r|^/(\d{4})(?:/page/\d+)?$|, path) do
|
||||
[_, year] ->
|
||||
update_in(plan.requested_years, &MapSet.put(&1, String.to_integer(year)))
|
||||
|
||||
nil ->
|
||||
if path == "/" or Regex.match?(~r|^/page/\d+$|, path) do
|
||||
%{plan | request_root_routes: true}
|
||||
else
|
||||
%{plan | requires_fallback_section_render: true}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@spec build_targeted_validation_plan(map(), [map()]) :: map()
|
||||
def build_targeted_validation_plan(initial_plan, published_posts) do
|
||||
if initial_plan.requires_fallback_section_render do
|
||||
initial_plan
|
||||
else
|
||||
available_category_slugs =
|
||||
published_posts
|
||||
|> Enum.flat_map(&(&1.categories || []))
|
||||
|> Enum.map(&Slug.slugify/1)
|
||||
|> MapSet.new()
|
||||
|
||||
available_tag_slugs =
|
||||
published_posts
|
||||
|> Enum.flat_map(&(&1.tags || []))
|
||||
|> Enum.map(&Slug.slugify/1)
|
||||
|> MapSet.new()
|
||||
|
||||
targeted_post_routes =
|
||||
Enum.reduce(initial_plan.requested_post_routes, MapSet.new(), fn route, acc ->
|
||||
MapSet.put(acc, route_key(route.year, route.month, route.day, route.slug))
|
||||
end)
|
||||
|
||||
enriched =
|
||||
Enum.reduce(initial_plan.requested_post_routes, %{initial_plan | requested_post_routes: targeted_post_routes}, fn route, acc ->
|
||||
case Enum.find(published_posts, &post_matches_route?(&1, route)) do
|
||||
nil ->
|
||||
acc
|
||||
|> update_in([:requested_years], &MapSet.put(&1, route.year))
|
||||
|> update_in([:requested_year_months], &MapSet.put(&1, route_month_key(route.year, route.month)))
|
||||
|> Map.put(:request_root_routes, true)
|
||||
|
||||
post ->
|
||||
{year, month, _day} = local_date_parts!(post.created_at)
|
||||
|
||||
acc
|
||||
|> update_in([:requested_category_slugs], fn set ->
|
||||
Enum.reduce(post.categories || [], set, &MapSet.put(&2, archive_route_segment(&1)))
|
||||
end)
|
||||
|> update_in([:requested_tag_slugs], fn set ->
|
||||
Enum.reduce(post.tags || [], set, &MapSet.put(&2, archive_route_segment(&1)))
|
||||
end)
|
||||
|> update_in([:requested_years], &MapSet.put(&1, year))
|
||||
|> update_in([:requested_year_months], &MapSet.put(&1, route_month_key(year, month)))
|
||||
|> Map.put(:request_root_routes, true)
|
||||
end
|
||||
end)
|
||||
|
||||
language_plans =
|
||||
initial_plan.language_plans
|
||||
|> Enum.map(fn {language, language_plan} ->
|
||||
{language, build_targeted_validation_plan(language_plan, published_posts)}
|
||||
end)
|
||||
|> Map.new()
|
||||
|
||||
%{
|
||||
enriched
|
||||
| requested_category_slugs: MapSet.intersection(enriched.requested_category_slugs, available_category_slugs),
|
||||
requested_tag_slugs: MapSet.intersection(enriched.requested_tag_slugs, available_tag_slugs),
|
||||
language_plans: language_plans
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
defp post_matches_route?(post, route) do
|
||||
{year, month, day} = local_date_parts!(post.created_at)
|
||||
|
||||
post.slug == route.slug and year == route.year and month == route.month and day == route.day
|
||||
end
|
||||
|
||||
defp route_key(year, month, day, slug) do
|
||||
"#{year}/#{String.pad_leading(Integer.to_string(month), 2, "0")}/#{String.pad_leading(Integer.to_string(day), 2, "0")}/#{slug}"
|
||||
end
|
||||
|
||||
defp route_month_key(year, month) do
|
||||
"#{year}/#{String.pad_leading(Integer.to_string(month), 2, "0")}"
|
||||
end
|
||||
|
||||
defp extract_language_path(path, additional_languages) do
|
||||
case Regex.run(~r|^/([a-z]{2,3})(/.*)?$|, path) do
|
||||
[_, language, suffix] ->
|
||||
if language in additional_languages do
|
||||
{language, normalize_url_path(suffix)}
|
||||
else
|
||||
{nil, path}
|
||||
end
|
||||
|
||||
[_, language] ->
|
||||
if language in additional_languages do
|
||||
{language, "/"}
|
||||
else
|
||||
{nil, path}
|
||||
end
|
||||
|
||||
_other -> {nil, path}
|
||||
end
|
||||
end
|
||||
|
||||
@spec targeted_output?(String.t(), map(), String.t() | nil, [String.t()]) :: boolean()
|
||||
def targeted_output?(relative_path, targeted_plan, main_language, additional_languages) do
|
||||
{language, stripped_path} = extract_relative_output_language(relative_path, additional_languages)
|
||||
|
||||
plan =
|
||||
case language do
|
||||
nil -> targeted_plan
|
||||
value -> Map.get(targeted_plan.language_plans, value, empty_validation_path_plan())
|
||||
end
|
||||
|
||||
targeted_output_for_plan?(stripped_path, plan, main_language == language or is_nil(language))
|
||||
end
|
||||
|
||||
defp extract_relative_output_language(relative_path, additional_languages) do
|
||||
segments = String.split(relative_path, "/", trim: true)
|
||||
|
||||
case segments do
|
||||
[language | rest] ->
|
||||
if language in additional_languages do
|
||||
{language, Path.join(rest)}
|
||||
else
|
||||
{nil, relative_path}
|
||||
end
|
||||
|
||||
_other ->
|
||||
{nil, relative_path}
|
||||
end
|
||||
end
|
||||
|
||||
defp targeted_output_for_plan?(_relative_path, %{requires_fallback_section_render: true}, _main?), do: true
|
||||
|
||||
defp targeted_output_for_plan?(relative_path, plan, _main?) do
|
||||
cond do
|
||||
relative_path in ["index.html", "404.html", "feed.xml", "atom.xml"] ->
|
||||
plan.request_root_routes
|
||||
|
||||
Regex.match?(~r|^category/([^/]+)(?:/page/\d+)?/index\.html$|, relative_path) ->
|
||||
[_, slug] = Regex.run(~r|^category/([^/]+)(?:/page/\d+)?/index\.html$|, relative_path)
|
||||
MapSet.member?(plan.requested_category_slugs, slug)
|
||||
|
||||
Regex.match?(~r|^tag/([^/]+)/index\.html$|, relative_path) ->
|
||||
[_, slug] = Regex.run(~r|^tag/([^/]+)/index\.html$|, relative_path)
|
||||
MapSet.member?(plan.requested_tag_slugs, slug)
|
||||
|
||||
Regex.match?(~r|^(\d{4})/(\d{2})/(\d{2})/([^/]+)/index\.html$|, relative_path) ->
|
||||
[_, year, month, day, slug] = Regex.run(~r|^(\d{4})/(\d{2})/(\d{2})/([^/]+)/index\.html$|, relative_path)
|
||||
MapSet.member?(plan.requested_post_routes, route_key(String.to_integer(year), String.to_integer(month), String.to_integer(day), slug))
|
||||
|
||||
Regex.match?(~r|^(\d{4})/(\d{2})/index\.html$|, relative_path) ->
|
||||
[_, year, month] = Regex.run(~r|^(\d{4})/(\d{2})/index\.html$|, relative_path)
|
||||
MapSet.member?(plan.requested_year_months, "#{year}/#{month}")
|
||||
|
||||
Regex.match?(~r|^(\d{4})/index\.html$|, relative_path) ->
|
||||
[_, year] = Regex.run(~r|^(\d{4})/index\.html$|, relative_path)
|
||||
MapSet.member?(plan.requested_years, String.to_integer(year))
|
||||
|
||||
true ->
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
@spec route_html_path?(String.t()) :: boolean()
|
||||
def route_html_path?(relative_path), do: String.ends_with?(relative_path, "index.html")
|
||||
|
||||
@spec prune_empty_parent_dirs(String.t(), String.t()) :: {non_neg_integer(), String.t()}
|
||||
def prune_empty_parent_dirs(current_dir, html_root) do
|
||||
cond do
|
||||
Path.expand(current_dir) == Path.expand(html_root) ->
|
||||
{0, current_dir}
|
||||
|
||||
true ->
|
||||
case File.ls(current_dir) do
|
||||
{:ok, []} ->
|
||||
case File.rmdir(current_dir) do
|
||||
:ok ->
|
||||
{count, last_dir} = prune_empty_parent_dirs(Path.dirname(current_dir), html_root)
|
||||
{count + 1, last_dir}
|
||||
|
||||
{:error, _reason} ->
|
||||
{0, current_dir}
|
||||
end
|
||||
|
||||
_other ->
|
||||
{0, current_dir}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user