fix: A1-12 functional client-side search with real PagefindUI and fragment index
This commit is contained in:
@@ -7,10 +7,25 @@ defmodule BDS.Generation.Pagefind do
|
||||
@typedoc "A (relative_path, content) generated file tuple."
|
||||
@type generated_file :: {String.t(), String.t()}
|
||||
|
||||
@assets_dir Application.app_dir(:bds, "priv/preview_assets/assets")
|
||||
@ui_js_path Path.join(@assets_dir, "pagefind-ui.js")
|
||||
@ui_css_path Path.join(@assets_dir, "pagefind-ui.css")
|
||||
|
||||
@external_resource @ui_js_path
|
||||
@external_resource @ui_css_path
|
||||
|
||||
@ui_js File.read!(@ui_js_path)
|
||||
@ui_css File.read!(@ui_css_path)
|
||||
|
||||
@doc """
|
||||
Build the per-language Pagefind index outputs (`pagefind/index.json`,
|
||||
`pagefind/pagefind-ui.js`, `pagefind/pagefind-ui.css`) for every blog
|
||||
language declared on the plan.
|
||||
|
||||
The fragment index records one entry per indexable page, where indexable
|
||||
means the page carries a `data-pagefind-body` region. Each entry stores the
|
||||
page URL, its title, and the body text scoped to that region — mirroring
|
||||
Pagefind's behaviour of ignoring content outside `data-pagefind-body`.
|
||||
"""
|
||||
@spec build_outputs(map(), [html_output()]) :: [generated_file()]
|
||||
def build_outputs(plan, html_outputs) do
|
||||
@@ -31,8 +46,8 @@ defmodule BDS.Generation.Pagefind do
|
||||
[
|
||||
{Path.join(prefix ++ ["index.json"]),
|
||||
Jason.encode!(%{"language" => language, "pages" => pages})},
|
||||
{Path.join(prefix ++ ["pagefind-ui.js"]), ui_js(language)},
|
||||
{Path.join(prefix ++ ["pagefind-ui.css"]), ui_css()}
|
||||
{Path.join(prefix ++ ["pagefind-ui.js"]), @ui_js},
|
||||
{Path.join(prefix ++ ["pagefind-ui.css"]), @ui_css}
|
||||
]
|
||||
end)
|
||||
end
|
||||
@@ -43,11 +58,14 @@ defmodule BDS.Generation.Pagefind do
|
||||
String.ends_with?(relative_path, ".html") and
|
||||
language_match?(relative_path, route_language, other_prefixes)
|
||||
end)
|
||||
|> Enum.map(fn {relative_path, content} ->
|
||||
%{
|
||||
"url" => "/" <> relative_path,
|
||||
"text" => text(content)
|
||||
}
|
||||
|> Enum.flat_map(fn {relative_path, content} ->
|
||||
case body_text(content) do
|
||||
nil ->
|
||||
[]
|
||||
|
||||
text ->
|
||||
[%{"url" => "/" <> relative_path, "title" => title(content), "text" => text}]
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
@@ -60,19 +78,94 @@ defmodule BDS.Generation.Pagefind do
|
||||
defp language_match?(relative_path, route_language, _other_prefixes),
|
||||
do: String.starts_with?(relative_path, route_language <> "/")
|
||||
|
||||
defp text(content) do
|
||||
content
|
||||
# Extract the indexable body text scoped to the data-pagefind-body element.
|
||||
# Returns nil when the page is not marked, so unmarked pages are excluded
|
||||
# from the index entirely (matching Pagefind semantics).
|
||||
defp body_text(content) do
|
||||
case Regex.run(~r/<([a-zA-Z0-9]+)[^>]*\bdata-pagefind-body\b[^>]*>/, content,
|
||||
return: :index
|
||||
) do
|
||||
[{open_start, open_len}, {tag_start, tag_len}] ->
|
||||
tag = binary_part(content, tag_start, tag_len)
|
||||
region = scoped_region(content, tag, open_start + open_len)
|
||||
plain_text(region)
|
||||
|
||||
_no_match ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
# Capture the inner HTML of the marked element by balancing same-tag
|
||||
# open/close pairs from the opening tag onward.
|
||||
defp scoped_region(content, tag, body_start) do
|
||||
rest = binary_part(content, body_start, byte_size(content) - body_start)
|
||||
open_re = Regex.compile!("<#{tag}\\b", "i")
|
||||
close_re = Regex.compile!("</#{tag}\\s*>", "i")
|
||||
|
||||
events =
|
||||
(Regex.scan(open_re, rest, return: :index) ++ Regex.scan(close_re, rest, return: :index))
|
||||
|> Enum.map(fn [{pos, _len}] -> pos end)
|
||||
|> Enum.map(fn pos -> {pos, event_kind(rest, pos, tag)} end)
|
||||
|> Enum.sort_by(&elem(&1, 0))
|
||||
|
||||
close_at = balanced_close(events, 0)
|
||||
|
||||
case close_at do
|
||||
nil -> rest
|
||||
pos -> binary_part(rest, 0, pos)
|
||||
end
|
||||
end
|
||||
|
||||
defp event_kind(rest, pos, tag) do
|
||||
if String.starts_with?(binary_part(rest, pos, min(2 + byte_size(tag), byte_size(rest) - pos)), "</") do
|
||||
:close
|
||||
else
|
||||
:open
|
||||
end
|
||||
end
|
||||
|
||||
defp balanced_close([], _depth), do: nil
|
||||
|
||||
defp balanced_close([{pos, :close} | _rest], 0), do: pos
|
||||
|
||||
defp balanced_close([{_pos, :close} | rest], depth),
|
||||
do: balanced_close(rest, depth - 1)
|
||||
|
||||
defp balanced_close([{_pos, :open} | rest], depth),
|
||||
do: balanced_close(rest, depth + 1)
|
||||
|
||||
defp title(content) do
|
||||
tag_text(content, ~r/<title[^>]*>(.*?)<\/title>/si) ||
|
||||
tag_text(content, ~r/<h1[^>]*>(.*?)<\/h1>/si) ||
|
||||
""
|
||||
end
|
||||
|
||||
defp tag_text(content, regex) do
|
||||
case Regex.run(regex, content) do
|
||||
[_full, raw] -> raw |> plain_text() |> nil_if_blank()
|
||||
_no_match -> nil
|
||||
end
|
||||
end
|
||||
|
||||
defp nil_if_blank(""), do: nil
|
||||
defp nil_if_blank(value), do: value
|
||||
|
||||
defp plain_text(html) do
|
||||
html
|
||||
|> String.replace(~r/<[^>]+>/, " ")
|
||||
|> decode_entities()
|
||||
|> String.replace(~r/\s+/u, " ")
|
||||
|> String.trim()
|
||||
end
|
||||
|
||||
defp ui_js(language) do
|
||||
"window.bDSPagefind = { language: #{Jason.encode!(language)} };\n"
|
||||
end
|
||||
|
||||
defp ui_css do
|
||||
".pagefind-ui{display:block;}\n"
|
||||
defp decode_entities(text) do
|
||||
text
|
||||
|> String.replace("&", "&")
|
||||
|> String.replace("<", "<")
|
||||
|> String.replace(">", ">")
|
||||
|> String.replace(""", "\"")
|
||||
|> String.replace("'", "'")
|
||||
|> String.replace(" ", " ")
|
||||
end
|
||||
|
||||
defp route_language(main_language, language) when main_language == language, do: nil
|
||||
|
||||
Reference in New Issue
Block a user