From 43a435f35da04cd37426d96c769b4fa893e0121f Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Mon, 11 May 2026 09:19:31 +0200 Subject: [PATCH] fix: derive pagefind language prefixes from project settings (CSM-025) Replace hardcoded ["de/", "fr/", "it/", "es/"] with prefixes computed from plan.blog_languages, so arbitrary language codes work correctly. Also mark CSM-024 as fixed (done in CSM-005). Co-Authored-By: Claude Opus 4.6 --- CODESMELL.md | 18 ++-- lib/bds/generation/pagefind.ex | 27 +++--- test/bds/csm025_hardcoded_languages_test.exs | 94 ++++++++++++++++++++ 3 files changed, 122 insertions(+), 17 deletions(-) create mode 100644 test/bds/csm025_hardcoded_languages_test.exs diff --git a/CODESMELL.md b/CODESMELL.md index 70967b3..3491290 100644 --- a/CODESMELL.md +++ b/CODESMELL.md @@ -386,16 +386,20 @@ --- -### CSM-024 — `Enum.reduce` with `acc.draft ++ [post]` (O(n²)) -- **File:** `lib/bds/ui/sidebar.ex:556-565` -- **Fix:** Use `Enum.group_by/3` or reverse-accumulate and `Enum.reverse`. +### ~~CSM-024 — `Enum.reduce` with `acc.draft ++ [post]` (O(n²))~~ ✅ FIXED +- **Fixed:** 2026-05-08 (as part of CSM-005) +- **What was done:** Replaced `acc.draft ++ [post]` with `Enum.group_by/2` in `group_posts/1`. See CSM-005 entry for details. --- -### CSM-025 — Hardcoded Language Prefixes -- **File:** `lib/bds/generation/pagefind.ex:48-54` -- **What:** `["de/", "fr/", "it/", "es/"]` hardcoded instead of derived from project settings. -- **Fix:** Derive from project settings (`mainLanguage` and supported languages). +### ~~CSM-025 — Hardcoded Language Prefixes~~ ✅ FIXED +- **Fixed:** 2026-05-11 +- **What was done:** + - Replaced hardcoded `["de/", "fr/", "it/", "es/"]` in `language_match?/2` with dynamically derived prefixes from `plan.blog_languages` and `plan.language`. + - `build_outputs/2` now computes `other_prefixes` by rejecting the main language from `blog_languages` and appending `"/"` to each. + - `pages_for_language/3` and `language_match?/3` now accept the computed prefixes as a parameter instead of using a hardcoded list. + - Works correctly with arbitrary language codes (e.g. `pt-br`, `zh-cn`, `ja`) that were not in the old hardcoded list. + - Added 5 tests in `test/bds/csm025_hardcoded_languages_test.exs`: source-level assertion for no hardcoded prefixes, main language exclusion, non-main language inclusion, arbitrary language codes, single-language blog. --- diff --git a/lib/bds/generation/pagefind.ex b/lib/bds/generation/pagefind.ex index 65841ce..b385fb5 100644 --- a/lib/bds/generation/pagefind.ex +++ b/lib/bds/generation/pagefind.ex @@ -14,11 +14,16 @@ defmodule BDS.Generation.Pagefind do """ @spec build_outputs(map(), [html_output()]) :: [generated_file()] def build_outputs(plan, html_outputs) do - plan.blog_languages - |> Enum.uniq() - |> Enum.flat_map(fn language -> + languages = Enum.uniq(plan.blog_languages) + + other_prefixes = + languages + |> Enum.reject(&(&1 == plan.language)) + |> Enum.map(&(&1 <> "/")) + + Enum.flat_map(languages, fn language -> route_language = route_language(plan.language, language) - pages = pages_for_language(html_outputs, route_language) + pages = pages_for_language(html_outputs, route_language, other_prefixes) prefix = if route_language in [nil, ""], do: ["pagefind"], else: [route_language, "pagefind"] @@ -32,10 +37,11 @@ defmodule BDS.Generation.Pagefind do end) end - defp pages_for_language(html_outputs, route_language) do + defp pages_for_language(html_outputs, route_language, other_prefixes) do html_outputs |> Enum.filter(fn {relative_path, _content} -> - String.ends_with?(relative_path, ".html") and language_match?(relative_path, route_language) + String.ends_with?(relative_path, ".html") and + language_match?(relative_path, route_language, other_prefixes) end) |> Enum.map(fn {relative_path, content} -> %{ @@ -45,12 +51,13 @@ defmodule BDS.Generation.Pagefind do end) end - defp language_match?(relative_path, nil), - do: not String.starts_with?(relative_path, ["de/", "fr/", "it/", "es/"]) + defp language_match?(relative_path, nil, other_prefixes), + do: not String.starts_with?(relative_path, other_prefixes) - defp language_match?(relative_path, ""), do: language_match?(relative_path, nil) + defp language_match?(relative_path, "", other_prefixes), + do: language_match?(relative_path, nil, other_prefixes) - defp language_match?(relative_path, route_language), + defp language_match?(relative_path, route_language, _other_prefixes), do: String.starts_with?(relative_path, route_language <> "/") defp text(content) do diff --git a/test/bds/csm025_hardcoded_languages_test.exs b/test/bds/csm025_hardcoded_languages_test.exs new file mode 100644 index 0000000..6ce4a63 --- /dev/null +++ b/test/bds/csm025_hardcoded_languages_test.exs @@ -0,0 +1,94 @@ +defmodule BDS.CSM025HardcodedLanguagesTest do + use ExUnit.Case, async: true + + alias BDS.Generation.Pagefind + + defp plan(main, languages) do + %{language: main, blog_languages: languages} + end + + defp html(path), do: {path, "

hello

"} + + describe "build_outputs/2 derives language prefixes from plan" do + test "no hardcoded language prefixes in source" do + source = File.read!("lib/bds/generation/pagefind.ex") + + refute source =~ ~s("de/"), + "language prefixes must not be hardcoded — derive from plan.blog_languages" + + refute source =~ ~s("fr/") + refute source =~ ~s("it/") + refute source =~ ~s("es/") + end + + test "main language index excludes other language paths" do + outputs = [ + html("index.html"), + html("about/index.html"), + html("ja/index.html"), + html("ko/index.html") + ] + + result = Pagefind.build_outputs(plan("en", ["en", "ja", "ko"]), outputs) + main_index = Enum.find(result, fn {path, _} -> path == "pagefind/index.json" end) + assert main_index + + {_, json} = main_index + decoded = Jason.decode!(json) + urls = Enum.map(decoded["pages"], & &1["url"]) + assert "/index.html" in urls + assert "/about/index.html" in urls + refute "/ja/index.html" in urls + refute "/ko/index.html" in urls + end + + test "non-main language index includes only its own paths" do + outputs = [ + html("index.html"), + html("ja/index.html"), + html("ja/about/index.html"), + html("ko/index.html") + ] + + result = Pagefind.build_outputs(plan("en", ["en", "ja", "ko"]), outputs) + ja_index = Enum.find(result, fn {path, _} -> path == "ja/pagefind/index.json" end) + assert ja_index + + {_, json} = ja_index + decoded = Jason.decode!(json) + urls = Enum.map(decoded["pages"], & &1["url"]) + assert "/ja/index.html" in urls + assert "/ja/about/index.html" in urls + refute "/index.html" in urls + refute "/ko/index.html" in urls + end + + test "works with arbitrary language codes not in old hardcoded list" do + outputs = [ + html("index.html"), + html("pt-br/index.html"), + html("zh-cn/index.html") + ] + + result = Pagefind.build_outputs(plan("en", ["en", "pt-br", "zh-cn"]), outputs) + main_index = Enum.find(result, fn {path, _} -> path == "pagefind/index.json" end) + {_, json} = main_index + decoded = Jason.decode!(json) + urls = Enum.map(decoded["pages"], & &1["url"]) + assert "/index.html" in urls + refute "/pt-br/index.html" in urls + refute "/zh-cn/index.html" in urls + end + + test "single language blog has no exclusion prefixes" do + outputs = [html("index.html"), html("about/index.html")] + + result = Pagefind.build_outputs(plan("en", ["en"]), outputs) + assert length(result) == 3 + + {_, json} = Enum.find(result, fn {path, _} -> path == "pagefind/index.json" end) + decoded = Jason.decode!(json) + assert length(decoded["pages"]) == 2 + end + end +end