From 4dd4781c5a2f4d660747cfc09f9b3432d1455df8 Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Fri, 12 Jun 2026 15:25:45 +0200 Subject: [PATCH] Rewrite OPML menu parsing on Saxy to stop xmerl atom interning --- TECHDEBTS.md | 9 ++++ lib/bds/menu.ex | 111 +++++++++++++++++++++++++++------------ test/bds/menu_test.exs | 116 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+), 34 deletions(-) diff --git a/TECHDEBTS.md b/TECHDEBTS.md index 5c8a96f..7b77eda 100644 --- a/TECHDEBTS.md +++ b/TECHDEBTS.md @@ -5,6 +5,15 @@ antipatterns, build-vs-buy decisions, and architecture. Tasks are ordered in phases that make sense to execute sequentially; within a phase, tasks are independent unless noted. Phase 1 contains the top five findings. +**Revalidated 2026-06-12:** every entry's implementation claims were re-checked +against the code, and all quality gates re-run clean (compile +--warnings-as-errors, full test suite 1191 passed / 0 failed, credo --strict, +deps.audit, dialyzer). All 25 entries confirmed done. One adjacent observation +outside any entry's scope: `lib/bds/menu.ex` still parsed OPML with +`:xmerl_scan` (same atom-creation class as TD-05's WXR finding, lower exposure +since the file is project-local) — fixed 2026-06-12 by rewriting the OPML +parsing on Saxy with atom-safety tests, mirroring TD-05's approach. + ## How to work these tasks Every task must follow the project rules in `AGENTS.md`: diff --git a/lib/bds/menu.ex b/lib/bds/menu.ex index 063cac2..1810ecb 100644 --- a/lib/bds/menu.ex +++ b/lib/bds/menu.ex @@ -1,17 +1,70 @@ defmodule BDS.Menu do @moduledoc false - require Record - alias BDS.Persistence alias BDS.Projects - Record.defrecord(:xmlElement, Record.extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl")) + defmodule OpmlHandler do + @moduledoc false - Record.defrecord( - :xmlAttribute, - Record.extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl") - ) + @behaviour Saxy.Handler + + # Collects /opml/body/outline trees as {attrs_map, children} tuples without + # interning element or attribute names as atoms. Outlines outside the body + # (or separated from their outline parent by a foreign element) are pushed + # as :ignored frames so the stack stays balanced. + def handle_event(:start_document, _prolog, state), do: {:ok, state} + def handle_event(:end_document, _data, state), do: {:ok, state} + def handle_event(:characters, _chars, state), do: {:ok, state} + def handle_event(:cdata, _chars, state), do: {:ok, state} + + def handle_event(:start_element, {name, attributes}, state) do + state = + if name == "outline" do + frame = + if collect_outline?(state) do + {Map.new(attributes), []} + else + :ignored + end + + %{state | stack: [frame | state.stack]} + else + state + end + + {:ok, %{state | path: [name | state.path]}} + end + + def handle_event(:end_element, name, state) do + state = %{state | path: tl(state.path)} + state = if name == "outline", do: pop_outline(state), else: state + {:ok, state} + end + + defp collect_outline?(%{path: ["body", "opml"]}), do: true + + defp collect_outline?(%{path: ["outline" | _rest], stack: [{_attrs, _children} | _frames]}), + do: true + + defp collect_outline?(_state), do: false + + defp pop_outline(%{stack: [:ignored | rest]} = state), do: %{state | stack: rest} + + defp pop_outline(%{stack: [{attrs, children} | rest]} = state) do + outline = {attrs, Enum.reverse(children)} + + case rest do + [{parent_attrs, parent_children} | frames] -> + %{state | stack: [{parent_attrs, [outline | parent_children]} | frames]} + + _top_level -> + %{state | stack: rest, outlines: [outline | state.outlines]} + end + end + + defp pop_outline(state), do: state + end @valid_kinds [:page, :submenu, :category_archive, :home] @@ -135,27 +188,28 @@ defmodule BDS.Menu do end defp parse_opml(contents) do - {document, _rest} = :xmerl_scan.string(String.to_charlist(contents)) + case Saxy.parse_string(contents, OpmlHandler, %{path: [], stack: [], outlines: []}) do + {:ok, %{outlines: outlines}} -> + outlines + |> Enum.reverse() + |> Enum.map(&parse_outline/1) - :xmerl_xpath.string(~c"/opml/body/outline", document) - |> Enum.map(&parse_outline/1) + {:error, error} -> + raise RuntimeError, "Invalid OPML menu file: #{Exception.message(error)}" + end end - defp parse_outline(element) do - kind = element |> outline_kind() |> normalize_kind() + defp parse_outline({attrs, children}) do + kind = attrs |> outline_kind() |> normalize_kind() base = %{ kind: kind, - label: xml_attr(element, :text) || "", - slug: element |> outline_slug(kind) |> normalize_optional_string() + label: Map.get(attrs, "text") || "", + slug: attrs |> outline_slug(kind) |> normalize_optional_string() } - children = - :xmerl_xpath.string(~c"./outline", element) - |> Enum.map(&parse_outline/1) - if kind == :submenu do - Map.put(base, :children, children) + Map.put(base, :children, Enum.map(children, &parse_outline/1)) else base end @@ -172,13 +226,12 @@ defmodule BDS.Menu do ] end - defp outline_kind(element), do: xml_attr(element, :type) || xml_attr(element, :kind) + defp outline_kind(attrs), do: Map.get(attrs, "type") || Map.get(attrs, "kind") - defp outline_slug(element, :category_archive), - do: xml_attr(element, :categoryName) || xml_attr(element, :slug) + defp outline_slug(attrs, :category_archive), + do: Map.get(attrs, "categoryName") || Map.get(attrs, "slug") - defp outline_slug(element, :home), do: xml_attr(element, :pageSlug) || xml_attr(element, :slug) - defp outline_slug(element, _kind), do: xml_attr(element, :pageSlug) || xml_attr(element, :slug) + defp outline_slug(attrs, _kind), do: Map.get(attrs, "pageSlug") || Map.get(attrs, "slug") defp render_outline_kind(:category_archive), do: "category-archive" defp render_outline_kind(kind), do: to_string(kind) @@ -190,16 +243,6 @@ defmodule BDS.Menu do defp render_category_name(:category_archive, slug), do: slug defp render_category_name(_kind, _slug), do: nil - defp xml_attr(element, name) do - element - |> xmlElement(:attributes) - |> Enum.find_value(fn attribute -> - if xmlAttribute(attribute, :name) == name do - attribute |> xmlAttribute(:value) |> to_string() - end - end) - end - defp normalize_kind(kind) when is_atom(kind) and kind in @valid_kinds, do: kind defp normalize_kind(nil), do: :page diff --git a/test/bds/menu_test.exs b/test/bds/menu_test.exs index a72a2cb..c3e1cf7 100644 --- a/test/bds/menu_test.exs +++ b/test/bds/menu_test.exs @@ -96,4 +96,120 @@ defmodule BDS.MenuTest do } ] end + + test "get_menu parses legacy kind/slug attributes and drops children of non-submenu items", + %{project: project, temp_dir: temp_dir} do + write_menu_opml(temp_dir, [ + ~s( ), + ~s( ), + ~s( ), + ~s( ) + ]) + + assert {:ok, menu} = BDS.Menu.get_menu(project.id) + + assert menu.items == [ + %{kind: :home, label: "Home", slug: nil}, + %{kind: :page, label: "About", slug: "about"}, + %{kind: :category_archive, label: "Notes", slug: "notes"} + ] + end + + test "get_menu ignores outlines outside /opml/body and decodes XML entities", + %{project: project, temp_dir: temp_dir} do + File.mkdir_p!(Path.join(temp_dir, "meta")) + + File.write!( + Path.join([temp_dir, "meta", "menu.opml"]), + [ + ~s(), + ~s(), + ~s( ), + ~s( ), + ~s( ), + ~s( ), + ~s( ), + ~s( ), + ~s() + ] + |> Enum.join("\n") + ) + + assert {:ok, menu} = BDS.Menu.get_menu(project.id) + + assert menu.items == [ + %{kind: :home, label: "Home", slug: nil}, + %{kind: :page, label: "Q & A", slug: "q-and-a"} + ] + end + + test "get_menu raises on malformed OPML", %{project: project, temp_dir: temp_dir} do + File.mkdir_p!(Path.join(temp_dir, "meta")) + File.write!(Path.join([temp_dir, "meta", "menu.opml"]), " + BDS.Menu.get_menu(project.id) + end + end + + test "get_menu does not intern OPML element or attribute names as atoms", + %{project: project, temp_dir: temp_dir} do + unique_element = "untrusted_element_#{System.unique_integer([:positive])}" + unique_attr = "untrusted_attr_#{System.unique_integer([:positive])}" + + write_menu_opml(temp_dir, [ + ~s( <#{unique_element}>ignored), + ~s( ) + ]) + + assert {:ok, menu} = BDS.Menu.get_menu(project.id) + assert Enum.at(menu.items, 1) == %{kind: :page, label: "About", slug: "about"} + + assert_raise ArgumentError, fn -> String.to_existing_atom(unique_element) end + assert_raise ArgumentError, fn -> String.to_existing_atom(unique_attr) end + end + + test "get_menu keeps atom growth bounded for many unique attribute names", + %{project: project, temp_dir: temp_dir} do + unique_attrs = + Enum.map(1..250, fn index -> + "untrusted_bulk_attr_#{System.unique_integer([:positive])}_#{index}" + end) + + outlines = + Enum.map(unique_attrs, fn attr -> + ~s( ) + end) + + write_menu_opml(temp_dir, outlines) + assert {:ok, _warmup} = BDS.Menu.get_menu(project.id) + + atom_count_before = :erlang.system_info(:atom_count) + assert {:ok, menu} = BDS.Menu.get_menu(project.id) + atom_count_after = :erlang.system_info(:atom_count) + + assert length(menu.items) == 251 + assert atom_count_after - atom_count_before < 20 + + Enum.each(unique_attrs, fn attr -> + assert_raise ArgumentError, fn -> String.to_existing_atom(attr) end + end) + end + + defp write_menu_opml(temp_dir, body_lines) do + File.mkdir_p!(Path.join(temp_dir, "meta")) + + File.write!( + Path.join([temp_dir, "meta", "menu.opml"]), + [ + ~s(), + ~s(), + ~s( Menu), + ~s( ) + ] + |> Enum.concat(body_lines) + |> Enum.concat([~s( ), ~s()]) + |> Enum.join("\n") + ) + end end