Rewrite OPML menu parsing on Saxy to stop xmerl atom interning

This commit is contained in:
2026-06-12 15:25:45 +02:00
parent a00e4b85ac
commit 4dd4781c5a
3 changed files with 202 additions and 34 deletions

View File

@@ -5,6 +5,15 @@ antipatterns, build-vs-buy decisions, and architecture. Tasks are ordered in
phases that make sense to execute sequentially; within a phase, tasks are phases that make sense to execute sequentially; within a phase, tasks are
independent unless noted. Phase 1 contains the top five findings. independent unless noted. Phase 1 contains the top five findings.
**Revalidated 2026-06-12:** every entry's implementation claims were re-checked
against the code, and all quality gates re-run clean (compile
--warnings-as-errors, full test suite 1191 passed / 0 failed, credo --strict,
deps.audit, dialyzer). All 25 entries confirmed done. One adjacent observation
outside any entry's scope: `lib/bds/menu.ex` still parsed OPML with
`:xmerl_scan` (same atom-creation class as TD-05's WXR finding, lower exposure
since the file is project-local) — fixed 2026-06-12 by rewriting the OPML
parsing on Saxy with atom-safety tests, mirroring TD-05's approach.
## How to work these tasks ## How to work these tasks
Every task must follow the project rules in `AGENTS.md`: Every task must follow the project rules in `AGENTS.md`:

View File

@@ -1,17 +1,70 @@
defmodule BDS.Menu do defmodule BDS.Menu do
@moduledoc false @moduledoc false
require Record
alias BDS.Persistence alias BDS.Persistence
alias BDS.Projects alias BDS.Projects
Record.defrecord(:xmlElement, Record.extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl")) defmodule OpmlHandler do
@moduledoc false
Record.defrecord( @behaviour Saxy.Handler
:xmlAttribute,
Record.extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl") # Collects /opml/body/outline trees as {attrs_map, children} tuples without
) # interning element or attribute names as atoms. Outlines outside the body
# (or separated from their outline parent by a foreign element) are pushed
# as :ignored frames so the stack stays balanced.
def handle_event(:start_document, _prolog, state), do: {:ok, state}
def handle_event(:end_document, _data, state), do: {:ok, state}
def handle_event(:characters, _chars, state), do: {:ok, state}
def handle_event(:cdata, _chars, state), do: {:ok, state}
def handle_event(:start_element, {name, attributes}, state) do
state =
if name == "outline" do
frame =
if collect_outline?(state) do
{Map.new(attributes), []}
else
:ignored
end
%{state | stack: [frame | state.stack]}
else
state
end
{:ok, %{state | path: [name | state.path]}}
end
def handle_event(:end_element, name, state) do
state = %{state | path: tl(state.path)}
state = if name == "outline", do: pop_outline(state), else: state
{:ok, state}
end
defp collect_outline?(%{path: ["body", "opml"]}), do: true
defp collect_outline?(%{path: ["outline" | _rest], stack: [{_attrs, _children} | _frames]}),
do: true
defp collect_outline?(_state), do: false
defp pop_outline(%{stack: [:ignored | rest]} = state), do: %{state | stack: rest}
defp pop_outline(%{stack: [{attrs, children} | rest]} = state) do
outline = {attrs, Enum.reverse(children)}
case rest do
[{parent_attrs, parent_children} | frames] ->
%{state | stack: [{parent_attrs, [outline | parent_children]} | frames]}
_top_level ->
%{state | stack: rest, outlines: [outline | state.outlines]}
end
end
defp pop_outline(state), do: state
end
@valid_kinds [:page, :submenu, :category_archive, :home] @valid_kinds [:page, :submenu, :category_archive, :home]
@@ -135,27 +188,28 @@ defmodule BDS.Menu do
end end
defp parse_opml(contents) do defp parse_opml(contents) do
{document, _rest} = :xmerl_scan.string(String.to_charlist(contents)) case Saxy.parse_string(contents, OpmlHandler, %{path: [], stack: [], outlines: []}) do
{:ok, %{outlines: outlines}} ->
:xmerl_xpath.string(~c"/opml/body/outline", document) outlines
|> Enum.reverse()
|> Enum.map(&parse_outline/1) |> Enum.map(&parse_outline/1)
{:error, error} ->
raise RuntimeError, "Invalid OPML menu file: #{Exception.message(error)}"
end
end end
defp parse_outline(element) do defp parse_outline({attrs, children}) do
kind = element |> outline_kind() |> normalize_kind() kind = attrs |> outline_kind() |> normalize_kind()
base = %{ base = %{
kind: kind, kind: kind,
label: xml_attr(element, :text) || "", label: Map.get(attrs, "text") || "",
slug: element |> outline_slug(kind) |> normalize_optional_string() slug: attrs |> outline_slug(kind) |> normalize_optional_string()
} }
children =
:xmerl_xpath.string(~c"./outline", element)
|> Enum.map(&parse_outline/1)
if kind == :submenu do if kind == :submenu do
Map.put(base, :children, children) Map.put(base, :children, Enum.map(children, &parse_outline/1))
else else
base base
end end
@@ -172,13 +226,12 @@ defmodule BDS.Menu do
] ]
end end
defp outline_kind(element), do: xml_attr(element, :type) || xml_attr(element, :kind) defp outline_kind(attrs), do: Map.get(attrs, "type") || Map.get(attrs, "kind")
defp outline_slug(element, :category_archive), defp outline_slug(attrs, :category_archive),
do: xml_attr(element, :categoryName) || xml_attr(element, :slug) do: Map.get(attrs, "categoryName") || Map.get(attrs, "slug")
defp outline_slug(element, :home), do: xml_attr(element, :pageSlug) || xml_attr(element, :slug) defp outline_slug(attrs, _kind), do: Map.get(attrs, "pageSlug") || Map.get(attrs, "slug")
defp outline_slug(element, _kind), do: xml_attr(element, :pageSlug) || xml_attr(element, :slug)
defp render_outline_kind(:category_archive), do: "category-archive" defp render_outline_kind(:category_archive), do: "category-archive"
defp render_outline_kind(kind), do: to_string(kind) defp render_outline_kind(kind), do: to_string(kind)
@@ -190,16 +243,6 @@ defmodule BDS.Menu do
defp render_category_name(:category_archive, slug), do: slug defp render_category_name(:category_archive, slug), do: slug
defp render_category_name(_kind, _slug), do: nil defp render_category_name(_kind, _slug), do: nil
defp xml_attr(element, name) do
element
|> xmlElement(:attributes)
|> Enum.find_value(fn attribute ->
if xmlAttribute(attribute, :name) == name do
attribute |> xmlAttribute(:value) |> to_string()
end
end)
end
defp normalize_kind(kind) when is_atom(kind) and kind in @valid_kinds, do: kind defp normalize_kind(kind) when is_atom(kind) and kind in @valid_kinds, do: kind
defp normalize_kind(nil), do: :page defp normalize_kind(nil), do: :page

View File

@@ -96,4 +96,120 @@ defmodule BDS.MenuTest do
} }
] ]
end end
test "get_menu parses legacy kind/slug attributes and drops children of non-submenu items",
%{project: project, temp_dir: temp_dir} do
write_menu_opml(temp_dir, [
~s( <outline text="About" kind="page" slug="about">),
~s( <outline text="Nested" kind="page" slug="nested"/>),
~s( </outline>),
~s( <outline text="Notes" kind="category-archive" slug="notes"/>)
])
assert {:ok, menu} = BDS.Menu.get_menu(project.id)
assert menu.items == [
%{kind: :home, label: "Home", slug: nil},
%{kind: :page, label: "About", slug: "about"},
%{kind: :category_archive, label: "Notes", slug: "notes"}
]
end
test "get_menu ignores outlines outside /opml/body and decodes XML entities",
%{project: project, temp_dir: temp_dir} do
File.mkdir_p!(Path.join(temp_dir, "meta"))
File.write!(
Path.join([temp_dir, "meta", "menu.opml"]),
[
~s(<?xml version="1.0" encoding="UTF-8"?>),
~s(<opml version="2.0">),
~s( <head>),
~s( <outline text="Stray" type="page" pageSlug="stray"/>),
~s( </head>),
~s( <body>),
~s( <outline text="Q &amp; A" type="page" pageSlug="q-and-a"/>),
~s( </body>),
~s(</opml>)
]
|> Enum.join("\n")
)
assert {:ok, menu} = BDS.Menu.get_menu(project.id)
assert menu.items == [
%{kind: :home, label: "Home", slug: nil},
%{kind: :page, label: "Q & A", slug: "q-and-a"}
]
end
test "get_menu raises on malformed OPML", %{project: project, temp_dir: temp_dir} do
File.mkdir_p!(Path.join(temp_dir, "meta"))
File.write!(Path.join([temp_dir, "meta", "menu.opml"]), "<opml><body><outline")
assert_raise RuntimeError, fn ->
BDS.Menu.get_menu(project.id)
end
end
test "get_menu does not intern OPML element or attribute names as atoms",
%{project: project, temp_dir: temp_dir} do
unique_element = "untrusted_element_#{System.unique_integer([:positive])}"
unique_attr = "untrusted_attr_#{System.unique_integer([:positive])}"
write_menu_opml(temp_dir, [
~s( <#{unique_element}>ignored</#{unique_element}>),
~s( <outline text="About" type="page" pageSlug="about" #{unique_attr}="x"/>)
])
assert {:ok, menu} = BDS.Menu.get_menu(project.id)
assert Enum.at(menu.items, 1) == %{kind: :page, label: "About", slug: "about"}
assert_raise ArgumentError, fn -> String.to_existing_atom(unique_element) end
assert_raise ArgumentError, fn -> String.to_existing_atom(unique_attr) end
end
test "get_menu keeps atom growth bounded for many unique attribute names",
%{project: project, temp_dir: temp_dir} do
unique_attrs =
Enum.map(1..250, fn index ->
"untrusted_bulk_attr_#{System.unique_integer([:positive])}_#{index}"
end)
outlines =
Enum.map(unique_attrs, fn attr ->
~s( <outline text="Item" type="page" pageSlug="item" #{attr}="x"/>)
end)
write_menu_opml(temp_dir, outlines)
assert {:ok, _warmup} = BDS.Menu.get_menu(project.id)
atom_count_before = :erlang.system_info(:atom_count)
assert {:ok, menu} = BDS.Menu.get_menu(project.id)
atom_count_after = :erlang.system_info(:atom_count)
assert length(menu.items) == 251
assert atom_count_after - atom_count_before < 20
Enum.each(unique_attrs, fn attr ->
assert_raise ArgumentError, fn -> String.to_existing_atom(attr) end
end)
end
defp write_menu_opml(temp_dir, body_lines) do
File.mkdir_p!(Path.join(temp_dir, "meta"))
File.write!(
Path.join([temp_dir, "meta", "menu.opml"]),
[
~s(<?xml version="1.0" encoding="UTF-8"?>),
~s(<opml version="2.0">),
~s( <head><title>Menu</title></head>),
~s( <body>)
]
|> Enum.concat(body_lines)
|> Enum.concat([~s( </body>), ~s(</opml>)])
|> Enum.join("\n")
)
end
end end