Rewrite OPML menu parsing on Saxy to stop xmerl atom interning
This commit is contained in:
@@ -5,6 +5,15 @@ antipatterns, build-vs-buy decisions, and architecture. Tasks are ordered in
|
|||||||
phases that make sense to execute sequentially; within a phase, tasks are
|
phases that make sense to execute sequentially; within a phase, tasks are
|
||||||
independent unless noted. Phase 1 contains the top five findings.
|
independent unless noted. Phase 1 contains the top five findings.
|
||||||
|
|
||||||
|
**Revalidated 2026-06-12:** every entry's implementation claims were re-checked
|
||||||
|
against the code, and all quality gates re-run clean (compile
|
||||||
|
--warnings-as-errors, full test suite 1191 passed / 0 failed, credo --strict,
|
||||||
|
deps.audit, dialyzer). All 25 entries confirmed done. One adjacent observation
|
||||||
|
outside any entry's scope: `lib/bds/menu.ex` still parsed OPML with
|
||||||
|
`:xmerl_scan` (same atom-creation class as TD-05's WXR finding, lower exposure
|
||||||
|
since the file is project-local) — fixed 2026-06-12 by rewriting the OPML
|
||||||
|
parsing on Saxy with atom-safety tests, mirroring TD-05's approach.
|
||||||
|
|
||||||
## How to work these tasks
|
## How to work these tasks
|
||||||
|
|
||||||
Every task must follow the project rules in `AGENTS.md`:
|
Every task must follow the project rules in `AGENTS.md`:
|
||||||
|
|||||||
111
lib/bds/menu.ex
111
lib/bds/menu.ex
@@ -1,17 +1,70 @@
|
|||||||
defmodule BDS.Menu do
|
defmodule BDS.Menu do
|
||||||
@moduledoc false
|
@moduledoc false
|
||||||
|
|
||||||
require Record
|
|
||||||
|
|
||||||
alias BDS.Persistence
|
alias BDS.Persistence
|
||||||
alias BDS.Projects
|
alias BDS.Projects
|
||||||
|
|
||||||
Record.defrecord(:xmlElement, Record.extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl"))
|
defmodule OpmlHandler do
|
||||||
|
@moduledoc false
|
||||||
|
|
||||||
Record.defrecord(
|
@behaviour Saxy.Handler
|
||||||
:xmlAttribute,
|
|
||||||
Record.extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl")
|
# Collects /opml/body/outline trees as {attrs_map, children} tuples without
|
||||||
)
|
# interning element or attribute names as atoms. Outlines outside the body
|
||||||
|
# (or separated from their outline parent by a foreign element) are pushed
|
||||||
|
# as :ignored frames so the stack stays balanced.
|
||||||
|
def handle_event(:start_document, _prolog, state), do: {:ok, state}
|
||||||
|
def handle_event(:end_document, _data, state), do: {:ok, state}
|
||||||
|
def handle_event(:characters, _chars, state), do: {:ok, state}
|
||||||
|
def handle_event(:cdata, _chars, state), do: {:ok, state}
|
||||||
|
|
||||||
|
def handle_event(:start_element, {name, attributes}, state) do
|
||||||
|
state =
|
||||||
|
if name == "outline" do
|
||||||
|
frame =
|
||||||
|
if collect_outline?(state) do
|
||||||
|
{Map.new(attributes), []}
|
||||||
|
else
|
||||||
|
:ignored
|
||||||
|
end
|
||||||
|
|
||||||
|
%{state | stack: [frame | state.stack]}
|
||||||
|
else
|
||||||
|
state
|
||||||
|
end
|
||||||
|
|
||||||
|
{:ok, %{state | path: [name | state.path]}}
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_event(:end_element, name, state) do
|
||||||
|
state = %{state | path: tl(state.path)}
|
||||||
|
state = if name == "outline", do: pop_outline(state), else: state
|
||||||
|
{:ok, state}
|
||||||
|
end
|
||||||
|
|
||||||
|
defp collect_outline?(%{path: ["body", "opml"]}), do: true
|
||||||
|
|
||||||
|
defp collect_outline?(%{path: ["outline" | _rest], stack: [{_attrs, _children} | _frames]}),
|
||||||
|
do: true
|
||||||
|
|
||||||
|
defp collect_outline?(_state), do: false
|
||||||
|
|
||||||
|
defp pop_outline(%{stack: [:ignored | rest]} = state), do: %{state | stack: rest}
|
||||||
|
|
||||||
|
defp pop_outline(%{stack: [{attrs, children} | rest]} = state) do
|
||||||
|
outline = {attrs, Enum.reverse(children)}
|
||||||
|
|
||||||
|
case rest do
|
||||||
|
[{parent_attrs, parent_children} | frames] ->
|
||||||
|
%{state | stack: [{parent_attrs, [outline | parent_children]} | frames]}
|
||||||
|
|
||||||
|
_top_level ->
|
||||||
|
%{state | stack: rest, outlines: [outline | state.outlines]}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp pop_outline(state), do: state
|
||||||
|
end
|
||||||
|
|
||||||
@valid_kinds [:page, :submenu, :category_archive, :home]
|
@valid_kinds [:page, :submenu, :category_archive, :home]
|
||||||
|
|
||||||
@@ -135,27 +188,28 @@ defmodule BDS.Menu do
|
|||||||
end
|
end
|
||||||
|
|
||||||
defp parse_opml(contents) do
|
defp parse_opml(contents) do
|
||||||
{document, _rest} = :xmerl_scan.string(String.to_charlist(contents))
|
case Saxy.parse_string(contents, OpmlHandler, %{path: [], stack: [], outlines: []}) do
|
||||||
|
{:ok, %{outlines: outlines}} ->
|
||||||
|
outlines
|
||||||
|
|> Enum.reverse()
|
||||||
|
|> Enum.map(&parse_outline/1)
|
||||||
|
|
||||||
:xmerl_xpath.string(~c"/opml/body/outline", document)
|
{:error, error} ->
|
||||||
|> Enum.map(&parse_outline/1)
|
raise RuntimeError, "Invalid OPML menu file: #{Exception.message(error)}"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp parse_outline(element) do
|
defp parse_outline({attrs, children}) do
|
||||||
kind = element |> outline_kind() |> normalize_kind()
|
kind = attrs |> outline_kind() |> normalize_kind()
|
||||||
|
|
||||||
base = %{
|
base = %{
|
||||||
kind: kind,
|
kind: kind,
|
||||||
label: xml_attr(element, :text) || "",
|
label: Map.get(attrs, "text") || "",
|
||||||
slug: element |> outline_slug(kind) |> normalize_optional_string()
|
slug: attrs |> outline_slug(kind) |> normalize_optional_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
children =
|
|
||||||
:xmerl_xpath.string(~c"./outline", element)
|
|
||||||
|> Enum.map(&parse_outline/1)
|
|
||||||
|
|
||||||
if kind == :submenu do
|
if kind == :submenu do
|
||||||
Map.put(base, :children, children)
|
Map.put(base, :children, Enum.map(children, &parse_outline/1))
|
||||||
else
|
else
|
||||||
base
|
base
|
||||||
end
|
end
|
||||||
@@ -172,13 +226,12 @@ defmodule BDS.Menu do
|
|||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
defp outline_kind(element), do: xml_attr(element, :type) || xml_attr(element, :kind)
|
defp outline_kind(attrs), do: Map.get(attrs, "type") || Map.get(attrs, "kind")
|
||||||
|
|
||||||
defp outline_slug(element, :category_archive),
|
defp outline_slug(attrs, :category_archive),
|
||||||
do: xml_attr(element, :categoryName) || xml_attr(element, :slug)
|
do: Map.get(attrs, "categoryName") || Map.get(attrs, "slug")
|
||||||
|
|
||||||
defp outline_slug(element, :home), do: xml_attr(element, :pageSlug) || xml_attr(element, :slug)
|
defp outline_slug(attrs, _kind), do: Map.get(attrs, "pageSlug") || Map.get(attrs, "slug")
|
||||||
defp outline_slug(element, _kind), do: xml_attr(element, :pageSlug) || xml_attr(element, :slug)
|
|
||||||
|
|
||||||
defp render_outline_kind(:category_archive), do: "category-archive"
|
defp render_outline_kind(:category_archive), do: "category-archive"
|
||||||
defp render_outline_kind(kind), do: to_string(kind)
|
defp render_outline_kind(kind), do: to_string(kind)
|
||||||
@@ -190,16 +243,6 @@ defmodule BDS.Menu do
|
|||||||
defp render_category_name(:category_archive, slug), do: slug
|
defp render_category_name(:category_archive, slug), do: slug
|
||||||
defp render_category_name(_kind, _slug), do: nil
|
defp render_category_name(_kind, _slug), do: nil
|
||||||
|
|
||||||
defp xml_attr(element, name) do
|
|
||||||
element
|
|
||||||
|> xmlElement(:attributes)
|
|
||||||
|> Enum.find_value(fn attribute ->
|
|
||||||
if xmlAttribute(attribute, :name) == name do
|
|
||||||
attribute |> xmlAttribute(:value) |> to_string()
|
|
||||||
end
|
|
||||||
end)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp normalize_kind(kind) when is_atom(kind) and kind in @valid_kinds, do: kind
|
defp normalize_kind(kind) when is_atom(kind) and kind in @valid_kinds, do: kind
|
||||||
|
|
||||||
defp normalize_kind(nil), do: :page
|
defp normalize_kind(nil), do: :page
|
||||||
|
|||||||
@@ -96,4 +96,120 @@ defmodule BDS.MenuTest do
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "get_menu parses legacy kind/slug attributes and drops children of non-submenu items",
|
||||||
|
%{project: project, temp_dir: temp_dir} do
|
||||||
|
write_menu_opml(temp_dir, [
|
||||||
|
~s( <outline text="About" kind="page" slug="about">),
|
||||||
|
~s( <outline text="Nested" kind="page" slug="nested"/>),
|
||||||
|
~s( </outline>),
|
||||||
|
~s( <outline text="Notes" kind="category-archive" slug="notes"/>)
|
||||||
|
])
|
||||||
|
|
||||||
|
assert {:ok, menu} = BDS.Menu.get_menu(project.id)
|
||||||
|
|
||||||
|
assert menu.items == [
|
||||||
|
%{kind: :home, label: "Home", slug: nil},
|
||||||
|
%{kind: :page, label: "About", slug: "about"},
|
||||||
|
%{kind: :category_archive, label: "Notes", slug: "notes"}
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
test "get_menu ignores outlines outside /opml/body and decodes XML entities",
|
||||||
|
%{project: project, temp_dir: temp_dir} do
|
||||||
|
File.mkdir_p!(Path.join(temp_dir, "meta"))
|
||||||
|
|
||||||
|
File.write!(
|
||||||
|
Path.join([temp_dir, "meta", "menu.opml"]),
|
||||||
|
[
|
||||||
|
~s(<?xml version="1.0" encoding="UTF-8"?>),
|
||||||
|
~s(<opml version="2.0">),
|
||||||
|
~s( <head>),
|
||||||
|
~s( <outline text="Stray" type="page" pageSlug="stray"/>),
|
||||||
|
~s( </head>),
|
||||||
|
~s( <body>),
|
||||||
|
~s( <outline text="Q & A" type="page" pageSlug="q-and-a"/>),
|
||||||
|
~s( </body>),
|
||||||
|
~s(</opml>)
|
||||||
|
]
|
||||||
|
|> Enum.join("\n")
|
||||||
|
)
|
||||||
|
|
||||||
|
assert {:ok, menu} = BDS.Menu.get_menu(project.id)
|
||||||
|
|
||||||
|
assert menu.items == [
|
||||||
|
%{kind: :home, label: "Home", slug: nil},
|
||||||
|
%{kind: :page, label: "Q & A", slug: "q-and-a"}
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
test "get_menu raises on malformed OPML", %{project: project, temp_dir: temp_dir} do
|
||||||
|
File.mkdir_p!(Path.join(temp_dir, "meta"))
|
||||||
|
File.write!(Path.join([temp_dir, "meta", "menu.opml"]), "<opml><body><outline")
|
||||||
|
|
||||||
|
assert_raise RuntimeError, fn ->
|
||||||
|
BDS.Menu.get_menu(project.id)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
test "get_menu does not intern OPML element or attribute names as atoms",
|
||||||
|
%{project: project, temp_dir: temp_dir} do
|
||||||
|
unique_element = "untrusted_element_#{System.unique_integer([:positive])}"
|
||||||
|
unique_attr = "untrusted_attr_#{System.unique_integer([:positive])}"
|
||||||
|
|
||||||
|
write_menu_opml(temp_dir, [
|
||||||
|
~s( <#{unique_element}>ignored</#{unique_element}>),
|
||||||
|
~s( <outline text="About" type="page" pageSlug="about" #{unique_attr}="x"/>)
|
||||||
|
])
|
||||||
|
|
||||||
|
assert {:ok, menu} = BDS.Menu.get_menu(project.id)
|
||||||
|
assert Enum.at(menu.items, 1) == %{kind: :page, label: "About", slug: "about"}
|
||||||
|
|
||||||
|
assert_raise ArgumentError, fn -> String.to_existing_atom(unique_element) end
|
||||||
|
assert_raise ArgumentError, fn -> String.to_existing_atom(unique_attr) end
|
||||||
|
end
|
||||||
|
|
||||||
|
test "get_menu keeps atom growth bounded for many unique attribute names",
|
||||||
|
%{project: project, temp_dir: temp_dir} do
|
||||||
|
unique_attrs =
|
||||||
|
Enum.map(1..250, fn index ->
|
||||||
|
"untrusted_bulk_attr_#{System.unique_integer([:positive])}_#{index}"
|
||||||
|
end)
|
||||||
|
|
||||||
|
outlines =
|
||||||
|
Enum.map(unique_attrs, fn attr ->
|
||||||
|
~s( <outline text="Item" type="page" pageSlug="item" #{attr}="x"/>)
|
||||||
|
end)
|
||||||
|
|
||||||
|
write_menu_opml(temp_dir, outlines)
|
||||||
|
assert {:ok, _warmup} = BDS.Menu.get_menu(project.id)
|
||||||
|
|
||||||
|
atom_count_before = :erlang.system_info(:atom_count)
|
||||||
|
assert {:ok, menu} = BDS.Menu.get_menu(project.id)
|
||||||
|
atom_count_after = :erlang.system_info(:atom_count)
|
||||||
|
|
||||||
|
assert length(menu.items) == 251
|
||||||
|
assert atom_count_after - atom_count_before < 20
|
||||||
|
|
||||||
|
Enum.each(unique_attrs, fn attr ->
|
||||||
|
assert_raise ArgumentError, fn -> String.to_existing_atom(attr) end
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp write_menu_opml(temp_dir, body_lines) do
|
||||||
|
File.mkdir_p!(Path.join(temp_dir, "meta"))
|
||||||
|
|
||||||
|
File.write!(
|
||||||
|
Path.join([temp_dir, "meta", "menu.opml"]),
|
||||||
|
[
|
||||||
|
~s(<?xml version="1.0" encoding="UTF-8"?>),
|
||||||
|
~s(<opml version="2.0">),
|
||||||
|
~s( <head><title>Menu</title></head>),
|
||||||
|
~s( <body>)
|
||||||
|
]
|
||||||
|
|> Enum.concat(body_lines)
|
||||||
|
|> Enum.concat([~s( </body>), ~s(</opml>)])
|
||||||
|
|> Enum.join("\n")
|
||||||
|
)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user