fix: implement TD-05, replacement of XML parser

This commit is contained in:
2026-06-12 11:48:44 +02:00
parent eac6d543d2
commit e3a1010ae9
8 changed files with 670 additions and 300 deletions

View File

@@ -331,7 +331,7 @@ defmodule BDS.CSM005SQLFilteringTest do
# Helpers
# ---------------------------------------------------------------------------
defp create_post(project_id, opts \\ []) do
defp create_post(project_id, opts) do
title = Keyword.get(opts, :title, "Post #{System.unique_integer([:positive])}")
status = Keyword.get(opts, :status, :draft)
tags = Keyword.get(opts, :tags, [])

View File

@@ -187,6 +187,66 @@ defmodule BDS.ImportExecutionTest do
assert_received {:execution_progress, "complete", 1, 1, %{detail: "import_complete"}}
end
test "execute_import rolls back imported posts when a later post write fails", %{
project: project
} do
report = %{
items: %{
categories: [],
tags: [],
posts: [
%{
item_type: "post",
post_type: "post",
wp_id: 101,
title: "Committed Too Early",
slug: "committed-too-early",
status: "new",
author: nil,
excerpt: nil,
categories: [],
tags: [],
wp_status: "publish",
content_markdown: "first body",
content_checksum: sha256("first body"),
created_at: "2024-05-01 12:00:00",
updated_at: "2024-05-01 12:30:00",
published_at: "2024-05-01 12:00:00"
},
%{
item_type: "post",
post_type: "post",
wp_id: 102,
title: "Broken Overwrite",
slug: "broken-overwrite",
status: "conflict",
resolution: "overwrite",
existing_id: "missing-post-id",
author: nil,
excerpt: nil,
categories: [],
tags: [],
wp_status: "publish",
content_markdown: "second body",
content_checksum: sha256("second body"),
created_at: "2024-05-02 12:00:00",
updated_at: "2024-05-02 12:30:00",
published_at: "2024-05-02 12:00:00"
}
],
pages: [],
media: []
},
details: %{posts: [], pages: [], media: []}
}
assert {:error, %{message: message}} =
ImportExecution.execute_import(project.id, report, default_author: "Imported Author")
assert message =~ "not_found"
assert Repo.aggregate(Posts.Post, :count, :id) == 0
end
defp sha256(value) do
:sha256
|> :crypto.hash(value)

View File

@@ -802,6 +802,50 @@ defmodule BDS.PostsTest do
BDS.Posts.rebuild_posts_from_files(project.id)
end
test "rebuild_posts_from_files rolls back created posts when a later translation import fails", %{
project: project
} do
posts_dir = Path.join([BDS.Projects.project_data_dir(project), "posts", "2026", "04"])
File.mkdir_p!(posts_dir)
File.write!(
Path.join(posts_dir, "chimera.md"),
[
"---",
"id: canonical-post-id",
"title: Chimera Source",
"slug: chimera",
"status: published",
"language: de",
"createdAt: 2024-03-30T21:20:00.000Z",
"updatedAt: 2024-03-31T21:20:00.000Z",
"publishedAt: 2024-04-01T21:20:00.000Z",
"---",
"Quelle",
""
]
|> Enum.join("\n")
)
File.write!(
Path.join(posts_dir, "chimera.en.md"),
[
"---",
"id: broken-translation-id",
"translationFor: missing-source-post-id",
"language: en",
"title: Broken Translation",
"---",
"Translated body",
""
]
|> Enum.join("\n")
)
assert {:error, _reason} = BDS.Posts.rebuild_posts_from_files(project.id)
assert BDS.Repo.aggregate(BDS.Posts.Post, :count, :id) == 0
end
test "rebuild_posts_from_files parses folded multiline title and slug scalars alongside translations" do
temp_dir =
Path.join(

View File

@@ -53,6 +53,31 @@ defmodule BDS.WxrParserTest do
end
end
test "parse_xml does not intern unknown element names as atoms" do
unique_name = "csm036_untrusted_#{System.unique_integer([:positive])}"
xml = """
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<channel>
<title>Legacy Blog</title>
<#{unique_name}>ignored payload</#{unique_name}>
</channel>
</rss>
"""
parsed = WxrParser.parse_xml(xml)
assert parsed.site.title == "Legacy Blog"
assert_raise ArgumentError, fn ->
String.to_existing_atom(unique_name)
end
end
defp sample_wxr_xml do
"""
<?xml version="1.0" encoding="UTF-8"?>