feat: step 12 done

This commit is contained in:
2026-04-29 20:07:01 +02:00
parent 155fda8b81
commit f178b5b207
18 changed files with 3494 additions and 2 deletions

View File

@@ -0,0 +1,108 @@
defmodule BDS.Desktop.ImportShellLiveTest do
use ExUnit.Case, async: false
import Phoenix.ConnTest
import Phoenix.LiveViewTest
alias BDS.ImportDefinitions
alias BDS.Projects
@endpoint BDS.Desktop.Endpoint
setup do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
Ecto.Adapters.SQL.Sandbox.mode(BDS.Repo, {:shared, self()})
temp_dir = Path.join(System.tmp_dir!(), "bds-import-shell-live-#{System.unique_integer([:positive])}")
File.mkdir_p!(temp_dir)
on_exit(fn -> File.rm_rf(temp_dir) end)
{:ok, project} = Projects.create_project(%{name: "Import Shell", data_path: temp_dir})
{:ok, _project} = Projects.set_active_project(project.id)
%{project: project, temp_dir: temp_dir}
end
test "opening an import definition renders the dedicated import analysis editor instead of the fallback shell frame", %{project: project, temp_dir: temp_dir} do
uploads_dir = Path.join(temp_dir, "uploads")
wxr_path = Path.join(temp_dir, "legacy.xml")
assert {:ok, definition} =
ImportDefinitions.create_definition(%{
project_id: project.id,
name: "Legacy Import",
wxr_file_path: wxr_path,
uploads_folder_path: uploads_dir,
last_analysis_result: Jason.encode!(cached_report(wxr_path, uploads_dir))
})
{:ok, view, _html} = live_isolated(build_conn(), BDS.Desktop.ShellLive)
_html = render_click(view, "select_view", %{"view" => "import"})
html =
view
|> element("[data-testid='sidebar-open-item'][data-item-id='#{definition.id}']")
|> render_click()
assert html =~ ~s(data-testid="import-editor")
assert html =~ ~s(data-testid="import-editor-form")
assert html =~ "Legacy Import"
assert html =~ "Uploads Folder"
assert html =~ "WXR File"
assert html =~ "Ready to import:"
assert html =~ "Import 5 Items"
assert html =~ "Post Slug Conflicts"
assert html =~ "Analyze with..."
refute html =~ "Desktop workbench content routed through the Elixir shell."
end
defp cached_report(wxr_path, uploads_dir) do
%{
source_file: wxr_path,
site_info: %{
title: "Legacy Blog",
url: "https://legacy.example",
language: "en",
source_file: wxr_path
},
post_stats: %{new_count: 1, update_count: 0, conflict_count: 1, duplicate_count: 0},
page_stats: %{new_count: 1, update_count: 0, conflict_count: 0, duplicate_count: 0},
media_stats: %{new_count: 1, update_count: 0, conflict_count: 0, duplicate_count: 0, missing_count: 0},
category_stats: %{existing_count: 0, mapped_count: 0, new_count: 1},
tag_stats: %{existing_count: 0, mapped_count: 0, new_count: 1},
date_distribution: [%{year: 2024, post_count: 2, media_count: 1}],
conflicts: [
%{
item_type: "post",
item_name: "hello-world",
resolution: "skip",
source_title: "Hello World",
existing_title: "Existing Hello"
}
],
macros: [%{name: "gallery", usage_count: 1, parameters: ["ids"], validation_status: "unknown"}],
items: %{
posts: [
%{item_type: "post", title: "Hello World", slug: "hello-world", status: "new"},
%{item_type: "post", title: "Conflict Me", slug: "conflict-me", status: "conflict", resolution: "skip"}
],
pages: [
%{item_type: "page", title: "About", slug: "about", status: "new"}
],
media: [
%{
item_type: "media",
title: "Import Asset",
filename: "import-asset.txt",
relative_path: "2024/05/import-asset.txt",
source_file: Path.join(uploads_dir, "2024/05/import-asset.txt"),
status: "new"
}
],
categories: [%{name: "General", exists_in_project: false, mapped_to: nil}],
tags: [%{name: "News", exists_in_project: false, mapped_to: nil}]
}
}
end
end

View File

@@ -0,0 +1,316 @@
defmodule BDS.ImportAnalysisTest do
use ExUnit.Case, async: false
alias BDS.ImportAnalysis
alias BDS.Media
alias BDS.Posts
alias BDS.Tags
setup do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
temp_dir = Path.join(System.tmp_dir!(), "bds-import-analysis-#{System.unique_integer([:positive])}")
File.mkdir_p!(temp_dir)
on_exit(fn -> File.rm_rf(temp_dir) end)
{:ok, project} = BDS.Projects.create_project(%{name: "Import Analysis", data_path: temp_dir})
%{project: project, temp_dir: temp_dir}
end
test "analyze_wxr summarizes new items, date distribution, and macros", %{project: project, temp_dir: temp_dir} do
uploads_dir = Path.join(temp_dir, "uploads")
File.mkdir_p!(Path.join(uploads_dir, "2024/05"))
File.write!(Path.join(uploads_dir, "2024/05/import-asset.txt"), "legacy attachment")
wxr_path = Path.join(temp_dir, "legacy.xml")
File.write!(wxr_path, basic_wxr_xml())
assert {:ok, report} = ImportAnalysis.analyze_wxr(project.id, wxr_path, uploads_dir)
assert report.site_info.title == "Legacy Blog"
assert report.site_info.url == "https://legacy.example"
assert report.site_info.language == "en"
assert report.site_info.source_file == wxr_path
assert report.post_stats == %{new_count: 1, update_count: 0, conflict_count: 0, duplicate_count: 0}
assert report.page_stats == %{new_count: 1, update_count: 0, conflict_count: 0, duplicate_count: 0}
assert report.media_stats == %{
new_count: 1,
update_count: 0,
conflict_count: 0,
duplicate_count: 0,
missing_count: 0
}
assert report.category_stats == %{existing_count: 0, mapped_count: 0, new_count: 1}
assert report.tag_stats == %{existing_count: 0, mapped_count: 0, new_count: 1}
assert Enum.any?(report.date_distribution, fn row ->
row.year == 2024 and row.post_count == 2 and row.media_count == 1
end)
assert [%{name: "gallery", usage_count: 1, parameters: ["ids"], validation_status: "unknown"}] = report.macros
assert report.conflicts == []
assert report.items.posts == [
%{
title: "Hello World",
slug: "hello-world",
status: "new",
item_type: "post"
}
]
assert report.items.pages == [
%{
title: "About",
slug: "about",
status: "new",
item_type: "page"
}
]
assert report.items.media == [
%{
title: "Import Asset",
filename: "import-asset.txt",
relative_path: "2024/05/import-asset.txt",
status: "new",
item_type: "media"
}
]
end
test "analyze_wxr detects update, conflict, duplicate, existing taxonomy, and missing uploads", %{project: project, temp_dir: temp_dir} do
assert {:ok, _category} = Tags.create_tag(%{project_id: project.id, name: "General"})
assert {:ok, _tag} = Tags.create_tag(%{project_id: project.id, name: "News"})
assert {:ok, _update_post} =
Posts.create_post(%{
project_id: project.id,
title: "Update Me",
content: "Update body",
checksum: sha256("Update body")
})
assert {:ok, _conflict_post} =
Posts.create_post(%{
project_id: project.id,
title: "Conflict Me",
content: "Local body",
checksum: sha256("Local body")
})
assert {:ok, _duplicate_post} =
Posts.create_post(%{
project_id: project.id,
title: "Existing Duplicate",
content: "Duplicate body",
checksum: sha256("Duplicate body")
})
existing_media_source = Path.join(temp_dir, "update-asset.txt")
File.write!(existing_media_source, "shared bytes")
assert {:ok, _existing_media} =
Media.import_media(%{
project_id: project.id,
source_path: existing_media_source,
title: "Update Asset"
})
wxr_path = Path.join(temp_dir, "conflicts.xml")
File.write!(wxr_path, conflict_wxr_xml())
assert {:ok, report} = ImportAnalysis.analyze_wxr(project.id, wxr_path, nil)
assert report.post_stats == %{new_count: 0, update_count: 1, conflict_count: 1, duplicate_count: 1}
assert report.page_stats == %{new_count: 0, update_count: 0, conflict_count: 0, duplicate_count: 0}
assert report.media_stats == %{
new_count: 0,
update_count: 0,
conflict_count: 0,
duplicate_count: 0,
missing_count: 1
}
assert report.category_stats == %{existing_count: 1, mapped_count: 0, new_count: 0}
assert report.tag_stats == %{existing_count: 1, mapped_count: 0, new_count: 0}
assert Enum.any?(report.conflicts, fn conflict ->
conflict.item_type == "post" and conflict.item_name == "conflict-me" and conflict.resolution == "skip"
end)
assert Enum.any?(report.items.posts, &(&1.slug == "update-me" and &1.status == "update"))
assert Enum.any?(report.items.posts, &(&1.slug == "conflict-me" and &1.status == "conflict"))
assert Enum.any?(report.items.posts, &(&1.slug == "duplicate-me" and &1.status == "duplicate"))
assert Enum.any?(report.items.media, &(&1.filename == "missing-asset.txt" and &1.status == "missing"))
end
defp sha256(value) do
:sha256
|> :crypto.hash(value)
|> Base.encode16(case: :lower)
end
defp basic_wxr_xml do
"""
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<channel>
<title>Legacy Blog</title>
<link>https://legacy.example</link>
<description>Imported from the legacy desktop app</description>
<language>en</language>
<wp:category>
<wp:cat_name><![CDATA[General]]></wp:cat_name>
<wp:category_nicename>general</wp:category_nicename>
<wp:category_parent></wp:category_parent>
</wp:category>
<wp:tag>
<wp:tag_slug>news</wp:tag_slug>
<wp:tag_name><![CDATA[News]]></wp:tag_name>
</wp:tag>
<item>
<title>Hello World</title>
<link>https://legacy.example/2024/05/hello-world</link>
<pubDate>Wed, 01 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[<p>Hello world</p><p>[gallery ids="1,2"]</p>]]></content:encoded>
<excerpt:encoded><![CDATA[Legacy hello]]></excerpt:encoded>
<wp:post_id>101</wp:post_id>
<wp:post_date>2024-05-01 12:00:00</wp:post_date>
<wp:post_modified>2024-05-01 12:30:00</wp:post_modified>
<wp:post_name>hello-world</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
<category domain="category" nicename="general"><![CDATA[General]]></category>
<category domain="post_tag" nicename="news"><![CDATA[News]]></category>
</item>
<item>
<title>About</title>
<link>https://legacy.example/about</link>
<pubDate>Thu, 02 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[<p>About page</p>]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>201</wp:post_id>
<wp:post_date>2024-05-02 12:00:00</wp:post_date>
<wp:post_modified>2024-05-02 12:30:00</wp:post_modified>
<wp:post_name>about</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>page</wp:post_type>
<category domain="category" nicename="general"><![CDATA[General]]></category>
</item>
<item>
<title>Import Asset</title>
<link>https://legacy.example/wp-content/uploads/2024/05/import-asset.txt</link>
<pubDate>Fri, 03 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[Legacy text attachment]]></content:encoded>
<wp:post_id>301</wp:post_id>
<wp:post_parent>101</wp:post_parent>
<wp:post_name>import-asset</wp:post_name>
<wp:status>inherit</wp:status>
<wp:post_type>attachment</wp:post_type>
<wp:attachment_url><![CDATA[https://legacy.example/wp-content/uploads/2024/05/import-asset.txt]]></wp:attachment_url>
</item>
</channel>
</rss>
"""
end
defp conflict_wxr_xml do
"""
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<channel>
<title>Legacy Blog</title>
<link>https://legacy.example</link>
<description>Imported from the legacy desktop app</description>
<language>en</language>
<wp:category>
<wp:cat_name><![CDATA[General]]></wp:cat_name>
<wp:category_nicename>general</wp:category_nicename>
<wp:category_parent></wp:category_parent>
</wp:category>
<wp:tag>
<wp:tag_slug>news</wp:tag_slug>
<wp:tag_name><![CDATA[News]]></wp:tag_name>
</wp:tag>
<item>
<title>Update Me</title>
<link>https://legacy.example/update-me</link>
<pubDate>Wed, 01 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[<p>Update body</p>]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>401</wp:post_id>
<wp:post_date>2024-05-01 12:00:00</wp:post_date>
<wp:post_modified>2024-05-01 12:30:00</wp:post_modified>
<wp:post_name>update-me</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
<category domain="category" nicename="general"><![CDATA[General]]></category>
<category domain="post_tag" nicename="news"><![CDATA[News]]></category>
</item>
<item>
<title>Conflict Me</title>
<link>https://legacy.example/conflict-me</link>
<pubDate>Thu, 02 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[<p>Incoming conflict body</p>]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>402</wp:post_id>
<wp:post_date>2024-05-02 12:00:00</wp:post_date>
<wp:post_modified>2024-05-02 12:30:00</wp:post_modified>
<wp:post_name>conflict-me</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
<category domain="category" nicename="general"><![CDATA[General]]></category>
<category domain="post_tag" nicename="news"><![CDATA[News]]></category>
</item>
<item>
<title>Duplicate Me</title>
<link>https://legacy.example/duplicate-me</link>
<pubDate>Fri, 03 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[<p>Duplicate body</p>]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>403</wp:post_id>
<wp:post_date>2024-05-03 12:00:00</wp:post_date>
<wp:post_modified>2024-05-03 12:30:00</wp:post_modified>
<wp:post_name>duplicate-me</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
<category domain="category" nicename="general"><![CDATA[General]]></category>
<category domain="post_tag" nicename="news"><![CDATA[News]]></category>
</item>
<item>
<title>Missing Asset</title>
<link>https://legacy.example/wp-content/uploads/2024/05/missing-asset.txt</link>
<pubDate>Sat, 04 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[Missing attachment]]></content:encoded>
<wp:post_id>404</wp:post_id>
<wp:post_parent>401</wp:post_parent>
<wp:post_name>missing-asset</wp:post_name>
<wp:status>inherit</wp:status>
<wp:post_type>attachment</wp:post_type>
<wp:attachment_url><![CDATA[https://legacy.example/wp-content/uploads/2024/05/missing-asset.txt]]></wp:attachment_url>
</item>
</channel>
</rss>
"""
end
end

View File

@@ -0,0 +1,57 @@
defmodule BDS.ImportDefinitionsTest do
use ExUnit.Case, async: false
alias BDS.ImportDefinitions
setup do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
temp_dir = Path.join(System.tmp_dir!(), "bds-import-definitions-#{System.unique_integer([:positive])}")
File.mkdir_p!(temp_dir)
on_exit(fn -> File.rm_rf(temp_dir) end)
{:ok, project} = BDS.Projects.create_project(%{name: "Import Definitions", data_path: temp_dir})
%{project: project, temp_dir: temp_dir}
end
test "get, update, and delete round-trip import definition editor state", %{project: project, temp_dir: temp_dir} do
uploads_folder_path = Path.join(temp_dir, "uploads")
wxr_file_path = Path.join(temp_dir, "legacy.xml")
assert {:ok, definition} =
ImportDefinitions.create_definition(%{
project_id: project.id,
name: "Legacy Import",
wxr_file_path: wxr_file_path,
uploads_folder_path: uploads_folder_path,
last_analysis_result: Jason.encode!(%{site_info: %{title: "Legacy Blog"}})
})
fetched = ImportDefinitions.get_definition(definition.id)
assert fetched.id == definition.id
assert fetched.project_id == project.id
assert fetched.name == "Legacy Import"
assert fetched.wxr_file_path == wxr_file_path
assert fetched.uploads_folder_path == uploads_folder_path
assert fetched.last_analysis_result == Jason.encode!(%{site_info: %{title: "Legacy Blog"}})
assert {:ok, updated} =
ImportDefinitions.update_definition(definition.id, %{
name: "Renamed Import",
wxr_file_path: Path.join(temp_dir, "renamed.xml"),
uploads_folder_path: Path.join(temp_dir, "renamed-uploads"),
last_analysis_result: %{site_info: %{title: "Renamed Blog"}, post_stats: %{new_count: 2}}
})
assert updated.name == "Renamed Import"
assert updated.wxr_file_path == Path.join(temp_dir, "renamed.xml")
assert updated.uploads_folder_path == Path.join(temp_dir, "renamed-uploads")
assert updated.last_analysis_result == Jason.encode!(%{site_info: %{title: "Renamed Blog"}, post_stats: %{new_count: 2}})
assert [%{id: listed_id, title: "Renamed Import"}] = ImportDefinitions.list_definitions(project.id)
assert listed_id == definition.id
assert {:ok, :deleted} = ImportDefinitions.delete_definition(definition.id)
assert ImportDefinitions.get_definition(definition.id) == nil
end
end

View File

@@ -0,0 +1,208 @@
defmodule BDS.ImportExecutionTest do
use ExUnit.Case, async: false
import Ecto.Query
alias BDS.ImportAnalysis
alias BDS.ImportExecution
alias BDS.Posts
alias BDS.Repo
alias BDS.Tags
setup do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo)
temp_dir = Path.join(System.tmp_dir!(), "bds-import-execution-#{System.unique_integer([:positive])}")
File.mkdir_p!(temp_dir)
on_exit(fn -> File.rm_rf(temp_dir) end)
{:ok, project} = BDS.Projects.create_project(%{name: "Import Execution", data_path: temp_dir})
%{project: project, temp_dir: temp_dir}
end
test "execute_import creates tags, posts, pages, and media from the analysis report", %{project: project, temp_dir: temp_dir} do
uploads_dir = Path.join(temp_dir, "uploads")
File.mkdir_p!(Path.join(uploads_dir, "2024/05"))
File.write!(Path.join(uploads_dir, "2024/05/import-asset.txt"), "legacy attachment")
wxr_path = Path.join(temp_dir, "legacy.xml")
File.write!(wxr_path, basic_wxr_xml())
assert {:ok, report} = ImportAnalysis.analyze_wxr(project.id, wxr_path, uploads_dir)
assert {:ok, result} =
ImportExecution.execute_import(project.id, report,
uploads_folder_path: uploads_dir,
default_author: "Imported Author"
)
assert result.success
assert result.tags == %{created: 2, skipped: 0}
assert result.posts == %{imported: 1, skipped: 0, errors: 0}
assert result.pages == %{imported: 1, skipped: 0, errors: 0}
assert result.media == %{imported: 1, skipped: 0, errors: 0}
assert result.errors == []
tag_names = project.id |> Tags.list_tags() |> Enum.map(& &1.name) |> Enum.sort()
assert tag_names == ["General", "News"]
posts = Repo.all(from post in Posts.Post, where: post.project_id == ^project.id, order_by: [asc: post.slug])
assert Enum.map(posts, & &1.slug) == ["about", "hello-world"]
hello_world = Enum.find(posts, &(&1.slug == "hello-world"))
about = Enum.find(posts, &(&1.slug == "about"))
assert hello_world.status == :published
assert hello_world.author == "Importer"
assert hello_world.content == nil
assert hello_world.file_path != ""
assert File.exists?(Path.join(temp_dir, hello_world.file_path))
assert File.read!(Path.join(temp_dir, hello_world.file_path)) =~ "Hello World"
assert about.status == :published
assert about.content == nil
assert "page" in about.categories
imported_media = Repo.one!(from media in BDS.Media.Media, where: media.project_id == ^project.id)
assert imported_media.original_name == "import-asset.txt"
assert File.exists?(Path.join(temp_dir, imported_media.file_path))
end
test "execute_import skips conflicts by default and can import them with a new slug", %{project: project, temp_dir: temp_dir} do
assert {:ok, _existing_post} =
Posts.create_post(%{
project_id: project.id,
title: "Conflict Me",
content: "Local body",
checksum: sha256("Local body")
})
wxr_path = Path.join(temp_dir, "conflict.xml")
File.write!(wxr_path, conflict_only_wxr_xml())
assert {:ok, report} = ImportAnalysis.analyze_wxr(project.id, wxr_path, nil)
assert {:ok, skipped_result} = ImportExecution.execute_import(project.id, report, default_author: "Imported Author")
assert skipped_result.posts == %{imported: 0, skipped: 1, errors: 0}
assert Repo.aggregate(Posts.Post, :count, :id) == 1
import_report = put_in(report.items.posts, [%{List.first(report.items.posts) | resolution: "import"}])
assert {:ok, imported_result} = ImportExecution.execute_import(project.id, import_report, default_author: "Imported Author")
assert imported_result.posts == %{imported: 1, skipped: 0, errors: 0}
slugs = Repo.all(from post in Posts.Post, where: post.project_id == ^project.id, select: post.slug, order_by: [asc: post.slug])
assert length(slugs) == 2
assert "conflict-me" in slugs
assert Enum.any?(slugs, &(&1 != "conflict-me"))
end
defp sha256(value) do
:sha256
|> :crypto.hash(value)
|> Base.encode16(case: :lower)
end
defp basic_wxr_xml do
"""
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<channel>
<title>Legacy Blog</title>
<link>https://legacy.example</link>
<description>Imported from the legacy desktop app</description>
<language>en</language>
<wp:category>
<wp:cat_name><![CDATA[General]]></wp:cat_name>
<wp:category_nicename>general</wp:category_nicename>
<wp:category_parent></wp:category_parent>
</wp:category>
<wp:tag>
<wp:tag_slug>news</wp:tag_slug>
<wp:tag_name><![CDATA[News]]></wp:tag_name>
</wp:tag>
<item>
<title>Hello World</title>
<link>https://legacy.example/2024/05/hello-world</link>
<pubDate>Wed, 01 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[<p>Hello world</p>]]></content:encoded>
<excerpt:encoded><![CDATA[Legacy hello]]></excerpt:encoded>
<wp:post_id>101</wp:post_id>
<wp:post_date>2024-05-01 12:00:00</wp:post_date>
<wp:post_modified>2024-05-01 12:30:00</wp:post_modified>
<wp:post_name>hello-world</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
<category domain="category" nicename="general"><![CDATA[General]]></category>
<category domain="post_tag" nicename="news"><![CDATA[News]]></category>
</item>
<item>
<title>About</title>
<link>https://legacy.example/about</link>
<pubDate>Thu, 02 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[<p>About page</p>]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>201</wp:post_id>
<wp:post_date>2024-05-02 12:00:00</wp:post_date>
<wp:post_modified>2024-05-02 12:30:00</wp:post_modified>
<wp:post_name>about</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>page</wp:post_type>
<category domain="category" nicename="general"><![CDATA[General]]></category>
</item>
<item>
<title>Import Asset</title>
<link>https://legacy.example/wp-content/uploads/2024/05/import-asset.txt</link>
<pubDate>Fri, 03 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[Legacy text attachment]]></content:encoded>
<wp:post_id>301</wp:post_id>
<wp:post_parent>101</wp:post_parent>
<wp:post_name>import-asset</wp:post_name>
<wp:status>inherit</wp:status>
<wp:post_type>attachment</wp:post_type>
<wp:attachment_url><![CDATA[https://legacy.example/wp-content/uploads/2024/05/import-asset.txt]]></wp:attachment_url>
</item>
</channel>
</rss>
"""
end
defp conflict_only_wxr_xml do
"""
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<channel>
<title>Legacy Blog</title>
<link>https://legacy.example</link>
<description>Imported from the legacy desktop app</description>
<language>en</language>
<item>
<title>Conflict Me</title>
<link>https://legacy.example/conflict-me</link>
<pubDate>Thu, 02 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[<p>Incoming conflict body</p>]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>402</wp:post_id>
<wp:post_date>2024-05-02 12:00:00</wp:post_date>
<wp:post_modified>2024-05-02 12:30:00</wp:post_modified>
<wp:post_name>conflict-me</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
</item>
</channel>
</rss>
"""
end
end

View File

@@ -0,0 +1,111 @@
defmodule BDS.WxrParserTest do
use ExUnit.Case, async: true
alias BDS.WxrParser
test "parse_xml extracts site info, posts, pages, media, categories, and tags" do
parsed = WxrParser.parse_xml(sample_wxr_xml())
assert parsed.site.title == "Legacy Blog"
assert parsed.site.link == "https://legacy.example"
assert parsed.site.description == "Imported from the legacy desktop app"
assert parsed.site.language == "en"
assert parsed.categories == [%{name: "General", slug: "general", parent: ""}]
assert parsed.tags == [%{name: "News", slug: "news"}]
assert [%{wp_id: 101, title: "Hello World", slug: "hello-world", creator: "Importer", status: "publish", post_type: "post", categories: ["General"], tags: ["News"]}] = parsed.posts
assert [%{wp_id: 201, title: "About", slug: "about", post_type: "page", categories: ["General"], tags: []}] = parsed.pages
assert [media] = parsed.media
assert media.wp_id == 301
assert media.title == "Import Asset"
assert media.filename == "import-asset.txt"
assert media.relative_path == "2024/05/import-asset.txt"
assert media.parent_id == 101
assert media.mime_type == "text/plain"
end
test "parse_xml raises when the WXR file has no channel" do
assert_raise RuntimeError, ~r/no <channel> element found/, fn ->
WxrParser.parse_xml("<rss version=\"2.0\"></rss>")
end
end
defp sample_wxr_xml do
"""
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<channel>
<title>Legacy Blog</title>
<link>https://legacy.example</link>
<description>Imported from the legacy desktop app</description>
<language>en</language>
<wp:category>
<wp:cat_name><![CDATA[General]]></wp:cat_name>
<wp:category_nicename>general</wp:category_nicename>
<wp:category_parent></wp:category_parent>
</wp:category>
<wp:tag>
<wp:tag_slug>news</wp:tag_slug>
<wp:tag_name><![CDATA[News]]></wp:tag_name>
</wp:tag>
<item>
<title>Hello World</title>
<link>https://legacy.example/2024/05/hello-world</link>
<pubDate>Wed, 01 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[<p>Hello <strong>world</strong>.</p><p>[gallery ids="1,2"]</p>]]></content:encoded>
<excerpt:encoded><![CDATA[Legacy hello]]></excerpt:encoded>
<wp:post_id>101</wp:post_id>
<wp:post_date>2024-05-01 14:00:00</wp:post_date>
<wp:post_modified>2024-05-02 15:00:00</wp:post_modified>
<wp:post_name>hello-world</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
<category domain="category" nicename="general"><![CDATA[General]]></category>
<category domain="post_tag" nicename="news"><![CDATA[News]]></category>
</item>
<item>
<title>About</title>
<link>https://legacy.example/about</link>
<pubDate>Thu, 02 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[<p>About page</p>]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>201</wp:post_id>
<wp:post_date>2024-05-02 12:00:00</wp:post_date>
<wp:post_modified>2024-05-02 12:30:00</wp:post_modified>
<wp:post_name>about</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>page</wp:post_type>
<category domain="category" nicename="general"><![CDATA[General]]></category>
</item>
<item>
<title>Import Asset</title>
<link>https://legacy.example/wp-content/uploads/2024/05/import-asset.txt</link>
<pubDate>Fri, 03 May 2024 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[Importer]]></dc:creator>
<content:encoded><![CDATA[Legacy text attachment]]></content:encoded>
<wp:post_id>301</wp:post_id>
<wp:post_parent>101</wp:post_parent>
<wp:post_name>import-asset</wp:post_name>
<wp:status>inherit</wp:status>
<wp:post_type>attachment</wp:post_type>
<wp:attachment_url><![CDATA[https://legacy.example/wp-content/uploads/2024/05/import-asset.txt]]></wp:attachment_url>
</item>
</channel>
</rss>
"""
end
end