defmodule BDS.WxrParserTest do use ExUnit.Case, async: true alias BDS.WxrParser test "parse_xml extracts site info, posts, pages, media, categories, and tags" do parsed = WxrParser.parse_xml(sample_wxr_xml()) assert parsed.site.title == "Legacy Blog" assert parsed.site.link == "https://legacy.example" assert parsed.site.description == "Imported from the legacy desktop app" assert parsed.site.language == "en" assert parsed.categories == [%{name: "General", slug: "general", parent: ""}] assert parsed.tags == [%{name: "News", slug: "news"}] assert [ %{ wp_id: 101, title: "Hello World", slug: "hello-world", creator: "Importer", status: "publish", post_type: "post", categories: ["General"], tags: ["News"] } ] = parsed.posts assert [ %{ wp_id: 201, title: "About", slug: "about", post_type: "page", categories: ["General"], tags: [] } ] = parsed.pages assert [media] = parsed.media assert media.wp_id == 301 assert media.title == "Import Asset" assert media.filename == "import-asset.txt" assert media.relative_path == "2024/05/import-asset.txt" assert media.parent_id == 101 assert media.mime_type == "text/plain" end test "parse_xml raises when the WXR file has no channel" do assert_raise RuntimeError, ~r/no element found/, fn -> WxrParser.parse_xml("") end end defp sample_wxr_xml do """ Legacy Blog https://legacy.example Imported from the legacy desktop app en general news Hello World https://legacy.example/2024/05/hello-world Wed, 01 May 2024 12:00:00 +0000 Hello world.

[gallery ids="1,2"]

]]>
101 2024-05-01 14:00:00 2024-05-02 15:00:00 hello-world publish post
About https://legacy.example/about Thu, 02 May 2024 12:00:00 +0000 About page

]]>
201 2024-05-02 12:00:00 2024-05-02 12:30:00 about publish page
Import Asset https://legacy.example/wp-content/uploads/2024/05/import-asset.txt Fri, 03 May 2024 12:00:00 +0000 301 101 import-asset inherit attachment
""" end end