defmodule BDS.ImportAnalysisTest do use ExUnit.Case, async: false alias BDS.ImportAnalysis alias BDS.Media alias BDS.Posts alias BDS.Tags setup do :ok = Ecto.Adapters.SQL.Sandbox.checkout(BDS.Repo) temp_dir = Path.join(System.tmp_dir!(), "bds-import-analysis-#{System.unique_integer([:positive])}") File.mkdir_p!(temp_dir) on_exit(fn -> File.rm_rf(temp_dir) end) {:ok, project} = BDS.Projects.create_project(%{name: "Import Analysis", data_path: temp_dir}) %{project: project, temp_dir: temp_dir} end test "analyze_wxr summarizes new items, date distribution, and macros", %{project: project, temp_dir: temp_dir} do uploads_dir = Path.join(temp_dir, "uploads") File.mkdir_p!(Path.join(uploads_dir, "2024/05")) File.write!(Path.join(uploads_dir, "2024/05/import-asset.txt"), "legacy attachment") wxr_path = Path.join(temp_dir, "legacy.xml") File.write!(wxr_path, basic_wxr_xml()) assert {:ok, report} = ImportAnalysis.analyze_wxr(project.id, wxr_path, uploads_dir) assert report.site_info.title == "Legacy Blog" assert report.site_info.url == "https://legacy.example" assert report.site_info.language == "en" assert report.site_info.source_file == wxr_path assert report.post_stats == %{new_count: 1, update_count: 0, conflict_count: 0, duplicate_count: 0} assert report.page_stats == %{new_count: 1, update_count: 0, conflict_count: 0, duplicate_count: 0} assert report.media_stats == %{ new_count: 1, update_count: 0, conflict_count: 0, duplicate_count: 0, missing_count: 0 } assert report.category_stats == %{existing_count: 0, mapped_count: 0, new_count: 1} assert report.tag_stats == %{existing_count: 0, mapped_count: 0, new_count: 1} assert Enum.any?(report.date_distribution, fn row -> row.year == 2024 and row.post_count == 2 and row.media_count == 1 end) assert [%{name: "gallery", usage_count: 1, parameters: ["ids"], validation_status: "unknown"}] = report.macros assert report.conflicts == [] assert report.items.posts == [ %{ title: "Hello World", slug: "hello-world", status: "new", item_type: "post" } ] assert report.items.pages == [ %{ title: "About", slug: "about", status: "new", item_type: "page" } ] assert report.items.media == [ %{ title: "Import Asset", filename: "import-asset.txt", relative_path: "2024/05/import-asset.txt", status: "new", item_type: "media" } ] end test "analyze_wxr detects update, conflict, duplicate, existing taxonomy, and missing uploads", %{project: project, temp_dir: temp_dir} do assert {:ok, _category} = Tags.create_tag(%{project_id: project.id, name: "General"}) assert {:ok, _tag} = Tags.create_tag(%{project_id: project.id, name: "News"}) assert {:ok, _update_post} = Posts.create_post(%{ project_id: project.id, title: "Update Me", content: "Update body", checksum: sha256("Update body") }) assert {:ok, _conflict_post} = Posts.create_post(%{ project_id: project.id, title: "Conflict Me", content: "Local body", checksum: sha256("Local body") }) assert {:ok, _duplicate_post} = Posts.create_post(%{ project_id: project.id, title: "Existing Duplicate", content: "Duplicate body", checksum: sha256("Duplicate body") }) existing_media_source = Path.join(temp_dir, "update-asset.txt") File.write!(existing_media_source, "shared bytes") assert {:ok, _existing_media} = Media.import_media(%{ project_id: project.id, source_path: existing_media_source, title: "Update Asset" }) wxr_path = Path.join(temp_dir, "conflicts.xml") File.write!(wxr_path, conflict_wxr_xml()) assert {:ok, report} = ImportAnalysis.analyze_wxr(project.id, wxr_path, nil) assert report.post_stats == %{new_count: 0, update_count: 1, conflict_count: 1, duplicate_count: 1} assert report.page_stats == %{new_count: 0, update_count: 0, conflict_count: 0, duplicate_count: 0} assert report.media_stats == %{ new_count: 0, update_count: 0, conflict_count: 0, duplicate_count: 0, missing_count: 1 } assert report.category_stats == %{existing_count: 1, mapped_count: 0, new_count: 0} assert report.tag_stats == %{existing_count: 1, mapped_count: 0, new_count: 0} assert Enum.any?(report.conflicts, fn conflict -> conflict.item_type == "post" and conflict.item_name == "conflict-me" and conflict.resolution == "skip" end) assert Enum.any?(report.items.posts, &(&1.slug == "update-me" and &1.status == "update")) assert Enum.any?(report.items.posts, &(&1.slug == "conflict-me" and &1.status == "conflict")) assert Enum.any?(report.items.posts, &(&1.slug == "duplicate-me" and &1.status == "duplicate")) assert Enum.any?(report.items.media, &(&1.filename == "missing-asset.txt" and &1.status == "missing")) end test "analyze_wxr reports legacy progress steps while building the import report", %{project: project, temp_dir: temp_dir} do uploads_dir = Path.join(temp_dir, "uploads") File.mkdir_p!(Path.join(uploads_dir, "2024/05")) File.write!(Path.join(uploads_dir, "2024/05/import-asset.txt"), "legacy attachment") wxr_path = Path.join(temp_dir, "legacy.xml") File.write!(wxr_path, basic_wxr_xml()) assert {:ok, _report} = ImportAnalysis.analyze_wxr(project.id, wxr_path, uploads_dir, on_progress: fn step, detail -> send(self(), {:analysis_progress, step, detail}) end ) assert_received {:analysis_progress, "Loading existing posts...", nil} assert_received {:analysis_progress, "Analyzing posts...", "1 posts to analyze"} assert_received {:analysis_progress, "Analyzing pages...", "1 pages to analyze"} assert_received {:analysis_progress, "Analyzing media files...", "1 media files to analyze"} assert_received {:analysis_progress, "Discovering macros...", nil} end defp sha256(value) do :sha256 |> :crypto.hash(value) |> Base.encode16(case: :lower) end defp basic_wxr_xml do """ Legacy Blog https://legacy.example Imported from the legacy desktop app en general news Hello World https://legacy.example/2024/05/hello-world Wed, 01 May 2024 12:00:00 +0000 Hello world

[gallery ids="1,2"]

]]>
101 2024-05-01 12:00:00 2024-05-01 12:30:00 hello-world publish post
About https://legacy.example/about Thu, 02 May 2024 12:00:00 +0000 About page

]]>
201 2024-05-02 12:00:00 2024-05-02 12:30:00 about publish page
Import Asset https://legacy.example/wp-content/uploads/2024/05/import-asset.txt Fri, 03 May 2024 12:00:00 +0000 301 101 import-asset inherit attachment
""" end defp conflict_wxr_xml do """ Legacy Blog https://legacy.example Imported from the legacy desktop app en general news Update Me https://legacy.example/update-me Wed, 01 May 2024 12:00:00 +0000 Update body

]]>
401 2024-05-01 12:00:00 2024-05-01 12:30:00 update-me publish post
Conflict Me https://legacy.example/conflict-me Thu, 02 May 2024 12:00:00 +0000 Incoming conflict body

]]>
402 2024-05-02 12:00:00 2024-05-02 12:30:00 conflict-me publish post
Duplicate Me https://legacy.example/duplicate-me Fri, 03 May 2024 12:00:00 +0000 Duplicate body

]]>
403 2024-05-03 12:00:00 2024-05-03 12:30:00 duplicate-me publish post
Missing Asset https://legacy.example/wp-content/uploads/2024/05/missing-asset.txt Sat, 04 May 2024 12:00:00 +0000 404 401 missing-asset inherit attachment
""" end end