/**
* WxrParser Unit Tests
*
* Tests the REAL WxrParser class with mocked filesystem.
* Following TDD best practices: mock external dependencies, test real implementation.
*/
import { describe, it, expect, beforeEach, vi } from 'vitest';
import { WxrParser } from '../../src/main/engine/WxrParser';
import type { WxrData } from '../../src/main/engine/WxrParser';
// Mock fs/promises
vi.mock('fs/promises', () => ({
readFile: vi.fn(),
}));
// Minimal valid WXR XML for testing
const MINIMAL_WXR = `
My Test Blog
https://example.com
A test blogen-US`;
// WXR with categories and tags at channel level
const WXR_WITH_TAXONOMIES = `
My Blog
https://example.com
Testen1technology2web-devtechnology10javascript11typescript`;
// WXR with a single published post
const WXR_WITH_POST = `
My Blog
https://example.com
TestenHello World
https://example.com/hello-world/
Mon, 15 Jan 2024 10:30:00 +0000Welcome to my blog. This is my first post.
]]>
422024-01-15 10:30:002024-01-15 10:30:002024-01-20 15:45:302024-01-20 15:45:30hello-worldpublishpost0
`;
// WXR with a page
const WXR_WITH_PAGE = `
My Blog
https://example.com
TestenAbout MeAbout
This is the about page.
]]>10aboutpublishpage0`;
// WXR with a media attachment
const WXR_WITH_MEDIA = `
My Blog
https://example.com
Testensunset-photo100sunset-photoinheritattachment42https://example.com/wp-content/uploads/2024/01/sunset.jpg_wp_attached_file2024/01/sunset.jpg`;
const WXR_WITH_MEDIA_PUBDATE = `
My Blog
https://example.com
Testenheader-imageFri, 05 Jan 2024 12:34:56 +0000101header-imageinheritattachment0https://example.com/wp-content/uploads/2024/01/header.jpg`;
const WXR_WITH_INVALID_PUBDATE = `
Dates Blog
https://example.com
TestenBad Date Postnot-a-datebad date]]>201bad-date-postpublishpost0Bad Date Mediaalso-not-a-date202bad-date-mediainheritattachment0https://example.com/wp-content/uploads/2024/01/bad-date.jpg`;
// WXR with mixed content: posts, pages, and media
const WXR_MIXED = `
Full Blog
https://fullblog.com
A full blog exportde-DEnewsfeaturedFirst PostTue, 02 Jan 2024 08:00:00 +0000First post content.]]>1first-postpublishpost0Second PostWed, 03 Jan 2024 09:00:00 +0000Second post content.]]>2second-postdraftpost0ContactContact us here.]]>3contactpublishpage0logo4logoinheritattachment3https://fullblog.com/wp-content/uploads/2024/02/logo.png`;
// WXR with draft and trashed posts
const WXR_WITH_STATUSES = `
Blog
https://example.com
enPublished PostPublished]]>1published-postpublishpost0Draft PostDraft]]>2draft-postdraftpost0Trashed PostTrash]]>3__trashedtrashpost0`;
describe('WxrParser', () => {
let parser: WxrParser;
beforeEach(() => {
parser = new WxrParser();
});
describe('parseXml', () => {
it('should parse minimal WXR and extract site info', () => {
const result = parser.parseXml(MINIMAL_WXR);
expect(result.site.title).toBe('My Test Blog');
expect(result.site.link).toBe('https://example.com');
expect(result.site.description).toBe('A test blog');
expect(result.site.language).toBe('en-US');
});
it('should return empty arrays when no items exist', () => {
const result = parser.parseXml(MINIMAL_WXR);
expect(result.posts).toEqual([]);
expect(result.pages).toEqual([]);
expect(result.media).toEqual([]);
expect(result.categories).toEqual([]);
expect(result.tags).toEqual([]);
});
it('should extract channel-level categories with parent relationships', () => {
const result = parser.parseXml(WXR_WITH_TAXONOMIES);
expect(result.categories).toHaveLength(2);
expect(result.categories[0]).toEqual({
name: 'Technology',
slug: 'technology',
parent: '',
});
expect(result.categories[1]).toEqual({
name: 'Web Development',
slug: 'web-dev',
parent: 'technology',
});
});
it('should extract channel-level tags', () => {
const result = parser.parseXml(WXR_WITH_TAXONOMIES);
expect(result.tags).toHaveLength(2);
expect(result.tags[0]).toEqual({
name: 'JavaScript',
slug: 'javascript',
});
expect(result.tags[1]).toEqual({
name: 'TypeScript',
slug: 'typescript',
});
});
it('should parse a published post with all fields', () => {
const result = parser.parseXml(WXR_WITH_POST);
expect(result.posts).toHaveLength(1);
const post = result.posts[0];
expect(post.wpId).toBe(42);
expect(post.title).toBe('Hello World');
expect(post.slug).toBe('hello-world');
expect(post.content).toBe('
Welcome to my blog. This is my first post.
');
expect(post.excerpt).toBe('Welcome to my blog.');
expect(post.creator).toBe('admin');
expect(post.status).toBe('publish');
expect(post.postType).toBe('post');
expect(post.categories).toEqual(['Uncategorized']);
expect(post.tags).toEqual(['Intro', 'Welcome']);
expect(post.pubDate).toBeInstanceOf(Date);
});
it('should extract postDate and postModified from WXR', () => {
const result = parser.parseXml(WXR_WITH_POST);
const post = result.posts[0];
// postDate is the WordPress local creation date
expect(post.postDate).toBeInstanceOf(Date);
expect(post.postDate?.toISOString()).toBe('2024-01-15T10:30:00.000Z');
// postModified is the WordPress local modification date
expect(post.postModified).toBeInstanceOf(Date);
expect(post.postModified?.toISOString()).toBe('2024-01-20T15:45:30.000Z');
});
it('should handle missing postDate and postModified gracefully', () => {
const result = parser.parseXml(WXR_WITH_PAGE);
const page = result.pages[0];
// Page test data doesn't have post_date/post_modified
expect(page.postDate).toBeNull();
expect(page.postModified).toBeNull();
});
it('should parse a page and put it in pages array', () => {
const result = parser.parseXml(WXR_WITH_PAGE);
expect(result.posts).toHaveLength(0);
expect(result.pages).toHaveLength(1);
const page = result.pages[0];
expect(page.wpId).toBe(10);
expect(page.title).toBe('About Me');
expect(page.slug).toBe('about');
expect(page.content).toContain('
About
');
expect(page.postType).toBe('page');
});
it('should parse a media attachment with URL and filename', () => {
const result = parser.parseXml(WXR_WITH_MEDIA);
expect(result.posts).toHaveLength(0);
expect(result.media).toHaveLength(1);
const media = result.media[0];
expect(media.wpId).toBe(100);
expect(media.title).toBe('sunset-photo');
expect(media.url).toBe('https://example.com/wp-content/uploads/2024/01/sunset.jpg');
expect(media.filename).toBe('sunset.jpg');
expect(media.relativePath).toBe('2024/01/sunset.jpg');
expect(media.parentId).toBe(42);
expect(media.description).toBe('A beautiful sunset');
});
it('should separate posts, pages, and media from mixed content', () => {
const result = parser.parseXml(WXR_MIXED);
expect(result.posts).toHaveLength(2);
expect(result.pages).toHaveLength(1);
expect(result.media).toHaveLength(1);
expect(result.categories).toHaveLength(1);
expect(result.tags).toHaveLength(1);
expect(result.posts[0].title).toBe('First Post');
expect(result.posts[1].title).toBe('Second Post');
expect(result.pages[0].title).toBe('Contact');
expect(result.media[0].title).toBe('logo');
});
it('should extract post categories and tags from item-level category elements', () => {
const result = parser.parseXml(WXR_MIXED);
const firstPost = result.posts[0];
expect(firstPost.categories).toEqual(['News']);
expect(firstPost.tags).toEqual(['Featured']);
// Second post has no categories or tags
const secondPost = result.posts[1];
expect(secondPost.categories).toEqual([]);
expect(secondPost.tags).toEqual([]);
});
it('should handle different post statuses', () => {
const result = parser.parseXml(WXR_WITH_STATUSES);
expect(result.posts).toHaveLength(3);
expect(result.posts[0].status).toBe('publish');
expect(result.posts[1].status).toBe('draft');
expect(result.posts[2].status).toBe('trash');
});
it('should extract relative path from media URL based on wp-content/uploads', () => {
const result = parser.parseXml(WXR_WITH_MEDIA);
const media = result.media[0];
// The path after wp-content/uploads/
expect(media.relativePath).toBe('2024/01/sunset.jpg');
});
it('should extract relative path from mixed content media', () => {
const result = parser.parseXml(WXR_MIXED);
const media = result.media[0];
expect(media.relativePath).toBe('2024/02/logo.png');
expect(media.filename).toBe('logo.png');
});
it('should handle empty content gracefully', () => {
const result = parser.parseXml(WXR_WITH_MEDIA);
// Media items in WXR often have empty excerpt
const media = result.media[0];
expect(media).toBeDefined();
});
it('should infer mime type from file extension', () => {
const result = parser.parseXml(WXR_WITH_MEDIA);
expect(result.media[0].mimeType).toBe('image/jpeg');
const mixedResult = parser.parseXml(WXR_MIXED);
expect(mixedResult.media[0].mimeType).toBe('image/png');
});
it('should handle missing pubDate gracefully', () => {
const result = parser.parseXml(WXR_WITH_PAGE);
// Page has no pubDate element
expect(result.pages[0].pubDate).toBeNull();
});
it('should parse valid RFC822 pubDate for media items', () => {
const result = parser.parseXml(WXR_WITH_MEDIA_PUBDATE);
expect(result.media).toHaveLength(1);
expect(result.media[0].pubDate).toBeInstanceOf(Date);
expect(result.media[0].pubDate?.toISOString()).toBe('2024-01-05T12:34:56.000Z');
});
it('should fallback to null for invalid pubDate nodes in post and media items', () => {
const result = parser.parseXml(WXR_WITH_INVALID_PUBDATE);
expect(result.posts).toHaveLength(1);
expect(result.media).toHaveLength(1);
expect(result.posts[0].pubDate).toBeNull();
expect(result.media[0].pubDate).toBeNull();
});
it('should keep base fields parity between post and page parse branches', () => {
const result = parser.parseXml(WXR_MIXED);
const post = result.posts[0];
const page = result.pages[0];
expect(post.postType).toBe('post');
expect(page.postType).toBe('page');
expect(post.wpId).toBeGreaterThan(0);
expect(page.wpId).toBeGreaterThan(0);
expect(post.title).toBeTruthy();
expect(page.title).toBeTruthy();
expect(post.slug).toBeTruthy();
expect(page.slug).toBeTruthy();
expect(typeof post.content).toBe('string');
expect(typeof page.content).toBe('string');
expect(typeof post.excerpt).toBe('string');
expect(typeof page.excerpt).toBe('string');
});
});
describe('parseFile', () => {
it('should read a file and parse its contents', async () => {
const fs = await import('fs/promises');
vi.mocked(fs.readFile).mockResolvedValueOnce(WXR_WITH_POST);
const result = await parser.parseFile('/path/to/export.xml');
expect(fs.readFile).toHaveBeenCalledWith('/path/to/export.xml', 'utf-8');
expect(result.posts).toHaveLength(1);
expect(result.posts[0].title).toBe('Hello World');
});
it('should throw an error if the file cannot be read', async () => {
const fs = await import('fs/promises');
vi.mocked(fs.readFile).mockRejectedValueOnce(new Error('ENOENT'));
await expect(parser.parseFile('/nonexistent.xml')).rejects.toThrow('ENOENT');
});
});
});