/** * WxrParser Unit Tests * * Tests the REAL WxrParser class with mocked filesystem. * Following TDD best practices: mock external dependencies, test real implementation. */ import { describe, it, expect, beforeEach, vi } from 'vitest'; import { WxrParser } from '../../src/main/engine/WxrParser'; import type { WxrData } from '../../src/main/engine/WxrParser'; // Mock fs/promises vi.mock('fs/promises', () => ({ readFile: vi.fn(), })); // Minimal valid WXR XML for testing const MINIMAL_WXR = ` My Test Blog https://example.com A test blog en-US `; // WXR with categories and tags at channel level const WXR_WITH_TAXONOMIES = ` My Blog https://example.com Test en 1 technology 2 web-dev technology 10 javascript 11 typescript `; // WXR with a single published post const WXR_WITH_POST = ` My Blog https://example.com Test en Hello World https://example.com/hello-world/ Mon, 15 Jan 2024 10:30:00 +0000 Welcome to my blog. This is my first post.

]]>
42 2024-01-15 10:30:00 2024-01-15 10:30:00 2024-01-20 15:45:30 2024-01-20 15:45:30 hello-world publish post 0
`; // WXR with a page const WXR_WITH_PAGE = ` My Blog https://example.com Test en About Me About

This is the about page.

]]>
10 about publish page 0
`; // WXR with a media attachment const WXR_WITH_MEDIA = ` My Blog https://example.com Test en sunset-photo 100 sunset-photo inherit attachment 42 https://example.com/wp-content/uploads/2024/01/sunset.jpg _wp_attached_file 2024/01/sunset.jpg `; // WXR with mixed content: posts, pages, and media const WXR_MIXED = ` Full Blog https://fullblog.com A full blog export de-DE news featured First Post Tue, 02 Jan 2024 08:00:00 +0000 First post content.

]]>
1 first-post publish post 0
Second Post Wed, 03 Jan 2024 09:00:00 +0000 Second post content.

]]>
2 second-post draft post 0
Contact Contact us here.

]]>
3 contact publish page 0
logo 4 logo inherit attachment 3 https://fullblog.com/wp-content/uploads/2024/02/logo.png
`; // WXR with draft and trashed posts const WXR_WITH_STATUSES = ` Blog https://example.com en Published Post Published

]]>
1 published-post publish post 0
Draft Post Draft

]]>
2 draft-post draft post 0
Trashed Post Trash

]]>
3 __trashed trash post 0
`; describe('WxrParser', () => { let parser: WxrParser; beforeEach(() => { parser = new WxrParser(); }); describe('parseXml', () => { it('should parse minimal WXR and extract site info', () => { const result = parser.parseXml(MINIMAL_WXR); expect(result.site.title).toBe('My Test Blog'); expect(result.site.link).toBe('https://example.com'); expect(result.site.description).toBe('A test blog'); expect(result.site.language).toBe('en-US'); }); it('should return empty arrays when no items exist', () => { const result = parser.parseXml(MINIMAL_WXR); expect(result.posts).toEqual([]); expect(result.pages).toEqual([]); expect(result.media).toEqual([]); expect(result.categories).toEqual([]); expect(result.tags).toEqual([]); }); it('should extract channel-level categories with parent relationships', () => { const result = parser.parseXml(WXR_WITH_TAXONOMIES); expect(result.categories).toHaveLength(2); expect(result.categories[0]).toEqual({ name: 'Technology', slug: 'technology', parent: '', }); expect(result.categories[1]).toEqual({ name: 'Web Development', slug: 'web-dev', parent: 'technology', }); }); it('should extract channel-level tags', () => { const result = parser.parseXml(WXR_WITH_TAXONOMIES); expect(result.tags).toHaveLength(2); expect(result.tags[0]).toEqual({ name: 'JavaScript', slug: 'javascript', }); expect(result.tags[1]).toEqual({ name: 'TypeScript', slug: 'typescript', }); }); it('should parse a published post with all fields', () => { const result = parser.parseXml(WXR_WITH_POST); expect(result.posts).toHaveLength(1); const post = result.posts[0]; expect(post.wpId).toBe(42); expect(post.title).toBe('Hello World'); expect(post.slug).toBe('hello-world'); expect(post.content).toBe('

Welcome to my blog. This is my first post.

'); expect(post.excerpt).toBe('Welcome to my blog.'); expect(post.creator).toBe('admin'); expect(post.status).toBe('publish'); expect(post.postType).toBe('post'); expect(post.categories).toEqual(['Uncategorized']); expect(post.tags).toEqual(['Intro', 'Welcome']); expect(post.pubDate).toBeInstanceOf(Date); }); it('should extract postDate and postModified from WXR', () => { const result = parser.parseXml(WXR_WITH_POST); const post = result.posts[0]; // postDate is the WordPress local creation date expect(post.postDate).toBeInstanceOf(Date); expect(post.postDate?.toISOString()).toBe('2024-01-15T10:30:00.000Z'); // postModified is the WordPress local modification date expect(post.postModified).toBeInstanceOf(Date); expect(post.postModified?.toISOString()).toBe('2024-01-20T15:45:30.000Z'); }); it('should handle missing postDate and postModified gracefully', () => { const result = parser.parseXml(WXR_WITH_PAGE); const page = result.pages[0]; // Page test data doesn't have post_date/post_modified expect(page.postDate).toBeNull(); expect(page.postModified).toBeNull(); }); it('should parse a page and put it in pages array', () => { const result = parser.parseXml(WXR_WITH_PAGE); expect(result.posts).toHaveLength(0); expect(result.pages).toHaveLength(1); const page = result.pages[0]; expect(page.wpId).toBe(10); expect(page.title).toBe('About Me'); expect(page.slug).toBe('about'); expect(page.content).toContain('

About

'); expect(page.postType).toBe('page'); }); it('should parse a media attachment with URL and filename', () => { const result = parser.parseXml(WXR_WITH_MEDIA); expect(result.posts).toHaveLength(0); expect(result.media).toHaveLength(1); const media = result.media[0]; expect(media.wpId).toBe(100); expect(media.title).toBe('sunset-photo'); expect(media.url).toBe('https://example.com/wp-content/uploads/2024/01/sunset.jpg'); expect(media.filename).toBe('sunset.jpg'); expect(media.relativePath).toBe('2024/01/sunset.jpg'); expect(media.parentId).toBe(42); expect(media.description).toBe('A beautiful sunset'); }); it('should separate posts, pages, and media from mixed content', () => { const result = parser.parseXml(WXR_MIXED); expect(result.posts).toHaveLength(2); expect(result.pages).toHaveLength(1); expect(result.media).toHaveLength(1); expect(result.categories).toHaveLength(1); expect(result.tags).toHaveLength(1); expect(result.posts[0].title).toBe('First Post'); expect(result.posts[1].title).toBe('Second Post'); expect(result.pages[0].title).toBe('Contact'); expect(result.media[0].title).toBe('logo'); }); it('should extract post categories and tags from item-level category elements', () => { const result = parser.parseXml(WXR_MIXED); const firstPost = result.posts[0]; expect(firstPost.categories).toEqual(['News']); expect(firstPost.tags).toEqual(['Featured']); // Second post has no categories or tags const secondPost = result.posts[1]; expect(secondPost.categories).toEqual([]); expect(secondPost.tags).toEqual([]); }); it('should handle different post statuses', () => { const result = parser.parseXml(WXR_WITH_STATUSES); expect(result.posts).toHaveLength(3); expect(result.posts[0].status).toBe('publish'); expect(result.posts[1].status).toBe('draft'); expect(result.posts[2].status).toBe('trash'); }); it('should extract relative path from media URL based on wp-content/uploads', () => { const result = parser.parseXml(WXR_WITH_MEDIA); const media = result.media[0]; // The path after wp-content/uploads/ expect(media.relativePath).toBe('2024/01/sunset.jpg'); }); it('should extract relative path from mixed content media', () => { const result = parser.parseXml(WXR_MIXED); const media = result.media[0]; expect(media.relativePath).toBe('2024/02/logo.png'); expect(media.filename).toBe('logo.png'); }); it('should handle empty content gracefully', () => { const result = parser.parseXml(WXR_WITH_MEDIA); // Media items in WXR often have empty excerpt const media = result.media[0]; expect(media).toBeDefined(); }); it('should infer mime type from file extension', () => { const result = parser.parseXml(WXR_WITH_MEDIA); expect(result.media[0].mimeType).toBe('image/jpeg'); const mixedResult = parser.parseXml(WXR_MIXED); expect(mixedResult.media[0].mimeType).toBe('image/png'); }); it('should handle missing pubDate gracefully', () => { const result = parser.parseXml(WXR_WITH_PAGE); // Page has no pubDate element expect(result.pages[0].pubDate).toBeNull(); }); }); describe('parseFile', () => { it('should read a file and parse its contents', async () => { const fs = await import('fs/promises'); vi.mocked(fs.readFile).mockResolvedValueOnce(WXR_WITH_POST); const result = await parser.parseFile('/path/to/export.xml'); expect(fs.readFile).toHaveBeenCalledWith('/path/to/export.xml', 'utf-8'); expect(result.posts).toHaveLength(1); expect(result.posts[0].title).toBe('Hello World'); }); it('should throw an error if the file cannot be read', async () => { const fs = await import('fs/promises'); vi.mocked(fs.readFile).mockRejectedValueOnce(new Error('ENOENT')); await expect(parser.parseFile('/nonexistent.xml')).rejects.toThrow('ENOENT'); }); }); });