From d88fb1d9fa3cbaf4a253f160ddcc4e3e96014260 Mon Sep 17 00:00:00 2001 From: hugo Date: Fri, 13 Feb 2026 13:07:44 +0100 Subject: [PATCH] feat: importer starting point --- package-lock.json | 26 +- package.json | 3 + src/main/engine/ImportAnalysisEngine.ts | 331 +++++++++++ src/main/engine/WxrParser.ts | 307 ++++++++++ src/main/engine/index.ts | 21 +- src/main/ipc/handlers.ts | 62 ++ src/main/preload.ts | 12 + .../components/ActivityBar/ActivityBar.tsx | 21 + src/renderer/components/Editor/Editor.tsx | 13 + .../ImportAnalysisView/ImportAnalysisView.css | 381 +++++++++++++ .../ImportAnalysisView/ImportAnalysisView.tsx | 432 ++++++++++++++ .../components/ImportAnalysisView/index.ts | 1 + src/renderer/components/TabBar/TabBar.tsx | 14 +- src/renderer/components/index.ts | 1 + src/renderer/store/appStore.ts | 26 +- src/renderer/types/electron.d.ts | 5 + tests/engine/ImportAnalysisEngine.test.ts | 537 ++++++++++++++++++ tests/engine/WxrParser.test.ts | 478 ++++++++++++++++ tests/setup.ts | 5 + 19 files changed, 2666 insertions(+), 10 deletions(-) create mode 100644 src/main/engine/ImportAnalysisEngine.ts create mode 100644 src/main/engine/WxrParser.ts create mode 100644 src/renderer/components/ImportAnalysisView/ImportAnalysisView.css create mode 100644 src/renderer/components/ImportAnalysisView/ImportAnalysisView.tsx create mode 100644 src/renderer/components/ImportAnalysisView/index.ts create mode 100644 tests/engine/ImportAnalysisEngine.test.ts create mode 100644 tests/engine/WxrParser.test.ts diff --git a/package-lock.json b/package-lock.json index 72da384..873d063 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,6 +22,7 @@ "@milkdown/react": "^7.18.0", "@milkdown/theme-nord": "^7.18.0", "@monaco-editor/react": "^4.7.0", + "@xmldom/xmldom": "^0.8.11", "chokidar": "^5.0.0", "date-fns": "^4.1.0", "drizzle-orm": "^0.45.1", @@ -34,6 +35,7 @@ "react-hot-toast": "^2.6.0", "sharp": "^0.34.5", "snowball-stemmers": "^0.6.0", + "turndown": "^7.2.2", "zod": "^4.3.6", "zustand": "^5.0.11" }, @@ -45,6 +47,7 @@ "@types/node": "^25.2.3", "@types/react": "^19.2.14", "@types/react-dom": "^19.2.3", + "@types/turndown": "^5.0.6", "@vitejs/plugin-react": "^5.1.4", "@vitest/coverage-v8": "^4.0.18", "@vitest/ui": "^4.0.18", @@ -3844,6 +3847,12 @@ "nanoid": "^5.0.9" } }, + "node_modules/@mixmark-io/domino": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz", + "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==", + "license": "BSD-2-Clause" + }, "node_modules/@monaco-editor/loader": { "version": "1.7.0", "resolved": "https://registry.npmjs.org/@monaco-editor/loader/-/loader-1.7.0.tgz", @@ -4690,6 +4699,13 @@ "license": "MIT", "optional": true }, + "node_modules/@types/turndown": { + "version": "5.0.6", + "resolved": "https://registry.npmjs.org/@types/turndown/-/turndown-5.0.6.tgz", + "integrity": "sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/unist": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", @@ -5038,7 +5054,6 @@ "version": "0.8.11", "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.11.tgz", "integrity": "sha512-cQzWCtO6C8TQiYl1ruKNn2U6Ao4o4WBBcbL61yJl84x+j5sOWWFU9X7DpND8XZG3daDppSsigMdfAIl2upQBRw==", - "dev": true, "license": "MIT", "engines": { "node": ">=10.0.0" @@ -12804,6 +12819,15 @@ "@esbuild/win32-x64": "0.27.3" } }, + "node_modules/turndown": { + "version": "7.2.2", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.2.tgz", + "integrity": "sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ==", + "license": "MIT", + "dependencies": { + "@mixmark-io/domino": "^2.2.0" + } + }, "node_modules/type-fest": { "version": "5.4.4", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-5.4.4.tgz", diff --git a/package.json b/package.json index d6bc2ba..5b055cc 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ "@types/node": "^25.2.3", "@types/react": "^19.2.14", "@types/react-dom": "^19.2.3", + "@types/turndown": "^5.0.6", "@vitejs/plugin-react": "^5.1.4", "@vitest/coverage-v8": "^4.0.18", "@vitest/ui": "^4.0.18", @@ -66,6 +67,7 @@ "@milkdown/react": "^7.18.0", "@milkdown/theme-nord": "^7.18.0", "@monaco-editor/react": "^4.7.0", + "@xmldom/xmldom": "^0.8.11", "chokidar": "^5.0.0", "date-fns": "^4.1.0", "drizzle-orm": "^0.45.1", @@ -78,6 +80,7 @@ "react-hot-toast": "^2.6.0", "sharp": "^0.34.5", "snowball-stemmers": "^0.6.0", + "turndown": "^7.2.2", "zod": "^4.3.6", "zustand": "^5.0.11" }, diff --git a/src/main/engine/ImportAnalysisEngine.ts b/src/main/engine/ImportAnalysisEngine.ts new file mode 100644 index 0000000..4858a15 --- /dev/null +++ b/src/main/engine/ImportAnalysisEngine.ts @@ -0,0 +1,331 @@ +import crypto from 'crypto'; +import * as fs from 'fs/promises'; +import * as path from 'path'; +import TurndownService from 'turndown'; +import { getDatabase } from '../database'; +import { posts, media, tags } from '../database/schema'; +import { eq } from 'drizzle-orm'; +import type { WxrData, WxrPost, WxrMedia, WxrSiteInfo, WxrCategory, WxrTag } from './WxrParser'; + +export type PostAnalysisStatus = 'new' | 'update' | 'conflict' | 'content-duplicate'; +export type MediaAnalysisStatus = 'new' | 'update' | 'conflict' | 'content-duplicate' | 'missing'; + +export interface AnalyzedPost { + wxrPost: WxrPost; + status: PostAnalysisStatus; + contentHash: string; + markdownPreview: string; + existingPost?: { + id: string; + title: string; + slug: string; + checksum: string | null; + }; +} + +export interface AnalyzedMedia { + wxrMedia: WxrMedia; + status: MediaAnalysisStatus; + fileHash: string | null; + existingMedia?: { + id: string; + originalName: string; + checksum: string | null; + }; +} + +export interface AnalyzedCategory { + name: string; + slug: string; + existsInProject: boolean; +} + +export interface AnalyzedTag { + name: string; + slug: string; + existsInProject: boolean; +} + +export interface ImportAnalysisReport { + sourceFile: string; + site: WxrSiteInfo; + analyzedAt: Date; + posts: { + total: number; + new: number; + updates: number; + conflicts: number; + contentDuplicates: number; + items: AnalyzedPost[]; + }; + pages: { + total: number; + new: number; + updates: number; + conflicts: number; + contentDuplicates: number; + items: AnalyzedPost[]; + }; + media: { + total: number; + new: number; + updates: number; + conflicts: number; + contentDuplicates: number; + missing: number; + items: AnalyzedMedia[]; + }; + categories: AnalyzedCategory[]; + tags: AnalyzedTag[]; +} + +export class ImportAnalysisEngine { + private currentProjectId: string = ''; + private turndown: TurndownService; + + constructor() { + this.turndown = new TurndownService({ + headingStyle: 'atx', + codeBlockStyle: 'fenced', + bulletListMarker: '-', + }); + } + + setProjectContext(projectId: string): void { + this.currentProjectId = projectId; + } + + async analyzeWxr(wxrData: WxrData, sourceFile: string, uploadsFolder?: string): Promise { + const db = getDatabase().getLocal(); + + // Fetch existing posts for this project + const existingPosts = await db + .select({ + id: posts.id, + slug: posts.slug, + title: posts.title, + checksum: posts.checksum, + }) + .from(posts) + .where(eq(posts.projectId, this.currentProjectId)) + .all(); + + // Fetch existing media for this project + const existingMedia = await db + .select({ + id: media.id, + originalName: media.originalName, + checksum: media.checksum, + }) + .from(media) + .where(eq(media.projectId, this.currentProjectId)) + .all(); + + // Fetch existing tags for this project + const existingTags = await db + .select({ + name: tags.name, + }) + .from(tags) + .where(eq(tags.projectId, this.currentProjectId)) + .all(); + + // Build lookup maps for posts + const slugToPost = new Map(); + const checksumToPost = new Map(); + for (const post of existingPosts) { + slugToPost.set(post.slug, post); + if (post.checksum) { + checksumToPost.set(post.checksum, post); + } + } + + // Build lookup maps for media + const nameToMedia = new Map(); + const checksumToMedia = new Map(); + for (const m of existingMedia) { + nameToMedia.set(m.originalName.toLowerCase(), m); + if (m.checksum) { + checksumToMedia.set(m.checksum, m); + } + } + + // Build tag set + const existingTagNames = new Set(existingTags.map(t => t.name.toLowerCase())); + + // Analyze posts + const analyzedPosts = this.analyzePostItems(wxrData.posts, slugToPost, checksumToPost); + const analyzedPages = this.analyzePostItems(wxrData.pages, slugToPost, checksumToPost); + + // Analyze media + const analyzedMedia = await this.analyzeMediaItems(wxrData.media, nameToMedia, checksumToMedia, uploadsFolder); + + // Analyze categories + const analyzedCategories: AnalyzedCategory[] = wxrData.categories.map(cat => ({ + name: cat.name, + slug: cat.slug, + existsInProject: existingTagNames.has(cat.name.toLowerCase()), + })); + + // Analyze tags + const analyzedTags: AnalyzedTag[] = wxrData.tags.map(tag => ({ + name: tag.name, + slug: tag.slug, + existsInProject: existingTagNames.has(tag.name.toLowerCase()), + })); + + return { + sourceFile, + site: wxrData.site, + analyzedAt: new Date(), + posts: this.summarizePostAnalysis(analyzedPosts), + pages: this.summarizePostAnalysis(analyzedPages), + media: this.summarizeMediaAnalysis(analyzedMedia), + categories: analyzedCategories, + tags: analyzedTags, + }; + } + + private analyzePostItems( + wxrPosts: WxrPost[], + slugToPost: Map, + checksumToPost: Map, + ): AnalyzedPost[] { + return wxrPosts.map(wxrPost => { + const markdown = this.convertToMarkdown(wxrPost.content); + const contentHash = this.calculateChecksum(markdown); + const markdownPreview = markdown.substring(0, 200); + + const existingBySlug = slugToPost.get(wxrPost.slug); + const existingByHash = checksumToPost.get(contentHash); + + let status: PostAnalysisStatus; + let existingPost: AnalyzedPost['existingPost']; + + if (existingBySlug) { + if (existingBySlug.checksum === contentHash) { + status = 'update'; + } else { + status = 'conflict'; + } + existingPost = { + id: existingBySlug.id, + title: existingBySlug.title, + slug: existingBySlug.slug, + checksum: existingBySlug.checksum, + }; + } else if (existingByHash) { + status = 'content-duplicate'; + existingPost = { + id: existingByHash.id, + title: existingByHash.title, + slug: existingByHash.slug, + checksum: existingByHash.checksum, + }; + } else { + status = 'new'; + } + + return { wxrPost, status, contentHash, markdownPreview, existingPost }; + }); + } + + private async analyzeMediaItems( + wxrMediaItems: WxrMedia[], + nameToMedia: Map, + checksumToMedia: Map, + uploadsFolder?: string, + ): Promise { + const results: AnalyzedMedia[] = []; + + for (const wxrMedia of wxrMediaItems) { + let fileHash: string | null = null; + let fileFound = false; + + // Try to read the actual file from the uploads folder + if (uploadsFolder) { + try { + const filePath = path.join(uploadsFolder, wxrMedia.relativePath); + const buffer = await fs.readFile(filePath); + fileHash = this.calculateChecksum(buffer.toString('binary')); + fileFound = true; + } catch { + // File not found in uploads folder + } + } + + if (!fileFound) { + results.push({ + wxrMedia, + status: 'missing', + fileHash: null, + }); + continue; + } + + const existingByName = nameToMedia.get(wxrMedia.filename.toLowerCase()); + const existingByHash = fileHash ? checksumToMedia.get(fileHash) : undefined; + + let status: MediaAnalysisStatus; + let existingMedia: AnalyzedMedia['existingMedia']; + + if (existingByName) { + if (fileHash && existingByName.checksum === fileHash) { + status = 'update'; + } else { + status = 'conflict'; + } + existingMedia = { + id: existingByName.id, + originalName: existingByName.originalName, + checksum: existingByName.checksum, + }; + } else if (existingByHash) { + status = 'content-duplicate'; + existingMedia = { + id: existingByHash.id, + originalName: existingByHash.originalName, + checksum: existingByHash.checksum, + }; + } else { + status = 'new'; + } + + results.push({ wxrMedia, status, fileHash, existingMedia }); + } + + return results; + } + + private summarizePostAnalysis(items: AnalyzedPost[]): ImportAnalysisReport['posts'] { + return { + total: items.length, + new: items.filter(i => i.status === 'new').length, + updates: items.filter(i => i.status === 'update').length, + conflicts: items.filter(i => i.status === 'conflict').length, + contentDuplicates: items.filter(i => i.status === 'content-duplicate').length, + items, + }; + } + + private summarizeMediaAnalysis(items: AnalyzedMedia[]): ImportAnalysisReport['media'] { + return { + total: items.length, + new: items.filter(i => i.status === 'new').length, + updates: items.filter(i => i.status === 'update').length, + conflicts: items.filter(i => i.status === 'conflict').length, + contentDuplicates: items.filter(i => i.status === 'content-duplicate').length, + missing: items.filter(i => i.status === 'missing').length, + items, + }; + } + + private convertToMarkdown(html: string): string { + if (!html || !html.trim()) return ''; + return this.turndown.turndown(html); + } + + private calculateChecksum(content: string): string { + return crypto.createHash('md5').update(content).digest('hex'); + } +} diff --git a/src/main/engine/WxrParser.ts b/src/main/engine/WxrParser.ts new file mode 100644 index 0000000..a94b792 --- /dev/null +++ b/src/main/engine/WxrParser.ts @@ -0,0 +1,307 @@ +import { DOMParser } from '@xmldom/xmldom'; +import * as fs from 'fs/promises'; + +export interface WxrSiteInfo { + title: string; + link: string; + description: string; + language: string; +} + +export interface WxrPost { + wpId: number; + title: string; + slug: string; + content: string; + excerpt: string; + pubDate: Date | null; + creator: string; + status: string; + postType: string; + categories: string[]; + tags: string[]; +} + +export interface WxrMedia { + wpId: number; + title: string; + url: string; + filename: string; + relativePath: string; + pubDate: Date | null; + parentId: number; + mimeType: string; + description: string; +} + +export interface WxrCategory { + name: string; + slug: string; + parent: string; +} + +export interface WxrTag { + name: string; + slug: string; +} + +export interface WxrData { + site: WxrSiteInfo; + posts: WxrPost[]; + pages: WxrPost[]; + media: WxrMedia[]; + categories: WxrCategory[]; + tags: WxrTag[]; +} + +// WordPress namespace URIs +const NS = { + wp: 'http://wordpress.org/export/1.2/', + content: 'http://purl.org/rss/1.0/modules/content/', + excerpt: 'http://wordpress.org/export/1.2/excerpt/', + dc: 'http://purl.org/dc/elements/1.1/', +}; + +// Common MIME types by file extension +const EXT_TO_MIME: Record = { + jpg: 'image/jpeg', + jpeg: 'image/jpeg', + png: 'image/png', + gif: 'image/gif', + webp: 'image/webp', + svg: 'image/svg+xml', + bmp: 'image/bmp', + ico: 'image/x-icon', + mp4: 'video/mp4', + webm: 'video/webm', + mp3: 'audio/mpeg', + wav: 'audio/wav', + ogg: 'audio/ogg', + pdf: 'application/pdf', + doc: 'application/msword', + docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + zip: 'application/zip', +}; + +export class WxrParser { + + async parseFile(filePath: string): Promise { + const content = await fs.readFile(filePath, 'utf-8'); + return this.parseXml(content); + } + + parseXml(xmlContent: string): WxrData { + const doc = new DOMParser().parseFromString(xmlContent, 'text/xml'); + const channel = doc.getElementsByTagName('channel')[0]; + + if (!channel) { + throw new Error('Invalid WXR file: no element found'); + } + + const site = this.parseSiteInfo(channel); + const categories = this.parseChannelCategories(channel); + const tags = this.parseChannelTags(channel); + + const posts: WxrPost[] = []; + const pages: WxrPost[] = []; + const media: WxrMedia[] = []; + + const items = channel.getElementsByTagName('item'); + for (let i = 0; i < items.length; i++) { + const item = items[i]; + const postType = this.getElementText(item, 'post_type', NS.wp); + + if (postType === 'attachment') { + media.push(this.parseMediaItem(item)); + } else if (postType === 'page') { + pages.push(this.parsePostItem(item)); + } else { + // 'post' and any other custom post types + posts.push(this.parsePostItem(item)); + } + } + + return { site, posts, pages, media, categories, tags }; + } + + private parseSiteInfo(channel: Element): WxrSiteInfo { + return { + title: this.getDirectChildText(channel, 'title'), + link: this.getDirectChildText(channel, 'link'), + description: this.getDirectChildText(channel, 'description'), + language: this.getDirectChildText(channel, 'language'), + }; + } + + private parseChannelCategories(channel: Element): WxrCategory[] { + const categories: WxrCategory[] = []; + const elements = channel.getElementsByTagNameNS(NS.wp, 'category'); + + for (let i = 0; i < elements.length; i++) { + const el = elements[i]; + // Only process direct children of channel (not item-level category elements) + if (el.parentNode !== channel) continue; + + categories.push({ + name: this.getElementText(el, 'cat_name', NS.wp), + slug: this.getElementText(el, 'category_nicename', NS.wp), + parent: this.getElementText(el, 'category_parent', NS.wp), + }); + } + + return categories; + } + + private parseChannelTags(channel: Element): WxrTag[] { + const tags: WxrTag[] = []; + const elements = channel.getElementsByTagNameNS(NS.wp, 'tag'); + + for (let i = 0; i < elements.length; i++) { + const el = elements[i]; + if (el.parentNode !== channel) continue; + + tags.push({ + name: this.getElementText(el, 'tag_name', NS.wp), + slug: this.getElementText(el, 'tag_slug', NS.wp), + }); + } + + return tags; + } + + private parsePostItem(item: Element): WxrPost { + const categories: string[] = []; + const tags: string[] = []; + + // Item-level elements (no namespace) + const catElements = item.getElementsByTagName('category'); + for (let i = 0; i < catElements.length; i++) { + const el = catElements[i]; + // Only direct children of item + if (el.parentNode !== item) continue; + const domain = el.getAttribute('domain'); + const text = this.getTextContent(el); + if (domain === 'category' && text) { + categories.push(text); + } else if (domain === 'post_tag' && text) { + tags.push(text); + } + } + + const pubDateStr = this.getDirectChildText(item, 'pubDate'); + let pubDate: Date | null = null; + if (pubDateStr) { + const parsed = new Date(pubDateStr); + if (!isNaN(parsed.getTime())) { + pubDate = parsed; + } + } + + return { + wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10), + title: this.getDirectChildText(item, 'title'), + slug: this.getElementText(item, 'post_name', NS.wp), + content: this.getElementText(item, 'encoded', NS.content), + excerpt: this.getElementText(item, 'encoded', NS.excerpt), + pubDate, + creator: this.getElementText(item, 'creator', NS.dc), + status: this.getElementText(item, 'status', NS.wp), + postType: this.getElementText(item, 'post_type', NS.wp), + categories, + tags, + }; + } + + private parseMediaItem(item: Element): WxrMedia { + const url = this.getElementText(item, 'attachment_url', NS.wp); + const filename = this.extractFilename(url); + const relativePath = this.extractRelativePath(url); + + const pubDateStr = this.getDirectChildText(item, 'pubDate'); + let pubDate: Date | null = null; + if (pubDateStr) { + const parsed = new Date(pubDateStr); + if (!isNaN(parsed.getTime())) { + pubDate = parsed; + } + } + + return { + wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10), + title: this.getDirectChildText(item, 'title'), + url, + filename, + relativePath, + pubDate, + parentId: parseInt(this.getElementText(item, 'post_parent', NS.wp) || '0', 10), + mimeType: this.inferMimeType(filename), + description: this.getElementText(item, 'encoded', NS.content), + }; + } + + private extractFilename(url: string): string { + if (!url) return ''; + try { + const pathname = new URL(url).pathname; + return pathname.split('/').pop() || ''; + } catch { + return url.split('/').pop() || ''; + } + } + + private extractRelativePath(url: string): string { + if (!url) return ''; + // Extract path after wp-content/uploads/ + const marker = 'wp-content/uploads/'; + const idx = url.indexOf(marker); + if (idx !== -1) { + return url.substring(idx + marker.length); + } + // Fallback: return filename only + return this.extractFilename(url); + } + + private inferMimeType(filename: string): string { + const ext = filename.split('.').pop()?.toLowerCase() || ''; + return EXT_TO_MIME[ext] || 'application/octet-stream'; + } + + /** Get text content of a namespaced child element */ + private getElementText(parent: Element, localName: string, nsUri: string): string { + const elements = parent.getElementsByTagNameNS(nsUri, localName); + for (let i = 0; i < elements.length; i++) { + const el = elements[i]; + // Find first one that is either a direct child or a grandchild (for nested structures) + if (el.parentNode === parent || el.parentNode?.parentNode === parent) { + return this.getTextContent(el); + } + } + return ''; + } + + /** Get text content of a direct child element (no namespace) */ + private getDirectChildText(parent: Element, tagName: string): string { + const children = parent.childNodes; + for (let i = 0; i < children.length; i++) { + const child = children[i]; + if (child.nodeType === 1 && (child as Element).localName === tagName) { + return this.getTextContent(child as Element); + } + } + return ''; + } + + /** Safely extract text content, handling CDATA sections */ + private getTextContent(el: Element): string { + let text = ''; + const children = el.childNodes; + for (let i = 0; i < children.length; i++) { + const child = children[i]; + if (child.nodeType === 3 || child.nodeType === 4) { + // Text node or CDATA section + text += child.nodeValue || ''; + } + } + return text; + } +} diff --git a/src/main/engine/index.ts b/src/main/engine/index.ts index 34f16cb..0d3c917 100644 --- a/src/main/engine/index.ts +++ b/src/main/engine/index.ts @@ -50,5 +50,22 @@ export { type SendMessageResult, type ModelInfo, } from './OpenCodeManager'; - - +export { + WxrParser, + type WxrData, + type WxrPost, + type WxrMedia, + type WxrSiteInfo, + type WxrCategory, + type WxrTag, +} from './WxrParser'; +export { + ImportAnalysisEngine, + type ImportAnalysisReport, + type AnalyzedPost, + type AnalyzedMedia, + type AnalyzedCategory, + type AnalyzedTag, + type PostAnalysisStatus, + type MediaAnalysisStatus, +} from './ImportAnalysisEngine'; diff --git a/src/main/ipc/handlers.ts b/src/main/ipc/handlers.ts index 02be88a..5b83651 100644 --- a/src/main/ipc/handlers.ts +++ b/src/main/ipc/handlers.ts @@ -745,6 +745,68 @@ export function registerIpcHandlers(): void { return engine.rebuildFromSidecars(); }); + // ============ Import Analysis Handlers ============ + + safeHandle('import:selectAndAnalyze', async (_, uploadsFolder?: string) => { + const result = await dialog.showOpenDialog({ + title: 'Select WordPress Export File (WXR)', + filters: [ + { name: 'WordPress Export', extensions: ['xml'] }, + { name: 'All Files', extensions: ['*'] }, + ], + properties: ['openFile'], + }); + + if (result.canceled || result.filePaths.length === 0) { + return null; + } + + const filePath = result.filePaths[0]; + const { WxrParser } = await import('../engine/WxrParser'); + const { ImportAnalysisEngine } = await import('../engine/ImportAnalysisEngine'); + + const parser = new WxrParser(); + const wxrData = await parser.parseFile(filePath); + + const analysisEngine = new ImportAnalysisEngine(); + const projectEngine = getProjectEngine(); + const activeProject = await projectEngine.getActiveProject(); + if (activeProject) { + analysisEngine.setProjectContext(activeProject.id); + } + + return analysisEngine.analyzeWxr(wxrData, filePath, uploadsFolder || undefined); + }); + + safeHandle('import:analyzeFile', async (_, filePath: string, uploadsFolder?: string) => { + const { WxrParser } = await import('../engine/WxrParser'); + const { ImportAnalysisEngine } = await import('../engine/ImportAnalysisEngine'); + + const parser = new WxrParser(); + const wxrData = await parser.parseFile(filePath); + + const analysisEngine = new ImportAnalysisEngine(); + const projectEngine = getProjectEngine(); + const activeProject = await projectEngine.getActiveProject(); + if (activeProject) { + analysisEngine.setProjectContext(activeProject.id); + } + + return analysisEngine.analyzeWxr(wxrData, filePath, uploadsFolder || undefined); + }); + + safeHandle('import:selectUploadsFolder', async () => { + const result = await dialog.showOpenDialog({ + title: 'Select WordPress Uploads Folder', + properties: ['openDirectory'], + }); + + if (result.canceled || result.filePaths.length === 0) { + return null; + } + return result.filePaths[0]; + }); + // ============ Event Forwarding ============ // Forward engine events to renderer diff --git a/src/main/preload.ts b/src/main/preload.ts index 8487000..04e86c8 100644 --- a/src/main/preload.ts +++ b/src/main/preload.ts @@ -150,6 +150,13 @@ contextBridge.exposeInMainWorld('electronAPI', { syncFromPosts: () => ipcRenderer.invoke('tags:syncFromPosts'), }, + // Import Analysis + import: { + selectAndAnalyze: (uploadsFolder?: string) => ipcRenderer.invoke('import:selectAndAnalyze', uploadsFolder), + analyzeFile: (filePath: string, uploadsFolder?: string) => ipcRenderer.invoke('import:analyzeFile', filePath, uploadsFolder), + selectUploadsFolder: () => ipcRenderer.invoke('import:selectUploadsFolder'), + }, + // AI Chat (OpenCode Zen API integration) chat: { // API Key Management @@ -312,6 +319,11 @@ export interface ElectronAPI { getPostsWithTag: (tagId: string) => Promise; syncFromPosts: () => Promise; }; + import: { + selectAndAnalyze: (uploadsFolder?: string) => Promise; + analyzeFile: (filePath: string, uploadsFolder?: string) => Promise; + selectUploadsFolder: () => Promise; + }; chat: { // API Key Management checkReady: () => Promise<{ ready: boolean; error?: string; backend?: string }>; diff --git a/src/renderer/components/ActivityBar/ActivityBar.tsx b/src/renderer/components/ActivityBar/ActivityBar.tsx index 206775a..516dd61 100644 --- a/src/renderer/components/ActivityBar/ActivityBar.tsx +++ b/src/renderer/components/ActivityBar/ActivityBar.tsx @@ -37,6 +37,12 @@ const ChatIcon = () => ( ); +const ImportIcon = () => ( + + + +); + const SyncIcon = () => ( @@ -60,6 +66,9 @@ export const ActivityBar: React.FC = () => { // Check if chat sidebar is active (activeView === 'chat' and sidebar is visible) const isChatActive = activeView === 'chat' && sidebarVisible; + // Check if import tab is currently active + const isImportTabActive = tabs.some(t => t.type === 'import' && t.id === activeTabId); + // Handle view click - toggle sidebar if clicking on active view, otherwise switch view const handleViewClick = (view: 'posts' | 'media' | 'chat') => { if (activeView === view && sidebarVisible) { @@ -96,6 +105,11 @@ export const ActivityBar: React.FC = () => { openTab({ type: 'tags', id: 'tags', isTransient: false }); }; + const handleImportClick = () => { + // Open import as a dedicated (non-transient) tab + openTab({ type: 'import', id: 'import', isTransient: false }); + }; + return (
@@ -127,6 +141,13 @@ export const ActivityBar: React.FC = () => { > +
diff --git a/src/renderer/components/Editor/Editor.tsx b/src/renderer/components/Editor/Editor.tsx index b29f48a..8486881 100644 --- a/src/renderer/components/Editor/Editor.tsx +++ b/src/renderer/components/Editor/Editor.tsx @@ -12,6 +12,7 @@ import { SettingsView } from '../SettingsView'; import { TagsView } from '../TagsView'; import { TagInput } from '../TagInput'; import { ChatPanel } from '../ChatPanel'; +import { ImportAnalysisView } from '../ImportAnalysisView'; import { AutoSaveManager } from '../../utils'; import { parseMacros, getMacro } from '../../macros/registry'; import { PostSearchModal } from '../PostSearchModal'; @@ -1531,6 +1532,7 @@ export const Editor: React.FC = () => { const showSettings = activeTab?.type === 'settings' || (activeView === 'settings' && !activeTab); const showTags = activeTab?.type === 'tags' || (activeView === 'tags' && !activeTab); const showChat = activeTab?.type === 'chat'; + const showImport = activeTab?.type === 'import'; // Clear selectedPostId if the post doesn't exist (e.g., after project switch) useEffect(() => { @@ -1619,6 +1621,17 @@ export const Editor: React.FC = () => { ); } + // Show import analysis if import tab is active + if (showImport) { + return ( +
+ + {renderErrorModal()} + {renderConfirmDeleteModal()} +
+ ); + } + // Show post editor if a post tab is active if (showPost && activeTabId) { const post = posts.find(p => p.id === activeTabId); diff --git a/src/renderer/components/ImportAnalysisView/ImportAnalysisView.css b/src/renderer/components/ImportAnalysisView/ImportAnalysisView.css new file mode 100644 index 0000000..035e360 --- /dev/null +++ b/src/renderer/components/ImportAnalysisView/ImportAnalysisView.css @@ -0,0 +1,381 @@ +.import-analysis { + display: flex; + flex-direction: column; + height: 100%; + overflow-y: auto; + padding: 24px; + gap: 20px; +} + +.import-analysis-header { + display: flex; + flex-direction: column; + gap: 4px; +} + +.import-analysis-header h2 { + margin: 0; + font-size: 18px; + font-weight: 600; + color: var(--vscode-foreground); +} + +.import-analysis-header p { + margin: 0; + font-size: 12px; + color: var(--vscode-descriptionForeground); +} + +/* File selection area */ +.import-file-selectors { + display: flex; + flex-direction: column; + gap: 12px; + background: var(--vscode-sideBar-background); + padding: 16px; + border-radius: 6px; +} + +.import-file-row { + display: flex; + align-items: center; + gap: 10px; +} + +.import-file-row label { + font-size: 12px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.5px; + color: var(--vscode-descriptionForeground); + min-width: 100px; + flex-shrink: 0; +} + +.import-file-path { + flex: 1; + font-size: 12px; + color: var(--vscode-foreground); + background: var(--vscode-input-background); + border: 1px solid var(--vscode-input-border, transparent); + border-radius: 4px; + padding: 6px 10px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.import-file-path.placeholder { + color: var(--vscode-input-placeholderForeground); + font-style: italic; +} + +.import-file-row button { + padding: 6px 12px; + font-size: 12px; + border: none; + border-radius: 4px; + cursor: pointer; + white-space: nowrap; + background: var(--vscode-button-background); + color: var(--vscode-button-foreground); +} + +.import-file-row button:hover { + background: var(--vscode-button-hoverBackground); +} + +.import-analyze-btn { + padding: 8px 20px; + font-size: 13px; + font-weight: 600; + border: none; + border-radius: 4px; + cursor: pointer; + background: var(--vscode-button-background); + color: var(--vscode-button-foreground); + align-self: flex-start; +} + +.import-analyze-btn:hover { + background: var(--vscode-button-hoverBackground); +} + +.import-analyze-btn:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +/* Loading state */ +.import-loading { + display: flex; + align-items: center; + gap: 10px; + padding: 20px; + font-size: 13px; + color: var(--vscode-descriptionForeground); +} + +.import-spinner { + width: 18px; + height: 18px; + border: 2px solid var(--vscode-descriptionForeground); + border-top-color: var(--vscode-button-background); + border-radius: 50%; + animation: spin 0.8s linear infinite; +} + +@keyframes spin { + to { transform: rotate(360deg); } +} + +/* Site info card */ +.import-site-info { + display: flex; + gap: 20px; + background: var(--vscode-sideBar-background); + padding: 16px; + border-radius: 6px; +} + +.import-site-info-item { + display: flex; + flex-direction: column; + gap: 2px; +} + +.import-site-info-item .info-label { + font-size: 11px; + text-transform: uppercase; + letter-spacing: 0.5px; + font-weight: 600; + color: var(--vscode-descriptionForeground); +} + +.import-site-info-item .info-value { + font-size: 13px; + color: var(--vscode-foreground); +} + +/* Stat cards grid */ +.import-stat-cards { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); + gap: 12px; +} + +.import-stat-card { + background: var(--vscode-sideBar-background); + border-radius: 6px; + padding: 16px; +} + +.import-stat-card h3 { + margin: 0 0 10px 0; + font-size: 11px; + text-transform: uppercase; + letter-spacing: 0.5px; + font-weight: 600; + color: var(--vscode-descriptionForeground); +} + +.import-stat-number { + font-size: 32px; + font-weight: 600; + margin-bottom: 4px; + color: var(--vscode-foreground); +} + +.import-stat-label { + font-size: 12px; + text-transform: uppercase; + letter-spacing: 0.5px; + color: var(--vscode-descriptionForeground); +} + +.import-stat-breakdown { + display: flex; + flex-wrap: wrap; + gap: 6px; + margin-top: 8px; +} + +.import-stat-tag { + font-size: 11px; + padding: 2px 8px; + border-radius: 10px; + background: var(--vscode-badge-background); + color: var(--vscode-badge-foreground); +} + +.import-stat-tag.stat-new { + background: rgba(115, 201, 145, 0.15); + color: #73c991; +} + +.import-stat-tag.stat-update { + background: rgba(117, 190, 255, 0.15); + color: #75beff; +} + +.import-stat-tag.stat-conflict { + background: rgba(244, 135, 113, 0.15); + color: #f48771; +} + +.import-stat-tag.stat-duplicate { + background: rgba(204, 167, 0, 0.15); + color: #cca700; +} + +.import-stat-tag.stat-missing { + background: rgba(150, 150, 150, 0.15); + color: #969696; +} + +/* Detail sections */ +.import-detail-section { + background: var(--vscode-sideBar-background); + border-radius: 6px; + padding: 16px; +} + +.import-detail-section h3 { + margin: 0 0 12px 0; + font-size: 13px; + font-weight: 600; + color: var(--vscode-foreground); + display: flex; + align-items: center; + gap: 8px; + cursor: pointer; + user-select: none; +} + +.import-detail-section h3 .toggle-icon { + font-size: 10px; + transition: transform 0.15s; +} + +.import-detail-section h3 .toggle-icon.open { + transform: rotate(90deg); +} + +/* Detail tables */ +.import-detail-table { + width: 100%; + border-collapse: collapse; + font-size: 12px; +} + +.import-detail-table th { + text-align: left; + padding: 6px 10px; + font-weight: 600; + font-size: 11px; + text-transform: uppercase; + letter-spacing: 0.5px; + color: var(--vscode-descriptionForeground); + border-bottom: 1px solid var(--vscode-panel-border, rgba(255,255,255,0.1)); +} + +.import-detail-table td { + padding: 6px 10px; + color: var(--vscode-foreground); + border-bottom: 1px solid var(--vscode-panel-border, rgba(255,255,255,0.06)); +} + +.import-detail-table tr:last-child td { + border-bottom: none; +} + +.import-detail-table .status-badge { + display: inline-block; + font-size: 10px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.5px; + padding: 2px 8px; + border-radius: 10px; +} + +.import-detail-table .status-badge.new { + background: rgba(115, 201, 145, 0.15); + color: #73c991; +} + +.import-detail-table .status-badge.update { + background: rgba(117, 190, 255, 0.15); + color: #75beff; +} + +.import-detail-table .status-badge.conflict { + background: rgba(244, 135, 113, 0.15); + color: #f48771; +} + +.import-detail-table .status-badge.content-duplicate { + background: rgba(204, 167, 0, 0.15); + color: #cca700; +} + +.import-detail-table .status-badge.missing { + background: rgba(150, 150, 150, 0.15); + color: #969696; +} + +.import-detail-table .slug-cell { + font-family: var(--vscode-editor-font-family, monospace); + font-size: 11px; + color: var(--vscode-descriptionForeground); +} + +.import-detail-table .existing-match { + font-size: 11px; + color: var(--vscode-descriptionForeground); +} + +/* Tag/category pills */ +.import-taxonomy-list { + display: flex; + flex-wrap: wrap; + gap: 6px; +} + +.import-taxonomy-pill { + font-size: 11px; + padding: 3px 10px; + border-radius: 10px; + background: var(--vscode-badge-background); + color: var(--vscode-badge-foreground); +} + +.import-taxonomy-pill.exists { + background: rgba(115, 201, 145, 0.15); + color: #73c991; +} + +.import-taxonomy-pill.new-tax { + background: rgba(117, 190, 255, 0.15); + color: #75beff; +} + +/* Empty state */ +.import-empty-state { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + padding: 60px 20px; + gap: 12px; + color: var(--vscode-descriptionForeground); +} + +.import-empty-state svg { + opacity: 0.3; +} + +.import-empty-state p { + margin: 0; + font-size: 13px; +} diff --git a/src/renderer/components/ImportAnalysisView/ImportAnalysisView.tsx b/src/renderer/components/ImportAnalysisView/ImportAnalysisView.tsx new file mode 100644 index 0000000..f46cabb --- /dev/null +++ b/src/renderer/components/ImportAnalysisView/ImportAnalysisView.tsx @@ -0,0 +1,432 @@ +import React, { useState, useCallback } from 'react'; +import { useAppStore } from '../../store'; +import './ImportAnalysisView.css'; + +interface AnalysisReport { + sourceFile: string; + site: { title: string; link: string; description: string; language: string }; + analyzedAt: string; + posts: ItemSection; + pages: ItemSection; + media: MediaSection; + categories: TaxonomyItem[]; + tags: TaxonomyItem[]; +} + +interface ItemSection { + total: number; + new: number; + updates: number; + conflicts: number; + contentDuplicates: number; + items: AnalyzedPostItem[]; +} + +interface MediaSection { + total: number; + new: number; + updates: number; + conflicts: number; + contentDuplicates: number; + missing: number; + items: AnalyzedMediaItem[]; +} + +interface AnalyzedPostItem { + wxrPost: { wpId: number; title: string; slug: string; status: string }; + status: string; + contentHash: string; + markdownPreview: string; + existingPost?: { id: string; title: string; slug: string }; +} + +interface AnalyzedMediaItem { + wxrMedia: { wpId: number; title: string; filename: string; url: string; relativePath: string }; + status: string; + fileHash: string | null; + existingMedia?: { id: string; originalName: string }; +} + +interface TaxonomyItem { + name: string; + slug: string; + existsInProject: boolean; +} + +export const ImportAnalysisView: React.FC = () => { + const { importAnalysis, importAnalysisLoading, setImportAnalysis, setImportAnalysisLoading } = useAppStore(); + const [uploadsFolder, setUploadsFolder] = useState(null); + const [expandedSections, setExpandedSections] = useState>({}); + + const report = importAnalysis as AnalysisReport | null; + + const handleSelectUploadsFolder = useCallback(async () => { + const folder = await window.electronAPI?.import.selectUploadsFolder(); + if (folder) { + setUploadsFolder(folder); + } + }, []); + + const handleSelectAndAnalyze = useCallback(async () => { + setImportAnalysisLoading(true); + setImportAnalysis(null); + try { + const result = await window.electronAPI?.import.selectAndAnalyze(uploadsFolder || undefined); + if (result) { + setImportAnalysis(result); + } + } catch (error) { + console.error('Import analysis failed:', error); + } finally { + setImportAnalysisLoading(false); + } + }, [uploadsFolder, setImportAnalysis, setImportAnalysisLoading]); + + const toggleSection = useCallback((section: string) => { + setExpandedSections(prev => ({ ...prev, [section]: !prev[section] })); + }, []); + + return ( +
+
+

Import Analysis

+

Select a WordPress export file (WXR) and an uploads folder to analyze what would be imported.

+
+ +
+
+ +
+ {uploadsFolder || 'No folder selected'} +
+ +
+
+ +
+ {report?.sourceFile || 'Select a file to analyze'} +
+ +
+
+ + {importAnalysisLoading && ( +
+
+ Analyzing WXR file... +
+ )} + + {!report && !importAnalysisLoading && ( +
+ + + +

Select a WordPress export file to begin analysis.

+
+ )} + + {report && !importAnalysisLoading && ( + <> + + + + {report.posts.conflicts > 0 && ( + i.status === 'conflict')} + expanded={expandedSections['post-conflicts'] ?? true} + onToggle={() => toggleSection('post-conflicts')} + /> + )} + + {report.pages.conflicts > 0 && ( + i.status === 'conflict')} + expanded={expandedSections['page-conflicts'] ?? true} + onToggle={() => toggleSection('page-conflicts')} + /> + )} + + toggleSection('posts')} + /> + + {report.pages.total > 0 && ( + toggleSection('pages')} + /> + )} + + toggleSection('media')} + /> + + {(report.categories.length > 0 || report.tags.length > 0) && ( + toggleSection('taxonomy')} + /> + )} + + )} +
+ ); +}; + +const SiteInfoCard: React.FC<{ site: AnalysisReport['site']; sourceFile: string }> = ({ site, sourceFile }) => ( +
+
+ Site + {site.title || 'Untitled'} +
+
+ URL + {site.link || 'N/A'} +
+
+ Language + {site.language || 'N/A'} +
+
+ File + {sourceFile.split(/[/\\]/).pop()} +
+
+); + +const StatCards: React.FC<{ report: AnalysisReport }> = ({ report }) => ( +
+
+

Posts

+
{report.posts.total}
+
+ {report.posts.new > 0 && {report.posts.new} new} + {report.posts.updates > 0 && {report.posts.updates} update} + {report.posts.conflicts > 0 && {report.posts.conflicts} conflict} + {report.posts.contentDuplicates > 0 && {report.posts.contentDuplicates} duplicate} +
+
+ +
+

Pages

+
{report.pages.total}
+
+ {report.pages.new > 0 && {report.pages.new} new} + {report.pages.updates > 0 && {report.pages.updates} update} + {report.pages.conflicts > 0 && {report.pages.conflicts} conflict} + {report.pages.contentDuplicates > 0 && {report.pages.contentDuplicates} duplicate} +
+
+ +
+

Media

+
{report.media.total}
+
+ {report.media.new > 0 && {report.media.new} new} + {report.media.updates > 0 && {report.media.updates} update} + {report.media.conflicts > 0 && {report.media.conflicts} conflict} + {report.media.contentDuplicates > 0 && {report.media.contentDuplicates} duplicate} + {report.media.missing > 0 && {report.media.missing} missing} +
+
+ +
+

Categories

+
{report.categories.length}
+
+ {report.categories.filter(c => c.existsInProject).length > 0 && ( + {report.categories.filter(c => c.existsInProject).length} existing + )} + {report.categories.filter(c => !c.existsInProject).length > 0 && ( + {report.categories.filter(c => !c.existsInProject).length} new + )} +
+
+ +
+

Tags

+
{report.tags.length}
+
+ {report.tags.filter(t => t.existsInProject).length > 0 && ( + {report.tags.filter(t => t.existsInProject).length} existing + )} + {report.tags.filter(t => !t.existsInProject).length > 0 && ( + {report.tags.filter(t => !t.existsInProject).length} new + )} +
+
+
+); + +const ConflictsSection: React.FC<{ + title: string; + items: AnalyzedPostItem[]; + expanded: boolean; + onToggle: () => void; +}> = ({ title, items, expanded, onToggle }) => ( +
+

+ + {title} ({items.length}) +

+ {expanded && ( + + + + + + + + + + {items.map((item, idx) => ( + + + + + + ))} + +
SlugWXR TitleExisting Title
{item.wxrPost.slug}{item.wxrPost.title}{item.existingPost?.title || '--'}
+ )} +
+); + +const PostDetailSection: React.FC<{ + title: string; + items: AnalyzedPostItem[]; + expanded: boolean; + onToggle: () => void; +}> = ({ title, items, expanded, onToggle }) => ( +
+

+ + {title} +

+ {expanded && ( + + + + + + + + + + + + {items.map((item, idx) => ( + + + + + + + + ))} + +
StatusTitleSlugWP StatusExisting Match
{item.status}{item.wxrPost.title}{item.wxrPost.slug}{item.wxrPost.status}{item.existingPost?.title || '--'}
+ )} +
+); + +const MediaDetailSection: React.FC<{ + title: string; + items: AnalyzedMediaItem[]; + expanded: boolean; + onToggle: () => void; +}> = ({ title, items, expanded, onToggle }) => ( +
+

+ + {title} +

+ {expanded && ( + + + + + + + + + + + {items.map((item, idx) => ( + + + + + + + ))} + +
StatusFilenamePathExisting Match
{item.status}{item.wxrMedia.filename}{item.wxrMedia.relativePath}{item.existingMedia?.originalName || '--'}
+ )} +
+); + +const TaxonomySection: React.FC<{ + categories: TaxonomyItem[]; + tags: TaxonomyItem[]; + expanded: boolean; + onToggle: () => void; +}> = ({ categories, tags, expanded, onToggle }) => ( +
+

+ + Categories & Tags +

+ {expanded && ( + <> + {categories.length > 0 && ( +
+
+ Categories +
+
+ {categories.map((cat, idx) => ( + + {cat.name} + + ))} +
+
+ )} + {tags.length > 0 && ( +
+
+ Tags +
+
+ {tags.map((tag, idx) => ( + + {tag.name} + + ))} +
+
+ )} + + )} +
+); diff --git a/src/renderer/components/ImportAnalysisView/index.ts b/src/renderer/components/ImportAnalysisView/index.ts new file mode 100644 index 0000000..021f4d3 --- /dev/null +++ b/src/renderer/components/ImportAnalysisView/index.ts @@ -0,0 +1 @@ +export { ImportAnalysisView } from './ImportAnalysisView'; diff --git a/src/renderer/components/TabBar/TabBar.tsx b/src/renderer/components/TabBar/TabBar.tsx index da4331f..88be57e 100644 --- a/src/renderer/components/TabBar/TabBar.tsx +++ b/src/renderer/components/TabBar/TabBar.tsx @@ -32,13 +32,17 @@ const getTabTitle = ( const title = chatTitles.get(tab.id); if (title && title !== 'New Chat') { // Truncate long titles for display - return title.length > MAX_CHAT_TITLE_LENGTH + return title.length > MAX_CHAT_TITLE_LENGTH ? title.substring(0, MAX_CHAT_TITLE_LENGTH) + '…' : title; } return 'New Chat'; } - + + if (tab.type === 'import') { + return 'Import Analysis'; + } + return 'Unknown'; }; @@ -74,6 +78,12 @@ const getTabIcon = (tab: Tab): React.ReactNode => { ); + case 'import': + return ( + + + + ); default: return ( diff --git a/src/renderer/components/index.ts b/src/renderer/components/index.ts index e9f727b..3f96c5c 100644 --- a/src/renderer/components/index.ts +++ b/src/renderer/components/index.ts @@ -19,3 +19,4 @@ export { LinkedMediaPanel } from './LinkedMediaPanel'; export { ErrorModal, type ErrorDetails } from './ErrorModal'; export { ConfirmDeleteModal, type ConfirmDeleteDetails, type DeleteReference } from './ConfirmDeleteModal'; export { ChatPanel } from './ChatPanel'; +export { ImportAnalysisView } from './ImportAnalysisView'; diff --git a/src/renderer/store/appStore.ts b/src/renderer/store/appStore.ts index 670ce90..bdd2643 100644 --- a/src/renderer/store/appStore.ts +++ b/src/renderer/store/appStore.ts @@ -6,7 +6,7 @@ import type { DeleteReference, ConfirmDeleteDetails } from '../components/Confir const STORAGE_KEY = 'bds-app-state'; // Tab types -export type TabType = 'post' | 'media' | 'settings' | 'tags' | 'chat'; +export type TabType = 'post' | 'media' | 'settings' | 'tags' | 'chat' | 'import'; export interface Tab { type: TabType; @@ -93,7 +93,7 @@ interface AppState { activeTabId: string | null; // UI State - activeView: 'posts' | 'media' | 'settings' | 'tags' | 'chat'; + activeView: 'posts' | 'media' | 'settings' | 'tags' | 'chat' | 'import'; sidebarVisible: boolean; panelVisible: boolean; selectedPostId: string | null; @@ -126,7 +126,11 @@ interface AppState { // Loading states isLoading: boolean; error: string | null; - + + // Import Analysis + importAnalysis: unknown | null; + importAnalysisLoading: boolean; + // Project Actions setProjects: (projects: ProjectData[]) => void; setActiveProject: (project: ProjectData | null) => void; @@ -144,7 +148,7 @@ interface AppState { restoreTabState: (state: TabState) => void; // Actions - setActiveView: (view: 'posts' | 'media' | 'settings' | 'tags' | 'chat') => void; + setActiveView: (view: 'posts' | 'media' | 'settings' | 'tags' | 'chat' | 'import') => void; toggleSidebar: () => void; togglePanel: () => void; setSelectedPost: (id: string | null) => void; @@ -184,6 +188,10 @@ interface AppState { setLoading: (loading: boolean) => void; setError: (error: string | null) => void; + + // Import Analysis Actions + setImportAnalysis: (report: unknown | null) => void; + setImportAnalysisLoading: (loading: boolean) => void; } export const useAppStore = create()( @@ -231,7 +239,11 @@ export const useAppStore = create()( // Initial Loading State isLoading: false, error: null, - + + // Import Analysis State + importAnalysis: null, + importAnalysisLoading: false, + // Project Actions setProjects: (projects) => set({ projects }), setActiveProject: (activeProject) => set({ activeProject }), @@ -405,6 +417,10 @@ export const useAppStore = create()( // Loading Actions setLoading: (isLoading) => set({ isLoading }), setError: (error) => set({ error }), + + // Import Analysis Actions + setImportAnalysis: (importAnalysis) => set({ importAnalysis }), + setImportAnalysisLoading: (importAnalysisLoading) => set({ importAnalysisLoading }), }), { name: STORAGE_KEY, diff --git a/src/renderer/types/electron.d.ts b/src/renderer/types/electron.d.ts index 8c9ca1b..8a1dc0d 100644 --- a/src/renderer/types/electron.d.ts +++ b/src/renderer/types/electron.d.ts @@ -381,6 +381,11 @@ export interface ElectronAPI { getPostsWithTag: (tagId: string) => Promise; syncFromPosts: () => Promise; }; + import: { + selectAndAnalyze: (uploadsFolder?: string) => Promise; + analyzeFile: (filePath: string, uploadsFolder?: string) => Promise; + selectUploadsFolder: () => Promise; + }; chat: { // API Key Management checkReady: () => Promise; diff --git a/tests/engine/ImportAnalysisEngine.test.ts b/tests/engine/ImportAnalysisEngine.test.ts new file mode 100644 index 0000000..40e851e --- /dev/null +++ b/tests/engine/ImportAnalysisEngine.test.ts @@ -0,0 +1,537 @@ +/** + * ImportAnalysisEngine Unit Tests + * + * Tests the REAL ImportAnalysisEngine class with mocked dependencies. + * Following TDD: mock database and filesystem, test real analysis logic. + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { ImportAnalysisEngine } from '../../src/main/engine/ImportAnalysisEngine'; +import type { ImportAnalysisReport, AnalyzedPost, AnalyzedMedia } from '../../src/main/engine/ImportAnalysisEngine'; +import type { WxrData, WxrPost, WxrMedia, WxrSiteInfo } from '../../src/main/engine/WxrParser'; +import crypto from 'crypto'; + +// Mock data stores +const mockPostRows: any[] = []; +const mockMediaRows: any[] = []; +const mockTagRows: any[] = []; + +function createSelectChain() { + return { + from: vi.fn().mockReturnThis(), + where: vi.fn().mockReturnThis(), + all: vi.fn().mockImplementation(() => { + // Return appropriate data based on the table being queried + return Promise.resolve([]); + }), + get: vi.fn().mockImplementation(() => Promise.resolve(undefined)), + }; +} + +const mockLocalDb = { + select: vi.fn(() => { + const chain = createSelectChain(); + // The chain.all will be overridden per test + return chain; + }), +}; + +// Mock the database module +vi.mock('../../src/main/database', () => ({ + getDatabase: vi.fn(() => ({ + getLocal: vi.fn(() => mockLocalDb), + })), +})); + +// Mock fs/promises for media file reading +const mockFileBuffers = new Map(); +vi.mock('fs/promises', () => ({ + readFile: vi.fn(async (path: string) => { + const buffer = mockFileBuffers.get(path.replace(/\\/g, '/')); + if (!buffer) { + const error = new Error(`ENOENT: no such file or directory, open '${path}'`); + (error as any).code = 'ENOENT'; + throw error; + } + return buffer; + }), + stat: vi.fn(async (path: string) => { + const buffer = mockFileBuffers.get(path.replace(/\\/g, '/')); + if (!buffer) { + const error = new Error(`ENOENT: no such file or directory, stat '${path}'`); + (error as any).code = 'ENOENT'; + throw error; + } + return { size: buffer.length }; + }), + access: vi.fn(async (path: string) => { + const normalizedPath = path.replace(/\\/g, '/'); + if (!mockFileBuffers.has(normalizedPath)) { + const error = new Error(`ENOENT`); + (error as any).code = 'ENOENT'; + throw error; + } + }), +})); + +// Helper to create a WxrPost +function createWxrPost(overrides: Partial = {}): WxrPost { + return { + wpId: 1, + title: 'Test Post', + slug: 'test-post', + content: '

Test content

', + excerpt: '', + pubDate: new Date('2024-01-15'), + creator: 'admin', + status: 'publish', + postType: 'post', + categories: [], + tags: [], + ...overrides, + }; +} + +// Helper to create a WxrMedia +function createWxrMedia(overrides: Partial = {}): WxrMedia { + return { + wpId: 100, + title: 'test-image', + url: 'https://example.com/wp-content/uploads/2024/01/test.jpg', + filename: 'test.jpg', + relativePath: '2024/01/test.jpg', + pubDate: null, + parentId: 0, + mimeType: 'image/jpeg', + description: '', + ...overrides, + }; +} + +// Helper to create WxrData +function createWxrData(overrides: Partial = {}): WxrData { + return { + site: { + title: 'Test Blog', + link: 'https://example.com', + description: 'A test blog', + language: 'en', + }, + posts: [], + pages: [], + media: [], + categories: [], + tags: [], + ...overrides, + }; +} + +// Helper to compute expected MD5 hash (same algo as PostEngine) +function md5(content: string): string { + return crypto.createHash('md5').update(content).digest('hex'); +} + +describe('ImportAnalysisEngine', () => { + let engine: ImportAnalysisEngine; + + beforeEach(() => { + vi.clearAllMocks(); + mockPostRows.length = 0; + mockMediaRows.length = 0; + mockTagRows.length = 0; + mockFileBuffers.clear(); + engine = new ImportAnalysisEngine(); + engine.setProjectContext('test-project'); + }); + + describe('analyzeWxr - posts', () => { + it('should classify a post as new when slug and hash do not exist in DB', async () => { + // DB has no existing posts + setupDbReturns([], [], []); + + const wxrData = createWxrData({ + posts: [createWxrPost({ slug: 'new-post', content: '

New content

' })], + }); + + const report = await engine.analyzeWxr(wxrData, '/path/to/export.xml'); + + expect(report.posts.total).toBe(1); + expect(report.posts.new).toBe(1); + expect(report.posts.items[0].status).toBe('new'); + }); + + it('should classify a post as update when slug AND hash match', async () => { + // The engine converts HTML to markdown then hashes it + //

Existing content

-> "Existing content\n" in turndown (approx) + // We need to compute what turndown gives us and hash that + const markdownContent = 'Existing content'; + const hash = md5(markdownContent); + + setupDbReturns([ + { id: 'existing-1', slug: 'existing-post', title: 'Existing Post', checksum: hash }, + ], [], []); + + const wxrData = createWxrData({ + posts: [createWxrPost({ slug: 'existing-post', content: '

Existing content

' })], + }); + + const report = await engine.analyzeWxr(wxrData, '/path/to/export.xml'); + + expect(report.posts.total).toBe(1); + expect(report.posts.updates).toBe(1); + expect(report.posts.items[0].status).toBe('update'); + expect(report.posts.items[0].existingPost?.id).toBe('existing-1'); + }); + + it('should classify a post as conflict when slug matches but hash differs', async () => { + setupDbReturns([ + { id: 'existing-1', slug: 'my-post', title: 'My Post', checksum: 'different-hash' }, + ], [], []); + + const wxrData = createWxrData({ + posts: [createWxrPost({ slug: 'my-post', content: '

Changed content

' })], + }); + + const report = await engine.analyzeWxr(wxrData, '/path/to/export.xml'); + + expect(report.posts.total).toBe(1); + expect(report.posts.conflicts).toBe(1); + expect(report.posts.items[0].status).toBe('conflict'); + expect(report.posts.items[0].existingPost?.id).toBe('existing-1'); + }); + + it('should classify a post as content-duplicate when hash matches but slug differs', async () => { + const markdownContent = 'Same content here'; + const hash = md5(markdownContent); + + setupDbReturns([ + { id: 'other-post', slug: 'different-slug', title: 'Different Title', checksum: hash }, + ], [], []); + + const wxrData = createWxrData({ + posts: [createWxrPost({ slug: 'my-original-slug', content: '

Same content here

' })], + }); + + const report = await engine.analyzeWxr(wxrData, '/path/to/export.xml'); + + expect(report.posts.total).toBe(1); + expect(report.posts.contentDuplicates).toBe(1); + expect(report.posts.items[0].status).toBe('content-duplicate'); + expect(report.posts.items[0].existingPost?.id).toBe('other-post'); + }); + + it('should analyze multiple posts with mixed statuses', async () => { + const existingContent = 'Unchanged content'; + const existingHash = md5(existingContent); + + setupDbReturns([ + { id: 'post-1', slug: 'unchanged', title: 'Unchanged', checksum: existingHash }, + { id: 'post-2', slug: 'modified', title: 'Modified', checksum: 'old-hash' }, + ], [], []); + + const wxrData = createWxrData({ + posts: [ + createWxrPost({ slug: 'unchanged', content: '

Unchanged content

' }), + createWxrPost({ slug: 'modified', content: '

New modified content

' }), + createWxrPost({ slug: 'brand-new', content: '

Brand new post

' }), + ], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml'); + + expect(report.posts.total).toBe(3); + expect(report.posts.updates).toBe(1); + expect(report.posts.conflicts).toBe(1); + expect(report.posts.new).toBe(1); + }); + + it('should include markdown preview in analyzed posts', async () => { + setupDbReturns([], [], []); + + const wxrData = createWxrData({ + posts: [createWxrPost({ content: '

This is a preview of the content.

' })], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml'); + + const item = report.posts.items[0]; + expect(item.markdownPreview).toBeTruthy(); + expect(item.markdownPreview.length).toBeGreaterThan(0); + expect(item.markdownPreview.length).toBeLessThanOrEqual(200); + }); + + it('should compute content hash from markdown conversion of HTML', async () => { + setupDbReturns([], [], []); + + const wxrData = createWxrData({ + posts: [createWxrPost({ content: '

Hello world

' })], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml'); + + const item = report.posts.items[0]; + expect(item.contentHash).toBeTruthy(); + // Hash should be MD5 of the markdown conversion + expect(item.contentHash).toMatch(/^[a-f0-9]{32}$/); + }); + }); + + describe('analyzeWxr - pages', () => { + it('should analyze pages separately from posts', async () => { + setupDbReturns([], [], []); + + const wxrData = createWxrData({ + posts: [createWxrPost({ slug: 'post-1' })], + pages: [createWxrPost({ slug: 'about', postType: 'page' })], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml'); + + expect(report.posts.total).toBe(1); + expect(report.pages.total).toBe(1); + expect(report.pages.items[0].wxrPost.slug).toBe('about'); + }); + }); + + describe('analyzeWxr - media', () => { + it('should classify media as new when filename not in DB and file exists in uploads', async () => { + setupDbReturns([], [], []); + const fileBuffer = Buffer.from('fake image data'); + mockFileBuffers.set('/uploads/2024/01/photo.jpg', fileBuffer); + + const wxrData = createWxrData({ + media: [createWxrMedia({ + filename: 'photo.jpg', + relativePath: '2024/01/photo.jpg', + })], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml', '/uploads'); + + expect(report.media.total).toBe(1); + expect(report.media.new).toBe(1); + expect(report.media.items[0].status).toBe('new'); + expect(report.media.items[0].fileHash).toBeTruthy(); + }); + + it('should classify media as update when filename matches AND hash matches', async () => { + const fileBuffer = Buffer.from('same file data'); + const fileHash = md5(fileBuffer.toString('binary')); + mockFileBuffers.set('/uploads/2024/01/logo.png', fileBuffer); + + setupDbReturns([], [ + { id: 'media-1', originalName: 'logo.png', checksum: fileHash }, + ], []); + + const wxrData = createWxrData({ + media: [createWxrMedia({ + filename: 'logo.png', + relativePath: '2024/01/logo.png', + })], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml', '/uploads'); + + expect(report.media.total).toBe(1); + expect(report.media.updates).toBe(1); + expect(report.media.items[0].status).toBe('update'); + expect(report.media.items[0].existingMedia?.id).toBe('media-1'); + }); + + it('should classify media as conflict when filename matches but hash differs', async () => { + const fileBuffer = Buffer.from('new file data'); + mockFileBuffers.set('/uploads/2024/01/logo.png', fileBuffer); + + setupDbReturns([], [ + { id: 'media-1', originalName: 'logo.png', checksum: 'old-hash-value' }, + ], []); + + const wxrData = createWxrData({ + media: [createWxrMedia({ + filename: 'logo.png', + relativePath: '2024/01/logo.png', + })], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml', '/uploads'); + + expect(report.media.total).toBe(1); + expect(report.media.conflicts).toBe(1); + expect(report.media.items[0].status).toBe('conflict'); + }); + + it('should classify media as content-duplicate when hash matches but filename differs', async () => { + const fileBuffer = Buffer.from('duplicate content'); + const fileHash = md5(fileBuffer.toString('binary')); + mockFileBuffers.set('/uploads/2024/01/new-name.jpg', fileBuffer); + + setupDbReturns([], [ + { id: 'media-1', originalName: 'old-name.jpg', checksum: fileHash }, + ], []); + + const wxrData = createWxrData({ + media: [createWxrMedia({ + filename: 'new-name.jpg', + relativePath: '2024/01/new-name.jpg', + })], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml', '/uploads'); + + expect(report.media.total).toBe(1); + expect(report.media.contentDuplicates).toBe(1); + expect(report.media.items[0].status).toBe('content-duplicate'); + }); + + it('should mark media as missing when file not found in uploads folder', async () => { + setupDbReturns([], [], []); + // No file added to mockFileBuffers + + const wxrData = createWxrData({ + media: [createWxrMedia({ + filename: 'missing.jpg', + relativePath: '2024/01/missing.jpg', + })], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml', '/uploads'); + + expect(report.media.total).toBe(1); + expect(report.media.missing).toBe(1); + expect(report.media.items[0].status).toBe('missing'); + expect(report.media.items[0].fileHash).toBeNull(); + }); + + it('should handle media analysis without uploads folder (all missing)', async () => { + setupDbReturns([], [], []); + + const wxrData = createWxrData({ + media: [createWxrMedia({ filename: 'test.jpg' })], + }); + + // No uploads folder provided + const report = await engine.analyzeWxr(wxrData, '/test.xml'); + + expect(report.media.total).toBe(1); + expect(report.media.missing).toBe(1); + expect(report.media.items[0].status).toBe('missing'); + }); + }); + + describe('analyzeWxr - categories and tags', () => { + it('should check existing categories against project tags', async () => { + setupDbReturns([], [], [ + { name: 'Technology' }, + ]); + + const wxrData = createWxrData({ + categories: [ + { name: 'Technology', slug: 'technology', parent: '' }, + { name: 'Science', slug: 'science', parent: '' }, + ], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml'); + + expect(report.categories).toHaveLength(2); + expect(report.categories[0].existsInProject).toBe(true); + expect(report.categories[1].existsInProject).toBe(false); + }); + + it('should check existing tags against project tags', async () => { + setupDbReturns([], [], [ + { name: 'javascript' }, + ]); + + const wxrData = createWxrData({ + tags: [ + { name: 'javascript', slug: 'javascript' }, + { name: 'python', slug: 'python' }, + ], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml'); + + expect(report.tags).toHaveLength(2); + expect(report.tags[0].existsInProject).toBe(true); + expect(report.tags[1].existsInProject).toBe(false); + }); + }); + + describe('analyzeWxr - report metadata', () => { + it('should include source file and site info in report', async () => { + setupDbReturns([], [], []); + + const wxrData = createWxrData({ + site: { + title: 'My Blog', + link: 'https://myblog.com', + description: 'A great blog', + language: 'de-DE', + }, + }); + + const report = await engine.analyzeWxr(wxrData, '/exports/myblog.xml'); + + expect(report.sourceFile).toBe('/exports/myblog.xml'); + expect(report.site.title).toBe('My Blog'); + expect(report.site.link).toBe('https://myblog.com'); + expect(report.analyzedAt).toBeInstanceOf(Date); + }); + + it('should correctly count all post statuses', async () => { + const contentA = 'Content A'; + const hashA = md5(contentA); + + setupDbReturns([ + { id: 'p1', slug: 'update-me', title: 'Update Me', checksum: hashA }, + { id: 'p2', slug: 'conflict-me', title: 'Conflict Me', checksum: 'old-hash' }, + ], [], []); + + const wxrData = createWxrData({ + posts: [ + createWxrPost({ slug: 'update-me', content: '

Content A

' }), + createWxrPost({ slug: 'conflict-me', content: '

Different content

' }), + createWxrPost({ slug: 'new-one', content: '

Brand new

' }), + createWxrPost({ slug: 'another-new', content: '

Also new

' }), + ], + }); + + const report = await engine.analyzeWxr(wxrData, '/test.xml'); + + expect(report.posts.total).toBe(4); + expect(report.posts.updates).toBe(1); + expect(report.posts.conflicts).toBe(1); + expect(report.posts.new).toBe(2); + expect(report.posts.contentDuplicates).toBe(0); + }); + }); +}); + +/** + * Helper to set up mock DB return values. + * Uses a counter-based approach to return different data for different queries. + */ +let dbQueryCount = 0; +function setupDbReturns( + existingPosts: Array<{ id: string; slug: string; title: string; checksum: string }>, + existingMedia: Array<{ id: string; originalName: string; checksum: string }>, + existingTags: Array<{ name: string }>, +) { + dbQueryCount = 0; + mockLocalDb.select.mockImplementation(() => { + const currentQuery = dbQueryCount++; + return { + from: vi.fn().mockReturnValue({ + where: vi.fn().mockReturnValue({ + all: vi.fn().mockImplementation(() => { + if (currentQuery === 0) return Promise.resolve(existingPosts); + if (currentQuery === 1) return Promise.resolve(existingMedia); + if (currentQuery === 2) return Promise.resolve(existingTags); + return Promise.resolve([]); + }), + }), + }), + }; + }); +} diff --git a/tests/engine/WxrParser.test.ts b/tests/engine/WxrParser.test.ts new file mode 100644 index 0000000..005b656 --- /dev/null +++ b/tests/engine/WxrParser.test.ts @@ -0,0 +1,478 @@ +/** + * WxrParser Unit Tests + * + * Tests the REAL WxrParser class with mocked filesystem. + * Following TDD best practices: mock external dependencies, test real implementation. + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { WxrParser } from '../../src/main/engine/WxrParser'; +import type { WxrData } from '../../src/main/engine/WxrParser'; + +// Mock fs/promises +vi.mock('fs/promises', () => ({ + readFile: vi.fn(), +})); + +// Minimal valid WXR XML for testing +const MINIMAL_WXR = ` + + + My Test Blog + https://example.com + A test blog + en-US + +`; + +// WXR with categories and tags at channel level +const WXR_WITH_TAXONOMIES = ` + + + My Blog + https://example.com + Test + en + + 1 + technology + + + + + 2 + web-dev + technology + + + + 10 + javascript + + + + 11 + typescript + + + +`; + +// WXR with a single published post +const WXR_WITH_POST = ` + + + My Blog + https://example.com + Test + en + + Hello World + https://example.com/hello-world/ + Mon, 15 Jan 2024 10:30:00 +0000 + + + + + Welcome to my blog. This is my first post.

]]>
+ + 42 + 2024-01-15 10:30:00 + hello-world + publish + post + 0 +
+
+
`; + +// WXR with a page +const WXR_WITH_PAGE = ` + + + My Blog + https://example.com + Test + en + + About Me + About

This is the about page.

]]>
+ + 10 + about + publish + page + 0 + +
+
+
`; + +// WXR with a media attachment +const WXR_WITH_MEDIA = ` + + + My Blog + https://example.com + Test + en + + sunset-photo + + + 100 + sunset-photo + inherit + attachment + 42 + https://example.com/wp-content/uploads/2024/01/sunset.jpg + + _wp_attached_file + 2024/01/sunset.jpg + + + + +`; + +// WXR with mixed content: posts, pages, and media +const WXR_MIXED = ` + + + Full Blog + https://fullblog.com + A full blog export + de-DE + + news + + + + + featured + + + + First Post + Tue, 02 Jan 2024 08:00:00 +0000 + + + + First post content.

]]>
+ + 1 + first-post + publish + post + 0 +
+ + Second Post + Wed, 03 Jan 2024 09:00:00 +0000 + + Second post content.

]]>
+ + 2 + second-post + draft + post + 0 +
+ + Contact + + Contact us here.

]]>
+ + 3 + contact + publish + page + 0 +
+ + logo + + + + 4 + logo + inherit + attachment + 3 + https://fullblog.com/wp-content/uploads/2024/02/logo.png + +
+
`; + +// WXR with draft and trashed posts +const WXR_WITH_STATUSES = ` + + + Blog + https://example.com + + en + + Published Post + Published

]]>
+ + 1 + published-post + publish + post + 0 + +
+ + Draft Post + Draft

]]>
+ + 2 + draft-post + draft + post + 0 + +
+ + Trashed Post + Trash

]]>
+ + 3 + __trashed + trash + post + 0 + +
+
+
`; + +describe('WxrParser', () => { + let parser: WxrParser; + + beforeEach(() => { + parser = new WxrParser(); + }); + + describe('parseXml', () => { + it('should parse minimal WXR and extract site info', () => { + const result = parser.parseXml(MINIMAL_WXR); + + expect(result.site.title).toBe('My Test Blog'); + expect(result.site.link).toBe('https://example.com'); + expect(result.site.description).toBe('A test blog'); + expect(result.site.language).toBe('en-US'); + }); + + it('should return empty arrays when no items exist', () => { + const result = parser.parseXml(MINIMAL_WXR); + + expect(result.posts).toEqual([]); + expect(result.pages).toEqual([]); + expect(result.media).toEqual([]); + expect(result.categories).toEqual([]); + expect(result.tags).toEqual([]); + }); + + it('should extract channel-level categories with parent relationships', () => { + const result = parser.parseXml(WXR_WITH_TAXONOMIES); + + expect(result.categories).toHaveLength(2); + expect(result.categories[0]).toEqual({ + name: 'Technology', + slug: 'technology', + parent: '', + }); + expect(result.categories[1]).toEqual({ + name: 'Web Development', + slug: 'web-dev', + parent: 'technology', + }); + }); + + it('should extract channel-level tags', () => { + const result = parser.parseXml(WXR_WITH_TAXONOMIES); + + expect(result.tags).toHaveLength(2); + expect(result.tags[0]).toEqual({ + name: 'JavaScript', + slug: 'javascript', + }); + expect(result.tags[1]).toEqual({ + name: 'TypeScript', + slug: 'typescript', + }); + }); + + it('should parse a published post with all fields', () => { + const result = parser.parseXml(WXR_WITH_POST); + + expect(result.posts).toHaveLength(1); + const post = result.posts[0]; + expect(post.wpId).toBe(42); + expect(post.title).toBe('Hello World'); + expect(post.slug).toBe('hello-world'); + expect(post.content).toBe('

Welcome to my blog. This is my first post.

'); + expect(post.excerpt).toBe('Welcome to my blog.'); + expect(post.creator).toBe('admin'); + expect(post.status).toBe('publish'); + expect(post.postType).toBe('post'); + expect(post.categories).toEqual(['Uncategorized']); + expect(post.tags).toEqual(['Intro', 'Welcome']); + expect(post.pubDate).toBeInstanceOf(Date); + }); + + it('should parse a page and put it in pages array', () => { + const result = parser.parseXml(WXR_WITH_PAGE); + + expect(result.posts).toHaveLength(0); + expect(result.pages).toHaveLength(1); + + const page = result.pages[0]; + expect(page.wpId).toBe(10); + expect(page.title).toBe('About Me'); + expect(page.slug).toBe('about'); + expect(page.content).toContain('

About

'); + expect(page.postType).toBe('page'); + }); + + it('should parse a media attachment with URL and filename', () => { + const result = parser.parseXml(WXR_WITH_MEDIA); + + expect(result.posts).toHaveLength(0); + expect(result.media).toHaveLength(1); + + const media = result.media[0]; + expect(media.wpId).toBe(100); + expect(media.title).toBe('sunset-photo'); + expect(media.url).toBe('https://example.com/wp-content/uploads/2024/01/sunset.jpg'); + expect(media.filename).toBe('sunset.jpg'); + expect(media.relativePath).toBe('2024/01/sunset.jpg'); + expect(media.parentId).toBe(42); + expect(media.description).toBe('A beautiful sunset'); + }); + + it('should separate posts, pages, and media from mixed content', () => { + const result = parser.parseXml(WXR_MIXED); + + expect(result.posts).toHaveLength(2); + expect(result.pages).toHaveLength(1); + expect(result.media).toHaveLength(1); + expect(result.categories).toHaveLength(1); + expect(result.tags).toHaveLength(1); + + expect(result.posts[0].title).toBe('First Post'); + expect(result.posts[1].title).toBe('Second Post'); + expect(result.pages[0].title).toBe('Contact'); + expect(result.media[0].title).toBe('logo'); + }); + + it('should extract post categories and tags from item-level category elements', () => { + const result = parser.parseXml(WXR_MIXED); + + const firstPost = result.posts[0]; + expect(firstPost.categories).toEqual(['News']); + expect(firstPost.tags).toEqual(['Featured']); + + // Second post has no categories or tags + const secondPost = result.posts[1]; + expect(secondPost.categories).toEqual([]); + expect(secondPost.tags).toEqual([]); + }); + + it('should handle different post statuses', () => { + const result = parser.parseXml(WXR_WITH_STATUSES); + + expect(result.posts).toHaveLength(3); + expect(result.posts[0].status).toBe('publish'); + expect(result.posts[1].status).toBe('draft'); + expect(result.posts[2].status).toBe('trash'); + }); + + it('should extract relative path from media URL based on wp-content/uploads', () => { + const result = parser.parseXml(WXR_WITH_MEDIA); + const media = result.media[0]; + + // The path after wp-content/uploads/ + expect(media.relativePath).toBe('2024/01/sunset.jpg'); + }); + + it('should extract relative path from mixed content media', () => { + const result = parser.parseXml(WXR_MIXED); + const media = result.media[0]; + + expect(media.relativePath).toBe('2024/02/logo.png'); + expect(media.filename).toBe('logo.png'); + }); + + it('should handle empty content gracefully', () => { + const result = parser.parseXml(WXR_WITH_MEDIA); + // Media items in WXR often have empty excerpt + const media = result.media[0]; + expect(media).toBeDefined(); + }); + + it('should infer mime type from file extension', () => { + const result = parser.parseXml(WXR_WITH_MEDIA); + expect(result.media[0].mimeType).toBe('image/jpeg'); + + const mixedResult = parser.parseXml(WXR_MIXED); + expect(mixedResult.media[0].mimeType).toBe('image/png'); + }); + + it('should handle missing pubDate gracefully', () => { + const result = parser.parseXml(WXR_WITH_PAGE); + // Page has no pubDate element + expect(result.pages[0].pubDate).toBeNull(); + }); + }); + + describe('parseFile', () => { + it('should read a file and parse its contents', async () => { + const fs = await import('fs/promises'); + vi.mocked(fs.readFile).mockResolvedValueOnce(WXR_WITH_POST); + + const result = await parser.parseFile('/path/to/export.xml'); + + expect(fs.readFile).toHaveBeenCalledWith('/path/to/export.xml', 'utf-8'); + expect(result.posts).toHaveLength(1); + expect(result.posts[0].title).toBe('Hello World'); + }); + + it('should throw an error if the file cannot be read', async () => { + const fs = await import('fs/promises'); + vi.mocked(fs.readFile).mockRejectedValueOnce(new Error('ENOENT')); + + await expect(parser.parseFile('/nonexistent.xml')).rejects.toThrow('ENOENT'); + }); + }); +}); diff --git a/tests/setup.ts b/tests/setup.ts index 8ee4259..8db28f9 100644 --- a/tests/setup.ts +++ b/tests/setup.ts @@ -106,6 +106,11 @@ Object.defineProperty(globalThis, 'window', { cancel: vi.fn(), clearCompleted: vi.fn(), }, + import: { + selectAndAnalyze: vi.fn(), + analyzeFile: vi.fn(), + selectUploadsFolder: vi.fn(), + }, on: vi.fn(() => () => {}), }, },