From 24667a7553b4a93641d9ef0624fb8005ab1b7844 Mon Sep 17 00:00:00 2001 From: hugo Date: Sun, 15 Feb 2026 14:52:53 +0100 Subject: [PATCH] fix: fixes on media importing --- src/main/engine/ImportExecutionEngine.ts | 55 ++++- src/main/ipc/handlers.ts | 10 + src/main/preload.ts | 19 ++ src/renderer/App.tsx | 29 +++ .../ImportAnalysisView/ImportAnalysisView.tsx | 1 + src/renderer/types/electron.d.ts | 10 + .../engine/ImportExecutionEngine.e2e.test.ts | 228 ++++++++++++++++++ 7 files changed, 350 insertions(+), 2 deletions(-) diff --git a/src/main/engine/ImportExecutionEngine.ts b/src/main/engine/ImportExecutionEngine.ts index 1e65b04..2b72cfb 100644 --- a/src/main/engine/ImportExecutionEngine.ts +++ b/src/main/engine/ImportExecutionEngine.ts @@ -22,6 +22,7 @@ import { eq } from 'drizzle-orm'; import { getTagEngine } from './TagEngine'; import { getPostEngine, PostData } from './PostEngine'; import { getMediaEngine, MediaData } from './MediaEngine'; +import { getPostMediaEngine } from './PostMediaEngine'; import type { ImportAnalysisReport, AnalyzedPost, @@ -72,6 +73,7 @@ export class ImportExecutionEngine extends EventEmitter { private currentProjectId: string = 'default'; private dataDir: string | null = null; private turndown: TurndownService; + private siteBaseUrl: string | null = null; // Base URL for media URL conversion constructor() { super(); @@ -194,6 +196,9 @@ export class ImportExecutionEngine extends EventEmitter { const progress = options.onProgress || (() => {}); + // Store site URL for media URL conversion + this.siteBaseUrl = report.site.link || null; + try { // Build tag/category mappings const tagMapping = this.buildTaxonomyMapping(report.tags); @@ -428,7 +433,10 @@ export class ImportExecutionEngine extends EventEmitter { const contentWithShortcodes = this.transformShortcodes(wxrPost.content); // Convert HTML content to Markdown - const transformedContent = this.convertToMarkdown(contentWithShortcodes); + let transformedContent = this.convertToMarkdown(contentWithShortcodes); + + // Convert absolute media URLs from the site to relative paths + transformedContent = this.convertMediaUrlsToRelative(transformedContent); // Resolve tags const resolvedTags = this.resolveTaxonomy(wxrPost.tags, tagMapping); @@ -622,7 +630,7 @@ export class ImportExecutionEngine extends EventEmitter { // Import the media file const mediaEngine = getMediaEngine(); - await mediaEngine.importMedia(sourcePath, { + const importedMedia = await mediaEngine.importMedia(sourcePath, { title: wxrMedia.title || undefined, alt: wxrMedia.description || undefined, mimeType: wxrMedia.mimeType, @@ -632,6 +640,15 @@ export class ImportExecutionEngine extends EventEmitter { updatedAt: createdAt, }); + // Link media to posts in the postMedia table + if (linkedPostIds.length > 0) { + const postMediaEngine = getPostMediaEngine(); + postMediaEngine.setProjectContext(this.currentProjectId); + for (const postId of linkedPostIds) { + await postMediaEngine.linkMediaToPost(postId, importedMedia.id); + } + } + return true; } @@ -725,6 +742,40 @@ export class ImportExecutionEngine extends EventEmitter { }); } + /** + * Convert absolute media URLs from the WordPress site to relative paths. + * + * Converts URLs like: + * https://site.com/wp-content/uploads/2022/11/image.jpg + * To: + * media/2022/11/image.jpg + * + * Only converts URLs from the site being imported (based on site.link). + * Does NOT convert: + * - URLs from external sites + * - URLs from wp-content/themes/ or wp-content/plugins/ (not imported media) + */ + private convertMediaUrlsToRelative(markdown: string): string { + if (!this.siteBaseUrl || !markdown) return markdown; + + // Normalize the site URL (remove trailing slash) + const siteUrl = this.siteBaseUrl.replace(/\/$/, ''); + + // Escape special regex characters in URL + const escapedSiteUrl = siteUrl.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + + // Match URLs pointing to wp-content/uploads/ on the site + // This pattern matches both HTTP and HTTPS versions + // Pattern: {siteUrl}/wp-content/uploads/{path} + const uploadsUrlPattern = new RegExp( + `${escapedSiteUrl}/wp-content/uploads/([^\\s)"']+)`, + 'gi' + ); + + // Replace with relative media path + return markdown.replace(uploadsUrlPattern, 'media/$1'); + } + /** * Transform WordPress shortcodes [shortcode] to [[shortcode]] */ diff --git a/src/main/ipc/handlers.ts b/src/main/ipc/handlers.ts index 96de629..a94ee4f 100644 --- a/src/main/ipc/handlers.ts +++ b/src/main/ipc/handlers.ts @@ -912,6 +912,16 @@ export function registerIpcHandlers(): void { wpIdToPostId: Object.fromEntries(result.wpIdToPostId), }; + // Emit import:complete event to notify UI to refresh + ipcMain.emit('forward-to-renderer', 'import:complete', { + taskId, + success: result.success, + posts: result.posts, + media: result.media, + pages: result.pages, + tags: result.tags, + }); + return serializedResult; }, }; diff --git a/src/main/preload.ts b/src/main/preload.ts index 2bc64a9..83482fc 100644 --- a/src/main/preload.ts +++ b/src/main/preload.ts @@ -159,6 +159,25 @@ contextBridge.exposeInMainWorld('electronAPI', { ipcRenderer.on('import:executionProgress', subscription); return () => ipcRenderer.removeListener('import:executionProgress', subscription); }, + onComplete: (callback: (data: { + taskId: string; + success: boolean; + posts: { imported: number; skipped: number; errors: number }; + media: { imported: number; skipped: number; errors: number }; + pages: { imported: number; skipped: number; errors: number }; + tags: { created: number; skipped: number }; + }) => void) => { + const subscription = (_event: Electron.IpcRendererEvent, data: { + taskId: string; + success: boolean; + posts: { imported: number; skipped: number; errors: number }; + media: { imported: number; skipped: number; errors: number }; + pages: { imported: number; skipped: number; errors: number }; + tags: { created: number; skipped: number }; + }) => callback(data); + ipcRenderer.on('import:complete', subscription); + return () => ipcRenderer.removeListener('import:complete', subscription); + }, }, // Import Definition CRUD diff --git a/src/renderer/App.tsx b/src/renderer/App.tsx index 984a2b6..5a9381b 100644 --- a/src/renderer/App.tsx +++ b/src/renderer/App.tsx @@ -276,6 +276,35 @@ const App: React.FC = () => { }) || (() => {}) ); + // Import completion event - refresh posts and media stores + unsubscribers.push( + window.electronAPI?.import.onComplete(async (data) => { + // Refresh posts store if any posts were imported + if (data.posts.imported > 0 || data.pages.imported > 0) { + const postsResult = await window.electronAPI?.posts.getAll({ limit: 500, offset: 0 }); + if (postsResult) { + const { items, hasMore, total } = postsResult as { items: PostData[]; hasMore: boolean; total: number }; + setPosts(items, hasMore, total); + } + } + + // Refresh media store if any media was imported + if (data.media.imported > 0) { + const mediaResult = await window.electronAPI?.media.getAll(); + if (mediaResult) { + setMedia(mediaResult as MediaData[]); + } + } + + // Show success toast + const importedCount = data.posts.imported + data.pages.imported; + const importedMedia = data.media.imported; + if (data.success) { + showToast.success(`Import complete: ${importedCount} posts, ${importedMedia} media files`); + } + }) || (() => {}) + ); + return () => { unsubscribers.forEach(unsub => unsub()); }; diff --git a/src/renderer/components/ImportAnalysisView/ImportAnalysisView.tsx b/src/renderer/components/ImportAnalysisView/ImportAnalysisView.tsx index 3113e94..ab906db 100644 --- a/src/renderer/components/ImportAnalysisView/ImportAnalysisView.tsx +++ b/src/renderer/components/ImportAnalysisView/ImportAnalysisView.tsx @@ -359,6 +359,7 @@ export const ImportAnalysisView: React.FC = ({ definiti await window.electronAPI?.importDefinitions.update(definitionId, { lastAnalysisResult: JSON.stringify(result), wxrFilePath: result.sourceFile, + name: result.site.link || result.site.title || undefined, }); } } catch (error) { diff --git a/src/renderer/types/electron.d.ts b/src/renderer/types/electron.d.ts index 77b93de..3f82ea4 100644 --- a/src/renderer/types/electron.d.ts +++ b/src/renderer/types/electron.d.ts @@ -14,6 +14,15 @@ export interface ImportExecutionProgress { eta?: number; } +export interface ImportCompleteResult { + taskId: string; + success: boolean; + posts: { imported: number; skipped: number; errors: number }; + media: { imported: number; skipped: number; errors: number }; + pages: { imported: number; skipped: number; errors: number }; + tags: { created: number; skipped: number }; +} + export interface ImportDefinitionData { id: string; projectId: string; @@ -388,6 +397,7 @@ export interface ElectronAPI { execute: (reportJson: string, uploadsFolder?: string) => Promise; onProgress: (callback: (data: { step: string; detail?: string }) => void) => () => void; onExecutionProgress: (callback: (data: ImportExecutionProgress) => void) => () => void; + onComplete: (callback: (data: ImportCompleteResult) => void) => () => void; }; importDefinitions: { create: (name?: string) => Promise; diff --git a/tests/engine/ImportExecutionEngine.e2e.test.ts b/tests/engine/ImportExecutionEngine.e2e.test.ts index 43901f2..9fc4ee4 100644 --- a/tests/engine/ImportExecutionEngine.e2e.test.ts +++ b/tests/engine/ImportExecutionEngine.e2e.test.ts @@ -179,6 +179,25 @@ vi.mock('../../src/main/engine/MediaEngine', () => ({ getMediaEngine: vi.fn(() => mockMediaEngine), })); +// Mock PostMediaEngine +const mockPostMediaEngine = { + setProjectContext: vi.fn(), + linkMediaToPost: vi.fn().mockImplementation(async (postId: string, mediaId: string) => { + return { + id: `link-${postId}-${mediaId}`, + projectId: 'test-project', + postId, + mediaId, + sortOrder: 0, + createdAt: new Date(), + }; + }), +}; + +vi.mock('../../src/main/engine/PostMediaEngine', () => ({ + getPostMediaEngine: vi.fn(() => mockPostMediaEngine), +})); + // Import after mocks are set up import { ImportExecutionEngine } from '../../src/main/engine/ImportExecutionEngine'; @@ -1071,6 +1090,12 @@ describe('ImportExecutionEngine E2E Tests', () => { expect(insertedMedia.length).toBe(1); expect(insertedMedia[0].linkedPostIds.length).toBe(1); expect(insertedMedia[0].linkedPostIds[0]).toBe(result.wpIdToPostId.get(201)); + + // CRITICAL: Verify PostMediaEngine.linkMediaToPost was called to create the DB link + expect(mockPostMediaEngine.linkMediaToPost).toHaveBeenCalledWith( + result.wpIdToPostId.get(201), // postId + insertedMedia[0].id // mediaId + ); }); it('should import standalone media without parent link', async () => { @@ -1578,4 +1603,207 @@ describe('ImportExecutionEngine E2E Tests', () => { expect(insertedPosts.length).toBe(0); }); }); + + // ========================================================================== + // SECTION 8: MEDIA URL CONVERSION TO RELATIVE PATHS + // ========================================================================== + + describe('Media URL Conversion to Relative Paths', () => { + /** + * Creates a custom post with specific content for URL conversion testing + */ + function createPostWithContent(content: string, siteUrl: string = 'https://testblog.example.com'): ImportAnalysisReport { + const customPost: WxrPost = { + wpId: 9001, + title: 'URL Conversion Test Post', + slug: 'url-conversion-test', + content: content, + excerpt: '', + pubDate: new Date('2024-01-15T10:00:00Z'), + postDate: new Date('2024-01-15T10:00:00Z'), + postModified: new Date('2024-01-15T10:00:00Z'), + creator: 'testauthor', + status: 'publish', + postType: 'post', + categories: [], + tags: [], + }; + + return { + wxrData: wxrData, + posts: { + total: 1, + new: 1, + update: 0, + conflict: 0, + items: [{ + wxrPost: customPost, + status: 'new' as PostAnalysisStatus, + contentHash: 'test-hash', + markdownPreview: '', + }], + }, + pages: { total: 0, new: 0, update: 0, conflict: 0, items: [] }, + media: { total: 0, new: 0, update: 0, conflict: 0, missing: 0, items: [] }, + tags: [], + categories: [], + site: { ...wxrData.site, link: siteUrl }, + macros: { totalUniqueMacros: 0, totalMacroUsages: 0, allMapped: true, macros: [] }, + }; + } + + it('should convert absolute media URLs from site domain to relative paths', async () => { + // Post with image URL pointing to the site's own media + const content = `

Check out this image:

+My Photo +

Nice, right?

`; + + const report = createPostWithContent(content); + await engine.executeImport(report, {}); + + const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test')); + expect(writtenFile).toBeDefined(); + + const fileContent = writtenFile!.content; + + // Should convert to relative media URL + expect(fileContent).toContain('![My Photo](media/2022/11/P1010853_01.jpg)'); + // Should NOT contain the absolute URL + expect(fileContent).not.toContain('https://testblog.example.com/wp-content/uploads'); + }); + + it('should convert linked images with absolute media URLs to relative paths', async () => { + // Linked image pattern common in WordPress - thumbnail links to full-size + const content = ` +Gallery Image +`; + + const report = createPostWithContent(content); + await engine.executeImport(report, {}); + + const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test')); + expect(writtenFile).toBeDefined(); + + const fileContent = writtenFile!.content; + + // The linked image rule uses the href (full-size) as the image URL + expect(fileContent).toContain('media/2022/11/full-size.jpg'); + // Should NOT contain absolute URLs + expect(fileContent).not.toContain('https://testblog.example.com/wp-content/uploads'); + }); + + it('should preserve external image URLs that are not from the site', async () => { + // Mix of site-owned and external images + const content = `

Own image:

+Local +

External image:

+External`; + + const report = createPostWithContent(content); + await engine.executeImport(report, {}); + + const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test')); + expect(writtenFile).toBeDefined(); + + const fileContent = writtenFile!.content; + + // Local image should become relative + expect(fileContent).toContain('![Local](media/2024/01/local.jpg)'); + // External image should remain absolute + expect(fileContent).toContain('![External](https://external-site.com/images/photo.jpg)'); + }); + + it('should handle site URLs with trailing slash', async () => { + const content = `Test`; + + const report = createPostWithContent(content, 'https://hugo.rfc1437.de/'); + await engine.executeImport(report, {}); + + const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test')); + expect(writtenFile).toBeDefined(); + + const fileContent = writtenFile!.content; + expect(fileContent).toContain('![Test](media/2022/11/image.jpg)'); + }); + + it('should handle site URLs without trailing slash', async () => { + const content = `Test`; + + const report = createPostWithContent(content, 'https://hugo.rfc1437.de'); + await engine.executeImport(report, {}); + + const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test')); + expect(writtenFile).toBeDefined(); + + const fileContent = writtenFile!.content; + expect(fileContent).toContain('![Test](media/2022/11/image.jpg)'); + }); + + it('should convert media URLs in markdown image syntax after HTML conversion', async () => { + // Sometimes WordPress content already has markdown-like syntax in HTML + const content = `

Image with title:

+Sunset`; + + const report = createPostWithContent(content); + await engine.executeImport(report, {}); + + const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test')); + expect(writtenFile).toBeDefined(); + + const fileContent = writtenFile!.content; + // Image with title should still get relative URL + expect(fileContent).toContain('media/2024/02/sunset.png'); + expect(fileContent).toContain('Beautiful Sunset'); + }); + + it('should handle multiple images in same post', async () => { + const content = `

Gallery:

+Image 1 +Image 2 +Image 3`; + + const report = createPostWithContent(content); + await engine.executeImport(report, {}); + + const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test')); + expect(writtenFile).toBeDefined(); + + const fileContent = writtenFile!.content; + expect(fileContent).toContain('![Image 1](media/2024/01/img1.jpg)'); + expect(fileContent).toContain('![Image 2](media/2024/01/img2.jpg)'); + expect(fileContent).toContain('![Image 3](media/2024/02/img3.jpg)'); + }); + + it('should handle deep nested upload paths', async () => { + const content = `Deep`; + + const report = createPostWithContent(content); + await engine.executeImport(report, {}); + + const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test')); + expect(writtenFile).toBeDefined(); + + const fileContent = writtenFile!.content; + // Even complex paths should work, preserving path after wp-content/uploads/ + expect(fileContent).toContain('media/sites/2/2024/03/nested/deep/image.jpg'); + }); + + it('should NOT convert wp-content/themes or wp-content/plugins URLs', async () => { + // Assets from themes/plugins should stay absolute (they're not imported media) + const content = `Theme Logo +Plugin Icon`; + + const report = createPostWithContent(content); + await engine.executeImport(report, {}); + + const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test')); + expect(writtenFile).toBeDefined(); + + const fileContent = writtenFile!.content; + // Theme assets should remain absolute + expect(fileContent).toContain('https://testblog.example.com/wp-content/themes/'); + // Plugin assets should remain absolute + expect(fileContent).toContain('https://testblog.example.com/wp-content/plugins/'); + }); + }); });