fix: fixes on media importing

This commit is contained in:
2026-02-15 14:52:53 +01:00
parent 3b0cb97ed2
commit 24667a7553
7 changed files with 350 additions and 2 deletions

View File

@@ -22,6 +22,7 @@ import { eq } from 'drizzle-orm';
import { getTagEngine } from './TagEngine';
import { getPostEngine, PostData } from './PostEngine';
import { getMediaEngine, MediaData } from './MediaEngine';
import { getPostMediaEngine } from './PostMediaEngine';
import type {
ImportAnalysisReport,
AnalyzedPost,
@@ -72,6 +73,7 @@ export class ImportExecutionEngine extends EventEmitter {
private currentProjectId: string = 'default';
private dataDir: string | null = null;
private turndown: TurndownService;
private siteBaseUrl: string | null = null; // Base URL for media URL conversion
constructor() {
super();
@@ -194,6 +196,9 @@ export class ImportExecutionEngine extends EventEmitter {
const progress = options.onProgress || (() => {});
// Store site URL for media URL conversion
this.siteBaseUrl = report.site.link || null;
try {
// Build tag/category mappings
const tagMapping = this.buildTaxonomyMapping(report.tags);
@@ -428,7 +433,10 @@ export class ImportExecutionEngine extends EventEmitter {
const contentWithShortcodes = this.transformShortcodes(wxrPost.content);
// Convert HTML content to Markdown
const transformedContent = this.convertToMarkdown(contentWithShortcodes);
let transformedContent = this.convertToMarkdown(contentWithShortcodes);
// Convert absolute media URLs from the site to relative paths
transformedContent = this.convertMediaUrlsToRelative(transformedContent);
// Resolve tags
const resolvedTags = this.resolveTaxonomy(wxrPost.tags, tagMapping);
@@ -622,7 +630,7 @@ export class ImportExecutionEngine extends EventEmitter {
// Import the media file
const mediaEngine = getMediaEngine();
await mediaEngine.importMedia(sourcePath, {
const importedMedia = await mediaEngine.importMedia(sourcePath, {
title: wxrMedia.title || undefined,
alt: wxrMedia.description || undefined,
mimeType: wxrMedia.mimeType,
@@ -632,6 +640,15 @@ export class ImportExecutionEngine extends EventEmitter {
updatedAt: createdAt,
});
// Link media to posts in the postMedia table
if (linkedPostIds.length > 0) {
const postMediaEngine = getPostMediaEngine();
postMediaEngine.setProjectContext(this.currentProjectId);
for (const postId of linkedPostIds) {
await postMediaEngine.linkMediaToPost(postId, importedMedia.id);
}
}
return true;
}
@@ -725,6 +742,40 @@ export class ImportExecutionEngine extends EventEmitter {
});
}
/**
* Convert absolute media URLs from the WordPress site to relative paths.
*
* Converts URLs like:
* https://site.com/wp-content/uploads/2022/11/image.jpg
* To:
* media/2022/11/image.jpg
*
* Only converts URLs from the site being imported (based on site.link).
* Does NOT convert:
* - URLs from external sites
* - URLs from wp-content/themes/ or wp-content/plugins/ (not imported media)
*/
private convertMediaUrlsToRelative(markdown: string): string {
if (!this.siteBaseUrl || !markdown) return markdown;
// Normalize the site URL (remove trailing slash)
const siteUrl = this.siteBaseUrl.replace(/\/$/, '');
// Escape special regex characters in URL
const escapedSiteUrl = siteUrl.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
// Match URLs pointing to wp-content/uploads/ on the site
// This pattern matches both HTTP and HTTPS versions
// Pattern: {siteUrl}/wp-content/uploads/{path}
const uploadsUrlPattern = new RegExp(
`${escapedSiteUrl}/wp-content/uploads/([^\\s)"']+)`,
'gi'
);
// Replace with relative media path
return markdown.replace(uploadsUrlPattern, 'media/$1');
}
/**
* Transform WordPress shortcodes [shortcode] to [[shortcode]]
*/

View File

@@ -912,6 +912,16 @@ export function registerIpcHandlers(): void {
wpIdToPostId: Object.fromEntries(result.wpIdToPostId),
};
// Emit import:complete event to notify UI to refresh
ipcMain.emit('forward-to-renderer', 'import:complete', {
taskId,
success: result.success,
posts: result.posts,
media: result.media,
pages: result.pages,
tags: result.tags,
});
return serializedResult;
},
};

View File

@@ -159,6 +159,25 @@ contextBridge.exposeInMainWorld('electronAPI', {
ipcRenderer.on('import:executionProgress', subscription);
return () => ipcRenderer.removeListener('import:executionProgress', subscription);
},
onComplete: (callback: (data: {
taskId: string;
success: boolean;
posts: { imported: number; skipped: number; errors: number };
media: { imported: number; skipped: number; errors: number };
pages: { imported: number; skipped: number; errors: number };
tags: { created: number; skipped: number };
}) => void) => {
const subscription = (_event: Electron.IpcRendererEvent, data: {
taskId: string;
success: boolean;
posts: { imported: number; skipped: number; errors: number };
media: { imported: number; skipped: number; errors: number };
pages: { imported: number; skipped: number; errors: number };
tags: { created: number; skipped: number };
}) => callback(data);
ipcRenderer.on('import:complete', subscription);
return () => ipcRenderer.removeListener('import:complete', subscription);
},
},
// Import Definition CRUD

View File

@@ -276,6 +276,35 @@ const App: React.FC = () => {
}) || (() => {})
);
// Import completion event - refresh posts and media stores
unsubscribers.push(
window.electronAPI?.import.onComplete(async (data) => {
// Refresh posts store if any posts were imported
if (data.posts.imported > 0 || data.pages.imported > 0) {
const postsResult = await window.electronAPI?.posts.getAll({ limit: 500, offset: 0 });
if (postsResult) {
const { items, hasMore, total } = postsResult as { items: PostData[]; hasMore: boolean; total: number };
setPosts(items, hasMore, total);
}
}
// Refresh media store if any media was imported
if (data.media.imported > 0) {
const mediaResult = await window.electronAPI?.media.getAll();
if (mediaResult) {
setMedia(mediaResult as MediaData[]);
}
}
// Show success toast
const importedCount = data.posts.imported + data.pages.imported;
const importedMedia = data.media.imported;
if (data.success) {
showToast.success(`Import complete: ${importedCount} posts, ${importedMedia} media files`);
}
}) || (() => {})
);
return () => {
unsubscribers.forEach(unsub => unsub());
};

View File

@@ -359,6 +359,7 @@ export const ImportAnalysisView: React.FC<ImportAnalysisViewProps> = ({ definiti
await window.electronAPI?.importDefinitions.update(definitionId, {
lastAnalysisResult: JSON.stringify(result),
wxrFilePath: result.sourceFile,
name: result.site.link || result.site.title || undefined,
});
}
} catch (error) {

View File

@@ -14,6 +14,15 @@ export interface ImportExecutionProgress {
eta?: number;
}
export interface ImportCompleteResult {
taskId: string;
success: boolean;
posts: { imported: number; skipped: number; errors: number };
media: { imported: number; skipped: number; errors: number };
pages: { imported: number; skipped: number; errors: number };
tags: { created: number; skipped: number };
}
export interface ImportDefinitionData {
id: string;
projectId: string;
@@ -388,6 +397,7 @@ export interface ElectronAPI {
execute: (reportJson: string, uploadsFolder?: string) => Promise<ImportExecuteResult>;
onProgress: (callback: (data: { step: string; detail?: string }) => void) => () => void;
onExecutionProgress: (callback: (data: ImportExecutionProgress) => void) => () => void;
onComplete: (callback: (data: ImportCompleteResult) => void) => () => void;
};
importDefinitions: {
create: (name?: string) => Promise<ImportDefinitionData>;

View File

@@ -179,6 +179,25 @@ vi.mock('../../src/main/engine/MediaEngine', () => ({
getMediaEngine: vi.fn(() => mockMediaEngine),
}));
// Mock PostMediaEngine
const mockPostMediaEngine = {
setProjectContext: vi.fn(),
linkMediaToPost: vi.fn().mockImplementation(async (postId: string, mediaId: string) => {
return {
id: `link-${postId}-${mediaId}`,
projectId: 'test-project',
postId,
mediaId,
sortOrder: 0,
createdAt: new Date(),
};
}),
};
vi.mock('../../src/main/engine/PostMediaEngine', () => ({
getPostMediaEngine: vi.fn(() => mockPostMediaEngine),
}));
// Import after mocks are set up
import { ImportExecutionEngine } from '../../src/main/engine/ImportExecutionEngine';
@@ -1071,6 +1090,12 @@ describe('ImportExecutionEngine E2E Tests', () => {
expect(insertedMedia.length).toBe(1);
expect(insertedMedia[0].linkedPostIds.length).toBe(1);
expect(insertedMedia[0].linkedPostIds[0]).toBe(result.wpIdToPostId.get(201));
// CRITICAL: Verify PostMediaEngine.linkMediaToPost was called to create the DB link
expect(mockPostMediaEngine.linkMediaToPost).toHaveBeenCalledWith(
result.wpIdToPostId.get(201), // postId
insertedMedia[0].id // mediaId
);
});
it('should import standalone media without parent link', async () => {
@@ -1578,4 +1603,207 @@ describe('ImportExecutionEngine E2E Tests', () => {
expect(insertedPosts.length).toBe(0);
});
});
// ==========================================================================
// SECTION 8: MEDIA URL CONVERSION TO RELATIVE PATHS
// ==========================================================================
describe('Media URL Conversion to Relative Paths', () => {
/**
* Creates a custom post with specific content for URL conversion testing
*/
function createPostWithContent(content: string, siteUrl: string = 'https://testblog.example.com'): ImportAnalysisReport {
const customPost: WxrPost = {
wpId: 9001,
title: 'URL Conversion Test Post',
slug: 'url-conversion-test',
content: content,
excerpt: '',
pubDate: new Date('2024-01-15T10:00:00Z'),
postDate: new Date('2024-01-15T10:00:00Z'),
postModified: new Date('2024-01-15T10:00:00Z'),
creator: 'testauthor',
status: 'publish',
postType: 'post',
categories: [],
tags: [],
};
return {
wxrData: wxrData,
posts: {
total: 1,
new: 1,
update: 0,
conflict: 0,
items: [{
wxrPost: customPost,
status: 'new' as PostAnalysisStatus,
contentHash: 'test-hash',
markdownPreview: '',
}],
},
pages: { total: 0, new: 0, update: 0, conflict: 0, items: [] },
media: { total: 0, new: 0, update: 0, conflict: 0, missing: 0, items: [] },
tags: [],
categories: [],
site: { ...wxrData.site, link: siteUrl },
macros: { totalUniqueMacros: 0, totalMacroUsages: 0, allMapped: true, macros: [] },
};
}
it('should convert absolute media URLs from site domain to relative paths', async () => {
// Post with image URL pointing to the site's own media
const content = `<p>Check out this image:</p>
<img src="https://testblog.example.com/wp-content/uploads/2022/11/P1010853_01.jpg" alt="My Photo" />
<p>Nice, right?</p>`;
const report = createPostWithContent(content);
await engine.executeImport(report, {});
const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test'));
expect(writtenFile).toBeDefined();
const fileContent = writtenFile!.content;
// Should convert to relative media URL
expect(fileContent).toContain('![My Photo](media/2022/11/P1010853_01.jpg)');
// Should NOT contain the absolute URL
expect(fileContent).not.toContain('https://testblog.example.com/wp-content/uploads');
});
it('should convert linked images with absolute media URLs to relative paths', async () => {
// Linked image pattern common in WordPress - thumbnail links to full-size
const content = `<a href="https://testblog.example.com/wp-content/uploads/2022/11/full-size.jpg">
<img src="https://testblog.example.com/wp-content/uploads/2022/11/thumb.jpg" alt="Gallery Image" />
</a>`;
const report = createPostWithContent(content);
await engine.executeImport(report, {});
const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test'));
expect(writtenFile).toBeDefined();
const fileContent = writtenFile!.content;
// The linked image rule uses the href (full-size) as the image URL
expect(fileContent).toContain('media/2022/11/full-size.jpg');
// Should NOT contain absolute URLs
expect(fileContent).not.toContain('https://testblog.example.com/wp-content/uploads');
});
it('should preserve external image URLs that are not from the site', async () => {
// Mix of site-owned and external images
const content = `<p>Own image:</p>
<img src="https://testblog.example.com/wp-content/uploads/2024/01/local.jpg" alt="Local" />
<p>External image:</p>
<img src="https://external-site.com/images/photo.jpg" alt="External" />`;
const report = createPostWithContent(content);
await engine.executeImport(report, {});
const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test'));
expect(writtenFile).toBeDefined();
const fileContent = writtenFile!.content;
// Local image should become relative
expect(fileContent).toContain('![Local](media/2024/01/local.jpg)');
// External image should remain absolute
expect(fileContent).toContain('![External](https://external-site.com/images/photo.jpg)');
});
it('should handle site URLs with trailing slash', async () => {
const content = `<img src="https://hugo.rfc1437.de/wp-content/uploads/2022/11/image.jpg" alt="Test" />`;
const report = createPostWithContent(content, 'https://hugo.rfc1437.de/');
await engine.executeImport(report, {});
const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test'));
expect(writtenFile).toBeDefined();
const fileContent = writtenFile!.content;
expect(fileContent).toContain('![Test](media/2022/11/image.jpg)');
});
it('should handle site URLs without trailing slash', async () => {
const content = `<img src="https://hugo.rfc1437.de/wp-content/uploads/2022/11/image.jpg" alt="Test" />`;
const report = createPostWithContent(content, 'https://hugo.rfc1437.de');
await engine.executeImport(report, {});
const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test'));
expect(writtenFile).toBeDefined();
const fileContent = writtenFile!.content;
expect(fileContent).toContain('![Test](media/2022/11/image.jpg)');
});
it('should convert media URLs in markdown image syntax after HTML conversion', async () => {
// Sometimes WordPress content already has markdown-like syntax in HTML
const content = `<p>Image with title:</p>
<img src="https://testblog.example.com/wp-content/uploads/2024/02/sunset.png" alt="Sunset" title="Beautiful Sunset" />`;
const report = createPostWithContent(content);
await engine.executeImport(report, {});
const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test'));
expect(writtenFile).toBeDefined();
const fileContent = writtenFile!.content;
// Image with title should still get relative URL
expect(fileContent).toContain('media/2024/02/sunset.png');
expect(fileContent).toContain('Beautiful Sunset');
});
it('should handle multiple images in same post', async () => {
const content = `<p>Gallery:</p>
<img src="https://testblog.example.com/wp-content/uploads/2024/01/img1.jpg" alt="Image 1" />
<img src="https://testblog.example.com/wp-content/uploads/2024/01/img2.jpg" alt="Image 2" />
<img src="https://testblog.example.com/wp-content/uploads/2024/02/img3.jpg" alt="Image 3" />`;
const report = createPostWithContent(content);
await engine.executeImport(report, {});
const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test'));
expect(writtenFile).toBeDefined();
const fileContent = writtenFile!.content;
expect(fileContent).toContain('![Image 1](media/2024/01/img1.jpg)');
expect(fileContent).toContain('![Image 2](media/2024/01/img2.jpg)');
expect(fileContent).toContain('![Image 3](media/2024/02/img3.jpg)');
});
it('should handle deep nested upload paths', async () => {
const content = `<img src="https://testblog.example.com/wp-content/uploads/sites/2/2024/03/nested/deep/image.jpg" alt="Deep" />`;
const report = createPostWithContent(content);
await engine.executeImport(report, {});
const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test'));
expect(writtenFile).toBeDefined();
const fileContent = writtenFile!.content;
// Even complex paths should work, preserving path after wp-content/uploads/
expect(fileContent).toContain('media/sites/2/2024/03/nested/deep/image.jpg');
});
it('should NOT convert wp-content/themes or wp-content/plugins URLs', async () => {
// Assets from themes/plugins should stay absolute (they're not imported media)
const content = `<img src="https://testblog.example.com/wp-content/themes/mytheme/images/logo.png" alt="Theme Logo" />
<img src="https://testblog.example.com/wp-content/plugins/myplugin/assets/icon.png" alt="Plugin Icon" />`;
const report = createPostWithContent(content);
await engine.executeImport(report, {});
const writtenFile = writtenFiles.find(f => f.path.includes('url-conversion-test'));
expect(writtenFile).toBeDefined();
const fileContent = writtenFile!.content;
// Theme assets should remain absolute
expect(fileContent).toContain('https://testblog.example.com/wp-content/themes/');
// Plugin assets should remain absolute
expect(fileContent).toContain('https://testblog.example.com/wp-content/plugins/');
});
});
});