/** * MetadataDiffEngine * * Compares metadata between database records and filesystem files for posts and media. * Used to detect and resolve differences that may have accumulated due to bugs or * manual edits. */ import { EventEmitter } from 'events'; import { eq, and } from 'drizzle-orm'; import { getDatabase } from '../database'; import { posts, media } from '../database/schema'; import { readPostFile, PostFileData } from './postFileUtils'; import { getPostEngine } from './PostEngine'; import { taskManager } from './TaskManager'; /** * A difference in a specific metadata field */ export interface FieldDifference { dbValue: T; fileValue: T; } /** * The fields that can have differences */ export type DiffField = 'tags' | 'categories' | 'title' | 'excerpt' | 'author'; /** * Metadata differences for a single post */ export interface PostMetadataDiff { postId: string; title: string; slug: string; filePath?: string; hasDifferences: boolean; differences: Partial>; } /** * A group of posts with the same type of difference */ export interface DiffGroup { field: DiffField; label: string; posts: Array<{ postId: string; title: string; slug: string; dbValue: unknown; fileValue: unknown; }>; } /** * Result of scanning all published posts */ export interface ScanResult { totalScanned: number; postsWithDifferences: number; differences: PostMetadataDiff[]; groups: DiffGroup[]; } /** * Statistics about posts/media tables */ export interface TableStats { totalPosts: number; publishedPosts: number; draftPosts: number; totalMedia: number; } export class MetadataDiffEngine extends EventEmitter { private currentProjectId = 'default'; setProjectContext(projectId: string): void { this.currentProjectId = projectId; } getProjectContext(): string { return this.currentProjectId; } private getDb() { return getDatabase().getLocal(); } private getClient() { return getDatabase().getLocalClient(); } /** * Get statistics about the posts and media tables */ async getTableStats(): Promise { const db = this.getDb(); const client = this.getClient(); if (!client) throw new Error('Database not initialized'); // Get post counts const allPostsResult = await client.execute({ sql: `SELECT COUNT(*) as count FROM posts WHERE project_id = ?`, args: [this.currentProjectId], }); const totalPosts = Number(allPostsResult.rows[0]?.count ?? 0); const publishedResult = await client.execute({ sql: `SELECT COUNT(*) as count FROM posts WHERE project_id = ? AND status = 'published' AND file_path IS NOT NULL AND file_path != ''`, args: [this.currentProjectId], }); const publishedPosts = Number(publishedResult.rows[0]?.count ?? 0); const draftResult = await client.execute({ sql: `SELECT COUNT(*) as count FROM posts WHERE project_id = ? AND status = 'draft'`, args: [this.currentProjectId], }); const draftPosts = Number(draftResult.rows[0]?.count ?? 0); // Get media count const mediaResult = await client.execute({ sql: `SELECT COUNT(*) as count FROM media WHERE project_id = ?`, args: [this.currentProjectId], }); const totalMedia = Number(mediaResult.rows[0]?.count ?? 0); return { totalPosts, publishedPosts, draftPosts, totalMedia, }; } /** * Compare metadata for a single post between database and file */ async comparePostMetadata(postId: string): Promise { const db = this.getDb(); // Get post from database const dbPost = await db .select() .from(posts) .where(and(eq(posts.id, postId), eq(posts.projectId, this.currentProjectId))) .get(); if (!dbPost) { return null; } // Skip drafts - they don't have files if (!dbPost.filePath || dbPost.status === 'draft') { return null; } // Read file metadata const fileData = await readPostFile(dbPost.filePath); if (!fileData) { // File doesn't exist or can't be read return { postId: dbPost.id, title: dbPost.title, slug: dbPost.slug, filePath: dbPost.filePath, hasDifferences: true, differences: {}, // File missing entirely }; } // Compare fields const differences: Partial> = {}; // Parse JSON arrays from database const dbTags: string[] = JSON.parse(dbPost.tags || '[]'); const dbCategories: string[] = JSON.parse(dbPost.categories || '[]'); const fileTags = fileData.tags || []; const fileCategories = fileData.categories || []; // Compare tags (order-independent) if (!this.arraysEqual(dbTags, fileTags)) { differences.tags = { dbValue: dbTags, fileValue: fileTags }; } // Compare categories (order-independent) if (!this.arraysEqual(dbCategories, fileCategories)) { differences.categories = { dbValue: dbCategories, fileValue: fileCategories }; } // Compare title if (dbPost.title !== fileData.title) { differences.title = { dbValue: dbPost.title, fileValue: fileData.title }; } // Compare excerpt if ((dbPost.excerpt || '') !== (fileData.excerpt || '')) { differences.excerpt = { dbValue: dbPost.excerpt || '', fileValue: fileData.excerpt || '' }; } // Compare author if ((dbPost.author || '') !== (fileData.author || '')) { differences.author = { dbValue: dbPost.author || '', fileValue: fileData.author || '' }; } return { postId: dbPost.id, title: dbPost.title, slug: dbPost.slug, filePath: dbPost.filePath, hasDifferences: Object.keys(differences).length > 0, differences, }; } /** * Compare arrays for equality (order-independent) */ private arraysEqual(a: string[], b: string[]): boolean { if (a.length !== b.length) return false; const sortedA = [...a].sort(); const sortedB = [...b].sort(); return sortedA.every((val, idx) => val === sortedB[idx]); } /** * Scan all published posts and find metadata differences */ async scanAllPublishedPosts( onProgress: (current: number, total: number, message: string) => void ): Promise { const client = this.getClient(); if (!client) throw new Error('Database not initialized'); // Get all published posts with file paths const result = await client.execute({ sql: `SELECT id, title, slug, file_path, tags, categories, excerpt, author FROM posts WHERE project_id = ? AND status = 'published' AND file_path IS NOT NULL AND file_path != ''`, args: [this.currentProjectId], }); const publishedPosts = result.rows; const total = publishedPosts.length; const differences: PostMetadataDiff[] = []; onProgress(0, total, `Scanning ${total} published posts...`); for (let i = 0; i < publishedPosts.length; i++) { const row = publishedPosts[i]; const postId = row.id as string; const diff = await this.comparePostMetadata(postId); if (diff && diff.hasDifferences) { differences.push(diff); } if ((i + 1) % 10 === 0 || i === total - 1) { onProgress(i + 1, total, `Scanned ${i + 1}/${total} posts, found ${differences.length} with differences`); } } // Group the differences const groups = this.groupDifferencesByField(differences); return { totalScanned: total, postsWithDifferences: differences.length, differences, groups, }; } /** * Group differences by field type for easier display and bulk actions */ groupDifferencesByField(diffs: PostMetadataDiff[]): DiffGroup[] { const groupMap = new Map(); const fieldLabels: Record = { tags: 'Tags', categories: 'Categories', title: 'Title', excerpt: 'Excerpt', author: 'Author', }; for (const diff of diffs) { for (const [field, fieldDiff] of Object.entries(diff.differences)) { const fieldKey = field as DiffField; if (!fieldDiff) continue; if (!groupMap.has(fieldKey)) { groupMap.set(fieldKey, { field: fieldKey, label: fieldLabels[fieldKey], posts: [], }); } groupMap.get(fieldKey)!.posts.push({ postId: diff.postId, title: diff.title, slug: diff.slug, dbValue: fieldDiff.dbValue, fileValue: fieldDiff.fileValue, }); } } return Array.from(groupMap.values()).sort((a, b) => b.posts.length - a.posts.length); } /** * Sync database metadata to files for the given posts * (DB -> File: writes current DB metadata to markdown files) */ async syncDbToFile(postIds: string[]): Promise<{ success: number; failed: number }> { const postEngine = getPostEngine(); let success = 0; let failed = 0; for (const postId of postIds) { try { const synced = await postEngine.syncPublishedPostFile(postId); if (synced) { success++; } else { failed++; } } catch (error) { console.error(`[MetadataDiffEngine] Failed to sync post ${postId} to file:`, error); failed++; } } return { success, failed }; } /** * Sync file metadata to database for the given posts * (File -> DB: reads file metadata and updates DB) */ async syncFileToDb(postIds: string[], field?: DiffField): Promise<{ success: number; failed: number }> { const db = this.getDb(); let success = 0; let failed = 0; for (const postId of postIds) { try { // Get the post from DB to get file path const dbPost = await db .select() .from(posts) .where(and(eq(posts.id, postId), eq(posts.projectId, this.currentProjectId))) .get(); if (!dbPost || !dbPost.filePath) { failed++; continue; } // Read file metadata const fileData = await readPostFile(dbPost.filePath); if (!fileData) { failed++; continue; } // Build update object based on field or all fields const updateData: Record = { updatedAt: new Date(), }; if (!field || field === 'tags') { updateData.tags = JSON.stringify(fileData.tags || []); } if (!field || field === 'categories') { updateData.categories = JSON.stringify(fileData.categories || []); } if (!field || field === 'title') { updateData.title = fileData.title; } if (!field || field === 'excerpt') { updateData.excerpt = fileData.excerpt || null; } if (!field || field === 'author') { updateData.author = fileData.author || null; } // Update database await db .update(posts) .set(updateData) .where(eq(posts.id, postId)); success++; } catch (error) { console.error(`[MetadataDiffEngine] Failed to sync post ${postId} to DB:`, error); failed++; } } return { success, failed }; } /** * Run a full scan as a background task */ async runScanTask(): Promise { return taskManager.runTask({ id: `metadata-diff-scan-${Date.now()}`, name: 'Scanning for metadata differences', execute: async (onProgress) => { return this.scanAllPublishedPosts((current, total, message) => { const percent = total > 0 ? (current / total) * 100 : 0; onProgress(percent, message); }); }, }); } /** * Run sync DB to File as a background task */ async runSyncDbToFileTask(postIds: string[], groupLabel: string): Promise<{ success: number; failed: number }> { return taskManager.runTask({ id: `metadata-sync-db-to-file-${Date.now()}`, name: `Syncing ${groupLabel} from DB to files`, execute: async (onProgress) => { onProgress(0, `Syncing ${postIds.length} posts...`); const result = await this.syncDbToFile(postIds); onProgress(100, `Completed: ${result.success} synced, ${result.failed} failed`); return result; }, }); } /** * Run sync File to DB as a background task */ async runSyncFileToDbTask(postIds: string[], field: DiffField, groupLabel: string): Promise<{ success: number; failed: number }> { return taskManager.runTask({ id: `metadata-sync-file-to-db-${Date.now()}`, name: `Syncing ${groupLabel} from files to DB`, execute: async (onProgress) => { onProgress(0, `Syncing ${postIds.length} posts...`); const result = await this.syncFileToDb(postIds, field); onProgress(100, `Completed: ${result.success} synced, ${result.failed} failed`); return result; }, }); } } // Singleton instance let metadataDiffEngineInstance: MetadataDiffEngine | null = null; export function getMetadataDiffEngine(): MetadataDiffEngine { if (!metadataDiffEngineInstance) { metadataDiffEngineInstance = new MetadataDiffEngine(); } return metadataDiffEngineInstance; }