From 73b5fa68fa6919809ee7c7aefbee85627c75d9a3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 15 Feb 2026 10:46:04 +0000 Subject: [PATCH] Add stemmer tests and improve MetaEngine/TagEngine branch coverage Co-authored-by: rfc1437 <774975+rfc1437@users.noreply.github.com> --- tests/engine/MetaEngine.test.ts | 106 ++++++++++ tests/engine/TagEngine.test.ts | 138 +++++++++++++ tests/engine/stemmer.test.ts | 354 ++++++++++++++++++++++++++++++++ 3 files changed, 598 insertions(+) create mode 100644 tests/engine/stemmer.test.ts diff --git a/tests/engine/MetaEngine.test.ts b/tests/engine/MetaEngine.test.ts index a5eef25..8abe9bd 100644 --- a/tests/engine/MetaEngine.test.ts +++ b/tests/engine/MetaEngine.test.ts @@ -82,6 +82,11 @@ function createSelectChain() { const mockLocalDb = { select: vi.fn(() => createSelectChain()), + update: vi.fn(() => ({ + set: vi.fn(() => ({ + where: vi.fn().mockResolvedValue(undefined), + })), + })), }; // Mock the database module @@ -347,6 +352,26 @@ describe('MetaEngine', () => { const categories = await metaEngine.collectCategoriesFromPosts(); expect(categories).toEqual(['valid']); }); + + it('should handle posts with invalid JSON tags (gracefully skip)', async () => { + mockPosts = [ + { tags: 'not-valid-json{[' }, + { tags: JSON.stringify(['valid-tag']) }, + ]; + + const tags = await metaEngine.collectTagsFromPosts(); + expect(tags).toEqual(['valid-tag']); + }); + + it('should handle posts with invalid JSON categories (gracefully skip)', async () => { + mockPosts = [ + { categories: 'invalid json here}' }, + { categories: JSON.stringify(['valid-cat']) }, + ]; + + const categories = await metaEngine.collectCategoriesFromPosts(); + expect(categories).toEqual(['valid-cat']); + }); }); describe('Event Emission', () => { @@ -446,6 +471,38 @@ describe('MetaEngine', () => { expect(metadata?.description).toBe('Loaded description'); }); + it('should handle ENOENT error when loading project metadata (no file)', async () => { + // No file exists, should set metadata to null + await metaEngine.loadProjectMetadata(); + + const metadata = await metaEngine.getProjectMetadata(); + expect(metadata).toBeNull(); + }); + + it('should throw non-ENOENT errors when loading project metadata', async () => { + // Mock readFile to throw a non-ENOENT error + const originalReadFile = vi.mocked(fs.readFile); + originalReadFile.mockRejectedValueOnce(Object.assign(new Error('Permission denied'), { code: 'EACCES' })); + + await expect(metaEngine.loadProjectMetadata()).rejects.toThrow('Permission denied'); + }); + + it('should handle ENOENT error when loading categories (no file)', async () => { + // No file exists, should not throw + await metaEngine.loadCategories(); + + const categories = await metaEngine.getCategories(); + expect(categories).toEqual([]); + }); + + it('should throw non-ENOENT errors when loading categories', async () => { + // Mock readFile to throw a non-ENOENT error + const originalReadFile = vi.mocked(fs.readFile); + originalReadFile.mockRejectedValueOnce(Object.assign(new Error('Disk full'), { code: 'ENOSPC' })); + + await expect(metaEngine.loadCategories()).rejects.toThrow('Disk full'); + }); + it('should emit projectMetadataChanged event when metadata is modified', async () => { const handler = vi.fn(); metaEngine.on('projectMetadataChanged', handler); @@ -560,5 +617,54 @@ describe('MetaEngine', () => { expect(categories).toContain('aside'); expect(categories).toContain('page'); }); + + it('should report isInitialized as false before syncOnStartup', () => { + expect(metaEngine.isInitialized()).toBe(false); + }); + + it('should report isInitialized as true after syncOnStartup', async () => { + await metaEngine.syncOnStartup(); + expect(metaEngine.isInitialized()).toBe(true); + }); + + it('should reset initialized flag when project context changes', async () => { + await metaEngine.syncOnStartup(); + expect(metaEngine.isInitialized()).toBe(true); + + metaEngine.setProjectContext('different-project'); + expect(metaEngine.isInitialized()).toBe(false); + }); + + it('should use custom dataDir when provided in setProjectContext', () => { + metaEngine.setProjectContext('project-with-custom-dir', '/custom/data/path'); + + const metaDir = metaEngine.getMetaDir(); + expect(metaDir).toContain('/custom/data/path'); + }); + + it('should sync dataPath from project.json to database if different', async () => { + const metaDir = metaEngine.getMetaDir(); + mockFiles.set(normalizePath(`${metaDir}/project.json`), JSON.stringify({ + name: 'Project', + dataPath: '/custom/path/from/file', + })); + + // Database has different or missing dataPath + mockProject = { + id: 'test-project', + name: 'Project', + description: null, + dataPath: null, + slug: 'project', + createdAt: new Date(), + updatedAt: new Date(), + isActive: true, + }; + + await metaEngine.syncOnStartup(); + + // Should have synced (database update called) + expect(mockLocalDb.select).toHaveBeenCalled(); + }); }); }); diff --git a/tests/engine/TagEngine.test.ts b/tests/engine/TagEngine.test.ts index f15a292..75dd865 100644 --- a/tests/engine/TagEngine.test.ts +++ b/tests/engine/TagEngine.test.ts @@ -8,6 +8,7 @@ import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest'; import { TagEngine, TagData, TagWithCount, MergeTagsResult, DeleteTagResult } from '../../src/main/engine/TagEngine'; import { resetMockCounters } from '../utils/factories'; +import { getDatabase } from '../../src/main/database'; // Create mock data stores const mockTags = new Map(); @@ -487,4 +488,141 @@ describe('TagEngine', () => { expect(result.discovered).toBeGreaterThanOrEqual(0); }); }); + + describe('loadTagsFromFile', () => { + it('should load tags from filesystem in portable format', async () => { + // Mock fs.readFile to return valid JSON + const fs = await import('fs/promises'); + vi.mocked(fs.readFile).mockResolvedValueOnce( + JSON.stringify([ + { name: 'tag1', color: '#ff0000' }, + { name: 'tag2' }, + ]) + ); + + // Mock select to return empty (no existing tags) + mockSelectDataQueue = [[], []]; + + await tagEngine.loadTagsFromFile(); + + // Verify insert was called for the new tags + expect(mockLocalDb.insert).toHaveBeenCalled(); + }); + + it('should handle ENOENT error gracefully (file not found)', async () => { + const fs = await import('fs/promises'); + const error = new Error('ENOENT'); + (error as NodeJS.ErrnoException).code = 'ENOENT'; + vi.mocked(fs.readFile).mockRejectedValueOnce(error); + + // Should not throw + await expect(tagEngine.loadTagsFromFile()).resolves.toBeUndefined(); + }); + + it('should log non-ENOENT errors when loading', async () => { + const fs = await import('fs/promises'); + const error = new Error('Permission denied'); + (error as NodeJS.ErrnoException).code = 'EACCES'; + vi.mocked(fs.readFile).mockRejectedValueOnce(error); + + // Should not throw, but should log error + await expect(tagEngine.loadTagsFromFile()).resolves.toBeUndefined(); + }); + + it('should skip tags with empty names', async () => { + const fs = await import('fs/promises'); + vi.mocked(fs.readFile).mockResolvedValueOnce( + JSON.stringify([ + { name: '' }, + { name: ' ' }, + { name: 'valid' }, + ]) + ); + + mockSelectDataQueue = [[]]; // no existing tags + + await tagEngine.loadTagsFromFile(); + + // Only 'valid' should be processed + const insertedTags = mockLocalDb.insert.mock.calls; + expect(insertedTags.length).toBeGreaterThan(0); + }); + + it('should update color for existing tag when loading', async () => { + const fs = await import('fs/promises'); + vi.mocked(fs.readFile).mockResolvedValueOnce( + JSON.stringify([ + { name: 'existing-tag', color: '#ff0000' }, + ]) + ); + + // Existing tag found + mockSelectDataQueue = [[{ id: 'tag-1' }]]; + + await tagEngine.loadTagsFromFile(); + + // Should update existing tag with color + expect(mockLocalDb.update).toHaveBeenCalled(); + }); + }); + + describe('renameTag error cases', () => { + it('should throw error when new name is empty', async () => { + await expect(tagEngine.renameTag('tag-1', '')).rejects.toThrow('New name is required'); + }); + + it('should throw error when new name is whitespace only', async () => { + await expect(tagEngine.renameTag('tag-1', ' ')).rejects.toThrow('New name is required'); + }); + + it('should throw error when tag not found', async () => { + mockSelectDataDefault = []; + await expect(tagEngine.renameTag('non-existent', 'new-name')).rejects.toThrow('Tag not found'); + }); + + it('should return success with 0 posts updated when renaming to same name', async () => { + mockSelectDataQueue = [ + [{ id: 'tag-1', name: 'same-name', projectId: 'default', createdAt: new Date(), updatedAt: new Date() }], + ]; + + const result = await tagEngine.renameTag('tag-1', 'same-name'); + + expect(result.success).toBe(true); + expect(result.postsUpdated).toBe(0); + expect(result.oldName).toBe('same-name'); + expect(result.newName).toBe('same-name'); + }); + + it('should throw error when target name already exists', async () => { + mockSelectDataQueue = [ + [{ id: 'tag-1', name: 'old-name', projectId: 'default', createdAt: new Date(), updatedAt: new Date() }], + [{ id: 'tag-2' }], // duplicate found + ]; + + await expect(tagEngine.renameTag('tag-1', 'existing-name')).rejects.toThrow('already exists'); + }); + }); + + describe('getPostsWithTag edge cases', () => { + it('should return empty array when tag not found', async () => { + mockSelectDataQueue = [[]]; // tag not found + + const result = await tagEngine.getPostsWithTag('non-existent-tag'); + + expect(result).toEqual([]); + }); + + it('should return empty array when client is not available', async () => { + // Mock getClient to return null + const mockGetClient = vi.fn().mockReturnValue(null); + vi.mocked(getDatabase).mockReturnValue({ + getLocal: vi.fn(() => mockLocalDb), + getLocalClient: mockGetClient, + } as any); + + const result = await tagEngine.getPostsWithTag('tag-1'); + + expect(result).toEqual([]); + }); + }); }); diff --git a/tests/engine/stemmer.test.ts b/tests/engine/stemmer.test.ts new file mode 100644 index 0000000..1212259 --- /dev/null +++ b/tests/engine/stemmer.test.ts @@ -0,0 +1,354 @@ +/** + * Stemmer Unit Tests + * + * Tests the REAL stemmer functions without any mocks. + * The stemmer provides multilingual text stemming for FTS indexing. + * + * Tests all branches including: + * - Various languages + * - ISO language code conversion + * - Empty/null inputs + * - FTS5 query operators (AND, OR, NOT) + * - Quoted phrases + * - Prefix searches + * - Edge cases + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import { + stemText, + stemQuery, + stemWord, + getSupportedLanguages, + isoToStemmerLanguage, + prepareForFTS, + SupportedLanguage, +} from '../../src/main/engine/stemmer'; + +describe('stemmer', () => { + describe('getSupportedLanguages', () => { + it('should return an array of supported languages', () => { + const languages = getSupportedLanguages(); + + expect(Array.isArray(languages)).toBe(true); + expect(languages.length).toBeGreaterThan(0); + expect(languages).toContain('english'); + }); + + it('should include common languages', () => { + const languages = getSupportedLanguages(); + + expect(languages).toContain('german'); + expect(languages).toContain('french'); + expect(languages).toContain('spanish'); + }); + }); + + describe('isoToStemmerLanguage', () => { + it('should convert ISO 639-1 codes to stemmer language names', () => { + expect(isoToStemmerLanguage('en')).toBe('english'); + expect(isoToStemmerLanguage('de')).toBe('german'); + expect(isoToStemmerLanguage('fr')).toBe('french'); + expect(isoToStemmerLanguage('es')).toBe('spanish'); + }); + + it('should handle locale codes with region (e.g., en-US)', () => { + expect(isoToStemmerLanguage('en-US')).toBe('english'); + expect(isoToStemmerLanguage('de-DE')).toBe('german'); + expect(isoToStemmerLanguage('fr-FR')).toBe('french'); + expect(isoToStemmerLanguage('es-MX')).toBe('spanish'); + }); + + it('should be case insensitive', () => { + expect(isoToStemmerLanguage('EN')).toBe('english'); + expect(isoToStemmerLanguage('De')).toBe('german'); + expect(isoToStemmerLanguage('FR')).toBe('french'); + }); + + it('should return english for unknown language codes', () => { + expect(isoToStemmerLanguage('xx')).toBe('english'); + expect(isoToStemmerLanguage('unknown')).toBe('english'); + expect(isoToStemmerLanguage('')).toBe('english'); + }); + + it('should handle all mapped ISO codes', () => { + expect(isoToStemmerLanguage('ar')).toBe('arabic'); + expect(isoToStemmerLanguage('hy')).toBe('armenian'); + expect(isoToStemmerLanguage('eu')).toBe('basque'); + expect(isoToStemmerLanguage('ca')).toBe('catalan'); + expect(isoToStemmerLanguage('cs')).toBe('czech'); + expect(isoToStemmerLanguage('da')).toBe('danish'); + expect(isoToStemmerLanguage('nl')).toBe('dutch'); + expect(isoToStemmerLanguage('fi')).toBe('finnish'); + expect(isoToStemmerLanguage('hu')).toBe('hungarian'); + expect(isoToStemmerLanguage('it')).toBe('italian'); + expect(isoToStemmerLanguage('ga')).toBe('irish'); + expect(isoToStemmerLanguage('no')).toBe('norwegian'); + expect(isoToStemmerLanguage('nb')).toBe('norwegian'); + expect(isoToStemmerLanguage('nn')).toBe('norwegian'); + expect(isoToStemmerLanguage('pt')).toBe('portuguese'); + expect(isoToStemmerLanguage('ro')).toBe('romanian'); + expect(isoToStemmerLanguage('ru')).toBe('russian'); + expect(isoToStemmerLanguage('sl')).toBe('slovene'); + expect(isoToStemmerLanguage('sv')).toBe('swedish'); + expect(isoToStemmerLanguage('ta')).toBe('tamil'); + expect(isoToStemmerLanguage('tr')).toBe('turkish'); + }); + }); + + describe('stemWord', () => { + it('should stem English words correctly', () => { + expect(stemWord('running', 'english')).toBe('run'); + expect(stemWord('dogs', 'english')).toBe('dog'); + expect(stemWord('played', 'english')).toBe('play'); + expect(stemWord('playing', 'english')).toBe('play'); + }); + + it('should stem German words correctly', () => { + expect(stemWord('häuser', 'german')).toBe('haus'); + expect(stemWord('Häuser', 'german')).toBe('haus'); + }); + + it('should stem French words correctly', () => { + expect(stemWord('chanter', 'french')).toBe('chant'); + expect(stemWord('chanteuse', 'french')).toBe('chanteux'); + }); + + it('should default to English when no language specified', () => { + expect(stemWord('running')).toBe('run'); + expect(stemWord('dogs')).toBe('dog'); + }); + + it('should handle uppercase words by converting to lowercase', () => { + expect(stemWord('RUNNING', 'english')).toBe('run'); + expect(stemWord('DOGS', 'english')).toBe('dog'); + }); + }); + + describe('stemText', () => { + it('should stem all words in a sentence', () => { + const result = stemText('Running dogs are playing', 'english'); + expect(result).toContain('run'); + expect(result).toContain('dog'); + }); + + it('should return empty string for empty input', () => { + expect(stemText('', 'english')).toBe(''); + expect(stemText(' ', 'english')).toBe(''); + }); + + it('should return empty string for null/undefined-like empty text', () => { + expect(stemText('', 'english')).toBe(''); + }); + + it('should handle multiple spaces between words', () => { + const result = stemText('Running dogs are playing', 'english'); + const words = result.split(' '); + expect(words).not.toContain(''); + }); + + it('should handle German text correctly', () => { + const result = stemText('Häuser Haus', 'german'); + expect(result).toContain('haus'); + }); + + it('should handle text with numbers', () => { + const result = stemText('Running 123 dogs', 'english'); + expect(result).toContain('run'); + expect(result).toContain('123'); + expect(result).toContain('dog'); + }); + + it('should handle punctuation by extracting words', () => { + const result = stemText('Hello, world! How are you?', 'english'); + expect(result).toContain('hello'); + expect(result).toContain('world'); + }); + + it('should use default English language when not specified', () => { + const result = stemText('Running dogs'); + expect(result).toContain('run'); + expect(result).toContain('dog'); + }); + + it('should handle Unicode characters for non-ASCII languages', () => { + // Russian text + const russianResult = stemText('привет мир', 'russian'); + expect(russianResult.length).toBeGreaterThan(0); + + // Arabic text + const arabicResult = stemText('مرحبا', 'arabic'); + expect(arabicResult.length).toBeGreaterThan(0); + }); + }); + + describe('stemQuery', () => { + it('should stem simple queries', () => { + const result = stemQuery('running dogs', 'english'); + expect(result).toContain('run'); + expect(result).toContain('dog'); + }); + + it('should return empty string for empty query', () => { + expect(stemQuery('', 'english')).toBe(''); + expect(stemQuery(' ', 'english')).toBe(''); + }); + + it('should preserve AND operator in uppercase', () => { + const result = stemQuery('running AND dogs', 'english'); + expect(result).toContain('AND'); + expect(result).toContain('run'); + expect(result).toContain('dog'); + }); + + it('should preserve OR operator in uppercase', () => { + const result = stemQuery('cats OR dogs', 'english'); + expect(result).toContain('OR'); + expect(result).toContain('cat'); + expect(result).toContain('dog'); + }); + + it('should preserve NOT operator in uppercase', () => { + const result = stemQuery('NOT dogs', 'english'); + expect(result).toContain('NOT'); + expect(result).toContain('dog'); + }); + + it('should handle lowercase operators by stemming them', () => { + // lowercase 'and', 'or', 'not' should be stemmed as regular words + const andResult = stemQuery('and', 'english'); + // 'and' stemmed might be 'and' itself + expect(andResult.length).toBeGreaterThan(0); + }); + + it('should stem words inside quoted phrases', () => { + const result = stemQuery('"running fast"', 'english'); + expect(result).toContain('"'); + expect(result).toContain('run'); + expect(result).toContain('fast'); + }); + + it('should keep quotes around stemmed phrase', () => { + const result = stemQuery('"running dogs"', 'english'); + expect(result.startsWith('"')).toBe(true); + expect(result.endsWith('"')).toBe(true); + }); + + it('should handle prefix searches with asterisk', () => { + const result = stemQuery('runn*', 'english'); + expect(result).toContain('*'); + // The word part before * should be stemmed + expect(result.includes('run')).toBe(true); + }); + + it('should handle prefix search when word results in empty after tokenization', () => { + // Test with just asterisk (edge case) + const result = stemQuery('*', 'english'); + // Should return the original match since no word part + expect(result).toBe('*'); + }); + + it('should handle complex queries with multiple operators', () => { + const result = stemQuery('"running fast" AND dogs NOT cats', 'english'); + expect(result).toContain('AND'); + expect(result).toContain('NOT'); + expect(result).toContain('dog'); + expect(result).toContain('cat'); + }); + + it('should clean up multiple spaces', () => { + const result = stemQuery('running dogs', 'english'); + expect(result).not.toContain(' '); + }); + + it('should use default English language when not specified', () => { + const result = stemQuery('running dogs'); + expect(result).toContain('run'); + expect(result).toContain('dog'); + }); + + it('should handle unquoted words that tokenize to empty', () => { + // Special characters only + const result = stemQuery('!!!', 'english'); + // Should result in empty string or just spaces + expect(result.trim()).toBe(''); + }); + + it('should handle mixed quoted and unquoted terms', () => { + const result = stemQuery('dogs "running fast" cats', 'english'); + expect(result).toContain('dog'); + expect(result).toContain('cat'); + expect(result).toContain('"'); + }); + }); + + describe('prepareForFTS', () => { + it('should prepare text for FTS indexing by stemming', () => { + const result = prepareForFTS('Running dogs are playing', 'english'); + expect(result).toContain('run'); + expect(result).toContain('dog'); + }); + + it('should use default English when no language specified', () => { + const result = prepareForFTS('Running dogs'); + expect(result).toContain('run'); + expect(result).toContain('dog'); + }); + + it('should be identical to stemText', () => { + const text = 'Running dogs are playing'; + expect(prepareForFTS(text, 'english')).toBe(stemText(text, 'english')); + }); + }); + + describe('stemmer caching', () => { + it('should reuse cached stemmers for same language', () => { + // Call multiple times with same language + const result1 = stemWord('running', 'english'); + const result2 = stemWord('playing', 'english'); + + // Results should be consistent + expect(result1).toBe('run'); + expect(result2).toBe('play'); + }); + + it('should support different languages in sequence', () => { + const englishResult = stemWord('running', 'english'); + const germanResult = stemWord('häuser', 'german'); + const frenchResult = stemWord('chanter', 'french'); + + expect(englishResult).toBe('run'); + expect(germanResult).toBe('haus'); + expect(frenchResult).toBe('chant'); + }); + }); + + describe('edge cases', () => { + it('should handle very long text', () => { + const longText = 'running '.repeat(1000); + const result = stemText(longText, 'english'); + expect(result.length).toBeGreaterThan(0); + expect(result.split(' ').every(word => word === 'run' || word === '')).toBe(true); + }); + + it('should handle special Unicode characters', () => { + const result = stemText('café résumé naïve', 'english'); + expect(result.length).toBeGreaterThan(0); + }); + + it('should handle emoji by extracting adjacent words', () => { + const result = stemText('running 🏃 dogs', 'english'); + expect(result).toContain('run'); + expect(result).toContain('dog'); + }); + + it('should handle mixed case consistently', () => { + const lower = stemText('running', 'english'); + const upper = stemText('RUNNING', 'english'); + const mixed = stemText('RuNnInG', 'english'); + + expect(lower).toBe(upper); + expect(lower).toBe(mixed); + }); + }); +});