Add stemmer tests and improve MetaEngine/TagEngine branch coverage

Co-authored-by: rfc1437 <774975+rfc1437@users.noreply.github.com>
2026-02-15 10:46:04 +00:00
parent c33f7a903b
commit 73b5fa68fa
3 changed files with 598 additions and 0 deletions
--- a/tests/engine/MetaEngine.test.ts
+++ b/tests/engine/MetaEngine.test.ts
@@ -82,6 +82,11 @@ function createSelectChain() {
 const mockLocalDb = {
  select: vi.fn(() => createSelectChain()),
  update: vi.fn(() => ({
    set: vi.fn(() => ({
      where: vi.fn().mockResolvedValue(undefined),
    })),
  })),
 };
 // Mock the database module
@@ -347,6 +352,26 @@ describe('MetaEngine', () => {
      const categories = await metaEngine.collectCategoriesFromPosts();
      expect(categories).toEqual(['valid']);
    });
    it('should handle posts with invalid JSON tags (gracefully skip)', async () => {
      mockPosts = [
        { tags: 'not-valid-json{[' },
        { tags: JSON.stringify(['valid-tag']) },
      ];
      const tags = await metaEngine.collectTagsFromPosts();
      expect(tags).toEqual(['valid-tag']);
    });
    it('should handle posts with invalid JSON categories (gracefully skip)', async () => {
      mockPosts = [
        { categories: 'invalid json here}' },
        { categories: JSON.stringify(['valid-cat']) },
      ];
      const categories = await metaEngine.collectCategoriesFromPosts();
      expect(categories).toEqual(['valid-cat']);
    });
  });
  describe('Event Emission', () => {
@@ -446,6 +471,38 @@ describe('MetaEngine', () => {
      expect(metadata?.description).toBe('Loaded description');
    });
    it('should handle ENOENT error when loading project metadata (no file)', async () => {
      // No file exists, should set metadata to null
      await metaEngine.loadProjectMetadata();
      const metadata = await metaEngine.getProjectMetadata();
      expect(metadata).toBeNull();
    });
    it('should throw non-ENOENT errors when loading project metadata', async () => {
      // Mock readFile to throw a non-ENOENT error
      const originalReadFile = vi.mocked(fs.readFile);
      originalReadFile.mockRejectedValueOnce(Object.assign(new Error('Permission denied'), { code: 'EACCES' }));
      await expect(metaEngine.loadProjectMetadata()).rejects.toThrow('Permission denied');
    });
    it('should handle ENOENT error when loading categories (no file)', async () => {
      // No file exists, should not throw
      await metaEngine.loadCategories();
      const categories = await metaEngine.getCategories();
      expect(categories).toEqual([]);
    });
    it('should throw non-ENOENT errors when loading categories', async () => {
      // Mock readFile to throw a non-ENOENT error
      const originalReadFile = vi.mocked(fs.readFile);
      originalReadFile.mockRejectedValueOnce(Object.assign(new Error('Disk full'), { code: 'ENOSPC' }));
      await expect(metaEngine.loadCategories()).rejects.toThrow('Disk full');
    });
    it('should emit projectMetadataChanged event when metadata is modified', async () => {
      const handler = vi.fn();
      metaEngine.on('projectMetadataChanged', handler);
@@ -560,5 +617,54 @@ describe('MetaEngine', () => {
      expect(categories).toContain('aside');
      expect(categories).toContain('page');
    });
    it('should report isInitialized as false before syncOnStartup', () => {
      expect(metaEngine.isInitialized()).toBe(false);
    });
    it('should report isInitialized as true after syncOnStartup', async () => {
      await metaEngine.syncOnStartup();
      expect(metaEngine.isInitialized()).toBe(true);
    });
    it('should reset initialized flag when project context changes', async () => {
      await metaEngine.syncOnStartup();
      expect(metaEngine.isInitialized()).toBe(true);
      metaEngine.setProjectContext('different-project');
      expect(metaEngine.isInitialized()).toBe(false);
    });
    it('should use custom dataDir when provided in setProjectContext', () => {
      metaEngine.setProjectContext('project-with-custom-dir', '/custom/data/path');
      const metaDir = metaEngine.getMetaDir();
      expect(metaDir).toContain('/custom/data/path');
    });
    it('should sync dataPath from project.json to database if different', async () => {
      const metaDir = metaEngine.getMetaDir();
      mockFiles.set(normalizePath(`${metaDir}/project.json`), JSON.stringify({
        name: 'Project',
        dataPath: '/custom/path/from/file',
      }));
      // Database has different or missing dataPath
      mockProject = {
        id: 'test-project',
        name: 'Project',
        description: null,
        dataPath: null,
        slug: 'project',
        createdAt: new Date(),
        updatedAt: new Date(),
        isActive: true,
      };
      await metaEngine.syncOnStartup();
      // Should have synced (database update called)
      expect(mockLocalDb.select).toHaveBeenCalled();
    });
  });
 });
--- a/tests/engine/TagEngine.test.ts
+++ b/tests/engine/TagEngine.test.ts
@@ -8,6 +8,7 @@
 import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest';
 import { TagEngine, TagData, TagWithCount, MergeTagsResult, DeleteTagResult } from '../../src/main/engine/TagEngine';
 import { resetMockCounters } from '../utils/factories';
 import { getDatabase } from '../../src/main/database';
 // Create mock data stores
 const mockTags = new Map<string, any>();
@@ -487,4 +488,141 @@ describe('TagEngine', () => {
      expect(result.discovered).toBeGreaterThanOrEqual(0);
    });
  });
  describe('loadTagsFromFile', () => {
    it('should load tags from filesystem in portable format', async () => {
      // Mock fs.readFile to return valid JSON
      const fs = await import('fs/promises');
      vi.mocked(fs.readFile).mockResolvedValueOnce(
        JSON.stringify([
          { name: 'tag1', color: '#ff0000' },
          { name: 'tag2' },
        ])
      );
      // Mock select to return empty (no existing tags)
      mockSelectDataQueue = [[], []];
      await tagEngine.loadTagsFromFile();
      // Verify insert was called for the new tags
      expect(mockLocalDb.insert).toHaveBeenCalled();
    });
    it('should handle ENOENT error gracefully (file not found)', async () => {
      const fs = await import('fs/promises');
      const error = new Error('ENOENT');
      (error as NodeJS.ErrnoException).code = 'ENOENT';
      vi.mocked(fs.readFile).mockRejectedValueOnce(error);
      // Should not throw
      await expect(tagEngine.loadTagsFromFile()).resolves.toBeUndefined();
    });
    it('should log non-ENOENT errors when loading', async () => {
      const fs = await import('fs/promises');
      const error = new Error('Permission denied');
      (error as NodeJS.ErrnoException).code = 'EACCES';
      vi.mocked(fs.readFile).mockRejectedValueOnce(error);
      // Should not throw, but should log error
      await expect(tagEngine.loadTagsFromFile()).resolves.toBeUndefined();
    });
    it('should skip tags with empty names', async () => {
      const fs = await import('fs/promises');
      vi.mocked(fs.readFile).mockResolvedValueOnce(
        JSON.stringify([
          { name: '' },
          { name: '  ' },
          { name: 'valid' },
        ])
      );
      mockSelectDataQueue = [[]]; // no existing tags
      await tagEngine.loadTagsFromFile();
      // Only 'valid' should be processed
      const insertedTags = mockLocalDb.insert.mock.calls;
      expect(insertedTags.length).toBeGreaterThan(0);
    });
    it('should update color for existing tag when loading', async () => {
      const fs = await import('fs/promises');
      vi.mocked(fs.readFile).mockResolvedValueOnce(
        JSON.stringify([
          { name: 'existing-tag', color: '#ff0000' },
        ])
      );
      // Existing tag found
      mockSelectDataQueue = [[{ id: 'tag-1' }]];
      await tagEngine.loadTagsFromFile();
      // Should update existing tag with color
      expect(mockLocalDb.update).toHaveBeenCalled();
    });
  });
  describe('renameTag error cases', () => {
    it('should throw error when new name is empty', async () => {
      await expect(tagEngine.renameTag('tag-1', '')).rejects.toThrow('New name is required');
    });
    it('should throw error when new name is whitespace only', async () => {
      await expect(tagEngine.renameTag('tag-1', '   ')).rejects.toThrow('New name is required');
    });
    it('should throw error when tag not found', async () => {
      mockSelectDataDefault = [];
      await expect(tagEngine.renameTag('non-existent', 'new-name')).rejects.toThrow('Tag not found');
    });
    it('should return success with 0 posts updated when renaming to same name', async () => {
      mockSelectDataQueue = [
        [{ id: 'tag-1', name: 'same-name', projectId: 'default', createdAt: new Date(), updatedAt: new Date() }],
      ];
      const result = await tagEngine.renameTag('tag-1', 'same-name');
      expect(result.success).toBe(true);
      expect(result.postsUpdated).toBe(0);
      expect(result.oldName).toBe('same-name');
      expect(result.newName).toBe('same-name');
    });
    it('should throw error when target name already exists', async () => {
      mockSelectDataQueue = [
        [{ id: 'tag-1', name: 'old-name', projectId: 'default', createdAt: new Date(), updatedAt: new Date() }],
        [{ id: 'tag-2' }], // duplicate found
      ];
      await expect(tagEngine.renameTag('tag-1', 'existing-name')).rejects.toThrow('already exists');
    });
  });
  describe('getPostsWithTag edge cases', () => {
    it('should return empty array when tag not found', async () => {
      mockSelectDataQueue = [[]]; // tag not found
      const result = await tagEngine.getPostsWithTag('non-existent-tag');
      expect(result).toEqual([]);
    });
    it('should return empty array when client is not available', async () => {
      // Mock getClient to return null
      const mockGetClient = vi.fn().mockReturnValue(null);
      vi.mocked(getDatabase).mockReturnValue({
        getLocal: vi.fn(() => mockLocalDb),
        getLocalClient: mockGetClient,
      } as any);
      const result = await tagEngine.getPostsWithTag('tag-1');
      expect(result).toEqual([]);
    });
  });
 });
--- a/tests/engine/stemmer.test.ts
+++ b/tests/engine/stemmer.test.ts
@@ -0,0 +1,354 @@
 /**
 * Stemmer Unit Tests
 *
 * Tests the REAL stemmer functions without any mocks.
 * The stemmer provides multilingual text stemming for FTS indexing.
 * 
 * Tests all branches including:
 * - Various languages
 * - ISO language code conversion
 * - Empty/null inputs
 * - FTS5 query operators (AND, OR, NOT)
 * - Quoted phrases
 * - Prefix searches
 * - Edge cases
 */
 import { describe, it, expect, beforeEach } from 'vitest';
 import {
  stemText,
  stemQuery,
  stemWord,
  getSupportedLanguages,
  isoToStemmerLanguage,
  prepareForFTS,
  SupportedLanguage,
 } from '../../src/main/engine/stemmer';
 describe('stemmer', () => {
  describe('getSupportedLanguages', () => {
    it('should return an array of supported languages', () => {
      const languages = getSupportedLanguages();
      expect(Array.isArray(languages)).toBe(true);
      expect(languages.length).toBeGreaterThan(0);
      expect(languages).toContain('english');
    });
    it('should include common languages', () => {
      const languages = getSupportedLanguages();
      expect(languages).toContain('german');
      expect(languages).toContain('french');
      expect(languages).toContain('spanish');
    });
  });
  describe('isoToStemmerLanguage', () => {
    it('should convert ISO 639-1 codes to stemmer language names', () => {
      expect(isoToStemmerLanguage('en')).toBe('english');
      expect(isoToStemmerLanguage('de')).toBe('german');
      expect(isoToStemmerLanguage('fr')).toBe('french');
      expect(isoToStemmerLanguage('es')).toBe('spanish');
    });
    it('should handle locale codes with region (e.g., en-US)', () => {
      expect(isoToStemmerLanguage('en-US')).toBe('english');
      expect(isoToStemmerLanguage('de-DE')).toBe('german');
      expect(isoToStemmerLanguage('fr-FR')).toBe('french');
      expect(isoToStemmerLanguage('es-MX')).toBe('spanish');
    });
    it('should be case insensitive', () => {
      expect(isoToStemmerLanguage('EN')).toBe('english');
      expect(isoToStemmerLanguage('De')).toBe('german');
      expect(isoToStemmerLanguage('FR')).toBe('french');
    });
    it('should return english for unknown language codes', () => {
      expect(isoToStemmerLanguage('xx')).toBe('english');
      expect(isoToStemmerLanguage('unknown')).toBe('english');
      expect(isoToStemmerLanguage('')).toBe('english');
    });
    it('should handle all mapped ISO codes', () => {
      expect(isoToStemmerLanguage('ar')).toBe('arabic');
      expect(isoToStemmerLanguage('hy')).toBe('armenian');
      expect(isoToStemmerLanguage('eu')).toBe('basque');
      expect(isoToStemmerLanguage('ca')).toBe('catalan');
      expect(isoToStemmerLanguage('cs')).toBe('czech');
      expect(isoToStemmerLanguage('da')).toBe('danish');
      expect(isoToStemmerLanguage('nl')).toBe('dutch');
      expect(isoToStemmerLanguage('fi')).toBe('finnish');
      expect(isoToStemmerLanguage('hu')).toBe('hungarian');
      expect(isoToStemmerLanguage('it')).toBe('italian');
      expect(isoToStemmerLanguage('ga')).toBe('irish');
      expect(isoToStemmerLanguage('no')).toBe('norwegian');
      expect(isoToStemmerLanguage('nb')).toBe('norwegian');
      expect(isoToStemmerLanguage('nn')).toBe('norwegian');
      expect(isoToStemmerLanguage('pt')).toBe('portuguese');
      expect(isoToStemmerLanguage('ro')).toBe('romanian');
      expect(isoToStemmerLanguage('ru')).toBe('russian');
      expect(isoToStemmerLanguage('sl')).toBe('slovene');
      expect(isoToStemmerLanguage('sv')).toBe('swedish');
      expect(isoToStemmerLanguage('ta')).toBe('tamil');
      expect(isoToStemmerLanguage('tr')).toBe('turkish');
    });
  });
  describe('stemWord', () => {
    it('should stem English words correctly', () => {
      expect(stemWord('running', 'english')).toBe('run');
      expect(stemWord('dogs', 'english')).toBe('dog');
      expect(stemWord('played', 'english')).toBe('play');
      expect(stemWord('playing', 'english')).toBe('play');
    });
    it('should stem German words correctly', () => {
      expect(stemWord('häuser', 'german')).toBe('haus');
      expect(stemWord('Häuser', 'german')).toBe('haus');
    });
    it('should stem French words correctly', () => {
      expect(stemWord('chanter', 'french')).toBe('chant');
      expect(stemWord('chanteuse', 'french')).toBe('chanteux');
    });
    it('should default to English when no language specified', () => {
      expect(stemWord('running')).toBe('run');
      expect(stemWord('dogs')).toBe('dog');
    });
    it('should handle uppercase words by converting to lowercase', () => {
      expect(stemWord('RUNNING', 'english')).toBe('run');
      expect(stemWord('DOGS', 'english')).toBe('dog');
    });
  });
  describe('stemText', () => {
    it('should stem all words in a sentence', () => {
      const result = stemText('Running dogs are playing', 'english');
      expect(result).toContain('run');
      expect(result).toContain('dog');
    });
    it('should return empty string for empty input', () => {
      expect(stemText('', 'english')).toBe('');
      expect(stemText('   ', 'english')).toBe('');
    });
    it('should return empty string for null/undefined-like empty text', () => {
      expect(stemText('', 'english')).toBe('');
    });
    it('should handle multiple spaces between words', () => {
      const result = stemText('Running   dogs   are   playing', 'english');
      const words = result.split(' ');
      expect(words).not.toContain('');
    });
    it('should handle German text correctly', () => {
      const result = stemText('Häuser Haus', 'german');
      expect(result).toContain('haus');
    });
    it('should handle text with numbers', () => {
      const result = stemText('Running 123 dogs', 'english');
      expect(result).toContain('run');
      expect(result).toContain('123');
      expect(result).toContain('dog');
    });
    it('should handle punctuation by extracting words', () => {
      const result = stemText('Hello, world! How are you?', 'english');
      expect(result).toContain('hello');
      expect(result).toContain('world');
    });
    it('should use default English language when not specified', () => {
      const result = stemText('Running dogs');
      expect(result).toContain('run');
      expect(result).toContain('dog');
    });
    it('should handle Unicode characters for non-ASCII languages', () => {
      // Russian text
      const russianResult = stemText('привет мир', 'russian');
      expect(russianResult.length).toBeGreaterThan(0);
      // Arabic text
      const arabicResult = stemText('مرحبا', 'arabic');
      expect(arabicResult.length).toBeGreaterThan(0);
    });
  });
  describe('stemQuery', () => {
    it('should stem simple queries', () => {
      const result = stemQuery('running dogs', 'english');
      expect(result).toContain('run');
      expect(result).toContain('dog');
    });
    it('should return empty string for empty query', () => {
      expect(stemQuery('', 'english')).toBe('');
      expect(stemQuery('   ', 'english')).toBe('');
    });
    it('should preserve AND operator in uppercase', () => {
      const result = stemQuery('running AND dogs', 'english');
      expect(result).toContain('AND');
      expect(result).toContain('run');
      expect(result).toContain('dog');
    });
    it('should preserve OR operator in uppercase', () => {
      const result = stemQuery('cats OR dogs', 'english');
      expect(result).toContain('OR');
      expect(result).toContain('cat');
      expect(result).toContain('dog');
    });
    it('should preserve NOT operator in uppercase', () => {
      const result = stemQuery('NOT dogs', 'english');
      expect(result).toContain('NOT');
      expect(result).toContain('dog');
    });
    it('should handle lowercase operators by stemming them', () => {
      // lowercase 'and', 'or', 'not' should be stemmed as regular words
      const andResult = stemQuery('and', 'english');
      // 'and' stemmed might be 'and' itself
      expect(andResult.length).toBeGreaterThan(0);
    });
    it('should stem words inside quoted phrases', () => {
      const result = stemQuery('"running fast"', 'english');
      expect(result).toContain('"');
      expect(result).toContain('run');
      expect(result).toContain('fast');
    });
    it('should keep quotes around stemmed phrase', () => {
      const result = stemQuery('"running dogs"', 'english');
      expect(result.startsWith('"')).toBe(true);
      expect(result.endsWith('"')).toBe(true);
    });
    it('should handle prefix searches with asterisk', () => {
      const result = stemQuery('runn*', 'english');
      expect(result).toContain('*');
      // The word part before * should be stemmed
      expect(result.includes('run')).toBe(true);
    });
    it('should handle prefix search when word results in empty after tokenization', () => {
      // Test with just asterisk (edge case)
      const result = stemQuery('*', 'english');
      // Should return the original match since no word part
      expect(result).toBe('*');
    });
    it('should handle complex queries with multiple operators', () => {
      const result = stemQuery('"running fast" AND dogs NOT cats', 'english');
      expect(result).toContain('AND');
      expect(result).toContain('NOT');
      expect(result).toContain('dog');
      expect(result).toContain('cat');
    });
    it('should clean up multiple spaces', () => {
      const result = stemQuery('running    dogs', 'english');
      expect(result).not.toContain('  ');
    });
    it('should use default English language when not specified', () => {
      const result = stemQuery('running dogs');
      expect(result).toContain('run');
      expect(result).toContain('dog');
    });
    it('should handle unquoted words that tokenize to empty', () => {
      // Special characters only
      const result = stemQuery('!!!', 'english');
      // Should result in empty string or just spaces
      expect(result.trim()).toBe('');
    });
    it('should handle mixed quoted and unquoted terms', () => {
      const result = stemQuery('dogs "running fast" cats', 'english');
      expect(result).toContain('dog');
      expect(result).toContain('cat');
      expect(result).toContain('"');
    });
  });
  describe('prepareForFTS', () => {
    it('should prepare text for FTS indexing by stemming', () => {
      const result = prepareForFTS('Running dogs are playing', 'english');
      expect(result).toContain('run');
      expect(result).toContain('dog');
    });
    it('should use default English when no language specified', () => {
      const result = prepareForFTS('Running dogs');
      expect(result).toContain('run');
      expect(result).toContain('dog');
    });
    it('should be identical to stemText', () => {
      const text = 'Running dogs are playing';
      expect(prepareForFTS(text, 'english')).toBe(stemText(text, 'english'));
    });
  });
  describe('stemmer caching', () => {
    it('should reuse cached stemmers for same language', () => {
      // Call multiple times with same language
      const result1 = stemWord('running', 'english');
      const result2 = stemWord('playing', 'english');
      // Results should be consistent
      expect(result1).toBe('run');
      expect(result2).toBe('play');
    });
    it('should support different languages in sequence', () => {
      const englishResult = stemWord('running', 'english');
      const germanResult = stemWord('häuser', 'german');
      const frenchResult = stemWord('chanter', 'french');
      expect(englishResult).toBe('run');
      expect(germanResult).toBe('haus');
      expect(frenchResult).toBe('chant');
    });
  });
  describe('edge cases', () => {
    it('should handle very long text', () => {
      const longText = 'running '.repeat(1000);
      const result = stemText(longText, 'english');
      expect(result.length).toBeGreaterThan(0);
      expect(result.split(' ').every(word => word === 'run' || word === '')).toBe(true);
    });
    it('should handle special Unicode characters', () => {
      const result = stemText('café résumé naïve', 'english');
      expect(result.length).toBeGreaterThan(0);
    });
    it('should handle emoji by extracting adjacent words', () => {
      const result = stemText('running 🏃 dogs', 'english');
      expect(result).toContain('run');
      expect(result).toContain('dog');
    });
    it('should handle mixed case consistently', () => {
      const lower = stemText('running', 'english');
      const upper = stemText('RUNNING', 'english');
      const mixed = stemText('RuNnInG', 'english');
      expect(lower).toBe(upper);
      expect(lower).toBe(mixed);
    });
  });
 });