Merge pull request #1 from rfc1437/copilot/add-test-coverage-and-validation
This commit is contained in:
@@ -762,6 +762,97 @@ describe('ImportAnalysisEngine', () => {
|
||||
expect(youtubeMacro?.usages[0].params.id).toBe('wordpress');
|
||||
});
|
||||
});
|
||||
|
||||
describe('HTML to Markdown Conversion - Linked Images', () => {
|
||||
it('should convert linked images with image href (WordPress full-size pattern)', async () => {
|
||||
setupDbReturns([], [], []);
|
||||
|
||||
const wxrData = createWxrData({
|
||||
posts: [createWxrPost({
|
||||
content: '<a href="http://example.com/full-image.jpg"><img src="http://example.com/thumb.jpg" alt="My Image" /></a>',
|
||||
})],
|
||||
});
|
||||
|
||||
const report = await engine.analyzeWxr(wxrData, '/test.xml');
|
||||
|
||||
// Should use the href URL (the full-size image) in markdown
|
||||
expect(report.posts.items[0].markdownPreview).toContain('');
|
||||
});
|
||||
|
||||
it('should convert linked images with non-image href (use img src)', async () => {
|
||||
setupDbReturns([], [], []);
|
||||
|
||||
const wxrData = createWxrData({
|
||||
posts: [createWxrPost({
|
||||
content: '<a href="http://example.com/article"><img src="http://example.com/image.jpg" alt="Article Image" /></a>',
|
||||
})],
|
||||
});
|
||||
|
||||
const report = await engine.analyzeWxr(wxrData, '/test.xml');
|
||||
|
||||
// Should use the img src since href is not an image
|
||||
expect(report.posts.items[0].markdownPreview).toContain('');
|
||||
});
|
||||
|
||||
it('should use img title as alt text when alt is empty', async () => {
|
||||
setupDbReturns([], [], []);
|
||||
|
||||
const wxrData = createWxrData({
|
||||
posts: [createWxrPost({
|
||||
content: '<a href="http://example.com/full.jpg"><img src="http://example.com/thumb.jpg" alt="" title="My Title" /></a>',
|
||||
})],
|
||||
});
|
||||
|
||||
const report = await engine.analyzeWxr(wxrData, '/test.xml');
|
||||
|
||||
// Should use title as alt text and include title in markdown
|
||||
expect(report.posts.items[0].markdownPreview).toContain('');
|
||||
});
|
||||
|
||||
it('should extract filename as alt text when both alt and title are empty', async () => {
|
||||
setupDbReturns([], [], []);
|
||||
|
||||
const wxrData = createWxrData({
|
||||
posts: [createWxrPost({
|
||||
content: '<a href="http://example.com/beautiful-sunset.jpg"><img src="http://example.com/thumb.jpg" alt="" title="" /></a>',
|
||||
})],
|
||||
});
|
||||
|
||||
const report = await engine.analyzeWxr(wxrData, '/test.xml');
|
||||
|
||||
// Should extract filename from URL as alt text
|
||||
expect(report.posts.items[0].markdownPreview).toContain('beautiful-sunset.jpg');
|
||||
});
|
||||
|
||||
it('should handle empty/whitespace content gracefully', async () => {
|
||||
setupDbReturns([], [], []);
|
||||
|
||||
const wxrData = createWxrData({
|
||||
posts: [createWxrPost({
|
||||
content: ' ',
|
||||
})],
|
||||
});
|
||||
|
||||
const report = await engine.analyzeWxr(wxrData, '/test.xml');
|
||||
|
||||
expect(report.posts.items[0].markdownPreview).toBe('');
|
||||
});
|
||||
|
||||
it('should preserve line breaks in text content', async () => {
|
||||
setupDbReturns([], [], []);
|
||||
|
||||
const wxrData = createWxrData({
|
||||
posts: [createWxrPost({
|
||||
content: '<p>Line one\nLine two\nLine three</p>',
|
||||
})],
|
||||
});
|
||||
|
||||
const report = await engine.analyzeWxr(wxrData, '/test.xml');
|
||||
|
||||
// Line breaks within text should be preserved
|
||||
expect(report.posts.items[0].markdownPreview).toContain('Line one');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
|
||||
@@ -82,6 +82,11 @@ function createSelectChain() {
|
||||
|
||||
const mockLocalDb = {
|
||||
select: vi.fn(() => createSelectChain()),
|
||||
update: vi.fn(() => ({
|
||||
set: vi.fn(() => ({
|
||||
where: vi.fn().mockResolvedValue(undefined),
|
||||
})),
|
||||
})),
|
||||
};
|
||||
|
||||
// Mock the database module
|
||||
@@ -347,6 +352,26 @@ describe('MetaEngine', () => {
|
||||
const categories = await metaEngine.collectCategoriesFromPosts();
|
||||
expect(categories).toEqual(['valid']);
|
||||
});
|
||||
|
||||
it('should handle posts with invalid JSON tags (gracefully skip)', async () => {
|
||||
mockPosts = [
|
||||
{ tags: 'not-valid-json{[' },
|
||||
{ tags: JSON.stringify(['valid-tag']) },
|
||||
];
|
||||
|
||||
const tags = await metaEngine.collectTagsFromPosts();
|
||||
expect(tags).toEqual(['valid-tag']);
|
||||
});
|
||||
|
||||
it('should handle posts with invalid JSON categories (gracefully skip)', async () => {
|
||||
mockPosts = [
|
||||
{ categories: 'invalid json here}' },
|
||||
{ categories: JSON.stringify(['valid-cat']) },
|
||||
];
|
||||
|
||||
const categories = await metaEngine.collectCategoriesFromPosts();
|
||||
expect(categories).toEqual(['valid-cat']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Event Emission', () => {
|
||||
@@ -446,6 +471,38 @@ describe('MetaEngine', () => {
|
||||
expect(metadata?.description).toBe('Loaded description');
|
||||
});
|
||||
|
||||
it('should handle ENOENT error when loading project metadata (no file)', async () => {
|
||||
// No file exists, should set metadata to null
|
||||
await metaEngine.loadProjectMetadata();
|
||||
|
||||
const metadata = await metaEngine.getProjectMetadata();
|
||||
expect(metadata).toBeNull();
|
||||
});
|
||||
|
||||
it('should throw non-ENOENT errors when loading project metadata', async () => {
|
||||
// Mock readFile to throw a non-ENOENT error
|
||||
const originalReadFile = vi.mocked(fs.readFile);
|
||||
originalReadFile.mockRejectedValueOnce(Object.assign(new Error('Permission denied'), { code: 'EACCES' }));
|
||||
|
||||
await expect(metaEngine.loadProjectMetadata()).rejects.toThrow('Permission denied');
|
||||
});
|
||||
|
||||
it('should handle ENOENT error when loading categories (no file)', async () => {
|
||||
// No file exists, should not throw
|
||||
await metaEngine.loadCategories();
|
||||
|
||||
const categories = await metaEngine.getCategories();
|
||||
expect(categories).toEqual([]);
|
||||
});
|
||||
|
||||
it('should throw non-ENOENT errors when loading categories', async () => {
|
||||
// Mock readFile to throw a non-ENOENT error
|
||||
const originalReadFile = vi.mocked(fs.readFile);
|
||||
originalReadFile.mockRejectedValueOnce(Object.assign(new Error('Disk full'), { code: 'ENOSPC' }));
|
||||
|
||||
await expect(metaEngine.loadCategories()).rejects.toThrow('Disk full');
|
||||
});
|
||||
|
||||
it('should emit projectMetadataChanged event when metadata is modified', async () => {
|
||||
const handler = vi.fn();
|
||||
metaEngine.on('projectMetadataChanged', handler);
|
||||
@@ -560,5 +617,54 @@ describe('MetaEngine', () => {
|
||||
expect(categories).toContain('aside');
|
||||
expect(categories).toContain('page');
|
||||
});
|
||||
|
||||
it('should report isInitialized as false before syncOnStartup', () => {
|
||||
expect(metaEngine.isInitialized()).toBe(false);
|
||||
});
|
||||
|
||||
it('should report isInitialized as true after syncOnStartup', async () => {
|
||||
await metaEngine.syncOnStartup();
|
||||
expect(metaEngine.isInitialized()).toBe(true);
|
||||
});
|
||||
|
||||
it('should reset initialized flag when project context changes', async () => {
|
||||
await metaEngine.syncOnStartup();
|
||||
expect(metaEngine.isInitialized()).toBe(true);
|
||||
|
||||
metaEngine.setProjectContext('different-project');
|
||||
expect(metaEngine.isInitialized()).toBe(false);
|
||||
});
|
||||
|
||||
it('should use custom dataDir when provided in setProjectContext', () => {
|
||||
metaEngine.setProjectContext('project-with-custom-dir', '/custom/data/path');
|
||||
|
||||
const metaDir = metaEngine.getMetaDir();
|
||||
expect(metaDir).toContain('/custom/data/path');
|
||||
});
|
||||
|
||||
it('should sync dataPath from project.json to database if different', async () => {
|
||||
const metaDir = metaEngine.getMetaDir();
|
||||
mockFiles.set(normalizePath(`${metaDir}/project.json`), JSON.stringify({
|
||||
name: 'Project',
|
||||
dataPath: '/custom/path/from/file',
|
||||
}));
|
||||
|
||||
// Database has different or missing dataPath
|
||||
mockProject = {
|
||||
id: 'test-project',
|
||||
name: 'Project',
|
||||
description: null,
|
||||
dataPath: null,
|
||||
slug: 'project',
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
isActive: true,
|
||||
};
|
||||
|
||||
await metaEngine.syncOnStartup();
|
||||
|
||||
// Should have synced (database update called)
|
||||
expect(mockLocalDb.select).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -538,4 +538,126 @@ describe('PostMediaEngine', () => {
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('importMediaForPost', () => {
|
||||
it('should import media and link it to the post', async () => {
|
||||
const postId = 'post-1';
|
||||
const sourcePath = '/path/to/image.jpg';
|
||||
const importedMediaId = 'imported-media-123';
|
||||
|
||||
mockImportMedia.mockResolvedValue({ id: importedMediaId });
|
||||
mockGetMedia.mockResolvedValue(createMockMedia({ id: importedMediaId, linkedPostIds: [] }));
|
||||
|
||||
const result = await engine.importMediaForPost(postId, sourcePath);
|
||||
|
||||
expect(mockImportMedia).toHaveBeenCalledWith(sourcePath);
|
||||
expect(result.postId).toBe(postId);
|
||||
expect(result.mediaId).toBe(importedMediaId);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getLinkedMediaDataForPost', () => {
|
||||
it('should return linked media with full media data', async () => {
|
||||
const postId = 'post-1';
|
||||
const media1 = createMockMedia({ id: 'media-1', title: 'Image 1' });
|
||||
const media2 = createMockMedia({ id: 'media-2', title: 'Image 2' });
|
||||
|
||||
selectMockData = [
|
||||
{ id: 'link-1', projectId: 'test-project', postId, mediaId: 'media-1', sortOrder: 0, createdAt: new Date() },
|
||||
{ id: 'link-2', projectId: 'test-project', postId, mediaId: 'media-2', sortOrder: 1, createdAt: new Date() },
|
||||
];
|
||||
|
||||
mockGetMedia.mockImplementation((id: string) => {
|
||||
if (id === 'media-1') return Promise.resolve(media1);
|
||||
if (id === 'media-2') return Promise.resolve(media2);
|
||||
return Promise.resolve(null);
|
||||
});
|
||||
|
||||
const result = await engine.getLinkedMediaDataForPost(postId);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0].media.title).toBe('Image 1');
|
||||
expect(result[1].media.title).toBe('Image 2');
|
||||
});
|
||||
|
||||
it('should skip links where media is not found', async () => {
|
||||
const postId = 'post-1';
|
||||
const media1 = createMockMedia({ id: 'media-1', title: 'Image 1' });
|
||||
|
||||
selectMockData = [
|
||||
{ id: 'link-1', projectId: 'test-project', postId, mediaId: 'media-1', sortOrder: 0, createdAt: new Date() },
|
||||
{ id: 'link-2', projectId: 'test-project', postId, mediaId: 'media-deleted', sortOrder: 1, createdAt: new Date() },
|
||||
];
|
||||
|
||||
mockGetMedia.mockImplementation((id: string) => {
|
||||
if (id === 'media-1') return Promise.resolve(media1);
|
||||
return Promise.resolve(null); // media-deleted not found
|
||||
});
|
||||
|
||||
const result = await engine.getLinkedMediaDataForPost(postId);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].media.title).toBe('Image 1');
|
||||
});
|
||||
|
||||
it('should return empty array when no links exist', async () => {
|
||||
selectMockData = [];
|
||||
|
||||
const result = await engine.getLinkedMediaDataForPost('post-no-links');
|
||||
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('edge cases for linkMediaToPost', () => {
|
||||
it('should not add duplicate postId to linkedPostIds', async () => {
|
||||
const postId = 'post-1';
|
||||
const mediaId = 'media-1';
|
||||
|
||||
// Media already has this post linked
|
||||
mockGetMedia.mockResolvedValue(createMockMedia({
|
||||
id: mediaId,
|
||||
linkedPostIds: [postId] // Already linked
|
||||
}));
|
||||
|
||||
await engine.linkMediaToPost(postId, mediaId);
|
||||
|
||||
// updateMedia should not be called since post is already in linkedPostIds
|
||||
expect(mockUpdateMedia).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should calculate correct sortOrder when existing links present', async () => {
|
||||
const postId = 'post-1';
|
||||
const mediaId = 'media-new';
|
||||
|
||||
// Existing links with specific sort orders
|
||||
selectMockData = [
|
||||
{ id: 'link-1', projectId: 'test-project', postId, mediaId: 'media-1', sortOrder: 5, createdAt: new Date() },
|
||||
{ id: 'link-2', projectId: 'test-project', postId, mediaId: 'media-2', sortOrder: 10, createdAt: new Date() },
|
||||
];
|
||||
|
||||
mockGetMedia.mockResolvedValue(createMockMedia({ id: mediaId, linkedPostIds: [] }));
|
||||
|
||||
const result = await engine.linkMediaToPost(postId, mediaId);
|
||||
|
||||
// sortOrder should be max + 1 = 11
|
||||
expect(result.sortOrder).toBe(11);
|
||||
});
|
||||
|
||||
it('should handle null media when linking', async () => {
|
||||
const postId = 'post-1';
|
||||
const mediaId = 'media-nonexistent';
|
||||
|
||||
// Media not found
|
||||
mockGetMedia.mockResolvedValue(null);
|
||||
|
||||
const result = await engine.linkMediaToPost(postId, mediaId);
|
||||
|
||||
// Should still create the link
|
||||
expect(result.postId).toBe(postId);
|
||||
expect(result.mediaId).toBe(mediaId);
|
||||
// But updateMedia shouldn't be called
|
||||
expect(mockUpdateMedia).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest';
|
||||
import { TagEngine, TagData, TagWithCount, MergeTagsResult, DeleteTagResult } from '../../src/main/engine/TagEngine';
|
||||
import { resetMockCounters } from '../utils/factories';
|
||||
import { getDatabase } from '../../src/main/database';
|
||||
|
||||
// Create mock data stores
|
||||
const mockTags = new Map<string, any>();
|
||||
@@ -487,4 +488,163 @@ describe('TagEngine', () => {
|
||||
expect(result.discovered).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('loadTagsFromFile', () => {
|
||||
it('should load tags from filesystem in portable format', async () => {
|
||||
// Mock fs.readFile to return valid JSON
|
||||
const fs = await import('fs/promises');
|
||||
vi.mocked(fs.readFile).mockResolvedValueOnce(
|
||||
JSON.stringify([
|
||||
{ name: 'tag1', color: '#ff0000' },
|
||||
{ name: 'tag2' },
|
||||
])
|
||||
);
|
||||
|
||||
// Mock select to return empty (no existing tags)
|
||||
mockSelectDataQueue = [[], []];
|
||||
|
||||
await tagEngine.loadTagsFromFile();
|
||||
|
||||
// Verify insert was called for the new tags
|
||||
expect(mockLocalDb.insert).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle ENOENT error gracefully (file not found)', async () => {
|
||||
const fs = await import('fs/promises');
|
||||
const error = new Error('ENOENT');
|
||||
(error as NodeJS.ErrnoException).code = 'ENOENT';
|
||||
vi.mocked(fs.readFile).mockRejectedValueOnce(error);
|
||||
|
||||
// Should not throw
|
||||
await expect(tagEngine.loadTagsFromFile()).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
it('should log non-ENOENT errors when loading', async () => {
|
||||
const fs = await import('fs/promises');
|
||||
const error = new Error('Permission denied');
|
||||
(error as NodeJS.ErrnoException).code = 'EACCES';
|
||||
vi.mocked(fs.readFile).mockRejectedValueOnce(error);
|
||||
|
||||
// Should not throw, but should log error
|
||||
await expect(tagEngine.loadTagsFromFile()).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
it('should skip tags with empty names', async () => {
|
||||
const fs = await import('fs/promises');
|
||||
vi.mocked(fs.readFile).mockResolvedValueOnce(
|
||||
JSON.stringify([
|
||||
{ name: '' },
|
||||
{ name: ' ' },
|
||||
{ name: 'valid' },
|
||||
])
|
||||
);
|
||||
|
||||
mockSelectDataQueue = [[]]; // no existing tags
|
||||
|
||||
await tagEngine.loadTagsFromFile();
|
||||
|
||||
// Only 'valid' should be processed
|
||||
const insertedTags = mockLocalDb.insert.mock.calls;
|
||||
expect(insertedTags.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should update color for existing tag when loading', async () => {
|
||||
const fs = await import('fs/promises');
|
||||
vi.mocked(fs.readFile).mockResolvedValueOnce(
|
||||
JSON.stringify([
|
||||
{ name: 'existing-tag', color: '#ff0000' },
|
||||
])
|
||||
);
|
||||
|
||||
// Existing tag found
|
||||
mockSelectDataQueue = [[{ id: 'tag-1' }]];
|
||||
|
||||
await tagEngine.loadTagsFromFile();
|
||||
|
||||
// Should update existing tag with color
|
||||
expect(mockLocalDb.update).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('renameTag error cases', () => {
|
||||
it('should throw error when new name is empty', async () => {
|
||||
await expect(tagEngine.renameTag('tag-1', '')).rejects.toThrow('New name is required');
|
||||
});
|
||||
|
||||
it('should throw error when new name is whitespace only', async () => {
|
||||
await expect(tagEngine.renameTag('tag-1', ' ')).rejects.toThrow('New name is required');
|
||||
});
|
||||
|
||||
it('should throw error when tag not found', async () => {
|
||||
mockSelectDataDefault = [];
|
||||
await expect(tagEngine.renameTag('non-existent', 'new-name')).rejects.toThrow('Tag not found');
|
||||
});
|
||||
|
||||
it('should return success with 0 posts updated when renaming to same name', async () => {
|
||||
mockSelectDataQueue = [
|
||||
[{ id: 'tag-1', name: 'same-name', projectId: 'default', createdAt: new Date(), updatedAt: new Date() }],
|
||||
];
|
||||
|
||||
const result = await tagEngine.renameTag('tag-1', 'same-name');
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.postsUpdated).toBe(0);
|
||||
expect(result.oldName).toBe('same-name');
|
||||
expect(result.newName).toBe('same-name');
|
||||
});
|
||||
|
||||
it('should throw error when target name already exists', async () => {
|
||||
mockSelectDataQueue = [
|
||||
[{ id: 'tag-1', name: 'old-name', projectId: 'default', createdAt: new Date(), updatedAt: new Date() }],
|
||||
[{ id: 'tag-2' }], // duplicate found
|
||||
];
|
||||
|
||||
await expect(tagEngine.renameTag('tag-1', 'existing-name')).rejects.toThrow('already exists');
|
||||
});
|
||||
});
|
||||
|
||||
describe('getPostsWithTag edge cases', () => {
|
||||
it('should return empty array when tag not found', async () => {
|
||||
mockSelectDataQueue = [[]]; // tag not found
|
||||
|
||||
const result = await tagEngine.getPostsWithTag('non-existent-tag');
|
||||
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should return empty array when client is not available', async () => {
|
||||
// Store original mock implementation
|
||||
const originalMock = vi.mocked(getDatabase).getMockImplementation();
|
||||
|
||||
// Mock getClient to return null
|
||||
vi.mocked(getDatabase).mockReturnValue({
|
||||
getLocal: vi.fn(() => mockLocalDb),
|
||||
getLocalClient: vi.fn().mockReturnValue(null),
|
||||
} as any);
|
||||
|
||||
const result = await tagEngine.getPostsWithTag('tag-1');
|
||||
|
||||
expect(result).toEqual([]);
|
||||
|
||||
// Restore original mock
|
||||
if (originalMock) {
|
||||
vi.mocked(getDatabase).mockImplementation(originalMock);
|
||||
} else {
|
||||
// Restore to standard mock
|
||||
vi.mocked(getDatabase).mockReturnValue({
|
||||
getLocal: vi.fn(() => mockLocalDb),
|
||||
getLocalClient: vi.fn(() => mockLocalClient),
|
||||
getRemote: vi.fn(() => null),
|
||||
getDataPaths: vi.fn(() => ({
|
||||
database: '/mock/userData/bds.db',
|
||||
posts: '/mock/userData/posts',
|
||||
media: '/mock/userData/media',
|
||||
})),
|
||||
initializeLocal: vi.fn(),
|
||||
initializeRemote: vi.fn(),
|
||||
close: vi.fn(),
|
||||
} as any);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
350
tests/engine/stemmer.test.ts
Normal file
350
tests/engine/stemmer.test.ts
Normal file
@@ -0,0 +1,350 @@
|
||||
/**
|
||||
* Stemmer Unit Tests
|
||||
*
|
||||
* Tests the REAL stemmer functions without any mocks.
|
||||
* The stemmer provides multilingual text stemming for FTS indexing.
|
||||
*
|
||||
* Tests all branches including:
|
||||
* - Various languages
|
||||
* - ISO language code conversion
|
||||
* - Empty/null inputs
|
||||
* - FTS5 query operators (AND, OR, NOT)
|
||||
* - Quoted phrases
|
||||
* - Prefix searches
|
||||
* - Edge cases
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import {
|
||||
stemText,
|
||||
stemQuery,
|
||||
stemWord,
|
||||
getSupportedLanguages,
|
||||
isoToStemmerLanguage,
|
||||
prepareForFTS,
|
||||
SupportedLanguage,
|
||||
} from '../../src/main/engine/stemmer';
|
||||
|
||||
describe('stemmer', () => {
|
||||
describe('getSupportedLanguages', () => {
|
||||
it('should return an array of supported languages', () => {
|
||||
const languages = getSupportedLanguages();
|
||||
|
||||
expect(Array.isArray(languages)).toBe(true);
|
||||
expect(languages.length).toBeGreaterThan(0);
|
||||
expect(languages).toContain('english');
|
||||
});
|
||||
|
||||
it('should include common languages', () => {
|
||||
const languages = getSupportedLanguages();
|
||||
|
||||
expect(languages).toContain('german');
|
||||
expect(languages).toContain('french');
|
||||
expect(languages).toContain('spanish');
|
||||
});
|
||||
});
|
||||
|
||||
describe('isoToStemmerLanguage', () => {
|
||||
it('should convert ISO 639-1 codes to stemmer language names', () => {
|
||||
expect(isoToStemmerLanguage('en')).toBe('english');
|
||||
expect(isoToStemmerLanguage('de')).toBe('german');
|
||||
expect(isoToStemmerLanguage('fr')).toBe('french');
|
||||
expect(isoToStemmerLanguage('es')).toBe('spanish');
|
||||
});
|
||||
|
||||
it('should handle locale codes with region (e.g., en-US)', () => {
|
||||
expect(isoToStemmerLanguage('en-US')).toBe('english');
|
||||
expect(isoToStemmerLanguage('de-DE')).toBe('german');
|
||||
expect(isoToStemmerLanguage('fr-FR')).toBe('french');
|
||||
expect(isoToStemmerLanguage('es-MX')).toBe('spanish');
|
||||
});
|
||||
|
||||
it('should be case insensitive', () => {
|
||||
expect(isoToStemmerLanguage('EN')).toBe('english');
|
||||
expect(isoToStemmerLanguage('De')).toBe('german');
|
||||
expect(isoToStemmerLanguage('FR')).toBe('french');
|
||||
});
|
||||
|
||||
it('should return english for unknown language codes', () => {
|
||||
expect(isoToStemmerLanguage('xx')).toBe('english');
|
||||
expect(isoToStemmerLanguage('unknown')).toBe('english');
|
||||
expect(isoToStemmerLanguage('')).toBe('english');
|
||||
});
|
||||
|
||||
it('should handle all mapped ISO codes', () => {
|
||||
expect(isoToStemmerLanguage('ar')).toBe('arabic');
|
||||
expect(isoToStemmerLanguage('hy')).toBe('armenian');
|
||||
expect(isoToStemmerLanguage('eu')).toBe('basque');
|
||||
expect(isoToStemmerLanguage('ca')).toBe('catalan');
|
||||
expect(isoToStemmerLanguage('cs')).toBe('czech');
|
||||
expect(isoToStemmerLanguage('da')).toBe('danish');
|
||||
expect(isoToStemmerLanguage('nl')).toBe('dutch');
|
||||
expect(isoToStemmerLanguage('fi')).toBe('finnish');
|
||||
expect(isoToStemmerLanguage('hu')).toBe('hungarian');
|
||||
expect(isoToStemmerLanguage('it')).toBe('italian');
|
||||
expect(isoToStemmerLanguage('ga')).toBe('irish');
|
||||
expect(isoToStemmerLanguage('no')).toBe('norwegian');
|
||||
expect(isoToStemmerLanguage('nb')).toBe('norwegian');
|
||||
expect(isoToStemmerLanguage('nn')).toBe('norwegian');
|
||||
expect(isoToStemmerLanguage('pt')).toBe('portuguese');
|
||||
expect(isoToStemmerLanguage('ro')).toBe('romanian');
|
||||
expect(isoToStemmerLanguage('ru')).toBe('russian');
|
||||
expect(isoToStemmerLanguage('sl')).toBe('slovene');
|
||||
expect(isoToStemmerLanguage('sv')).toBe('swedish');
|
||||
expect(isoToStemmerLanguage('ta')).toBe('tamil');
|
||||
expect(isoToStemmerLanguage('tr')).toBe('turkish');
|
||||
});
|
||||
});
|
||||
|
||||
describe('stemWord', () => {
|
||||
it('should stem English words correctly', () => {
|
||||
expect(stemWord('running', 'english')).toBe('run');
|
||||
expect(stemWord('dogs', 'english')).toBe('dog');
|
||||
expect(stemWord('played', 'english')).toBe('play');
|
||||
expect(stemWord('playing', 'english')).toBe('play');
|
||||
});
|
||||
|
||||
it('should stem German words correctly', () => {
|
||||
expect(stemWord('häuser', 'german')).toBe('haus');
|
||||
expect(stemWord('Häuser', 'german')).toBe('haus');
|
||||
});
|
||||
|
||||
it('should stem French words correctly', () => {
|
||||
expect(stemWord('chanter', 'french')).toBe('chant');
|
||||
expect(stemWord('chanteuse', 'french')).toBe('chanteux');
|
||||
});
|
||||
|
||||
it('should default to English when no language specified', () => {
|
||||
expect(stemWord('running')).toBe('run');
|
||||
expect(stemWord('dogs')).toBe('dog');
|
||||
});
|
||||
|
||||
it('should handle uppercase words by converting to lowercase', () => {
|
||||
expect(stemWord('RUNNING', 'english')).toBe('run');
|
||||
expect(stemWord('DOGS', 'english')).toBe('dog');
|
||||
});
|
||||
});
|
||||
|
||||
describe('stemText', () => {
|
||||
it('should stem all words in a sentence', () => {
|
||||
const result = stemText('Running dogs are playing', 'english');
|
||||
expect(result).toContain('run');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should return empty string for empty input', () => {
|
||||
expect(stemText('', 'english')).toBe('');
|
||||
expect(stemText(' ', 'english')).toBe('');
|
||||
});
|
||||
|
||||
it('should handle multiple spaces between words', () => {
|
||||
const result = stemText('Running dogs are playing', 'english');
|
||||
const words = result.split(' ');
|
||||
expect(words).not.toContain('');
|
||||
});
|
||||
|
||||
it('should handle German text correctly', () => {
|
||||
const result = stemText('Häuser Haus', 'german');
|
||||
expect(result).toContain('haus');
|
||||
});
|
||||
|
||||
it('should handle text with numbers', () => {
|
||||
const result = stemText('Running 123 dogs', 'english');
|
||||
expect(result).toContain('run');
|
||||
expect(result).toContain('123');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should handle punctuation by extracting words', () => {
|
||||
const result = stemText('Hello, world! How are you?', 'english');
|
||||
expect(result).toContain('hello');
|
||||
expect(result).toContain('world');
|
||||
});
|
||||
|
||||
it('should use default English language when not specified', () => {
|
||||
const result = stemText('Running dogs');
|
||||
expect(result).toContain('run');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should handle Unicode characters for non-ASCII languages', () => {
|
||||
// Russian text
|
||||
const russianResult = stemText('привет мир', 'russian');
|
||||
expect(russianResult.length).toBeGreaterThan(0);
|
||||
|
||||
// Arabic text
|
||||
const arabicResult = stemText('مرحبا', 'arabic');
|
||||
expect(arabicResult.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('stemQuery', () => {
|
||||
it('should stem simple queries', () => {
|
||||
const result = stemQuery('running dogs', 'english');
|
||||
expect(result).toContain('run');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should return empty string for empty query', () => {
|
||||
expect(stemQuery('', 'english')).toBe('');
|
||||
expect(stemQuery(' ', 'english')).toBe('');
|
||||
});
|
||||
|
||||
it('should preserve AND operator in uppercase', () => {
|
||||
const result = stemQuery('running AND dogs', 'english');
|
||||
expect(result).toContain('AND');
|
||||
expect(result).toContain('run');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should preserve OR operator in uppercase', () => {
|
||||
const result = stemQuery('cats OR dogs', 'english');
|
||||
expect(result).toContain('OR');
|
||||
expect(result).toContain('cat');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should preserve NOT operator in uppercase', () => {
|
||||
const result = stemQuery('NOT dogs', 'english');
|
||||
expect(result).toContain('NOT');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should handle lowercase operators by stemming them', () => {
|
||||
// lowercase 'and', 'or', 'not' should be stemmed as regular words
|
||||
const andResult = stemQuery('and', 'english');
|
||||
// 'and' stemmed might be 'and' itself
|
||||
expect(andResult.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should stem words inside quoted phrases', () => {
|
||||
const result = stemQuery('"running fast"', 'english');
|
||||
expect(result).toContain('"');
|
||||
expect(result).toContain('run');
|
||||
expect(result).toContain('fast');
|
||||
});
|
||||
|
||||
it('should keep quotes around stemmed phrase', () => {
|
||||
const result = stemQuery('"running dogs"', 'english');
|
||||
expect(result.startsWith('"')).toBe(true);
|
||||
expect(result.endsWith('"')).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle prefix searches with asterisk', () => {
|
||||
const result = stemQuery('runn*', 'english');
|
||||
expect(result).toContain('*');
|
||||
// The word part before * should be stemmed
|
||||
expect(result.includes('run')).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle prefix search when word results in empty after tokenization', () => {
|
||||
// Test with just asterisk (edge case)
|
||||
const result = stemQuery('*', 'english');
|
||||
// Should return the original match since no word part
|
||||
expect(result).toBe('*');
|
||||
});
|
||||
|
||||
it('should handle complex queries with multiple operators', () => {
|
||||
const result = stemQuery('"running fast" AND dogs NOT cats', 'english');
|
||||
expect(result).toContain('AND');
|
||||
expect(result).toContain('NOT');
|
||||
expect(result).toContain('dog');
|
||||
expect(result).toContain('cat');
|
||||
});
|
||||
|
||||
it('should clean up multiple spaces', () => {
|
||||
const result = stemQuery('running dogs', 'english');
|
||||
expect(result).not.toContain(' ');
|
||||
});
|
||||
|
||||
it('should use default English language when not specified', () => {
|
||||
const result = stemQuery('running dogs');
|
||||
expect(result).toContain('run');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should handle unquoted words that tokenize to empty', () => {
|
||||
// Special characters only
|
||||
const result = stemQuery('!!!', 'english');
|
||||
// Should result in empty string or just spaces
|
||||
expect(result.trim()).toBe('');
|
||||
});
|
||||
|
||||
it('should handle mixed quoted and unquoted terms', () => {
|
||||
const result = stemQuery('dogs "running fast" cats', 'english');
|
||||
expect(result).toContain('dog');
|
||||
expect(result).toContain('cat');
|
||||
expect(result).toContain('"');
|
||||
});
|
||||
});
|
||||
|
||||
describe('prepareForFTS', () => {
|
||||
it('should prepare text for FTS indexing by stemming', () => {
|
||||
const result = prepareForFTS('Running dogs are playing', 'english');
|
||||
expect(result).toContain('run');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should use default English when no language specified', () => {
|
||||
const result = prepareForFTS('Running dogs');
|
||||
expect(result).toContain('run');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should be identical to stemText', () => {
|
||||
const text = 'Running dogs are playing';
|
||||
expect(prepareForFTS(text, 'english')).toBe(stemText(text, 'english'));
|
||||
});
|
||||
});
|
||||
|
||||
describe('stemmer caching', () => {
|
||||
it('should reuse cached stemmers for same language', () => {
|
||||
// Call multiple times with same language
|
||||
const result1 = stemWord('running', 'english');
|
||||
const result2 = stemWord('playing', 'english');
|
||||
|
||||
// Results should be consistent
|
||||
expect(result1).toBe('run');
|
||||
expect(result2).toBe('play');
|
||||
});
|
||||
|
||||
it('should support different languages in sequence', () => {
|
||||
const englishResult = stemWord('running', 'english');
|
||||
const germanResult = stemWord('häuser', 'german');
|
||||
const frenchResult = stemWord('chanter', 'french');
|
||||
|
||||
expect(englishResult).toBe('run');
|
||||
expect(germanResult).toBe('haus');
|
||||
expect(frenchResult).toBe('chant');
|
||||
});
|
||||
});
|
||||
|
||||
describe('edge cases', () => {
|
||||
it('should handle very long text', () => {
|
||||
const longText = 'running '.repeat(1000);
|
||||
const result = stemText(longText, 'english');
|
||||
expect(result.length).toBeGreaterThan(0);
|
||||
expect(result.split(' ').every(word => word === 'run' || word === '')).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle special Unicode characters', () => {
|
||||
const result = stemText('café résumé naïve', 'english');
|
||||
expect(result.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should handle emoji by extracting adjacent words', () => {
|
||||
const result = stemText('running 🏃 dogs', 'english');
|
||||
expect(result).toContain('run');
|
||||
expect(result).toContain('dog');
|
||||
});
|
||||
|
||||
it('should handle mixed case consistently', () => {
|
||||
const lower = stemText('running', 'english');
|
||||
const upper = stemText('RUNNING', 'english');
|
||||
const mixed = stemText('RuNnInG', 'english');
|
||||
|
||||
expect(lower).toBe(upper);
|
||||
expect(lower).toBe(mixed);
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user