diff --git a/package-lock.json b/package-lock.json index 824a2db..c37f74e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -50,6 +50,7 @@ "simple-git": "^3.31.1", "smol-toml": "^1.6.0", "snowball-stemmers": "^0.6.0", + "transliteration": "^2.6.1", "turndown": "^7.2.2", "uuid": "^13.0.0", "vanilla-calendar-pro": "^3.1.0", @@ -15228,6 +15229,19 @@ "node": ">=20" } }, + "node_modules/transliteration": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/transliteration/-/transliteration-2.6.1.tgz", + "integrity": "sha512-hJ9BhrQAOnNTbpOr1MxsNjZISkn7ppvF5TKUeFmTE1mG4ZPD/XVxF0L0LUoIUCWmQyxH0gJpVtfYLAWf298U9w==", + "license": "MIT", + "bin": { + "slugify": "dist/bin/slugify", + "transliterate": "dist/bin/transliterate" + }, + "engines": { + "node": ">=20.0.0" + } + }, "node_modules/tree-dump": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/tree-dump/-/tree-dump-1.1.0.tgz", diff --git a/package.json b/package.json index 520f47b..d75ad24 100644 --- a/package.json +++ b/package.json @@ -111,6 +111,7 @@ "simple-git": "^3.31.1", "smol-toml": "^1.6.0", "snowball-stemmers": "^0.6.0", + "transliteration": "^2.6.1", "turndown": "^7.2.2", "uuid": "^13.0.0", "vanilla-calendar-pro": "^3.1.0", diff --git a/src/main/engine/PostEngine.ts b/src/main/engine/PostEngine.ts index 65659e7..2aad68b 100644 --- a/src/main/engine/PostEngine.ts +++ b/src/main/engine/PostEngine.ts @@ -13,6 +13,7 @@ import { stemText, stemQuery, SupportedLanguage } from './stemmer'; import { readPostFile as readPostFileShared, type PostFileData } from './postFileUtils'; import { CliNotifier, NoopNotifier } from './CliNotifier'; import type { MediaEngine } from './MediaEngine'; +import { slugify } from './slugify'; export interface PostData { id: string; @@ -216,10 +217,7 @@ export class PostEngine extends EventEmitter { } private generateSlug(title: string): string { - return title - .toLowerCase() - .replace(/[^a-z0-9]+/g, '-') - .replace(/^-|-$/g, ''); + return slugify(title); } /** diff --git a/src/main/engine/ProjectEngine.ts b/src/main/engine/ProjectEngine.ts index f331c25..18fe407 100644 --- a/src/main/engine/ProjectEngine.ts +++ b/src/main/engine/ProjectEngine.ts @@ -6,6 +6,7 @@ import { eq } from 'drizzle-orm'; import { app } from 'electron'; import { getDatabase } from '../database'; import { projects, posts, media, Project, NewProject } from '../database/schema'; +import { slugify } from './slugify'; export interface ProjectData { id: string; @@ -43,10 +44,7 @@ export class ProjectEngine extends EventEmitter { } private generateSlug(name: string): string { - return name - .toLowerCase() - .replace(/[^a-z0-9]+/g, '-') - .replace(/^-|-$/g, ''); + return slugify(name); } /** diff --git a/src/main/engine/slugify.ts b/src/main/engine/slugify.ts new file mode 100644 index 0000000..748b73b --- /dev/null +++ b/src/main/engine/slugify.ts @@ -0,0 +1,18 @@ +import { transliterate } from 'transliteration'; + +/** + * Generate a URL-safe slug from a string. + * + * - Transliterates umlauts and accented characters to ASCII equivalents + * using the `transliteration` package for broad Unicode coverage + * - Removes non-alphanumeric characters (except hyphens used as separators) + * - Separates words with normal hyphens (U+002D) + * - Collapses consecutive separators into a single hyphen + * - Strips leading/trailing hyphens + */ +export function slugify(input: string): string { + return transliterate(input) + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); +} diff --git a/tests/engine/PostEngine.test.ts b/tests/engine/PostEngine.test.ts index 36201c7..71dc27b 100644 --- a/tests/engine/PostEngine.test.ts +++ b/tests/engine/PostEngine.test.ts @@ -227,9 +227,9 @@ describe('PostEngine', () => { expect(post.slug).toBe('multiple-spaces-here'); }); - it('should handle unicode characters by removing them', async () => { + it('should handle unicode characters by transliterating them', async () => { const post = await postEngine.createPost({ title: 'Café Test' }); - expect(post.slug).toBe('caf-test'); + expect(post.slug).toBe('cafe-test'); }); }); diff --git a/tests/engine/slugify.test.ts b/tests/engine/slugify.test.ts new file mode 100644 index 0000000..c402636 --- /dev/null +++ b/tests/engine/slugify.test.ts @@ -0,0 +1,110 @@ +import { describe, expect, it } from 'vitest'; +import { slugify } from '../../src/main/engine/slugify'; + +describe('slugify', () => { + describe('basic transformations', () => { + it('lowercases the input', () => { + expect(slugify('Hello World')).toBe('hello-world'); + }); + + it('replaces spaces with hyphens', () => { + expect(slugify('hello world')).toBe('hello-world'); + }); + + it('collapses multiple spaces into a single hyphen', () => { + expect(slugify('Multiple Spaces Here')).toBe('multiple-spaces-here'); + }); + + it('removes leading and trailing hyphens', () => { + expect(slugify('---Test---')).toBe('test'); + }); + + it('handles numbers in input', () => { + expect(slugify('10 Tips for Testing')).toBe('10-tips-for-testing'); + }); + + it('returns empty string for empty input', () => { + expect(slugify('')).toBe(''); + }); + + it('returns empty string for whitespace-only input', () => { + expect(slugify(' ')).toBe(''); + }); + }); + + describe('umlaut and special character transliteration', () => { + it('transliterates German umlauts', () => { + expect(slugify('Über die Brücke')).toBe('uber-die-brucke'); + expect(slugify('Ärger')).toBe('arger'); + expect(slugify('schön')).toBe('schon'); + }); + + it('transliterates ß to ss', () => { + expect(slugify('Straße')).toBe('strasse'); + expect(slugify('Großmutter')).toBe('grossmutter'); + }); + + it('transliterates French accented characters', () => { + expect(slugify('Café Test')).toBe('cafe-test'); + expect(slugify('crème brûlée')).toBe('creme-brulee'); + expect(slugify('naïve')).toBe('naive'); + }); + + it('transliterates Nordic characters', () => { + expect(slugify('Ångström')).toBe('angstrom'); + expect(slugify('Ærø')).toBe('aero'); + expect(slugify('Ødegaard')).toBe('odegaard'); + }); + + it('transliterates Spanish characters', () => { + expect(slugify('España')).toBe('espana'); + expect(slugify('niño')).toBe('nino'); + }); + + it('transliterates Polish characters', () => { + expect(slugify('Łódź')).toBe('lodz'); + }); + + it('transliterates Czech characters', () => { + expect(slugify('Dvořák')).toBe('dvorak'); + expect(slugify('Háček')).toBe('hacek'); + }); + }); + + describe('special characters removal', () => { + it('removes punctuation', () => { + expect(slugify('Hello, World! How are you?')).toBe('hello-world-how-are-you'); + }); + + it('removes brackets and parentheses', () => { + expect(slugify('Hello (World) [Test]')).toBe('hello-world-test'); + }); + + it('removes symbols', () => { + expect(slugify('Hello @World #Test $100')).toBe('hello-world-test-100'); + }); + + it('removes emoji and non-Latin characters', () => { + expect(slugify('Hello 🌍 World')).toBe('hello-world'); + }); + }); + + describe('word separation', () => { + it('separates words with normal hyphens', () => { + const result = slugify('Hello World'); + expect(result).toBe('hello-world'); + // Verify it's a normal hyphen (U+002D), not en-dash or em-dash + expect(result.charCodeAt(5)).toBe(0x2d); + }); + + it('converts en-dashes and em-dashes to hyphens', () => { + expect(slugify('hello–world')).toBe('hello-world'); // en-dash + expect(slugify('hello—world')).toBe('hello-world'); // em-dash + }); + + it('collapses consecutive special chars into single hyphen', () => { + expect(slugify('hello!!!world')).toBe('hello-world'); + expect(slugify('hello...world')).toBe('hello-world'); + }); + }); +});