fix: slugify transliterates now much better

This commit is contained in:
2026-03-01 07:12:38 +01:00
parent 583c37473a
commit 289535021a
7 changed files with 149 additions and 10 deletions

14
package-lock.json generated
View File

@@ -50,6 +50,7 @@
"simple-git": "^3.31.1", "simple-git": "^3.31.1",
"smol-toml": "^1.6.0", "smol-toml": "^1.6.0",
"snowball-stemmers": "^0.6.0", "snowball-stemmers": "^0.6.0",
"transliteration": "^2.6.1",
"turndown": "^7.2.2", "turndown": "^7.2.2",
"uuid": "^13.0.0", "uuid": "^13.0.0",
"vanilla-calendar-pro": "^3.1.0", "vanilla-calendar-pro": "^3.1.0",
@@ -15228,6 +15229,19 @@
"node": ">=20" "node": ">=20"
} }
}, },
"node_modules/transliteration": {
"version": "2.6.1",
"resolved": "https://registry.npmjs.org/transliteration/-/transliteration-2.6.1.tgz",
"integrity": "sha512-hJ9BhrQAOnNTbpOr1MxsNjZISkn7ppvF5TKUeFmTE1mG4ZPD/XVxF0L0LUoIUCWmQyxH0gJpVtfYLAWf298U9w==",
"license": "MIT",
"bin": {
"slugify": "dist/bin/slugify",
"transliterate": "dist/bin/transliterate"
},
"engines": {
"node": ">=20.0.0"
}
},
"node_modules/tree-dump": { "node_modules/tree-dump": {
"version": "1.1.0", "version": "1.1.0",
"resolved": "https://registry.npmjs.org/tree-dump/-/tree-dump-1.1.0.tgz", "resolved": "https://registry.npmjs.org/tree-dump/-/tree-dump-1.1.0.tgz",

View File

@@ -111,6 +111,7 @@
"simple-git": "^3.31.1", "simple-git": "^3.31.1",
"smol-toml": "^1.6.0", "smol-toml": "^1.6.0",
"snowball-stemmers": "^0.6.0", "snowball-stemmers": "^0.6.0",
"transliteration": "^2.6.1",
"turndown": "^7.2.2", "turndown": "^7.2.2",
"uuid": "^13.0.0", "uuid": "^13.0.0",
"vanilla-calendar-pro": "^3.1.0", "vanilla-calendar-pro": "^3.1.0",

View File

@@ -13,6 +13,7 @@ import { stemText, stemQuery, SupportedLanguage } from './stemmer';
import { readPostFile as readPostFileShared, type PostFileData } from './postFileUtils'; import { readPostFile as readPostFileShared, type PostFileData } from './postFileUtils';
import { CliNotifier, NoopNotifier } from './CliNotifier'; import { CliNotifier, NoopNotifier } from './CliNotifier';
import type { MediaEngine } from './MediaEngine'; import type { MediaEngine } from './MediaEngine';
import { slugify } from './slugify';
export interface PostData { export interface PostData {
id: string; id: string;
@@ -216,10 +217,7 @@ export class PostEngine extends EventEmitter {
} }
private generateSlug(title: string): string { private generateSlug(title: string): string {
return title return slugify(title);
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '');
} }
/** /**

View File

@@ -6,6 +6,7 @@ import { eq } from 'drizzle-orm';
import { app } from 'electron'; import { app } from 'electron';
import { getDatabase } from '../database'; import { getDatabase } from '../database';
import { projects, posts, media, Project, NewProject } from '../database/schema'; import { projects, posts, media, Project, NewProject } from '../database/schema';
import { slugify } from './slugify';
export interface ProjectData { export interface ProjectData {
id: string; id: string;
@@ -43,10 +44,7 @@ export class ProjectEngine extends EventEmitter {
} }
private generateSlug(name: string): string { private generateSlug(name: string): string {
return name return slugify(name);
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '');
} }
/** /**

View File

@@ -0,0 +1,18 @@
import { transliterate } from 'transliteration';
/**
* Generate a URL-safe slug from a string.
*
* - Transliterates umlauts and accented characters to ASCII equivalents
* using the `transliteration` package for broad Unicode coverage
* - Removes non-alphanumeric characters (except hyphens used as separators)
* - Separates words with normal hyphens (U+002D)
* - Collapses consecutive separators into a single hyphen
* - Strips leading/trailing hyphens
*/
export function slugify(input: string): string {
return transliterate(input)
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '');
}

View File

@@ -227,9 +227,9 @@ describe('PostEngine', () => {
expect(post.slug).toBe('multiple-spaces-here'); expect(post.slug).toBe('multiple-spaces-here');
}); });
it('should handle unicode characters by removing them', async () => { it('should handle unicode characters by transliterating them', async () => {
const post = await postEngine.createPost({ title: 'Café Test' }); const post = await postEngine.createPost({ title: 'Café Test' });
expect(post.slug).toBe('caf-test'); expect(post.slug).toBe('cafe-test');
}); });
}); });

View File

@@ -0,0 +1,110 @@
import { describe, expect, it } from 'vitest';
import { slugify } from '../../src/main/engine/slugify';
describe('slugify', () => {
describe('basic transformations', () => {
it('lowercases the input', () => {
expect(slugify('Hello World')).toBe('hello-world');
});
it('replaces spaces with hyphens', () => {
expect(slugify('hello world')).toBe('hello-world');
});
it('collapses multiple spaces into a single hyphen', () => {
expect(slugify('Multiple Spaces Here')).toBe('multiple-spaces-here');
});
it('removes leading and trailing hyphens', () => {
expect(slugify('---Test---')).toBe('test');
});
it('handles numbers in input', () => {
expect(slugify('10 Tips for Testing')).toBe('10-tips-for-testing');
});
it('returns empty string for empty input', () => {
expect(slugify('')).toBe('');
});
it('returns empty string for whitespace-only input', () => {
expect(slugify(' ')).toBe('');
});
});
describe('umlaut and special character transliteration', () => {
it('transliterates German umlauts', () => {
expect(slugify('Über die Brücke')).toBe('uber-die-brucke');
expect(slugify('Ärger')).toBe('arger');
expect(slugify('schön')).toBe('schon');
});
it('transliterates ß to ss', () => {
expect(slugify('Straße')).toBe('strasse');
expect(slugify('Großmutter')).toBe('grossmutter');
});
it('transliterates French accented characters', () => {
expect(slugify('Café Test')).toBe('cafe-test');
expect(slugify('crème brûlée')).toBe('creme-brulee');
expect(slugify('naïve')).toBe('naive');
});
it('transliterates Nordic characters', () => {
expect(slugify('Ångström')).toBe('angstrom');
expect(slugify('Ærø')).toBe('aero');
expect(slugify('Ødegaard')).toBe('odegaard');
});
it('transliterates Spanish characters', () => {
expect(slugify('España')).toBe('espana');
expect(slugify('niño')).toBe('nino');
});
it('transliterates Polish characters', () => {
expect(slugify('Łódź')).toBe('lodz');
});
it('transliterates Czech characters', () => {
expect(slugify('Dvořák')).toBe('dvorak');
expect(slugify('Háček')).toBe('hacek');
});
});
describe('special characters removal', () => {
it('removes punctuation', () => {
expect(slugify('Hello, World! How are you?')).toBe('hello-world-how-are-you');
});
it('removes brackets and parentheses', () => {
expect(slugify('Hello (World) [Test]')).toBe('hello-world-test');
});
it('removes symbols', () => {
expect(slugify('Hello @World #Test $100')).toBe('hello-world-test-100');
});
it('removes emoji and non-Latin characters', () => {
expect(slugify('Hello 🌍 World')).toBe('hello-world');
});
});
describe('word separation', () => {
it('separates words with normal hyphens', () => {
const result = slugify('Hello World');
expect(result).toBe('hello-world');
// Verify it's a normal hyphen (U+002D), not en-dash or em-dash
expect(result.charCodeAt(5)).toBe(0x2d);
});
it('converts en-dashes and em-dashes to hyphens', () => {
expect(slugify('helloworld')).toBe('hello-world'); // en-dash
expect(slugify('hello—world')).toBe('hello-world'); // em-dash
});
it('collapses consecutive special chars into single hyphen', () => {
expect(slugify('hello!!!world')).toBe('hello-world');
expect(slugify('hello...world')).toBe('hello-world');
});
});
});