fix: slugify transliterates now much better
This commit is contained in:
14
package-lock.json
generated
14
package-lock.json
generated
@@ -50,6 +50,7 @@
|
||||
"simple-git": "^3.31.1",
|
||||
"smol-toml": "^1.6.0",
|
||||
"snowball-stemmers": "^0.6.0",
|
||||
"transliteration": "^2.6.1",
|
||||
"turndown": "^7.2.2",
|
||||
"uuid": "^13.0.0",
|
||||
"vanilla-calendar-pro": "^3.1.0",
|
||||
@@ -15228,6 +15229,19 @@
|
||||
"node": ">=20"
|
||||
}
|
||||
},
|
||||
"node_modules/transliteration": {
|
||||
"version": "2.6.1",
|
||||
"resolved": "https://registry.npmjs.org/transliteration/-/transliteration-2.6.1.tgz",
|
||||
"integrity": "sha512-hJ9BhrQAOnNTbpOr1MxsNjZISkn7ppvF5TKUeFmTE1mG4ZPD/XVxF0L0LUoIUCWmQyxH0gJpVtfYLAWf298U9w==",
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"slugify": "dist/bin/slugify",
|
||||
"transliterate": "dist/bin/transliterate"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/tree-dump": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/tree-dump/-/tree-dump-1.1.0.tgz",
|
||||
|
||||
@@ -111,6 +111,7 @@
|
||||
"simple-git": "^3.31.1",
|
||||
"smol-toml": "^1.6.0",
|
||||
"snowball-stemmers": "^0.6.0",
|
||||
"transliteration": "^2.6.1",
|
||||
"turndown": "^7.2.2",
|
||||
"uuid": "^13.0.0",
|
||||
"vanilla-calendar-pro": "^3.1.0",
|
||||
|
||||
@@ -13,6 +13,7 @@ import { stemText, stemQuery, SupportedLanguage } from './stemmer';
|
||||
import { readPostFile as readPostFileShared, type PostFileData } from './postFileUtils';
|
||||
import { CliNotifier, NoopNotifier } from './CliNotifier';
|
||||
import type { MediaEngine } from './MediaEngine';
|
||||
import { slugify } from './slugify';
|
||||
|
||||
export interface PostData {
|
||||
id: string;
|
||||
@@ -216,10 +217,7 @@ export class PostEngine extends EventEmitter {
|
||||
}
|
||||
|
||||
private generateSlug(title: string): string {
|
||||
return title
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-|-$/g, '');
|
||||
return slugify(title);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -6,6 +6,7 @@ import { eq } from 'drizzle-orm';
|
||||
import { app } from 'electron';
|
||||
import { getDatabase } from '../database';
|
||||
import { projects, posts, media, Project, NewProject } from '../database/schema';
|
||||
import { slugify } from './slugify';
|
||||
|
||||
export interface ProjectData {
|
||||
id: string;
|
||||
@@ -43,10 +44,7 @@ export class ProjectEngine extends EventEmitter {
|
||||
}
|
||||
|
||||
private generateSlug(name: string): string {
|
||||
return name
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-|-$/g, '');
|
||||
return slugify(name);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
18
src/main/engine/slugify.ts
Normal file
18
src/main/engine/slugify.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import { transliterate } from 'transliteration';
|
||||
|
||||
/**
|
||||
* Generate a URL-safe slug from a string.
|
||||
*
|
||||
* - Transliterates umlauts and accented characters to ASCII equivalents
|
||||
* using the `transliteration` package for broad Unicode coverage
|
||||
* - Removes non-alphanumeric characters (except hyphens used as separators)
|
||||
* - Separates words with normal hyphens (U+002D)
|
||||
* - Collapses consecutive separators into a single hyphen
|
||||
* - Strips leading/trailing hyphens
|
||||
*/
|
||||
export function slugify(input: string): string {
|
||||
return transliterate(input)
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '');
|
||||
}
|
||||
@@ -227,9 +227,9 @@ describe('PostEngine', () => {
|
||||
expect(post.slug).toBe('multiple-spaces-here');
|
||||
});
|
||||
|
||||
it('should handle unicode characters by removing them', async () => {
|
||||
it('should handle unicode characters by transliterating them', async () => {
|
||||
const post = await postEngine.createPost({ title: 'Café Test' });
|
||||
expect(post.slug).toBe('caf-test');
|
||||
expect(post.slug).toBe('cafe-test');
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
110
tests/engine/slugify.test.ts
Normal file
110
tests/engine/slugify.test.ts
Normal file
@@ -0,0 +1,110 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { slugify } from '../../src/main/engine/slugify';
|
||||
|
||||
describe('slugify', () => {
|
||||
describe('basic transformations', () => {
|
||||
it('lowercases the input', () => {
|
||||
expect(slugify('Hello World')).toBe('hello-world');
|
||||
});
|
||||
|
||||
it('replaces spaces with hyphens', () => {
|
||||
expect(slugify('hello world')).toBe('hello-world');
|
||||
});
|
||||
|
||||
it('collapses multiple spaces into a single hyphen', () => {
|
||||
expect(slugify('Multiple Spaces Here')).toBe('multiple-spaces-here');
|
||||
});
|
||||
|
||||
it('removes leading and trailing hyphens', () => {
|
||||
expect(slugify('---Test---')).toBe('test');
|
||||
});
|
||||
|
||||
it('handles numbers in input', () => {
|
||||
expect(slugify('10 Tips for Testing')).toBe('10-tips-for-testing');
|
||||
});
|
||||
|
||||
it('returns empty string for empty input', () => {
|
||||
expect(slugify('')).toBe('');
|
||||
});
|
||||
|
||||
it('returns empty string for whitespace-only input', () => {
|
||||
expect(slugify(' ')).toBe('');
|
||||
});
|
||||
});
|
||||
|
||||
describe('umlaut and special character transliteration', () => {
|
||||
it('transliterates German umlauts', () => {
|
||||
expect(slugify('Über die Brücke')).toBe('uber-die-brucke');
|
||||
expect(slugify('Ärger')).toBe('arger');
|
||||
expect(slugify('schön')).toBe('schon');
|
||||
});
|
||||
|
||||
it('transliterates ß to ss', () => {
|
||||
expect(slugify('Straße')).toBe('strasse');
|
||||
expect(slugify('Großmutter')).toBe('grossmutter');
|
||||
});
|
||||
|
||||
it('transliterates French accented characters', () => {
|
||||
expect(slugify('Café Test')).toBe('cafe-test');
|
||||
expect(slugify('crème brûlée')).toBe('creme-brulee');
|
||||
expect(slugify('naïve')).toBe('naive');
|
||||
});
|
||||
|
||||
it('transliterates Nordic characters', () => {
|
||||
expect(slugify('Ångström')).toBe('angstrom');
|
||||
expect(slugify('Ærø')).toBe('aero');
|
||||
expect(slugify('Ødegaard')).toBe('odegaard');
|
||||
});
|
||||
|
||||
it('transliterates Spanish characters', () => {
|
||||
expect(slugify('España')).toBe('espana');
|
||||
expect(slugify('niño')).toBe('nino');
|
||||
});
|
||||
|
||||
it('transliterates Polish characters', () => {
|
||||
expect(slugify('Łódź')).toBe('lodz');
|
||||
});
|
||||
|
||||
it('transliterates Czech characters', () => {
|
||||
expect(slugify('Dvořák')).toBe('dvorak');
|
||||
expect(slugify('Háček')).toBe('hacek');
|
||||
});
|
||||
});
|
||||
|
||||
describe('special characters removal', () => {
|
||||
it('removes punctuation', () => {
|
||||
expect(slugify('Hello, World! How are you?')).toBe('hello-world-how-are-you');
|
||||
});
|
||||
|
||||
it('removes brackets and parentheses', () => {
|
||||
expect(slugify('Hello (World) [Test]')).toBe('hello-world-test');
|
||||
});
|
||||
|
||||
it('removes symbols', () => {
|
||||
expect(slugify('Hello @World #Test $100')).toBe('hello-world-test-100');
|
||||
});
|
||||
|
||||
it('removes emoji and non-Latin characters', () => {
|
||||
expect(slugify('Hello 🌍 World')).toBe('hello-world');
|
||||
});
|
||||
});
|
||||
|
||||
describe('word separation', () => {
|
||||
it('separates words with normal hyphens', () => {
|
||||
const result = slugify('Hello World');
|
||||
expect(result).toBe('hello-world');
|
||||
// Verify it's a normal hyphen (U+002D), not en-dash or em-dash
|
||||
expect(result.charCodeAt(5)).toBe(0x2d);
|
||||
});
|
||||
|
||||
it('converts en-dashes and em-dashes to hyphens', () => {
|
||||
expect(slugify('hello–world')).toBe('hello-world'); // en-dash
|
||||
expect(slugify('hello—world')).toBe('hello-world'); // em-dash
|
||||
});
|
||||
|
||||
it('collapses consecutive special chars into single hyphen', () => {
|
||||
expect(slugify('hello!!!world')).toBe('hello-world');
|
||||
expect(slugify('hello...world')).toBe('hello-world');
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user