fix: no more duplicates for already fully supported languages (#58)

Co-authored-by: hugo <hugoms@me.com>
This commit is contained in:
Georg Bauer
2026-03-22 16:52:53 +01:00
committed by GitHub
parent 6564ea5b63
commit 72ff998537
3 changed files with 84 additions and 8 deletions

View File

@@ -19,7 +19,13 @@
"WebFetch(domain:ricmac.org)", "WebFetch(domain:ricmac.org)",
"WebFetch(domain:docs.mistral.ai)", "WebFetch(domain:docs.mistral.ai)",
"Bash(npm uninstall dropbox date-fns @testing-library/user-event @types/dagre electron-store memfs)", "Bash(npm uninstall dropbox date-fns @testing-library/user-event @types/dagre electron-store memfs)",
"WebSearch" "WebSearch",
"WebFetch(domain:ai.google.dev)",
"Bash(gh api:*)",
"Bash(python3 -c \"import json,sys; [print\\(f[''''name'''']\\) for f in json.load\\(sys.stdin\\)]\")",
"Bash(grep -rn choices.*auto.*mistral /Users/gb/mlx-env/lib/python3.14/site-packages/vllm_mlx/ --include=*.py)",
"Bash(source ~/mlx-env/bin/activate)",
"Bash(python3 -c \":*)"
] ]
} }
} }

View File

@@ -628,6 +628,16 @@ export class BlogGenerationEngine {
.map((lang) => lang.trim().toLowerCase()) .map((lang) => lang.trim().toLowerCase())
.filter((lang) => lang.length > 0 && lang !== mainLanguage); .filter((lang) => lang.length > 0 && lang !== mainLanguage);
// When a language has a dedicated subtree (e.g. /fr/), suppress its .lang
// translation variant pages (e.g. /2025/01/15/post.fr/) to avoid duplicate
// content. The subtree already contains the translated version at a clean URL.
if (additionalLanguages.length > 0) {
const subtreeLanguages = new Set(additionalLanguages);
publishedRoutePosts = publishedRoutePosts.filter(
(p) => !(p as any).translationSourceSlug || !subtreeLanguages.has((p.language ?? '').trim().toLowerCase()),
);
}
// Determine whether to use worker threads for page generation // Determine whether to use worker threads for page generation
const useWorkers = !!options.dbPath; const useWorkers = !!options.dbPath;
@@ -1409,9 +1419,23 @@ export class BlogGenerationEngine {
.map(([category]) => category); .map(([category]) => category);
const { publishedPosts, publishedListPosts } = await loadPublishedGenerationSets(this.postEngine, listExcludedCategories); const { publishedPosts, publishedListPosts } = await loadPublishedGenerationSets(this.postEngine, listExcludedCategories);
const { routePosts: publishedRoutePosts } = await this.buildPublishedRoutePosts(publishedPosts); let { routePosts: publishedRoutePosts } = await this.buildPublishedRoutePosts(publishedPosts);
const generationPostIndex = buildGenerationPostIndex(publishedListPosts); const generationPostIndex = buildGenerationPostIndex(publishedListPosts);
// --- Build per-language expected paths ---
const mainLanguage = (options.language ?? 'en').trim().toLowerCase();
const additionalLanguages = (options.blogLanguages ?? [])
.map((lang) => lang.trim().toLowerCase())
.filter((lang) => lang.length > 0 && lang !== mainLanguage);
// Suppress .lang variant pages for languages that have a dedicated subtree
if (additionalLanguages.length > 0) {
const subtreeLanguages = new Set(additionalLanguages);
publishedRoutePosts = publishedRoutePosts.filter(
(p) => !(p as any).translationSourceSlug || !subtreeLanguages.has((p.language ?? '').trim().toLowerCase()),
);
}
const { sitemapXml } = buildSitemapAndFeeds({ const { sitemapXml } = buildSitemapAndFeeds({
baseUrl: options.baseUrl, baseUrl: options.baseUrl,
projectName: options.projectName, projectName: options.projectName,
@@ -1427,12 +1451,6 @@ export class BlogGenerationEngine {
await fs.mkdir(htmlDir, { recursive: true }); await fs.mkdir(htmlDir, { recursive: true });
const sitemapPath = path.join(htmlDir, 'sitemap.xml'); const sitemapPath = path.join(htmlDir, 'sitemap.xml');
// --- Build per-language expected paths ---
const mainLanguage = (options.language ?? 'en').trim().toLowerCase();
const additionalLanguages = (options.blogLanguages ?? [])
.map((lang) => lang.trim().toLowerCase())
.filter((lang) => lang.length > 0 && lang !== mainLanguage);
let sitemapToWrite = sitemapXml; let sitemapToWrite = sitemapXml;
const additionalExpectedPaths: string[] = []; const additionalExpectedPaths: string[] = [];
const additionalPostTimestampChecks: Array<{ const additionalPostTimestampChecks: Array<{

View File

@@ -2325,4 +2325,56 @@ describe('BlogGenerationEngine', () => {
const page2 = await readFile(path.join(tempDir, 'html', 'tag', 'paginated', 'page', '2', 'index.html'), 'utf-8'); const page2 = await readFile(path.join(tempDir, 'html', 'tag', 'paginated', 'page', '2', 'index.html'), 'utf-8');
expect(page2).toContain('/tag/paginated/'); expect(page2).toContain('/tag/paginated/');
}); });
it('does not generate .lang variant pages when that language has a subtree', async () => {
const sourcePost = makePost({
id: '1',
slug: 'hello-world',
title: 'Hello World',
content: '# Hello World\n\nEnglish body',
language: 'en',
availableLanguages: ['en', 'fr'],
createdAt: new Date('2025-01-15T10:00:00Z'),
updatedAt: new Date('2025-01-15T10:00:00Z'),
});
const translationsByPostId = new Map<string, PostTranslationData[]>([
['1', [{
id: 'translation-1-fr',
projectId: 'default',
translationFor: '1',
language: 'fr',
title: 'Bonjour le monde',
excerpt: 'Resume FR',
content: '# Bonjour le monde\n\nCorps FR',
status: 'published',
createdAt: new Date('2025-01-15T10:05:00Z'),
updatedAt: new Date('2025-01-15T10:05:00Z'),
publishedAt: new Date('2025-01-15T10:06:00Z'),
filePath: path.join(tempDir, 'posts', 'hello-world.fr.md'),
}]],
]);
setupPosts([sourcePost]);
mockPostEngine.getPostTranslations.mockImplementation(async (postId: string) => translationsByPostId.get(postId) ?? []);
const { BlogGenerationEngine } = await import('../../src/main/engine/BlogGenerationEngine');
const engine = new BlogGenerationEngine(mockPostEngine, mockMediaEngine, mockPostMediaEngine);
await engine.generate({
projectId: 'test',
projectName: 'Test Blog',
dataDir: tempDir,
baseUrl: 'https://example.com',
language: 'en',
blogLanguages: ['en', 'fr'],
}, vi.fn());
// The fr subtree should exist
const frSubtreeExists = await fileExists(path.join(tempDir, 'html', 'fr', '2025', '01', '15', 'hello-world', 'index.html'));
expect(frSubtreeExists).toBe(true);
// The .fr variant page should NOT exist since fr has a language subtree
const variantExists = await fileExists(path.join(tempDir, 'html', '2025', '01', '15', 'hello-world.fr', 'index.html'));
expect(variantExists).toBe(false);
});
}); });