diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 8f0405e..9241cf6 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -19,7 +19,13 @@ "WebFetch(domain:ricmac.org)", "WebFetch(domain:docs.mistral.ai)", "Bash(npm uninstall dropbox date-fns @testing-library/user-event @types/dagre electron-store memfs)", - "WebSearch" + "WebSearch", + "WebFetch(domain:ai.google.dev)", + "Bash(gh api:*)", + "Bash(python3 -c \"import json,sys; [print\\(f[''''name'''']\\) for f in json.load\\(sys.stdin\\)]\")", + "Bash(grep -rn choices.*auto.*mistral /Users/gb/mlx-env/lib/python3.14/site-packages/vllm_mlx/ --include=*.py)", + "Bash(source ~/mlx-env/bin/activate)", + "Bash(python3 -c \":*)" ] } } diff --git a/src/main/engine/BlogGenerationEngine.ts b/src/main/engine/BlogGenerationEngine.ts index 04d6e3e..b72dd61 100644 --- a/src/main/engine/BlogGenerationEngine.ts +++ b/src/main/engine/BlogGenerationEngine.ts @@ -628,6 +628,16 @@ export class BlogGenerationEngine { .map((lang) => lang.trim().toLowerCase()) .filter((lang) => lang.length > 0 && lang !== mainLanguage); + // When a language has a dedicated subtree (e.g. /fr/), suppress its .lang + // translation variant pages (e.g. /2025/01/15/post.fr/) to avoid duplicate + // content. The subtree already contains the translated version at a clean URL. + if (additionalLanguages.length > 0) { + const subtreeLanguages = new Set(additionalLanguages); + publishedRoutePosts = publishedRoutePosts.filter( + (p) => !(p as any).translationSourceSlug || !subtreeLanguages.has((p.language ?? '').trim().toLowerCase()), + ); + } + // Determine whether to use worker threads for page generation const useWorkers = !!options.dbPath; @@ -1409,9 +1419,23 @@ export class BlogGenerationEngine { .map(([category]) => category); const { publishedPosts, publishedListPosts } = await loadPublishedGenerationSets(this.postEngine, listExcludedCategories); - const { routePosts: publishedRoutePosts } = await this.buildPublishedRoutePosts(publishedPosts); + let { routePosts: publishedRoutePosts } = await this.buildPublishedRoutePosts(publishedPosts); const generationPostIndex = buildGenerationPostIndex(publishedListPosts); + // --- Build per-language expected paths --- + const mainLanguage = (options.language ?? 'en').trim().toLowerCase(); + const additionalLanguages = (options.blogLanguages ?? []) + .map((lang) => lang.trim().toLowerCase()) + .filter((lang) => lang.length > 0 && lang !== mainLanguage); + + // Suppress .lang variant pages for languages that have a dedicated subtree + if (additionalLanguages.length > 0) { + const subtreeLanguages = new Set(additionalLanguages); + publishedRoutePosts = publishedRoutePosts.filter( + (p) => !(p as any).translationSourceSlug || !subtreeLanguages.has((p.language ?? '').trim().toLowerCase()), + ); + } + const { sitemapXml } = buildSitemapAndFeeds({ baseUrl: options.baseUrl, projectName: options.projectName, @@ -1427,12 +1451,6 @@ export class BlogGenerationEngine { await fs.mkdir(htmlDir, { recursive: true }); const sitemapPath = path.join(htmlDir, 'sitemap.xml'); - // --- Build per-language expected paths --- - const mainLanguage = (options.language ?? 'en').trim().toLowerCase(); - const additionalLanguages = (options.blogLanguages ?? []) - .map((lang) => lang.trim().toLowerCase()) - .filter((lang) => lang.length > 0 && lang !== mainLanguage); - let sitemapToWrite = sitemapXml; const additionalExpectedPaths: string[] = []; const additionalPostTimestampChecks: Array<{ diff --git a/tests/engine/BlogGenerationEngine.test.ts b/tests/engine/BlogGenerationEngine.test.ts index 6746cf5..078c93c 100644 --- a/tests/engine/BlogGenerationEngine.test.ts +++ b/tests/engine/BlogGenerationEngine.test.ts @@ -2325,4 +2325,56 @@ describe('BlogGenerationEngine', () => { const page2 = await readFile(path.join(tempDir, 'html', 'tag', 'paginated', 'page', '2', 'index.html'), 'utf-8'); expect(page2).toContain('/tag/paginated/'); }); + + it('does not generate .lang variant pages when that language has a subtree', async () => { + const sourcePost = makePost({ + id: '1', + slug: 'hello-world', + title: 'Hello World', + content: '# Hello World\n\nEnglish body', + language: 'en', + availableLanguages: ['en', 'fr'], + createdAt: new Date('2025-01-15T10:00:00Z'), + updatedAt: new Date('2025-01-15T10:00:00Z'), + }); + const translationsByPostId = new Map([ + ['1', [{ + id: 'translation-1-fr', + projectId: 'default', + translationFor: '1', + language: 'fr', + title: 'Bonjour le monde', + excerpt: 'Resume FR', + content: '# Bonjour le monde\n\nCorps FR', + status: 'published', + createdAt: new Date('2025-01-15T10:05:00Z'), + updatedAt: new Date('2025-01-15T10:05:00Z'), + publishedAt: new Date('2025-01-15T10:06:00Z'), + filePath: path.join(tempDir, 'posts', 'hello-world.fr.md'), + }]], + ]); + + setupPosts([sourcePost]); + mockPostEngine.getPostTranslations.mockImplementation(async (postId: string) => translationsByPostId.get(postId) ?? []); + + const { BlogGenerationEngine } = await import('../../src/main/engine/BlogGenerationEngine'); + const engine = new BlogGenerationEngine(mockPostEngine, mockMediaEngine, mockPostMediaEngine); + + await engine.generate({ + projectId: 'test', + projectName: 'Test Blog', + dataDir: tempDir, + baseUrl: 'https://example.com', + language: 'en', + blogLanguages: ['en', 'fr'], + }, vi.fn()); + + // The fr subtree should exist + const frSubtreeExists = await fileExists(path.join(tempDir, 'html', 'fr', '2025', '01', '15', 'hello-world', 'index.html')); + expect(frSubtreeExists).toBe(true); + + // The .fr variant page should NOT exist since fr has a language subtree + const variantExists = await fileExists(path.join(tempDir, 'html', '2025', '01', '15', 'hello-world.fr', 'index.html')); + expect(variantExists).toBe(false); + }); });