fix: no more duplicates for already fully supported languages (#58)
Co-authored-by: hugo <hugoms@me.com>
This commit is contained in:
@@ -19,7 +19,13 @@
|
|||||||
"WebFetch(domain:ricmac.org)",
|
"WebFetch(domain:ricmac.org)",
|
||||||
"WebFetch(domain:docs.mistral.ai)",
|
"WebFetch(domain:docs.mistral.ai)",
|
||||||
"Bash(npm uninstall dropbox date-fns @testing-library/user-event @types/dagre electron-store memfs)",
|
"Bash(npm uninstall dropbox date-fns @testing-library/user-event @types/dagre electron-store memfs)",
|
||||||
"WebSearch"
|
"WebSearch",
|
||||||
|
"WebFetch(domain:ai.google.dev)",
|
||||||
|
"Bash(gh api:*)",
|
||||||
|
"Bash(python3 -c \"import json,sys; [print\\(f[''''name'''']\\) for f in json.load\\(sys.stdin\\)]\")",
|
||||||
|
"Bash(grep -rn choices.*auto.*mistral /Users/gb/mlx-env/lib/python3.14/site-packages/vllm_mlx/ --include=*.py)",
|
||||||
|
"Bash(source ~/mlx-env/bin/activate)",
|
||||||
|
"Bash(python3 -c \":*)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -628,6 +628,16 @@ export class BlogGenerationEngine {
|
|||||||
.map((lang) => lang.trim().toLowerCase())
|
.map((lang) => lang.trim().toLowerCase())
|
||||||
.filter((lang) => lang.length > 0 && lang !== mainLanguage);
|
.filter((lang) => lang.length > 0 && lang !== mainLanguage);
|
||||||
|
|
||||||
|
// When a language has a dedicated subtree (e.g. /fr/), suppress its .lang
|
||||||
|
// translation variant pages (e.g. /2025/01/15/post.fr/) to avoid duplicate
|
||||||
|
// content. The subtree already contains the translated version at a clean URL.
|
||||||
|
if (additionalLanguages.length > 0) {
|
||||||
|
const subtreeLanguages = new Set(additionalLanguages);
|
||||||
|
publishedRoutePosts = publishedRoutePosts.filter(
|
||||||
|
(p) => !(p as any).translationSourceSlug || !subtreeLanguages.has((p.language ?? '').trim().toLowerCase()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Determine whether to use worker threads for page generation
|
// Determine whether to use worker threads for page generation
|
||||||
const useWorkers = !!options.dbPath;
|
const useWorkers = !!options.dbPath;
|
||||||
|
|
||||||
@@ -1409,9 +1419,23 @@ export class BlogGenerationEngine {
|
|||||||
.map(([category]) => category);
|
.map(([category]) => category);
|
||||||
|
|
||||||
const { publishedPosts, publishedListPosts } = await loadPublishedGenerationSets(this.postEngine, listExcludedCategories);
|
const { publishedPosts, publishedListPosts } = await loadPublishedGenerationSets(this.postEngine, listExcludedCategories);
|
||||||
const { routePosts: publishedRoutePosts } = await this.buildPublishedRoutePosts(publishedPosts);
|
let { routePosts: publishedRoutePosts } = await this.buildPublishedRoutePosts(publishedPosts);
|
||||||
const generationPostIndex = buildGenerationPostIndex(publishedListPosts);
|
const generationPostIndex = buildGenerationPostIndex(publishedListPosts);
|
||||||
|
|
||||||
|
// --- Build per-language expected paths ---
|
||||||
|
const mainLanguage = (options.language ?? 'en').trim().toLowerCase();
|
||||||
|
const additionalLanguages = (options.blogLanguages ?? [])
|
||||||
|
.map((lang) => lang.trim().toLowerCase())
|
||||||
|
.filter((lang) => lang.length > 0 && lang !== mainLanguage);
|
||||||
|
|
||||||
|
// Suppress .lang variant pages for languages that have a dedicated subtree
|
||||||
|
if (additionalLanguages.length > 0) {
|
||||||
|
const subtreeLanguages = new Set(additionalLanguages);
|
||||||
|
publishedRoutePosts = publishedRoutePosts.filter(
|
||||||
|
(p) => !(p as any).translationSourceSlug || !subtreeLanguages.has((p.language ?? '').trim().toLowerCase()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const { sitemapXml } = buildSitemapAndFeeds({
|
const { sitemapXml } = buildSitemapAndFeeds({
|
||||||
baseUrl: options.baseUrl,
|
baseUrl: options.baseUrl,
|
||||||
projectName: options.projectName,
|
projectName: options.projectName,
|
||||||
@@ -1427,12 +1451,6 @@ export class BlogGenerationEngine {
|
|||||||
await fs.mkdir(htmlDir, { recursive: true });
|
await fs.mkdir(htmlDir, { recursive: true });
|
||||||
const sitemapPath = path.join(htmlDir, 'sitemap.xml');
|
const sitemapPath = path.join(htmlDir, 'sitemap.xml');
|
||||||
|
|
||||||
// --- Build per-language expected paths ---
|
|
||||||
const mainLanguage = (options.language ?? 'en').trim().toLowerCase();
|
|
||||||
const additionalLanguages = (options.blogLanguages ?? [])
|
|
||||||
.map((lang) => lang.trim().toLowerCase())
|
|
||||||
.filter((lang) => lang.length > 0 && lang !== mainLanguage);
|
|
||||||
|
|
||||||
let sitemapToWrite = sitemapXml;
|
let sitemapToWrite = sitemapXml;
|
||||||
const additionalExpectedPaths: string[] = [];
|
const additionalExpectedPaths: string[] = [];
|
||||||
const additionalPostTimestampChecks: Array<{
|
const additionalPostTimestampChecks: Array<{
|
||||||
|
|||||||
@@ -2325,4 +2325,56 @@ describe('BlogGenerationEngine', () => {
|
|||||||
const page2 = await readFile(path.join(tempDir, 'html', 'tag', 'paginated', 'page', '2', 'index.html'), 'utf-8');
|
const page2 = await readFile(path.join(tempDir, 'html', 'tag', 'paginated', 'page', '2', 'index.html'), 'utf-8');
|
||||||
expect(page2).toContain('/tag/paginated/');
|
expect(page2).toContain('/tag/paginated/');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('does not generate .lang variant pages when that language has a subtree', async () => {
|
||||||
|
const sourcePost = makePost({
|
||||||
|
id: '1',
|
||||||
|
slug: 'hello-world',
|
||||||
|
title: 'Hello World',
|
||||||
|
content: '# Hello World\n\nEnglish body',
|
||||||
|
language: 'en',
|
||||||
|
availableLanguages: ['en', 'fr'],
|
||||||
|
createdAt: new Date('2025-01-15T10:00:00Z'),
|
||||||
|
updatedAt: new Date('2025-01-15T10:00:00Z'),
|
||||||
|
});
|
||||||
|
const translationsByPostId = new Map<string, PostTranslationData[]>([
|
||||||
|
['1', [{
|
||||||
|
id: 'translation-1-fr',
|
||||||
|
projectId: 'default',
|
||||||
|
translationFor: '1',
|
||||||
|
language: 'fr',
|
||||||
|
title: 'Bonjour le monde',
|
||||||
|
excerpt: 'Resume FR',
|
||||||
|
content: '# Bonjour le monde\n\nCorps FR',
|
||||||
|
status: 'published',
|
||||||
|
createdAt: new Date('2025-01-15T10:05:00Z'),
|
||||||
|
updatedAt: new Date('2025-01-15T10:05:00Z'),
|
||||||
|
publishedAt: new Date('2025-01-15T10:06:00Z'),
|
||||||
|
filePath: path.join(tempDir, 'posts', 'hello-world.fr.md'),
|
||||||
|
}]],
|
||||||
|
]);
|
||||||
|
|
||||||
|
setupPosts([sourcePost]);
|
||||||
|
mockPostEngine.getPostTranslations.mockImplementation(async (postId: string) => translationsByPostId.get(postId) ?? []);
|
||||||
|
|
||||||
|
const { BlogGenerationEngine } = await import('../../src/main/engine/BlogGenerationEngine');
|
||||||
|
const engine = new BlogGenerationEngine(mockPostEngine, mockMediaEngine, mockPostMediaEngine);
|
||||||
|
|
||||||
|
await engine.generate({
|
||||||
|
projectId: 'test',
|
||||||
|
projectName: 'Test Blog',
|
||||||
|
dataDir: tempDir,
|
||||||
|
baseUrl: 'https://example.com',
|
||||||
|
language: 'en',
|
||||||
|
blogLanguages: ['en', 'fr'],
|
||||||
|
}, vi.fn());
|
||||||
|
|
||||||
|
// The fr subtree should exist
|
||||||
|
const frSubtreeExists = await fileExists(path.join(tempDir, 'html', 'fr', '2025', '01', '15', 'hello-world', 'index.html'));
|
||||||
|
expect(frSubtreeExists).toBe(true);
|
||||||
|
|
||||||
|
// The .fr variant page should NOT exist since fr has a language subtree
|
||||||
|
const variantExists = await fileExists(path.join(tempDir, 'html', '2025', '01', '15', 'hello-world.fr', 'index.html'));
|
||||||
|
expect(variantExists).toBe(false);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user