fix: phase 7 refactoring

This commit is contained in:
2026-02-16 06:54:01 +01:00
parent 341aaead61
commit 6ec25d2705
3 changed files with 158 additions and 32 deletions

View File

@@ -261,7 +261,7 @@ Move color contrast logic into a shared renderer utility.
1. Phase 1 (finish API/store type convergence)
2. Phase 2 (finish TagEngine workflow dedup)
3. Phase 3 (finish PostMedia single/batch dedup)
4. Phase 7 (WxrParser repeated parse blocks)
4. ~~Phase 7 (WxrParser repeated parse blocks)~~ ✅ Completed
5. Phase 8 (MetaEngine ↔ TagEngine overlap)
6. Phase 9 (renderer tag event subscription helper)
7. Phase 10 (local UI repeated blocks in component files)
@@ -274,6 +274,8 @@ Rationale: complete in-flight high-impact phases first, then address newly detec
## Phase 7 — Consolidate WXR Item Parse Blocks
Status: ✅ Completed
### Problem
`WxrParser` contains repeated `pubDate` parsing + return-shape scaffolding in nearby item parse paths.
@@ -289,6 +291,10 @@ Extract shared `parsePubDate` and/or shared item base builder helper to avoid dr
- No behavior change in parsed output.
- Duplicated `pubDate`/return scaffolding materially reduced.
### Progress Check
- Completed: extracted shared `pubDate` parser helper and shared base item builder for post/media parse paths.
- Completed: added branch-focused tests for valid/invalid/missing `pubDate` and post/page parse-branch parity.
### Coverage & Test Quality (fresh run: `npm run test:coverage`)
- `src/main/engine/WxrParser.ts`: 93.55% statements, 100.00% functions, 67.14% branches.
- `tests/engine/WxrParser.test.ts`: 19 tests covering parse variants, status handling, metadata extraction, and file-read paths.

View File

@@ -87,6 +87,40 @@ const EXT_TO_MIME: Record<string, string> = {
export class WxrParser {
private parsePubDate(item: Element): Date | null {
const pubDateStr = this.getDirectChildText(item, 'pubDate');
if (!pubDateStr) {
return null;
}
const parsed = new Date(pubDateStr);
return isNaN(parsed.getTime()) ? null : parsed;
}
private parseItemBase(item: Element): {
wpId: number;
title: string;
slug: string;
content: string;
excerpt: string;
pubDate: Date | null;
creator: string;
status: string;
postType: string;
} {
return {
wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10),
title: this.getDirectChildText(item, 'title'),
slug: this.getElementText(item, 'post_name', NS.wp),
content: this.getElementText(item, 'encoded', NS.content),
excerpt: this.getElementText(item, 'encoded', NS.excerpt),
pubDate: this.parsePubDate(item),
creator: this.getElementText(item, 'creator', NS.dc),
status: this.getElementText(item, 'status', NS.wp),
postType: this.getElementText(item, 'post_type', NS.wp),
};
}
async parseFile(filePath: string): Promise<WxrData> {
const content = await fs.readFile(filePath, 'utf-8');
return this.parseXml(content);
@@ -172,6 +206,7 @@ export class WxrParser {
}
private parsePostItem(item: Element): WxrPost {
const base = this.parseItemBase(item);
const categories: string[] = [];
const tags: string[] = [];
@@ -190,15 +225,6 @@ export class WxrParser {
}
}
const pubDateStr = this.getDirectChildText(item, 'pubDate');
let pubDate: Date | null = null;
if (pubDateStr) {
const parsed = new Date(pubDateStr);
if (!isNaN(parsed.getTime())) {
pubDate = parsed;
}
}
// Parse WordPress local post date (wp:post_date)
const postDateStr = this.getElementText(item, 'post_date', NS.wp);
let postDate: Date | null = null;
@@ -220,46 +246,38 @@ export class WxrParser {
}
return {
wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10),
title: this.getDirectChildText(item, 'title'),
slug: this.getElementText(item, 'post_name', NS.wp),
content: this.getElementText(item, 'encoded', NS.content),
excerpt: this.getElementText(item, 'encoded', NS.excerpt),
pubDate,
wpId: base.wpId,
title: base.title,
slug: base.slug,
content: base.content,
excerpt: base.excerpt,
pubDate: base.pubDate,
postDate,
postModified,
creator: this.getElementText(item, 'creator', NS.dc),
status: this.getElementText(item, 'status', NS.wp),
postType: this.getElementText(item, 'post_type', NS.wp),
creator: base.creator,
status: base.status,
postType: base.postType,
categories,
tags,
};
}
private parseMediaItem(item: Element): WxrMedia {
const base = this.parseItemBase(item);
const url = this.getElementText(item, 'attachment_url', NS.wp);
const filename = this.extractFilename(url);
const relativePath = this.extractRelativePath(url);
const pubDateStr = this.getDirectChildText(item, 'pubDate');
let pubDate: Date | null = null;
if (pubDateStr) {
const parsed = new Date(pubDateStr);
if (!isNaN(parsed.getTime())) {
pubDate = parsed;
}
}
return {
wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10),
title: this.getDirectChildText(item, 'title'),
wpId: base.wpId,
title: base.title,
url,
filename,
relativePath,
pubDate,
pubDate: base.pubDate,
parentId: parseInt(this.getElementText(item, 'post_parent', NS.wp) || '0', 10),
mimeType: this.inferMimeType(filename),
description: this.getElementText(item, 'encoded', NS.content),
description: base.content,
};
}

View File

@@ -158,6 +158,72 @@ const WXR_WITH_MEDIA = `<?xml version="1.0" encoding="UTF-8"?>
</channel>
</rss>`;
const WXR_WITH_MEDIA_PUBDATE = `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<channel>
<title>My Blog</title>
<link>https://example.com</link>
<description>Test</description>
<language>en</language>
<item>
<title>header-image</title>
<pubDate>Fri, 05 Jan 2024 12:34:56 +0000</pubDate>
<content:encoded><![CDATA[]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>101</wp:post_id>
<wp:post_name>header-image</wp:post_name>
<wp:status>inherit</wp:status>
<wp:post_type>attachment</wp:post_type>
<wp:post_parent>0</wp:post_parent>
<wp:attachment_url>https://example.com/wp-content/uploads/2024/01/header.jpg</wp:attachment_url>
<dc:creator><![CDATA[admin]]></dc:creator>
</item>
</channel>
</rss>`;
const WXR_WITH_INVALID_PUBDATE = `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<channel>
<title>Dates Blog</title>
<link>https://example.com</link>
<description>Test</description>
<language>en</language>
<item>
<title>Bad Date Post</title>
<pubDate>not-a-date</pubDate>
<content:encoded><![CDATA[<p>bad date</p>]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>201</wp:post_id>
<wp:post_name>bad-date-post</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
<wp:post_parent>0</wp:post_parent>
<dc:creator><![CDATA[admin]]></dc:creator>
</item>
<item>
<title>Bad Date Media</title>
<pubDate>also-not-a-date</pubDate>
<content:encoded><![CDATA[]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>202</wp:post_id>
<wp:post_name>bad-date-media</wp:post_name>
<wp:status>inherit</wp:status>
<wp:post_type>attachment</wp:post_type>
<wp:post_parent>0</wp:post_parent>
<wp:attachment_url>https://example.com/wp-content/uploads/2024/01/bad-date.jpg</wp:attachment_url>
<dc:creator><![CDATA[admin]]></dc:creator>
</item>
</channel>
</rss>`;
// WXR with mixed content: posts, pages, and media
const WXR_MIXED = `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
@@ -479,6 +545,42 @@ describe('WxrParser', () => {
// Page has no pubDate element
expect(result.pages[0].pubDate).toBeNull();
});
it('should parse valid RFC822 pubDate for media items', () => {
const result = parser.parseXml(WXR_WITH_MEDIA_PUBDATE);
expect(result.media).toHaveLength(1);
expect(result.media[0].pubDate).toBeInstanceOf(Date);
expect(result.media[0].pubDate?.toISOString()).toBe('2024-01-05T12:34:56.000Z');
});
it('should fallback to null for invalid pubDate nodes in post and media items', () => {
const result = parser.parseXml(WXR_WITH_INVALID_PUBDATE);
expect(result.posts).toHaveLength(1);
expect(result.media).toHaveLength(1);
expect(result.posts[0].pubDate).toBeNull();
expect(result.media[0].pubDate).toBeNull();
});
it('should keep base fields parity between post and page parse branches', () => {
const result = parser.parseXml(WXR_MIXED);
const post = result.posts[0];
const page = result.pages[0];
expect(post.postType).toBe('post');
expect(page.postType).toBe('page');
expect(post.wpId).toBeGreaterThan(0);
expect(page.wpId).toBeGreaterThan(0);
expect(post.title).toBeTruthy();
expect(page.title).toBeTruthy();
expect(post.slug).toBeTruthy();
expect(page.slug).toBeTruthy();
expect(typeof post.content).toBe('string');
expect(typeof page.content).toBe('string');
expect(typeof post.excerpt).toBe('string');
expect(typeof page.excerpt).toBe('string');
});
});
describe('parseFile', () => {