fix: phase 7 refactoring
This commit is contained in:
@@ -261,7 +261,7 @@ Move color contrast logic into a shared renderer utility.
|
||||
1. Phase 1 (finish API/store type convergence)
|
||||
2. Phase 2 (finish TagEngine workflow dedup)
|
||||
3. Phase 3 (finish PostMedia single/batch dedup)
|
||||
4. Phase 7 (WxrParser repeated parse blocks)
|
||||
4. ~~Phase 7 (WxrParser repeated parse blocks)~~ ✅ Completed
|
||||
5. Phase 8 (MetaEngine ↔ TagEngine overlap)
|
||||
6. Phase 9 (renderer tag event subscription helper)
|
||||
7. Phase 10 (local UI repeated blocks in component files)
|
||||
@@ -274,6 +274,8 @@ Rationale: complete in-flight high-impact phases first, then address newly detec
|
||||
|
||||
## Phase 7 — Consolidate WXR Item Parse Blocks
|
||||
|
||||
Status: ✅ Completed
|
||||
|
||||
### Problem
|
||||
`WxrParser` contains repeated `pubDate` parsing + return-shape scaffolding in nearby item parse paths.
|
||||
|
||||
@@ -289,6 +291,10 @@ Extract shared `parsePubDate` and/or shared item base builder helper to avoid dr
|
||||
- No behavior change in parsed output.
|
||||
- Duplicated `pubDate`/return scaffolding materially reduced.
|
||||
|
||||
### Progress Check
|
||||
- Completed: extracted shared `pubDate` parser helper and shared base item builder for post/media parse paths.
|
||||
- Completed: added branch-focused tests for valid/invalid/missing `pubDate` and post/page parse-branch parity.
|
||||
|
||||
### Coverage & Test Quality (fresh run: `npm run test:coverage`)
|
||||
- `src/main/engine/WxrParser.ts`: 93.55% statements, 100.00% functions, 67.14% branches.
|
||||
- `tests/engine/WxrParser.test.ts`: 19 tests covering parse variants, status handling, metadata extraction, and file-read paths.
|
||||
|
||||
@@ -87,6 +87,40 @@ const EXT_TO_MIME: Record<string, string> = {
|
||||
|
||||
export class WxrParser {
|
||||
|
||||
private parsePubDate(item: Element): Date | null {
|
||||
const pubDateStr = this.getDirectChildText(item, 'pubDate');
|
||||
if (!pubDateStr) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const parsed = new Date(pubDateStr);
|
||||
return isNaN(parsed.getTime()) ? null : parsed;
|
||||
}
|
||||
|
||||
private parseItemBase(item: Element): {
|
||||
wpId: number;
|
||||
title: string;
|
||||
slug: string;
|
||||
content: string;
|
||||
excerpt: string;
|
||||
pubDate: Date | null;
|
||||
creator: string;
|
||||
status: string;
|
||||
postType: string;
|
||||
} {
|
||||
return {
|
||||
wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10),
|
||||
title: this.getDirectChildText(item, 'title'),
|
||||
slug: this.getElementText(item, 'post_name', NS.wp),
|
||||
content: this.getElementText(item, 'encoded', NS.content),
|
||||
excerpt: this.getElementText(item, 'encoded', NS.excerpt),
|
||||
pubDate: this.parsePubDate(item),
|
||||
creator: this.getElementText(item, 'creator', NS.dc),
|
||||
status: this.getElementText(item, 'status', NS.wp),
|
||||
postType: this.getElementText(item, 'post_type', NS.wp),
|
||||
};
|
||||
}
|
||||
|
||||
async parseFile(filePath: string): Promise<WxrData> {
|
||||
const content = await fs.readFile(filePath, 'utf-8');
|
||||
return this.parseXml(content);
|
||||
@@ -172,6 +206,7 @@ export class WxrParser {
|
||||
}
|
||||
|
||||
private parsePostItem(item: Element): WxrPost {
|
||||
const base = this.parseItemBase(item);
|
||||
const categories: string[] = [];
|
||||
const tags: string[] = [];
|
||||
|
||||
@@ -190,15 +225,6 @@ export class WxrParser {
|
||||
}
|
||||
}
|
||||
|
||||
const pubDateStr = this.getDirectChildText(item, 'pubDate');
|
||||
let pubDate: Date | null = null;
|
||||
if (pubDateStr) {
|
||||
const parsed = new Date(pubDateStr);
|
||||
if (!isNaN(parsed.getTime())) {
|
||||
pubDate = parsed;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse WordPress local post date (wp:post_date)
|
||||
const postDateStr = this.getElementText(item, 'post_date', NS.wp);
|
||||
let postDate: Date | null = null;
|
||||
@@ -220,46 +246,38 @@ export class WxrParser {
|
||||
}
|
||||
|
||||
return {
|
||||
wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10),
|
||||
title: this.getDirectChildText(item, 'title'),
|
||||
slug: this.getElementText(item, 'post_name', NS.wp),
|
||||
content: this.getElementText(item, 'encoded', NS.content),
|
||||
excerpt: this.getElementText(item, 'encoded', NS.excerpt),
|
||||
pubDate,
|
||||
wpId: base.wpId,
|
||||
title: base.title,
|
||||
slug: base.slug,
|
||||
content: base.content,
|
||||
excerpt: base.excerpt,
|
||||
pubDate: base.pubDate,
|
||||
postDate,
|
||||
postModified,
|
||||
creator: this.getElementText(item, 'creator', NS.dc),
|
||||
status: this.getElementText(item, 'status', NS.wp),
|
||||
postType: this.getElementText(item, 'post_type', NS.wp),
|
||||
creator: base.creator,
|
||||
status: base.status,
|
||||
postType: base.postType,
|
||||
categories,
|
||||
tags,
|
||||
};
|
||||
}
|
||||
|
||||
private parseMediaItem(item: Element): WxrMedia {
|
||||
const base = this.parseItemBase(item);
|
||||
const url = this.getElementText(item, 'attachment_url', NS.wp);
|
||||
const filename = this.extractFilename(url);
|
||||
const relativePath = this.extractRelativePath(url);
|
||||
|
||||
const pubDateStr = this.getDirectChildText(item, 'pubDate');
|
||||
let pubDate: Date | null = null;
|
||||
if (pubDateStr) {
|
||||
const parsed = new Date(pubDateStr);
|
||||
if (!isNaN(parsed.getTime())) {
|
||||
pubDate = parsed;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10),
|
||||
title: this.getDirectChildText(item, 'title'),
|
||||
wpId: base.wpId,
|
||||
title: base.title,
|
||||
url,
|
||||
filename,
|
||||
relativePath,
|
||||
pubDate,
|
||||
pubDate: base.pubDate,
|
||||
parentId: parseInt(this.getElementText(item, 'post_parent', NS.wp) || '0', 10),
|
||||
mimeType: this.inferMimeType(filename),
|
||||
description: this.getElementText(item, 'encoded', NS.content),
|
||||
description: base.content,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -158,6 +158,72 @@ const WXR_WITH_MEDIA = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
</channel>
|
||||
</rss>`;
|
||||
|
||||
const WXR_WITH_MEDIA_PUBDATE = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0"
|
||||
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:wp="http://wordpress.org/export/1.2/">
|
||||
<channel>
|
||||
<title>My Blog</title>
|
||||
<link>https://example.com</link>
|
||||
<description>Test</description>
|
||||
<language>en</language>
|
||||
<item>
|
||||
<title>header-image</title>
|
||||
<pubDate>Fri, 05 Jan 2024 12:34:56 +0000</pubDate>
|
||||
<content:encoded><![CDATA[]]></content:encoded>
|
||||
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
|
||||
<wp:post_id>101</wp:post_id>
|
||||
<wp:post_name>header-image</wp:post_name>
|
||||
<wp:status>inherit</wp:status>
|
||||
<wp:post_type>attachment</wp:post_type>
|
||||
<wp:post_parent>0</wp:post_parent>
|
||||
<wp:attachment_url>https://example.com/wp-content/uploads/2024/01/header.jpg</wp:attachment_url>
|
||||
<dc:creator><![CDATA[admin]]></dc:creator>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`;
|
||||
|
||||
const WXR_WITH_INVALID_PUBDATE = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0"
|
||||
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:wp="http://wordpress.org/export/1.2/">
|
||||
<channel>
|
||||
<title>Dates Blog</title>
|
||||
<link>https://example.com</link>
|
||||
<description>Test</description>
|
||||
<language>en</language>
|
||||
<item>
|
||||
<title>Bad Date Post</title>
|
||||
<pubDate>not-a-date</pubDate>
|
||||
<content:encoded><![CDATA[<p>bad date</p>]]></content:encoded>
|
||||
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
|
||||
<wp:post_id>201</wp:post_id>
|
||||
<wp:post_name>bad-date-post</wp:post_name>
|
||||
<wp:status>publish</wp:status>
|
||||
<wp:post_type>post</wp:post_type>
|
||||
<wp:post_parent>0</wp:post_parent>
|
||||
<dc:creator><![CDATA[admin]]></dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Bad Date Media</title>
|
||||
<pubDate>also-not-a-date</pubDate>
|
||||
<content:encoded><![CDATA[]]></content:encoded>
|
||||
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
|
||||
<wp:post_id>202</wp:post_id>
|
||||
<wp:post_name>bad-date-media</wp:post_name>
|
||||
<wp:status>inherit</wp:status>
|
||||
<wp:post_type>attachment</wp:post_type>
|
||||
<wp:post_parent>0</wp:post_parent>
|
||||
<wp:attachment_url>https://example.com/wp-content/uploads/2024/01/bad-date.jpg</wp:attachment_url>
|
||||
<dc:creator><![CDATA[admin]]></dc:creator>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`;
|
||||
|
||||
// WXR with mixed content: posts, pages, and media
|
||||
const WXR_MIXED = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0"
|
||||
@@ -479,6 +545,42 @@ describe('WxrParser', () => {
|
||||
// Page has no pubDate element
|
||||
expect(result.pages[0].pubDate).toBeNull();
|
||||
});
|
||||
|
||||
it('should parse valid RFC822 pubDate for media items', () => {
|
||||
const result = parser.parseXml(WXR_WITH_MEDIA_PUBDATE);
|
||||
|
||||
expect(result.media).toHaveLength(1);
|
||||
expect(result.media[0].pubDate).toBeInstanceOf(Date);
|
||||
expect(result.media[0].pubDate?.toISOString()).toBe('2024-01-05T12:34:56.000Z');
|
||||
});
|
||||
|
||||
it('should fallback to null for invalid pubDate nodes in post and media items', () => {
|
||||
const result = parser.parseXml(WXR_WITH_INVALID_PUBDATE);
|
||||
|
||||
expect(result.posts).toHaveLength(1);
|
||||
expect(result.media).toHaveLength(1);
|
||||
expect(result.posts[0].pubDate).toBeNull();
|
||||
expect(result.media[0].pubDate).toBeNull();
|
||||
});
|
||||
|
||||
it('should keep base fields parity between post and page parse branches', () => {
|
||||
const result = parser.parseXml(WXR_MIXED);
|
||||
const post = result.posts[0];
|
||||
const page = result.pages[0];
|
||||
|
||||
expect(post.postType).toBe('post');
|
||||
expect(page.postType).toBe('page');
|
||||
expect(post.wpId).toBeGreaterThan(0);
|
||||
expect(page.wpId).toBeGreaterThan(0);
|
||||
expect(post.title).toBeTruthy();
|
||||
expect(page.title).toBeTruthy();
|
||||
expect(post.slug).toBeTruthy();
|
||||
expect(page.slug).toBeTruthy();
|
||||
expect(typeof post.content).toBe('string');
|
||||
expect(typeof page.content).toBe('string');
|
||||
expect(typeof post.excerpt).toBe('string');
|
||||
expect(typeof page.excerpt).toBe('string');
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseFile', () => {
|
||||
|
||||
Reference in New Issue
Block a user