added more fields to the wxr parser
This commit is contained in:
@@ -15,6 +15,8 @@ export interface WxrPost {
|
|||||||
content: string;
|
content: string;
|
||||||
excerpt: string;
|
excerpt: string;
|
||||||
pubDate: Date | null;
|
pubDate: Date | null;
|
||||||
|
postDate: Date | null;
|
||||||
|
postModified: Date | null;
|
||||||
creator: string;
|
creator: string;
|
||||||
status: string;
|
status: string;
|
||||||
postType: string;
|
postType: string;
|
||||||
@@ -197,6 +199,26 @@ export class WxrParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse WordPress local post date (wp:post_date)
|
||||||
|
const postDateStr = this.getElementText(item, 'post_date', NS.wp);
|
||||||
|
let postDate: Date | null = null;
|
||||||
|
if (postDateStr) {
|
||||||
|
const parsed = new Date(postDateStr.replace(' ', 'T') + 'Z');
|
||||||
|
if (!isNaN(parsed.getTime())) {
|
||||||
|
postDate = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse WordPress local modification date (wp:post_modified)
|
||||||
|
const postModifiedStr = this.getElementText(item, 'post_modified', NS.wp);
|
||||||
|
let postModified: Date | null = null;
|
||||||
|
if (postModifiedStr) {
|
||||||
|
const parsed = new Date(postModifiedStr.replace(' ', 'T') + 'Z');
|
||||||
|
if (!isNaN(parsed.getTime())) {
|
||||||
|
postModified = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10),
|
wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10),
|
||||||
title: this.getDirectChildText(item, 'title'),
|
title: this.getDirectChildText(item, 'title'),
|
||||||
@@ -204,6 +226,8 @@ export class WxrParser {
|
|||||||
content: this.getElementText(item, 'encoded', NS.content),
|
content: this.getElementText(item, 'encoded', NS.content),
|
||||||
excerpt: this.getElementText(item, 'encoded', NS.excerpt),
|
excerpt: this.getElementText(item, 'encoded', NS.excerpt),
|
||||||
pubDate,
|
pubDate,
|
||||||
|
postDate,
|
||||||
|
postModified,
|
||||||
creator: this.getElementText(item, 'creator', NS.dc),
|
creator: this.getElementText(item, 'creator', NS.dc),
|
||||||
status: this.getElementText(item, 'status', NS.wp),
|
status: this.getElementText(item, 'status', NS.wp),
|
||||||
postType: this.getElementText(item, 'post_type', NS.wp),
|
postType: this.getElementText(item, 'post_type', NS.wp),
|
||||||
|
|||||||
@@ -44,6 +44,8 @@ interface AnalyzedPostItem {
|
|||||||
status: string;
|
status: string;
|
||||||
excerpt: string;
|
excerpt: string;
|
||||||
pubDate: string | null;
|
pubDate: string | null;
|
||||||
|
postDate: string | null;
|
||||||
|
postModified: string | null;
|
||||||
creator: string;
|
creator: string;
|
||||||
postType: string;
|
postType: string;
|
||||||
categories: string[];
|
categories: string[];
|
||||||
|
|||||||
@@ -90,6 +90,9 @@ const WXR_WITH_POST = `<?xml version="1.0" encoding="UTF-8"?>
|
|||||||
<excerpt:encoded><![CDATA[Welcome to my blog.]]></excerpt:encoded>
|
<excerpt:encoded><![CDATA[Welcome to my blog.]]></excerpt:encoded>
|
||||||
<wp:post_id>42</wp:post_id>
|
<wp:post_id>42</wp:post_id>
|
||||||
<wp:post_date>2024-01-15 10:30:00</wp:post_date>
|
<wp:post_date>2024-01-15 10:30:00</wp:post_date>
|
||||||
|
<wp:post_date_gmt>2024-01-15 10:30:00</wp:post_date_gmt>
|
||||||
|
<wp:post_modified>2024-01-20 15:45:30</wp:post_modified>
|
||||||
|
<wp:post_modified_gmt>2024-01-20 15:45:30</wp:post_modified_gmt>
|
||||||
<wp:post_name>hello-world</wp:post_name>
|
<wp:post_name>hello-world</wp:post_name>
|
||||||
<wp:status>publish</wp:status>
|
<wp:status>publish</wp:status>
|
||||||
<wp:post_type>post</wp:post_type>
|
<wp:post_type>post</wp:post_type>
|
||||||
@@ -351,6 +354,28 @@ describe('WxrParser', () => {
|
|||||||
expect(post.pubDate).toBeInstanceOf(Date);
|
expect(post.pubDate).toBeInstanceOf(Date);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should extract postDate and postModified from WXR', () => {
|
||||||
|
const result = parser.parseXml(WXR_WITH_POST);
|
||||||
|
const post = result.posts[0];
|
||||||
|
|
||||||
|
// postDate is the WordPress local creation date
|
||||||
|
expect(post.postDate).toBeInstanceOf(Date);
|
||||||
|
expect(post.postDate?.toISOString()).toBe('2024-01-15T10:30:00.000Z');
|
||||||
|
|
||||||
|
// postModified is the WordPress local modification date
|
||||||
|
expect(post.postModified).toBeInstanceOf(Date);
|
||||||
|
expect(post.postModified?.toISOString()).toBe('2024-01-20T15:45:30.000Z');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle missing postDate and postModified gracefully', () => {
|
||||||
|
const result = parser.parseXml(WXR_WITH_PAGE);
|
||||||
|
const page = result.pages[0];
|
||||||
|
|
||||||
|
// Page test data doesn't have post_date/post_modified
|
||||||
|
expect(page.postDate).toBeNull();
|
||||||
|
expect(page.postModified).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
it('should parse a page and put it in pages array', () => {
|
it('should parse a page and put it in pages array', () => {
|
||||||
const result = parser.parseXml(WXR_WITH_PAGE);
|
const result = parser.parseXml(WXR_WITH_PAGE);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user