added more fields to the wxr parser
This commit is contained in:
@@ -15,6 +15,8 @@ export interface WxrPost {
|
||||
content: string;
|
||||
excerpt: string;
|
||||
pubDate: Date | null;
|
||||
postDate: Date | null;
|
||||
postModified: Date | null;
|
||||
creator: string;
|
||||
status: string;
|
||||
postType: string;
|
||||
@@ -197,6 +199,26 @@ export class WxrParser {
|
||||
}
|
||||
}
|
||||
|
||||
// Parse WordPress local post date (wp:post_date)
|
||||
const postDateStr = this.getElementText(item, 'post_date', NS.wp);
|
||||
let postDate: Date | null = null;
|
||||
if (postDateStr) {
|
||||
const parsed = new Date(postDateStr.replace(' ', 'T') + 'Z');
|
||||
if (!isNaN(parsed.getTime())) {
|
||||
postDate = parsed;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse WordPress local modification date (wp:post_modified)
|
||||
const postModifiedStr = this.getElementText(item, 'post_modified', NS.wp);
|
||||
let postModified: Date | null = null;
|
||||
if (postModifiedStr) {
|
||||
const parsed = new Date(postModifiedStr.replace(' ', 'T') + 'Z');
|
||||
if (!isNaN(parsed.getTime())) {
|
||||
postModified = parsed;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10),
|
||||
title: this.getDirectChildText(item, 'title'),
|
||||
@@ -204,6 +226,8 @@ export class WxrParser {
|
||||
content: this.getElementText(item, 'encoded', NS.content),
|
||||
excerpt: this.getElementText(item, 'encoded', NS.excerpt),
|
||||
pubDate,
|
||||
postDate,
|
||||
postModified,
|
||||
creator: this.getElementText(item, 'creator', NS.dc),
|
||||
status: this.getElementText(item, 'status', NS.wp),
|
||||
postType: this.getElementText(item, 'post_type', NS.wp),
|
||||
|
||||
@@ -44,6 +44,8 @@ interface AnalyzedPostItem {
|
||||
status: string;
|
||||
excerpt: string;
|
||||
pubDate: string | null;
|
||||
postDate: string | null;
|
||||
postModified: string | null;
|
||||
creator: string;
|
||||
postType: string;
|
||||
categories: string[];
|
||||
|
||||
@@ -90,6 +90,9 @@ const WXR_WITH_POST = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<excerpt:encoded><![CDATA[Welcome to my blog.]]></excerpt:encoded>
|
||||
<wp:post_id>42</wp:post_id>
|
||||
<wp:post_date>2024-01-15 10:30:00</wp:post_date>
|
||||
<wp:post_date_gmt>2024-01-15 10:30:00</wp:post_date_gmt>
|
||||
<wp:post_modified>2024-01-20 15:45:30</wp:post_modified>
|
||||
<wp:post_modified_gmt>2024-01-20 15:45:30</wp:post_modified_gmt>
|
||||
<wp:post_name>hello-world</wp:post_name>
|
||||
<wp:status>publish</wp:status>
|
||||
<wp:post_type>post</wp:post_type>
|
||||
@@ -351,6 +354,28 @@ describe('WxrParser', () => {
|
||||
expect(post.pubDate).toBeInstanceOf(Date);
|
||||
});
|
||||
|
||||
it('should extract postDate and postModified from WXR', () => {
|
||||
const result = parser.parseXml(WXR_WITH_POST);
|
||||
const post = result.posts[0];
|
||||
|
||||
// postDate is the WordPress local creation date
|
||||
expect(post.postDate).toBeInstanceOf(Date);
|
||||
expect(post.postDate?.toISOString()).toBe('2024-01-15T10:30:00.000Z');
|
||||
|
||||
// postModified is the WordPress local modification date
|
||||
expect(post.postModified).toBeInstanceOf(Date);
|
||||
expect(post.postModified?.toISOString()).toBe('2024-01-20T15:45:30.000Z');
|
||||
});
|
||||
|
||||
it('should handle missing postDate and postModified gracefully', () => {
|
||||
const result = parser.parseXml(WXR_WITH_PAGE);
|
||||
const page = result.pages[0];
|
||||
|
||||
// Page test data doesn't have post_date/post_modified
|
||||
expect(page.postDate).toBeNull();
|
||||
expect(page.postModified).toBeNull();
|
||||
});
|
||||
|
||||
it('should parse a page and put it in pages array', () => {
|
||||
const result = parser.parseXml(WXR_WITH_PAGE);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user