added more fields to the wxr parser

This commit is contained in:
2026-02-14 19:11:16 +01:00
parent 49e3ed7a95
commit 973d6af231
3 changed files with 51 additions and 0 deletions

View File

@@ -15,6 +15,8 @@ export interface WxrPost {
content: string;
excerpt: string;
pubDate: Date | null;
postDate: Date | null;
postModified: Date | null;
creator: string;
status: string;
postType: string;
@@ -197,6 +199,26 @@ export class WxrParser {
}
}
// Parse WordPress local post date (wp:post_date)
const postDateStr = this.getElementText(item, 'post_date', NS.wp);
let postDate: Date | null = null;
if (postDateStr) {
const parsed = new Date(postDateStr.replace(' ', 'T') + 'Z');
if (!isNaN(parsed.getTime())) {
postDate = parsed;
}
}
// Parse WordPress local modification date (wp:post_modified)
const postModifiedStr = this.getElementText(item, 'post_modified', NS.wp);
let postModified: Date | null = null;
if (postModifiedStr) {
const parsed = new Date(postModifiedStr.replace(' ', 'T') + 'Z');
if (!isNaN(parsed.getTime())) {
postModified = parsed;
}
}
return {
wpId: parseInt(this.getElementText(item, 'post_id', NS.wp) || '0', 10),
title: this.getDirectChildText(item, 'title'),
@@ -204,6 +226,8 @@ export class WxrParser {
content: this.getElementText(item, 'encoded', NS.content),
excerpt: this.getElementText(item, 'encoded', NS.excerpt),
pubDate,
postDate,
postModified,
creator: this.getElementText(item, 'creator', NS.dc),
status: this.getElementText(item, 'status', NS.wp),
postType: this.getElementText(item, 'post_type', NS.wp),

View File

@@ -44,6 +44,8 @@ interface AnalyzedPostItem {
status: string;
excerpt: string;
pubDate: string | null;
postDate: string | null;
postModified: string | null;
creator: string;
postType: string;
categories: string[];

View File

@@ -90,6 +90,9 @@ const WXR_WITH_POST = `<?xml version="1.0" encoding="UTF-8"?>
<excerpt:encoded><![CDATA[Welcome to my blog.]]></excerpt:encoded>
<wp:post_id>42</wp:post_id>
<wp:post_date>2024-01-15 10:30:00</wp:post_date>
<wp:post_date_gmt>2024-01-15 10:30:00</wp:post_date_gmt>
<wp:post_modified>2024-01-20 15:45:30</wp:post_modified>
<wp:post_modified_gmt>2024-01-20 15:45:30</wp:post_modified_gmt>
<wp:post_name>hello-world</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
@@ -351,6 +354,28 @@ describe('WxrParser', () => {
expect(post.pubDate).toBeInstanceOf(Date);
});
it('should extract postDate and postModified from WXR', () => {
const result = parser.parseXml(WXR_WITH_POST);
const post = result.posts[0];
// postDate is the WordPress local creation date
expect(post.postDate).toBeInstanceOf(Date);
expect(post.postDate?.toISOString()).toBe('2024-01-15T10:30:00.000Z');
// postModified is the WordPress local modification date
expect(post.postModified).toBeInstanceOf(Date);
expect(post.postModified?.toISOString()).toBe('2024-01-20T15:45:30.000Z');
});
it('should handle missing postDate and postModified gracefully', () => {
const result = parser.parseXml(WXR_WITH_PAGE);
const page = result.pages[0];
// Page test data doesn't have post_date/post_modified
expect(page.postDate).toBeNull();
expect(page.postModified).toBeNull();
});
it('should parse a page and put it in pages array', () => {
const result = parser.parseXml(WXR_WITH_PAGE);