From e158b2bcd80615d0ee5d7803c880d177db368a0f Mon Sep 17 00:00:00 2001
From: hugo
tags
- * Only converts newlines that appear within meaningful text content,
- * not newlines that are just whitespace between tags
+ * Preserve line breaks and paragraph structure in content.
+ *
+ * WordPress exports often have:
+ * - Plain text mixed with HTML
+ * - Double newlines representing paragraph breaks
+ * - Single newlines that should become
+ *
+ * This function converts:
+ * - Double newlines (\n\n) to paragraph breaks (
)
+ * - Single newlines within text to
+ * - Wraps content in
tags if it starts with plain text */ private preserveLineBreaks(html: string): string { - // Convert newlines that appear within text content (between > and <) - // But only if the text content has actual content before or after the newline + if (!html || !html.trim()) return html; + + // Check if content starts with a tag or plain text + const startsWithTag = /^\s*\n
');
+
+ // Convert remaining single newlines within text to
+ // (but not newlines that are just between tags)
+ processed = processed.replace(/>([^<]+) {
+ if (!textContent.trim()) {
+ return '>' + textContent + '<';
+ }
+ const preserved = textContent.replace(/\n/g, '
');
+ return '>' + preserved + '<';
+ });
+
+ // Also handle newlines at the start (before any tags)
+ processed = processed.replace(/^([^<]+)/g, (match, textContent: string) => {
+ if (!textContent.trim()) return match;
+ return textContent.replace(/\n/g, '
');
+ });
+
+ // Wrap in
if we added paragraph markers + if (processed.includes('
')) { + processed = '' + processed + '
'; + } + + return processed; + } + + // For content that starts with HTML, handle newlines within text content return html.replace(/>([^<]+) { - // Skip if the text content is only whitespace (just formatting between tags) if (!textContent.trim()) { return '>' + textContent + '<'; } - // Replace all newlines with');
+ // Then convert remaining single newlines to
+ preserved = preserved.replace(/\n/g, '
');
return '>' + preserved + '<';
});
}
diff --git a/src/main/engine/ImportExecutionEngine.ts b/src/main/engine/ImportExecutionEngine.ts
index 32cfdac..645e017 100644
--- a/src/main/engine/ImportExecutionEngine.ts
+++ b/src/main/engine/ImportExecutionEngine.ts
@@ -754,20 +754,62 @@ export class ImportExecutionEngine extends EventEmitter {
}
/**
- * Preserve line breaks in HTML content by converting \n to
tags
- * Only converts newlines that appear within meaningful text content,
- * not newlines that are just whitespace between tags
+ * Preserve line breaks and paragraph structure in content.
+ *
+ * WordPress exports often have:
+ * - Plain text mixed with HTML
+ * - Double newlines representing paragraph breaks
+ * - Single newlines that should become
+ *
+ * This function converts:
+ * - Double newlines (\n\n) to paragraph breaks (
)
+ * - Single newlines within text to
+ * - Wraps content in
tags if it starts with plain text */ private preserveLineBreaks(html: string): string { - // Convert newlines that appear within text content (between > and <) - // But only if the text content has actual content before or after the newline + if (!html || !html.trim()) return html; + + // Check if content starts with a tag or plain text + const startsWithTag = /^\s*\n
');
+
+ // Convert remaining single newlines within text to
+ // (but not newlines that are just between tags)
+ processed = processed.replace(/>([^<]+) {
+ if (!textContent.trim()) {
+ return '>' + textContent + '<';
+ }
+ const preserved = textContent.replace(/\n/g, '
');
+ return '>' + preserved + '<';
+ });
+
+ // Also handle newlines at the start (before any tags)
+ processed = processed.replace(/^([^<]+)/g, (match, textContent: string) => {
+ if (!textContent.trim()) return match;
+ return textContent.replace(/\n/g, '
');
+ });
+
+ // Wrap in
if we added paragraph markers + if (processed.includes('
')) { + processed = '' + processed + '
'; + } + + return processed; + } + + // For content that starts with HTML, handle newlines within text content return html.replace(/>([^<]+) { - // Skip if the text content is only whitespace (just formatting between tags) if (!textContent.trim()) { return '>' + textContent + '<'; } - // Replace all newlines with');
+ // Then convert remaining single newlines to
+ preserved = preserved.replace(/\n/g, '
');
return '>' + preserved + '<';
});
}
diff --git a/tests/assets/wxr-ref/what-a-superb-owl.md b/tests/assets/wxr-ref/what-a-superb-owl.md
index 4e7766f..7bcc6da 100644
--- a/tests/assets/wxr-ref/what-a-superb-owl.md
+++ b/tests/assets/wxr-ref/what-a-superb-owl.md
@@ -13,6 +13,4 @@ categories:
author: hugo
publishedAt: '2011-02-06T22:02:46.000Z'
---
-
-
-[Warscheinlich von hier](http://jephjacques.tumblr.com/post/3148377589/superb-owl-joeks) (ich habs nur indirekt über Twitter mitbekommen).
+ [Warscheinlich von hier](http://jephjacques.tumblr.com/post/3148377589/superb-owl-joeks) (ich habs nur indirekt über Twitter mitbekommen).
diff --git a/tests/assets/wxr-ref/wir-haben-geheiratet.md b/tests/assets/wxr-ref/wir-haben-geheiratet.md
index 5ae1e98..e9f8045 100644
--- a/tests/assets/wxr-ref/wir-haben-geheiratet.md
+++ b/tests/assets/wxr-ref/wir-haben-geheiratet.md
@@ -14,6 +14,4 @@ categories:
author: hugo
publishedAt: '2011-09-04T14:50:06.000Z'
---
-Anstelle die Bilder schon hier hinzupacken gibt es nur einen Verweis auf ein Album von Bildern die meine Schwiegermutter gemacht hat - die offiziellen Fotos warten noch etwas (und ich selber hab ja keine gemacht).
-
-[[Embedded content: https://picasaweb.google.com/s/c/bin/slideshow.swf]]
+Anstelle die Bilder schon hier hinzupacken gibt es nur einen Verweis auf ein Album von Bildern die meine Schwiegermutter gemacht hat - die offiziellen Fotos warten noch etwas (und ich selber hab ja keine gemacht). FLASH PLAYER NOT SUPPORTED