From e158b2bcd80615d0ee5d7803c880d177db368a0f Mon Sep 17 00:00:00 2001
From: hugo <hugoms@me.com>
Date: Sun, 15 Feb 2026 18:14:52 +0100
Subject: [PATCH] chore: updated reference files for details

---
 src/main/engine/ImportAnalysisEngine.ts      | 58 +++++++++++++++++---
 src/main/engine/ImportExecutionEngine.ts     | 58 +++++++++++++++++---
 tests/assets/wxr-ref/what-a-superb-owl.md    |  4 +-
 tests/assets/wxr-ref/wir-haben-geheiratet.md |  4 +-
 4 files changed, 102 insertions(+), 22 deletions(-)
diff --git a/src/main/engine/ImportAnalysisEngine.ts b/src/main/engine/ImportAnalysisEngine.ts
index cbdd443..e50078d 100644
--- a/src/main/engine/ImportAnalysisEngine.ts
+++ b/src/main/engine/ImportAnalysisEngine.ts
@@ -562,20 +562,62 @@ export class ImportAnalysisEngine {
   }
 
   /**
-   * Preserve line breaks in HTML content by converting \n to <br> tags
-   * Only converts newlines that appear within meaningful text content,
-   * not newlines that are just whitespace between tags
+   * Preserve line breaks and paragraph structure in content.
+   * 
+   * WordPress exports often have:
+   * - Plain text mixed with HTML
+   * - Double newlines representing paragraph breaks
+   * - Single newlines that should become <br>
+   * 
+   * This function converts:
+   * - Double newlines (\n\n) to paragraph breaks (</p><p>)
+   * - Single newlines within text to <br>
+   * - Wraps content in <p> tags if it starts with plain text
    */
   private preserveLineBreaks(html: string): string {
-    // Convert newlines that appear within text content (between > and <)
-    // But only if the text content has actual content before or after the newline
+    if (!html || !html.trim()) return html;
+
+    // Check if content starts with a tag or plain text
+    const startsWithTag = /^\s*</.test(html);
+    
+    // If it starts with plain text, we need to handle the whole content differently
+    if (!startsWithTag) {
+      // First, convert double newlines to paragraph markers
+      let processed = html.replace(/\n\n+/g, '</p>\n<p>');
+      
+      // Convert remaining single newlines within text to <br>
+      // (but not newlines that are just between tags)
+      processed = processed.replace(/>([^<]+)</g, (_match, textContent: string) => {
+        if (!textContent.trim()) {
+          return '>' + textContent + '<';
+        }
+        const preserved = textContent.replace(/\n/g, '<br>');
+        return '>' + preserved + '<';
+      });
+      
+      // Also handle newlines at the start (before any tags)
+      processed = processed.replace(/^([^<]+)/g, (match, textContent: string) => {
+        if (!textContent.trim()) return match;
+        return textContent.replace(/\n/g, '<br>');
+      });
+      
+      // Wrap in <p> if we added paragraph markers
+      if (processed.includes('</p>')) {
+        processed = '<p>' + processed + '</p>';
+      }
+      
+      return processed;
+    }
+
+    // For content that starts with HTML, handle newlines within text content
     return html.replace(/>([^<]+)</g, (_match, textContent: string) => {
-      // Skip if the text content is only whitespace (just formatting between tags)
       if (!textContent.trim()) {
         return '>' + textContent + '<';
       }
-      // Replace all newlines with <br> (the text has actual content)
-      const preserved = textContent.replace(/\n/g, '<br>');
+      // First convert double newlines to paragraph breaks
+      let preserved = textContent.replace(/\n\n+/g, '</p><p>');
+      // Then convert remaining single newlines to <br>
+      preserved = preserved.replace(/\n/g, '<br>');
       return '>' + preserved + '<';
     });
   }
diff --git a/src/main/engine/ImportExecutionEngine.ts b/src/main/engine/ImportExecutionEngine.ts
index 32cfdac..645e017 100644
--- a/src/main/engine/ImportExecutionEngine.ts
+++ b/src/main/engine/ImportExecutionEngine.ts
@@ -754,20 +754,62 @@ export class ImportExecutionEngine extends EventEmitter {
   }
 
   /**
-   * Preserve line breaks in HTML content by converting \n to <br> tags
-   * Only converts newlines that appear within meaningful text content,
-   * not newlines that are just whitespace between tags
+   * Preserve line breaks and paragraph structure in content.
+   * 
+   * WordPress exports often have:
+   * - Plain text mixed with HTML
+   * - Double newlines representing paragraph breaks
+   * - Single newlines that should become <br>
+   * 
+   * This function converts:
+   * - Double newlines (\n\n) to paragraph breaks (</p><p>)
+   * - Single newlines within text to <br>
+   * - Wraps content in <p> tags if it starts with plain text
    */
   private preserveLineBreaks(html: string): string {
-    // Convert newlines that appear within text content (between > and <)
-    // But only if the text content has actual content before or after the newline
+    if (!html || !html.trim()) return html;
+
+    // Check if content starts with a tag or plain text
+    const startsWithTag = /^\s*</.test(html);
+    
+    // If it starts with plain text, we need to handle the whole content differently
+    if (!startsWithTag) {
+      // First, convert double newlines to paragraph markers
+      let processed = html.replace(/\n\n+/g, '</p>\n<p>');
+      
+      // Convert remaining single newlines within text to <br>
+      // (but not newlines that are just between tags)
+      processed = processed.replace(/>([^<]+)</g, (_match, textContent: string) => {
+        if (!textContent.trim()) {
+          return '>' + textContent + '<';
+        }
+        const preserved = textContent.replace(/\n/g, '<br>');
+        return '>' + preserved + '<';
+      });
+      
+      // Also handle newlines at the start (before any tags)
+      processed = processed.replace(/^([^<]+)/g, (match, textContent: string) => {
+        if (!textContent.trim()) return match;
+        return textContent.replace(/\n/g, '<br>');
+      });
+      
+      // Wrap in <p> if we added paragraph markers
+      if (processed.includes('</p>')) {
+        processed = '<p>' + processed + '</p>';
+      }
+      
+      return processed;
+    }
+
+    // For content that starts with HTML, handle newlines within text content
     return html.replace(/>([^<]+)</g, (_match, textContent: string) => {
-      // Skip if the text content is only whitespace (just formatting between tags)
       if (!textContent.trim()) {
         return '>' + textContent + '<';
       }
-      // Replace all newlines with <br> (the text has actual content)
-      const preserved = textContent.replace(/\n/g, '<br>');
+      // First convert double newlines to paragraph breaks
+      let preserved = textContent.replace(/\n\n+/g, '</p><p>');
+      // Then convert remaining single newlines to <br>
+      preserved = preserved.replace(/\n/g, '<br>');
       return '>' + preserved + '<';
     });
   }
diff --git a/tests/assets/wxr-ref/what-a-superb-owl.md b/tests/assets/wxr-ref/what-a-superb-owl.md
index 4e7766f..7bcc6da 100644
--- a/tests/assets/wxr-ref/what-a-superb-owl.md
+++ b/tests/assets/wxr-ref/what-a-superb-owl.md
@@ -13,6 +13,4 @@ categories:
 author: hugo
 publishedAt: '2011-02-06T22:02:46.000Z'
 ---
-![superb owl](http://28.media.tumblr.com/tumblr_lg7mqyuVsE1qzlnwmo1_500.png)
-
-[Warscheinlich von hier](http://jephjacques.tumblr.com/post/3148377589/superb-owl-joeks) (ich habs nur indirekt über Twitter mitbekommen).
+![superb owl](http://28.media.tumblr.com/tumblr_lg7mqyuVsE1qzlnwmo1_500.png) [Warscheinlich von hier](http://jephjacques.tumblr.com/post/3148377589/superb-owl-joeks) (ich habs nur indirekt über Twitter mitbekommen).
diff --git a/tests/assets/wxr-ref/wir-haben-geheiratet.md b/tests/assets/wxr-ref/wir-haben-geheiratet.md
index 5ae1e98..e9f8045 100644
--- a/tests/assets/wxr-ref/wir-haben-geheiratet.md
+++ b/tests/assets/wxr-ref/wir-haben-geheiratet.md
@@ -14,6 +14,4 @@ categories:
 author: hugo
 publishedAt: '2011-09-04T14:50:06.000Z'
 ---
-Anstelle die Bilder schon hier hinzupacken gibt es nur einen Verweis auf ein Album von Bildern die meine Schwiegermutter gemacht hat - die offiziellen Fotos warten noch etwas (und ich selber hab ja keine gemacht).
-
-[[Embedded content: https://picasaweb.google.com/s/c/bin/slideshow.swf]]
+Anstelle die Bilder schon hier hinzupacken gibt es nur einen Verweis auf ein Album von Bildern die meine Schwiegermutter gemacht hat - die offiziellen Fotos warten noch etwas (und ich selber hab ja keine gemacht). FLASH PLAYER NOT SUPPORTED