diff --git a/src/main/engine/ImportAnalysisEngine.ts b/src/main/engine/ImportAnalysisEngine.ts
index 1f4d227..ddf9750 100644
--- a/src/main/engine/ImportAnalysisEngine.ts
+++ b/src/main/engine/ImportAnalysisEngine.ts
@@ -580,7 +580,21 @@ export class ImportAnalysisEngine {
const withCodeBlocks = this.wrapMultilineCode(html);
// Preprocess: Convert newlines within text to
tags to preserve line breaks
const preprocessed = this.preserveLineBreaks(withCodeBlocks);
- return this.turndown.turndown(preprocessed);
+ let markdown = this.turndown.turndown(preprocessed);
+ // Normalize non-breaking spaces to regular spaces
+ markdown = markdown.replace(/\u00A0/g, ' ');
+ // Clean up trailing whitespace from each line, but preserve "> " for blockquote continuation
+ markdown = markdown.split('\n').map(line => {
+ const trimmed = line.trimEnd();
+ // Preserve space after ">" for blockquote continuation lines
+ if (trimmed === '>' && line.startsWith('> ')) {
+ return '> ';
+ }
+ return trimmed;
+ }).join('\n');
+ // Normalize multiple blank lines (3+ consecutive newlines → 2 newlines)
+ markdown = markdown.replace(/\n{3,}/g, '\n\n');
+ return markdown;
}
/**
@@ -656,6 +670,19 @@ export class ImportAnalysisEngine {
return '>' + preserved + '<';
});
+ // Also handle text at the END of content (after the last tag)
+ // This catches text after closing tags like --> or /> that goes to the end
+ result = result.replace(/>([^<]+)$/g, (match, textContent: string) => {
+ if (!textContent.trim()) {
+ return match;
+ }
+ // First convert double newlines to paragraph breaks
+ let preserved = textContent.replace(/\n\n+/g, '
');
+ // Then convert remaining single newlines to
+ preserved = preserved.replace(/\n/g, '
');
+ return '>' + preserved;
+ });
+
// Restore protected
blocks
preBlocks.forEach((block, i) => {
result = result.replace(`__PRE_BLOCK_${i}__`, block);
diff --git a/src/main/engine/ImportExecutionEngine.ts b/src/main/engine/ImportExecutionEngine.ts
index 7d61d5e..0f4d00f 100644
--- a/src/main/engine/ImportExecutionEngine.ts
+++ b/src/main/engine/ImportExecutionEngine.ts
@@ -775,6 +775,19 @@ export class ImportExecutionEngine extends EventEmitter {
markdown = markdown.replace(/\[\[([^\]]*?)\]\]/g, (_match, inner: string) => {
return '[[' + inner.replace(/\\(.)/g, '$1') + ']]';
});
+ // Normalize non-breaking spaces to regular spaces
+ markdown = markdown.replace(/\u00A0/g, ' ');
+ // Clean up trailing whitespace from each line, but preserve "> " for blockquote continuation
+ markdown = markdown.split('\n').map(line => {
+ const trimmed = line.trimEnd();
+ // Preserve space after ">" for blockquote continuation lines
+ if (trimmed === '>' && line.startsWith('> ')) {
+ return '> ';
+ }
+ return trimmed;
+ }).join('\n');
+ // Normalize multiple blank lines (3+ consecutive newlines → 2 newlines)
+ markdown = markdown.replace(/\n{3,}/g, '\n\n');
return markdown;
}
@@ -851,6 +864,19 @@ export class ImportExecutionEngine extends EventEmitter {
return '>' + preserved + '<';
});
+ // Also handle text at the END of content (after the last tag)
+ // This catches text after closing tags like --> or /> that goes to the end
+ result = result.replace(/>([^<]+)$/g, (match, textContent: string) => {
+ if (!textContent.trim()) {
+ return match;
+ }
+ // First convert double newlines to paragraph breaks
+ let preserved = textContent.replace(/\n\n+/g, '');
+ // Then convert remaining single newlines to
+ preserved = preserved.replace(/\n/g, '
');
+ return '>' + preserved;
+ });
+
// Restore protected
blocks
preBlocks.forEach((block, i) => {
result = result.replace(`__PRE_BLOCK_${i}__`, block);