chore: updated reference files for details

This commit is contained in:
2026-02-15 18:14:52 +01:00
parent 2a44ea454b
commit e158b2bcd8
4 changed files with 102 additions and 22 deletions

View File

@@ -562,22 +562,64 @@ export class ImportAnalysisEngine {
} }
/** /**
* Preserve line breaks in HTML content by converting \n to <br> tags * Preserve line breaks and paragraph structure in content.
* Only converts newlines that appear within meaningful text content, *
* not newlines that are just whitespace between tags * WordPress exports often have:
* - Plain text mixed with HTML
* - Double newlines representing paragraph breaks
* - Single newlines that should become <br>
*
* This function converts:
* - Double newlines (\n\n) to paragraph breaks (</p><p>)
* - Single newlines within text to <br>
* - Wraps content in <p> tags if it starts with plain text
*/ */
private preserveLineBreaks(html: string): string { private preserveLineBreaks(html: string): string {
// Convert newlines that appear within text content (between > and <) if (!html || !html.trim()) return html;
// But only if the text content has actual content before or after the newline
return html.replace(/>([^<]+)</g, (_match, textContent: string) => { // Check if content starts with a tag or plain text
// Skip if the text content is only whitespace (just formatting between tags) const startsWithTag = /^\s*</.test(html);
// If it starts with plain text, we need to handle the whole content differently
if (!startsWithTag) {
// First, convert double newlines to paragraph markers
let processed = html.replace(/\n\n+/g, '</p>\n<p>');
// Convert remaining single newlines within text to <br>
// (but not newlines that are just between tags)
processed = processed.replace(/>([^<]+)</g, (_match, textContent: string) => {
if (!textContent.trim()) { if (!textContent.trim()) {
return '>' + textContent + '<'; return '>' + textContent + '<';
} }
// Replace all newlines with <br> (the text has actual content)
const preserved = textContent.replace(/\n/g, '<br>'); const preserved = textContent.replace(/\n/g, '<br>');
return '>' + preserved + '<'; return '>' + preserved + '<';
}); });
// Also handle newlines at the start (before any tags)
processed = processed.replace(/^([^<]+)/g, (match, textContent: string) => {
if (!textContent.trim()) return match;
return textContent.replace(/\n/g, '<br>');
});
// Wrap in <p> if we added paragraph markers
if (processed.includes('</p>')) {
processed = '<p>' + processed + '</p>';
}
return processed;
}
// For content that starts with HTML, handle newlines within text content
return html.replace(/>([^<]+)</g, (_match, textContent: string) => {
if (!textContent.trim()) {
return '>' + textContent + '<';
}
// First convert double newlines to paragraph breaks
let preserved = textContent.replace(/\n\n+/g, '</p><p>');
// Then convert remaining single newlines to <br>
preserved = preserved.replace(/\n/g, '<br>');
return '>' + preserved + '<';
});
} }
private calculateChecksum(content: string): string { private calculateChecksum(content: string): string {

View File

@@ -754,22 +754,64 @@ export class ImportExecutionEngine extends EventEmitter {
} }
/** /**
* Preserve line breaks in HTML content by converting \n to <br> tags * Preserve line breaks and paragraph structure in content.
* Only converts newlines that appear within meaningful text content, *
* not newlines that are just whitespace between tags * WordPress exports often have:
* - Plain text mixed with HTML
* - Double newlines representing paragraph breaks
* - Single newlines that should become <br>
*
* This function converts:
* - Double newlines (\n\n) to paragraph breaks (</p><p>)
* - Single newlines within text to <br>
* - Wraps content in <p> tags if it starts with plain text
*/ */
private preserveLineBreaks(html: string): string { private preserveLineBreaks(html: string): string {
// Convert newlines that appear within text content (between > and <) if (!html || !html.trim()) return html;
// But only if the text content has actual content before or after the newline
return html.replace(/>([^<]+)</g, (_match, textContent: string) => { // Check if content starts with a tag or plain text
// Skip if the text content is only whitespace (just formatting between tags) const startsWithTag = /^\s*</.test(html);
// If it starts with plain text, we need to handle the whole content differently
if (!startsWithTag) {
// First, convert double newlines to paragraph markers
let processed = html.replace(/\n\n+/g, '</p>\n<p>');
// Convert remaining single newlines within text to <br>
// (but not newlines that are just between tags)
processed = processed.replace(/>([^<]+)</g, (_match, textContent: string) => {
if (!textContent.trim()) { if (!textContent.trim()) {
return '>' + textContent + '<'; return '>' + textContent + '<';
} }
// Replace all newlines with <br> (the text has actual content)
const preserved = textContent.replace(/\n/g, '<br>'); const preserved = textContent.replace(/\n/g, '<br>');
return '>' + preserved + '<'; return '>' + preserved + '<';
}); });
// Also handle newlines at the start (before any tags)
processed = processed.replace(/^([^<]+)/g, (match, textContent: string) => {
if (!textContent.trim()) return match;
return textContent.replace(/\n/g, '<br>');
});
// Wrap in <p> if we added paragraph markers
if (processed.includes('</p>')) {
processed = '<p>' + processed + '</p>';
}
return processed;
}
// For content that starts with HTML, handle newlines within text content
return html.replace(/>([^<]+)</g, (_match, textContent: string) => {
if (!textContent.trim()) {
return '>' + textContent + '<';
}
// First convert double newlines to paragraph breaks
let preserved = textContent.replace(/\n\n+/g, '</p><p>');
// Then convert remaining single newlines to <br>
preserved = preserved.replace(/\n/g, '<br>');
return '>' + preserved + '<';
});
} }
/** /**

View File

@@ -13,6 +13,4 @@ categories:
author: hugo author: hugo
publishedAt: '2011-02-06T22:02:46.000Z' publishedAt: '2011-02-06T22:02:46.000Z'
--- ---
![superb owl](http://28.media.tumblr.com/tumblr_lg7mqyuVsE1qzlnwmo1_500.png) ![superb owl](http://28.media.tumblr.com/tumblr_lg7mqyuVsE1qzlnwmo1_500.png) [Warscheinlich von hier](http://jephjacques.tumblr.com/post/3148377589/superb-owl-joeks) (ich habs nur indirekt über Twitter mitbekommen).
[Warscheinlich von hier](http://jephjacques.tumblr.com/post/3148377589/superb-owl-joeks) (ich habs nur indirekt über Twitter mitbekommen).

View File

@@ -14,6 +14,4 @@ categories:
author: hugo author: hugo
publishedAt: '2011-09-04T14:50:06.000Z' publishedAt: '2011-09-04T14:50:06.000Z'
--- ---
Anstelle die Bilder schon hier hinzupacken gibt es nur einen Verweis auf ein Album von Bildern die meine Schwiegermutter gemacht hat - die offiziellen Fotos warten noch etwas (und ich selber hab ja keine gemacht). Anstelle die Bilder schon hier hinzupacken gibt es nur einen Verweis auf ein Album von Bildern die meine Schwiegermutter gemacht hat - die offiziellen Fotos warten noch etwas (und ich selber hab ja keine gemacht). FLASH PLAYER NOT SUPPORTED
[[Embedded content: https://picasaweb.google.com/s/c/bin/slideshow.swf]]