diff --git a/src/main/engine/GitEngine.ts b/src/main/engine/GitEngine.ts index 3087fbc..fc87c9d 100644 --- a/src/main/engine/GitEngine.ts +++ b/src/main/engine/GitEngine.ts @@ -239,18 +239,79 @@ export class GitEngine { ]; } + /** + * Extracts the host portion from common Git URL formats. + * + * Supports: + * - HTTPS/HTTP: https://host/owner/repo.git + * - SSH: git@host:owner/repo.git + * - SSH URL: ssh://git@host/owner/repo.git + */ + private getHostFromGitUrl(value: string): string | null { + const trimmed = value.trim(); + + // Try standard URL parsing for HTTP(S) and ssh:// URLs + try { + const url = new URL(trimmed); + if (url.hostname) { + return url.hostname.toLowerCase(); + } + } catch { + // Fall through to manual parsing for scp-like SSH syntax + } + + // Match scp-like SSH syntax: [user@]host:owner/repo.git + // Examples: + // git@github.com:owner/repo.git + // git@gitlab.example.com:owner/repo.git + const sshLikeMatch = trimmed.match(/^[^@]+@([^:]+):.+$/); + if (sshLikeMatch && sshLikeMatch[1]) { + return sshLikeMatch[1].toLowerCase(); + } + + return null; + } + private isGitHubUrl(value: string): boolean { - const normalized = value.toLowerCase(); - return normalized.includes('github.com') || normalized.includes('git@github.com:'); + const host = this.getHostFromGitUrl(value); + if (host) { + // Accept github.com and common www-prefixed variant. + return host === 'github.com' || host === 'www.github.com'; + } + + // Fallback for non-standard patterns like "github.com:owner/repo" + const normalized = value.trim().toLowerCase(); + return normalized.startsWith('github.com:') || normalized.startsWith('ssh://github.com/'); } private isGitLabUrl(value: string): boolean { - const normalized = value.toLowerCase(); - return normalized.includes('gitlab.com') || normalized.includes('git@gitlab.com:') || normalized.includes('gitlab'); + const host = this.getHostFromGitUrl(value); + if (host) { + // Hosted GitLab + if (host === 'gitlab.com' || host === 'www.gitlab.com') { + return true; + } + // Self-hosted GitLab: many instances include "gitlab" in the hostname. + if (host.includes('gitlab')) { + return true; + } + } + + // Fallback for non-standard patterns like "gitlab.com:owner/repo" + const normalized = value.trim().toLowerCase(); + return normalized.startsWith('gitlab.com:') || normalized.startsWith('ssh://gitlab.com/'); } private isGiteaForgejoUrl(value: string): boolean { - const normalized = value.toLowerCase(); + const host = this.getHostFromGitUrl(value); + if (host) { + if (host.includes('gitea') || host.includes('forgejo')) { + return true; + } + } + + // Fallback: if we cannot parse a host, fall back to substring detection. + const normalized = value.trim().toLowerCase(); return normalized.includes('gitea') || normalized.includes('forgejo'); } diff --git a/tests/engine/ImportExecutionEngine.e2e.test.ts b/tests/engine/ImportExecutionEngine.e2e.test.ts index a3b1e68..9f7c985 100644 --- a/tests/engine/ImportExecutionEngine.e2e.test.ts +++ b/tests/engine/ImportExecutionEngine.e2e.test.ts @@ -1630,7 +1630,7 @@ describe('ImportExecutionEngine E2E Tests', () => { /** * Creates a custom post with specific content for URL conversion testing */ - function createPostWithContent(content: string, siteUrl: string = 'https://testblog.example.com'): ImportAnalysisReport { + function createPostWithContent(content: string, siteUrl: string = 'https://testblog'): ImportAnalysisReport { const customPost: WxrPost = { wpId: 9001, title: 'URL Conversion Test Post', @@ -1673,7 +1673,7 @@ describe('ImportExecutionEngine E2E Tests', () => { it('should convert absolute media URLs from site domain to relative paths', async () => { // Post with image URL pointing to the site's own media const content = `
Check out this image:
-
+
Nice, right?
`; const report = createPostWithContent(content); @@ -1687,13 +1687,13 @@ describe('ImportExecutionEngine E2E Tests', () => { // Should convert to relative media URL expect(fileContent).toContain(''); // Should NOT contain the absolute URL - expect(fileContent).not.toContain('https://testblog.example.com/wp-content/uploads'); + expect(fileContent).not.toContain('https://testblog/wp-content/uploads'); }); it('should convert linked images with absolute media URLs to relative paths', async () => { // Linked image pattern common in WordPress - thumbnail links to full-size - const content = ` -Own image:
-
+
External image:
`;
@@ -1760,7 +1760,7 @@ describe('ImportExecutionEngine E2E Tests', () => {
it('should convert media URLs in markdown image syntax after HTML conversion', async () => {
// Sometimes WordPress content already has markdown-like syntax in HTML
const content = `Image with title:
-
`;
+
`;
const report = createPostWithContent(content);
await engine.executeImport(report, {});
@@ -1776,9 +1776,9 @@ describe('ImportExecutionEngine E2E Tests', () => {
it('should handle multiple images in same post', async () => {
const content = `Gallery:
-
-
-
`;
+
+
+
`;
const report = createPostWithContent(content);
await engine.executeImport(report, {});
@@ -1793,7 +1793,7 @@ describe('ImportExecutionEngine E2E Tests', () => {
});
it('should handle deep nested upload paths', async () => {
- const content = `
`;
+ const content = `
`;
const report = createPostWithContent(content);
await engine.executeImport(report, {});
@@ -1808,8 +1808,8 @@ describe('ImportExecutionEngine E2E Tests', () => {
it('should NOT convert wp-content/themes or wp-content/plugins URLs', async () => {
// Assets from themes/plugins should stay absolute (they're not imported media)
- const content = `
-
+