From 2c7195490e7deda8f45a7484c481f9ef9bb9a726 Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Fri, 20 Mar 2026 17:01:23 +0100 Subject: [PATCH] chore: moved some tests to qwen3.5 0.8B for speed --- .../Server/APIServerRewriteTests.swift | 63 ++++++++++--------- .../Server/ChatViewModelTests.swift | 4 +- .../Server/PromptBuilderTests.swift | 2 +- README.md | 2 + 4 files changed, 38 insertions(+), 33 deletions(-) diff --git a/MLXServerTests/Server/APIServerRewriteTests.swift b/MLXServerTests/Server/APIServerRewriteTests.swift index 0ec2202..77ab05e 100644 --- a/MLXServerTests/Server/APIServerRewriteTests.swift +++ b/MLXServerTests/Server/APIServerRewriteTests.swift @@ -3,8 +3,11 @@ import XCTest @testable import MLX_Server final class APIServerRewriteTests: XCTestCase { + private let genericModelId = "qwen3.5-0.8b" + private let genericModelRepoId = "mlx-community/Qwen3.5-0.8B-4bit" + func testQwenNonStreamingChatCompletionCachesAndReusesPrompt() async throws { - let harness = try await makeHarness(initialModelId: "qwen") + let harness = try await makeHarness(initialModelId: self.genericModelId) defer { harness.stop() } let lookups = LookupEventCollector() @@ -18,7 +21,7 @@ final class APIServerRewriteTests: XCTestCase { } let request = APIChatCompletionRequest( - model: "qwen", + model: self.genericModelId, messages: [ APIChatMessage(role: "user", content: .text("Reply with exactly one short word."), name: nil, tool_calls: nil, tool_call_id: nil) ], @@ -39,7 +42,7 @@ final class APIServerRewriteTests: XCTestCase { try await waitUntil(timeoutSeconds: 5) { let snapshot = TokenPrefixCache.shared.snapshot() - return snapshot.totalEntries > 0 && snapshot.entries.allSatisfy { $0.modelId == "qwen" } + return snapshot.totalEntries > 0 && snapshot.entries.allSatisfy { $0.modelId == self.genericModelId } } let firstSnapshot = TokenPrefixCache.shared.snapshot() @@ -77,7 +80,7 @@ final class APIServerRewriteTests: XCTestCase { defer { harness.stop() } let request = APIChatCompletionRequest( - model: "gemma", + model: self.genericModelId, messages: [ APIChatMessage(role: "user", content: .text("Reply with exactly one short word."), name: nil, tool_calls: nil, tool_call_id: nil) ], @@ -179,7 +182,7 @@ final class APIServerRewriteTests: XCTestCase { } func testTextOnlyFollowUpReusesEarlierImagePrefix() async throws { - let harness = try await makeHarness() + let harness = try await makeHarness(initialModelId: "gemma") defer { harness.stop() } let lookups = LookupEventCollector() @@ -192,7 +195,7 @@ final class APIServerRewriteTests: XCTestCase { APIServer.debugLookupEventHandler = nil } - let firstRequest = visionRequest(dataURI: TestImageFixtures.primaryDataURI, prompt: "Describe this image in one short word.") + let firstRequest = visionRequest(modelId: "gemma", dataURI: TestImageFixtures.primaryDataURI, prompt: "Describe this image in one short word.") let firstResponse = try await sendChatCompletion(firstRequest, port: harness.port) let assistantContent = try XCTUnwrap(firstResponse.choices.first?.message.content) @@ -253,7 +256,7 @@ final class APIServerRewriteTests: XCTestCase { } let request = APIChatCompletionRequest( - model: "gemma", + model: self.genericModelId, messages: [ APIChatMessage(role: "user", content: .text("Answer with one word: ocean."), name: nil, tool_calls: nil, tool_call_id: nil) ], @@ -284,7 +287,7 @@ final class APIServerRewriteTests: XCTestCase { } func testSingleTurnContinuationProducesPartialCacheHit() async throws { - let harness = try await makeHarness() + let harness = try await makeHarness(initialModelId: "gemma") defer { harness.stop() } let firstRequest = APIChatCompletionRequest( @@ -334,7 +337,7 @@ final class APIServerRewriteTests: XCTestCase { } func testSameSystemPromptDifferentUserMessageReusesSystemPrefix() async throws { - let harness = try await makeHarness() + let harness = try await makeHarness(initialModelId: "gemma") defer { harness.stop() } let lookups = LookupEventCollector() @@ -401,7 +404,7 @@ final class APIServerRewriteTests: XCTestCase { } func testServerStoredCacheIsDirectlyReusableForSameSystemDifferentUserPrompt() async throws { - let harness = try await makeHarness() + let harness = try await makeHarness(initialModelId: "gemma") defer { harness.stop() } let firstRequest = APIChatCompletionRequest( @@ -447,7 +450,7 @@ final class APIServerRewriteTests: XCTestCase { let engine = InferenceEngine(container: container) let preparedPrompt = PromptBuilder.build( from: secondRequest, - modelId: ModelConfig.default.repoId, + modelId: "mlx-community/gemma-3-4b-it-4bit", thinkingEnabled: Preferences.enableThinking ) let preparedInference = try await engine.prepare(preparedPrompt.userInput) @@ -463,7 +466,7 @@ final class APIServerRewriteTests: XCTestCase { defer { harness.stop() } let firstRequest = APIChatCompletionRequest( - model: "gemma", + model: self.genericModelId, messages: [ APIChatMessage(role: "system", content: .text("System Alpha Unique Tokens"), name: nil, tool_calls: nil, tool_call_id: nil), APIChatMessage(role: "user", content: .text("Answer in one word: tree."), name: nil, tool_calls: nil, tool_call_id: nil) @@ -481,7 +484,7 @@ final class APIServerRewriteTests: XCTestCase { ) let secondRequest = APIChatCompletionRequest( - model: "gemma", + model: self.genericModelId, messages: [ APIChatMessage(role: "system", content: .text("Completely Different Beta Markers"), name: nil, tool_calls: nil, tool_call_id: nil), APIChatMessage(role: "user", content: .text("Answer in one word: tree."), name: nil, tool_calls: nil, tool_call_id: nil) @@ -512,7 +515,7 @@ final class APIServerRewriteTests: XCTestCase { let harness = try await makeHarness() defer { harness.stop() } - Preferences.lastModelId = "gemma" + Preferences.lastModelId = self.genericModelId let request = APIChatCompletionRequest( model: nil, messages: [ @@ -590,7 +593,7 @@ final class APIServerRewriteTests: XCTestCase { } let qwenRequest = APIChatCompletionRequest( - model: "qwen", + model: "qwen3.5-0.8b", messages: [ APIChatMessage(role: "user", content: .text("Answer with one word: river."), name: nil, tool_calls: nil, tool_call_id: nil) ], @@ -611,15 +614,15 @@ final class APIServerRewriteTests: XCTestCase { try await waitUntil(timeoutSeconds: 5) { let snapshot = TokenPrefixCache.shared.snapshot() let modelId = await MainActor.run { harness.modelManager.currentModel?.id } - return modelId == "qwen" + return modelId == "qwen3.5-0.8b" && !snapshot.entries.isEmpty - && snapshot.entries.allSatisfy { $0.modelId == "qwen" } + && snapshot.entries.allSatisfy { $0.modelId == "qwen3.5-0.8b" } } let afterSwapSnapshot = TokenPrefixCache.shared.snapshot() let afterSwapEvents = await lookups.events() let firstQwenLookup = try XCTUnwrap(afterSwapEvents.last) - XCTAssertTrue(afterSwapSnapshot.entries.allSatisfy { $0.modelId == "qwen" }) + XCTAssertTrue(afterSwapSnapshot.entries.allSatisfy { $0.modelId == "qwen3.5-0.8b" }) XCTAssertFalse(firstQwenLookup.isHit) XCTAssertEqual(firstQwenLookup.matchedTokenCount, 0) @@ -639,7 +642,7 @@ final class APIServerRewriteTests: XCTestCase { } func testStreamingChatCompletionReusesCacheAcrossThreeProgressivelyLongerTurns() async throws { - let harness = try await makeHarness() + let harness = try await makeHarness(initialModelId: "gemma") defer { harness.stop() } let firstRequest = APIChatCompletionRequest( @@ -742,7 +745,7 @@ final class APIServerRewriteTests: XCTestCase { } func testStreamingChatCompletionReusesCacheAcrossToolBoundary() async throws { - let harness = try await makeHarness() + let harness = try await makeHarness(initialModelId: "gemma") defer { harness.stop() } let tools = [mockWeatherTool] @@ -847,7 +850,7 @@ final class APIServerRewriteTests: XCTestCase { } func testStreamingChatCompletionReusesCacheAcrossMultipleToolTurns() async throws { - let harness = try await makeHarness() + let harness = try await makeHarness(initialModelId: "gemma") defer { harness.stop() } let tools = [mockWeatherTool] @@ -994,7 +997,7 @@ final class APIServerRewriteTests: XCTestCase { defer { harness.stop() } let request = APIChatCompletionRequest( - model: "gemma", + model: self.genericModelId, messages: [ APIChatMessage(role: "user", content: .text("Count from one to twenty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil) ], @@ -1053,7 +1056,7 @@ final class APIServerRewriteTests: XCTestCase { defer { harness.stop() } let request = APIChatCompletionRequest( - model: "gemma", + model: self.genericModelId, messages: [ APIChatMessage(role: "user", content: .text("Count from one to fifty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil) ], @@ -1120,7 +1123,7 @@ final class APIServerRewriteTests: XCTestCase { let harness = try await makeHarness() let request = APIChatCompletionRequest( - model: "gemma", + model: self.genericModelId, messages: [ APIChatMessage(role: "user", content: .text("Count from one to fifty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil) ], @@ -1189,7 +1192,7 @@ final class APIServerRewriteTests: XCTestCase { defer { harness.stop() } let request = APIChatCompletionRequest( - model: "gemma", + model: self.genericModelId, messages: [ APIChatMessage(role: "user", content: .text("Count from one to forty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil) ], @@ -1218,7 +1221,7 @@ final class APIServerRewriteTests: XCTestCase { } let recoveryRequest = APIChatCompletionRequest( - model: "gemma", + model: self.genericModelId, messages: [ APIChatMessage(role: "user", content: .text("Reply with exactly one short word."), name: nil, tool_calls: nil, tool_call_id: nil) ], @@ -1241,7 +1244,7 @@ final class APIServerRewriteTests: XCTestCase { } func testStreamingToolCallChunksArriveInOpenAICompatibleOrder() async throws { - let harness = try await makeHarness() + let harness = try await makeHarness(initialModelId: "gemma") defer { harness.stop() } let detailed = try await sendStreamingChatCompletionDetailed( @@ -1311,7 +1314,7 @@ final class APIServerRewriteTests: XCTestCase { ) } - private func makeHarness(initialModelId: String = "gemma") async throws -> TestHarness { + private func makeHarness(initialModelId: String = "qwen3.5-0.8b") async throws -> TestHarness { let modelManager = await MainActor.run { ModelManager() } let config = try XCTUnwrap(ModelConfig.resolve(initialModelId)) @@ -1334,9 +1337,9 @@ final class APIServerRewriteTests: XCTestCase { return TestHarness(server: server, modelManager: modelManager, port: port) } - private func visionRequest(dataURI: String, prompt: String) -> APIChatCompletionRequest { + private func visionRequest(modelId: String = "qwen3.5-0.8b", dataURI: String, prompt: String) -> APIChatCompletionRequest { APIChatCompletionRequest( - model: "gemma", + model: modelId, messages: [ APIChatMessage( role: "user", diff --git a/MLXServerTests/Server/ChatViewModelTests.swift b/MLXServerTests/Server/ChatViewModelTests.swift index f9a7ed1..5f360b0 100644 --- a/MLXServerTests/Server/ChatViewModelTests.swift +++ b/MLXServerTests/Server/ChatViewModelTests.swift @@ -3,9 +3,9 @@ import XCTest @MainActor final class ChatViewModelTests: XCTestCase { - func testGemmaChatViewModelSendProducesAssistantReply() async throws { + func testQwenChatViewModelSendProducesAssistantReply() async throws { let modelManager = ModelManager() - let config = try XCTUnwrap(ModelConfig.resolve("gemma")) + let config = try XCTUnwrap(ModelConfig.resolve("qwen3.5-0.8b")) await modelManager.loadModel(config) defer { modelManager.unloadModel() } diff --git a/MLXServerTests/Server/PromptBuilderTests.swift b/MLXServerTests/Server/PromptBuilderTests.swift index 79e5a76..7669cba 100644 --- a/MLXServerTests/Server/PromptBuilderTests.swift +++ b/MLXServerTests/Server/PromptBuilderTests.swift @@ -114,7 +114,7 @@ final class PromptBuilderTests: XCTestCase { n: nil ) - let prepared = PromptBuilder.build(from: request, modelId: "mlx-community/Qwen3.5-4B-MLX-4bit", thinkingEnabled: true) + let prepared = PromptBuilder.build(from: request, modelId: "mlx-community/Qwen3.5-0.8B-4bit", thinkingEnabled: true) XCTAssertEqual(prepared.chatMessages.count, 1) XCTAssertTrue(prepared.chatMessages[0].content.contains("Let me check.")) diff --git a/README.md b/README.md index 421c2c8..3bf3228 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,8 @@ Native macOS app for running local LLMs on Apple Silicon via [MLX](https://githu Any model in MLX format on HuggingFace can be added — there is no restriction on uploader or architecture. +Developer note: the test suite uses `qwen3.5-0.8b` as the main live-model target because it is substantially faster and lighter than the larger Qwen variants, but some tests still run on Gemma 3 because they validate Gemma-specific prompt shaping, cache-reuse behavior, and tool-call behavior that did not match Qwen3.5 0.8B closely enough. + ## Quick Start Requires macOS 15+, Xcode 16.4+, and `xcodegen` (`brew install xcodegen`).