chore: moved some tests to qwen3.5 0.8B for speed

This commit is contained in:
2026-03-20 17:01:23 +01:00
parent 7d25955042
commit 2c7195490e
4 changed files with 38 additions and 33 deletions

View File

@@ -3,8 +3,11 @@ import XCTest
@testable import MLX_Server
final class APIServerRewriteTests: XCTestCase {
private let genericModelId = "qwen3.5-0.8b"
private let genericModelRepoId = "mlx-community/Qwen3.5-0.8B-4bit"
func testQwenNonStreamingChatCompletionCachesAndReusesPrompt() async throws {
let harness = try await makeHarness(initialModelId: "qwen")
let harness = try await makeHarness(initialModelId: self.genericModelId)
defer { harness.stop() }
let lookups = LookupEventCollector()
@@ -18,7 +21,7 @@ final class APIServerRewriteTests: XCTestCase {
}
let request = APIChatCompletionRequest(
model: "qwen",
model: self.genericModelId,
messages: [
APIChatMessage(role: "user", content: .text("Reply with exactly one short word."), name: nil, tool_calls: nil, tool_call_id: nil)
],
@@ -39,7 +42,7 @@ final class APIServerRewriteTests: XCTestCase {
try await waitUntil(timeoutSeconds: 5) {
let snapshot = TokenPrefixCache.shared.snapshot()
return snapshot.totalEntries > 0 && snapshot.entries.allSatisfy { $0.modelId == "qwen" }
return snapshot.totalEntries > 0 && snapshot.entries.allSatisfy { $0.modelId == self.genericModelId }
}
let firstSnapshot = TokenPrefixCache.shared.snapshot()
@@ -77,7 +80,7 @@ final class APIServerRewriteTests: XCTestCase {
defer { harness.stop() }
let request = APIChatCompletionRequest(
model: "gemma",
model: self.genericModelId,
messages: [
APIChatMessage(role: "user", content: .text("Reply with exactly one short word."), name: nil, tool_calls: nil, tool_call_id: nil)
],
@@ -179,7 +182,7 @@ final class APIServerRewriteTests: XCTestCase {
}
func testTextOnlyFollowUpReusesEarlierImagePrefix() async throws {
let harness = try await makeHarness()
let harness = try await makeHarness(initialModelId: "gemma")
defer { harness.stop() }
let lookups = LookupEventCollector()
@@ -192,7 +195,7 @@ final class APIServerRewriteTests: XCTestCase {
APIServer.debugLookupEventHandler = nil
}
let firstRequest = visionRequest(dataURI: TestImageFixtures.primaryDataURI, prompt: "Describe this image in one short word.")
let firstRequest = visionRequest(modelId: "gemma", dataURI: TestImageFixtures.primaryDataURI, prompt: "Describe this image in one short word.")
let firstResponse = try await sendChatCompletion(firstRequest, port: harness.port)
let assistantContent = try XCTUnwrap(firstResponse.choices.first?.message.content)
@@ -253,7 +256,7 @@ final class APIServerRewriteTests: XCTestCase {
}
let request = APIChatCompletionRequest(
model: "gemma",
model: self.genericModelId,
messages: [
APIChatMessage(role: "user", content: .text("Answer with one word: ocean."), name: nil, tool_calls: nil, tool_call_id: nil)
],
@@ -284,7 +287,7 @@ final class APIServerRewriteTests: XCTestCase {
}
func testSingleTurnContinuationProducesPartialCacheHit() async throws {
let harness = try await makeHarness()
let harness = try await makeHarness(initialModelId: "gemma")
defer { harness.stop() }
let firstRequest = APIChatCompletionRequest(
@@ -334,7 +337,7 @@ final class APIServerRewriteTests: XCTestCase {
}
func testSameSystemPromptDifferentUserMessageReusesSystemPrefix() async throws {
let harness = try await makeHarness()
let harness = try await makeHarness(initialModelId: "gemma")
defer { harness.stop() }
let lookups = LookupEventCollector()
@@ -401,7 +404,7 @@ final class APIServerRewriteTests: XCTestCase {
}
func testServerStoredCacheIsDirectlyReusableForSameSystemDifferentUserPrompt() async throws {
let harness = try await makeHarness()
let harness = try await makeHarness(initialModelId: "gemma")
defer { harness.stop() }
let firstRequest = APIChatCompletionRequest(
@@ -447,7 +450,7 @@ final class APIServerRewriteTests: XCTestCase {
let engine = InferenceEngine(container: container)
let preparedPrompt = PromptBuilder.build(
from: secondRequest,
modelId: ModelConfig.default.repoId,
modelId: "mlx-community/gemma-3-4b-it-4bit",
thinkingEnabled: Preferences.enableThinking
)
let preparedInference = try await engine.prepare(preparedPrompt.userInput)
@@ -463,7 +466,7 @@ final class APIServerRewriteTests: XCTestCase {
defer { harness.stop() }
let firstRequest = APIChatCompletionRequest(
model: "gemma",
model: self.genericModelId,
messages: [
APIChatMessage(role: "system", content: .text("System Alpha Unique Tokens"), name: nil, tool_calls: nil, tool_call_id: nil),
APIChatMessage(role: "user", content: .text("Answer in one word: tree."), name: nil, tool_calls: nil, tool_call_id: nil)
@@ -481,7 +484,7 @@ final class APIServerRewriteTests: XCTestCase {
)
let secondRequest = APIChatCompletionRequest(
model: "gemma",
model: self.genericModelId,
messages: [
APIChatMessage(role: "system", content: .text("Completely Different Beta Markers"), name: nil, tool_calls: nil, tool_call_id: nil),
APIChatMessage(role: "user", content: .text("Answer in one word: tree."), name: nil, tool_calls: nil, tool_call_id: nil)
@@ -512,7 +515,7 @@ final class APIServerRewriteTests: XCTestCase {
let harness = try await makeHarness()
defer { harness.stop() }
Preferences.lastModelId = "gemma"
Preferences.lastModelId = self.genericModelId
let request = APIChatCompletionRequest(
model: nil,
messages: [
@@ -590,7 +593,7 @@ final class APIServerRewriteTests: XCTestCase {
}
let qwenRequest = APIChatCompletionRequest(
model: "qwen",
model: "qwen3.5-0.8b",
messages: [
APIChatMessage(role: "user", content: .text("Answer with one word: river."), name: nil, tool_calls: nil, tool_call_id: nil)
],
@@ -611,15 +614,15 @@ final class APIServerRewriteTests: XCTestCase {
try await waitUntil(timeoutSeconds: 5) {
let snapshot = TokenPrefixCache.shared.snapshot()
let modelId = await MainActor.run { harness.modelManager.currentModel?.id }
return modelId == "qwen"
return modelId == "qwen3.5-0.8b"
&& !snapshot.entries.isEmpty
&& snapshot.entries.allSatisfy { $0.modelId == "qwen" }
&& snapshot.entries.allSatisfy { $0.modelId == "qwen3.5-0.8b" }
}
let afterSwapSnapshot = TokenPrefixCache.shared.snapshot()
let afterSwapEvents = await lookups.events()
let firstQwenLookup = try XCTUnwrap(afterSwapEvents.last)
XCTAssertTrue(afterSwapSnapshot.entries.allSatisfy { $0.modelId == "qwen" })
XCTAssertTrue(afterSwapSnapshot.entries.allSatisfy { $0.modelId == "qwen3.5-0.8b" })
XCTAssertFalse(firstQwenLookup.isHit)
XCTAssertEqual(firstQwenLookup.matchedTokenCount, 0)
@@ -639,7 +642,7 @@ final class APIServerRewriteTests: XCTestCase {
}
func testStreamingChatCompletionReusesCacheAcrossThreeProgressivelyLongerTurns() async throws {
let harness = try await makeHarness()
let harness = try await makeHarness(initialModelId: "gemma")
defer { harness.stop() }
let firstRequest = APIChatCompletionRequest(
@@ -742,7 +745,7 @@ final class APIServerRewriteTests: XCTestCase {
}
func testStreamingChatCompletionReusesCacheAcrossToolBoundary() async throws {
let harness = try await makeHarness()
let harness = try await makeHarness(initialModelId: "gemma")
defer { harness.stop() }
let tools = [mockWeatherTool]
@@ -847,7 +850,7 @@ final class APIServerRewriteTests: XCTestCase {
}
func testStreamingChatCompletionReusesCacheAcrossMultipleToolTurns() async throws {
let harness = try await makeHarness()
let harness = try await makeHarness(initialModelId: "gemma")
defer { harness.stop() }
let tools = [mockWeatherTool]
@@ -994,7 +997,7 @@ final class APIServerRewriteTests: XCTestCase {
defer { harness.stop() }
let request = APIChatCompletionRequest(
model: "gemma",
model: self.genericModelId,
messages: [
APIChatMessage(role: "user", content: .text("Count from one to twenty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil)
],
@@ -1053,7 +1056,7 @@ final class APIServerRewriteTests: XCTestCase {
defer { harness.stop() }
let request = APIChatCompletionRequest(
model: "gemma",
model: self.genericModelId,
messages: [
APIChatMessage(role: "user", content: .text("Count from one to fifty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil)
],
@@ -1120,7 +1123,7 @@ final class APIServerRewriteTests: XCTestCase {
let harness = try await makeHarness()
let request = APIChatCompletionRequest(
model: "gemma",
model: self.genericModelId,
messages: [
APIChatMessage(role: "user", content: .text("Count from one to fifty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil)
],
@@ -1189,7 +1192,7 @@ final class APIServerRewriteTests: XCTestCase {
defer { harness.stop() }
let request = APIChatCompletionRequest(
model: "gemma",
model: self.genericModelId,
messages: [
APIChatMessage(role: "user", content: .text("Count from one to forty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil)
],
@@ -1218,7 +1221,7 @@ final class APIServerRewriteTests: XCTestCase {
}
let recoveryRequest = APIChatCompletionRequest(
model: "gemma",
model: self.genericModelId,
messages: [
APIChatMessage(role: "user", content: .text("Reply with exactly one short word."), name: nil, tool_calls: nil, tool_call_id: nil)
],
@@ -1241,7 +1244,7 @@ final class APIServerRewriteTests: XCTestCase {
}
func testStreamingToolCallChunksArriveInOpenAICompatibleOrder() async throws {
let harness = try await makeHarness()
let harness = try await makeHarness(initialModelId: "gemma")
defer { harness.stop() }
let detailed = try await sendStreamingChatCompletionDetailed(
@@ -1311,7 +1314,7 @@ final class APIServerRewriteTests: XCTestCase {
)
}
private func makeHarness(initialModelId: String = "gemma") async throws -> TestHarness {
private func makeHarness(initialModelId: String = "qwen3.5-0.8b") async throws -> TestHarness {
let modelManager = await MainActor.run { ModelManager() }
let config = try XCTUnwrap(ModelConfig.resolve(initialModelId))
@@ -1334,9 +1337,9 @@ final class APIServerRewriteTests: XCTestCase {
return TestHarness(server: server, modelManager: modelManager, port: port)
}
private func visionRequest(dataURI: String, prompt: String) -> APIChatCompletionRequest {
private func visionRequest(modelId: String = "qwen3.5-0.8b", dataURI: String, prompt: String) -> APIChatCompletionRequest {
APIChatCompletionRequest(
model: "gemma",
model: modelId,
messages: [
APIChatMessage(
role: "user",

View File

@@ -3,9 +3,9 @@ import XCTest
@MainActor
final class ChatViewModelTests: XCTestCase {
func testGemmaChatViewModelSendProducesAssistantReply() async throws {
func testQwenChatViewModelSendProducesAssistantReply() async throws {
let modelManager = ModelManager()
let config = try XCTUnwrap(ModelConfig.resolve("gemma"))
let config = try XCTUnwrap(ModelConfig.resolve("qwen3.5-0.8b"))
await modelManager.loadModel(config)
defer { modelManager.unloadModel() }

View File

@@ -114,7 +114,7 @@ final class PromptBuilderTests: XCTestCase {
n: nil
)
let prepared = PromptBuilder.build(from: request, modelId: "mlx-community/Qwen3.5-4B-MLX-4bit", thinkingEnabled: true)
let prepared = PromptBuilder.build(from: request, modelId: "mlx-community/Qwen3.5-0.8B-4bit", thinkingEnabled: true)
XCTAssertEqual(prepared.chatMessages.count, 1)
XCTAssertTrue(prepared.chatMessages[0].content.contains("Let me check."))