chore: moved some tests to qwen3.5 0.8B for speed
This commit is contained in:
@@ -3,8 +3,11 @@ import XCTest
|
||||
@testable import MLX_Server
|
||||
|
||||
final class APIServerRewriteTests: XCTestCase {
|
||||
private let genericModelId = "qwen3.5-0.8b"
|
||||
private let genericModelRepoId = "mlx-community/Qwen3.5-0.8B-4bit"
|
||||
|
||||
func testQwenNonStreamingChatCompletionCachesAndReusesPrompt() async throws {
|
||||
let harness = try await makeHarness(initialModelId: "qwen")
|
||||
let harness = try await makeHarness(initialModelId: self.genericModelId)
|
||||
defer { harness.stop() }
|
||||
|
||||
let lookups = LookupEventCollector()
|
||||
@@ -18,7 +21,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
let request = APIChatCompletionRequest(
|
||||
model: "qwen",
|
||||
model: self.genericModelId,
|
||||
messages: [
|
||||
APIChatMessage(role: "user", content: .text("Reply with exactly one short word."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
],
|
||||
@@ -39,7 +42,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
|
||||
try await waitUntil(timeoutSeconds: 5) {
|
||||
let snapshot = TokenPrefixCache.shared.snapshot()
|
||||
return snapshot.totalEntries > 0 && snapshot.entries.allSatisfy { $0.modelId == "qwen" }
|
||||
return snapshot.totalEntries > 0 && snapshot.entries.allSatisfy { $0.modelId == self.genericModelId }
|
||||
}
|
||||
|
||||
let firstSnapshot = TokenPrefixCache.shared.snapshot()
|
||||
@@ -77,7 +80,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
defer { harness.stop() }
|
||||
|
||||
let request = APIChatCompletionRequest(
|
||||
model: "gemma",
|
||||
model: self.genericModelId,
|
||||
messages: [
|
||||
APIChatMessage(role: "user", content: .text("Reply with exactly one short word."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
],
|
||||
@@ -179,7 +182,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
func testTextOnlyFollowUpReusesEarlierImagePrefix() async throws {
|
||||
let harness = try await makeHarness()
|
||||
let harness = try await makeHarness(initialModelId: "gemma")
|
||||
defer { harness.stop() }
|
||||
|
||||
let lookups = LookupEventCollector()
|
||||
@@ -192,7 +195,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
APIServer.debugLookupEventHandler = nil
|
||||
}
|
||||
|
||||
let firstRequest = visionRequest(dataURI: TestImageFixtures.primaryDataURI, prompt: "Describe this image in one short word.")
|
||||
let firstRequest = visionRequest(modelId: "gemma", dataURI: TestImageFixtures.primaryDataURI, prompt: "Describe this image in one short word.")
|
||||
let firstResponse = try await sendChatCompletion(firstRequest, port: harness.port)
|
||||
let assistantContent = try XCTUnwrap(firstResponse.choices.first?.message.content)
|
||||
|
||||
@@ -253,7 +256,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
let request = APIChatCompletionRequest(
|
||||
model: "gemma",
|
||||
model: self.genericModelId,
|
||||
messages: [
|
||||
APIChatMessage(role: "user", content: .text("Answer with one word: ocean."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
],
|
||||
@@ -284,7 +287,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
func testSingleTurnContinuationProducesPartialCacheHit() async throws {
|
||||
let harness = try await makeHarness()
|
||||
let harness = try await makeHarness(initialModelId: "gemma")
|
||||
defer { harness.stop() }
|
||||
|
||||
let firstRequest = APIChatCompletionRequest(
|
||||
@@ -334,7 +337,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
func testSameSystemPromptDifferentUserMessageReusesSystemPrefix() async throws {
|
||||
let harness = try await makeHarness()
|
||||
let harness = try await makeHarness(initialModelId: "gemma")
|
||||
defer { harness.stop() }
|
||||
|
||||
let lookups = LookupEventCollector()
|
||||
@@ -401,7 +404,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
func testServerStoredCacheIsDirectlyReusableForSameSystemDifferentUserPrompt() async throws {
|
||||
let harness = try await makeHarness()
|
||||
let harness = try await makeHarness(initialModelId: "gemma")
|
||||
defer { harness.stop() }
|
||||
|
||||
let firstRequest = APIChatCompletionRequest(
|
||||
@@ -447,7 +450,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
let engine = InferenceEngine(container: container)
|
||||
let preparedPrompt = PromptBuilder.build(
|
||||
from: secondRequest,
|
||||
modelId: ModelConfig.default.repoId,
|
||||
modelId: "mlx-community/gemma-3-4b-it-4bit",
|
||||
thinkingEnabled: Preferences.enableThinking
|
||||
)
|
||||
let preparedInference = try await engine.prepare(preparedPrompt.userInput)
|
||||
@@ -463,7 +466,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
defer { harness.stop() }
|
||||
|
||||
let firstRequest = APIChatCompletionRequest(
|
||||
model: "gemma",
|
||||
model: self.genericModelId,
|
||||
messages: [
|
||||
APIChatMessage(role: "system", content: .text("System Alpha Unique Tokens"), name: nil, tool_calls: nil, tool_call_id: nil),
|
||||
APIChatMessage(role: "user", content: .text("Answer in one word: tree."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
@@ -481,7 +484,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
)
|
||||
|
||||
let secondRequest = APIChatCompletionRequest(
|
||||
model: "gemma",
|
||||
model: self.genericModelId,
|
||||
messages: [
|
||||
APIChatMessage(role: "system", content: .text("Completely Different Beta Markers"), name: nil, tool_calls: nil, tool_call_id: nil),
|
||||
APIChatMessage(role: "user", content: .text("Answer in one word: tree."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
@@ -512,7 +515,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
let harness = try await makeHarness()
|
||||
defer { harness.stop() }
|
||||
|
||||
Preferences.lastModelId = "gemma"
|
||||
Preferences.lastModelId = self.genericModelId
|
||||
let request = APIChatCompletionRequest(
|
||||
model: nil,
|
||||
messages: [
|
||||
@@ -590,7 +593,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
let qwenRequest = APIChatCompletionRequest(
|
||||
model: "qwen",
|
||||
model: "qwen3.5-0.8b",
|
||||
messages: [
|
||||
APIChatMessage(role: "user", content: .text("Answer with one word: river."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
],
|
||||
@@ -611,15 +614,15 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
try await waitUntil(timeoutSeconds: 5) {
|
||||
let snapshot = TokenPrefixCache.shared.snapshot()
|
||||
let modelId = await MainActor.run { harness.modelManager.currentModel?.id }
|
||||
return modelId == "qwen"
|
||||
return modelId == "qwen3.5-0.8b"
|
||||
&& !snapshot.entries.isEmpty
|
||||
&& snapshot.entries.allSatisfy { $0.modelId == "qwen" }
|
||||
&& snapshot.entries.allSatisfy { $0.modelId == "qwen3.5-0.8b" }
|
||||
}
|
||||
|
||||
let afterSwapSnapshot = TokenPrefixCache.shared.snapshot()
|
||||
let afterSwapEvents = await lookups.events()
|
||||
let firstQwenLookup = try XCTUnwrap(afterSwapEvents.last)
|
||||
XCTAssertTrue(afterSwapSnapshot.entries.allSatisfy { $0.modelId == "qwen" })
|
||||
XCTAssertTrue(afterSwapSnapshot.entries.allSatisfy { $0.modelId == "qwen3.5-0.8b" })
|
||||
XCTAssertFalse(firstQwenLookup.isHit)
|
||||
XCTAssertEqual(firstQwenLookup.matchedTokenCount, 0)
|
||||
|
||||
@@ -639,7 +642,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
func testStreamingChatCompletionReusesCacheAcrossThreeProgressivelyLongerTurns() async throws {
|
||||
let harness = try await makeHarness()
|
||||
let harness = try await makeHarness(initialModelId: "gemma")
|
||||
defer { harness.stop() }
|
||||
|
||||
let firstRequest = APIChatCompletionRequest(
|
||||
@@ -742,7 +745,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
func testStreamingChatCompletionReusesCacheAcrossToolBoundary() async throws {
|
||||
let harness = try await makeHarness()
|
||||
let harness = try await makeHarness(initialModelId: "gemma")
|
||||
defer { harness.stop() }
|
||||
|
||||
let tools = [mockWeatherTool]
|
||||
@@ -847,7 +850,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
func testStreamingChatCompletionReusesCacheAcrossMultipleToolTurns() async throws {
|
||||
let harness = try await makeHarness()
|
||||
let harness = try await makeHarness(initialModelId: "gemma")
|
||||
defer { harness.stop() }
|
||||
|
||||
let tools = [mockWeatherTool]
|
||||
@@ -994,7 +997,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
defer { harness.stop() }
|
||||
|
||||
let request = APIChatCompletionRequest(
|
||||
model: "gemma",
|
||||
model: self.genericModelId,
|
||||
messages: [
|
||||
APIChatMessage(role: "user", content: .text("Count from one to twenty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
],
|
||||
@@ -1053,7 +1056,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
defer { harness.stop() }
|
||||
|
||||
let request = APIChatCompletionRequest(
|
||||
model: "gemma",
|
||||
model: self.genericModelId,
|
||||
messages: [
|
||||
APIChatMessage(role: "user", content: .text("Count from one to fifty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
],
|
||||
@@ -1120,7 +1123,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
let harness = try await makeHarness()
|
||||
|
||||
let request = APIChatCompletionRequest(
|
||||
model: "gemma",
|
||||
model: self.genericModelId,
|
||||
messages: [
|
||||
APIChatMessage(role: "user", content: .text("Count from one to fifty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
],
|
||||
@@ -1189,7 +1192,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
defer { harness.stop() }
|
||||
|
||||
let request = APIChatCompletionRequest(
|
||||
model: "gemma",
|
||||
model: self.genericModelId,
|
||||
messages: [
|
||||
APIChatMessage(role: "user", content: .text("Count from one to forty with commas, using many tokens."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
],
|
||||
@@ -1218,7 +1221,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
let recoveryRequest = APIChatCompletionRequest(
|
||||
model: "gemma",
|
||||
model: self.genericModelId,
|
||||
messages: [
|
||||
APIChatMessage(role: "user", content: .text("Reply with exactly one short word."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||
],
|
||||
@@ -1241,7 +1244,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
}
|
||||
|
||||
func testStreamingToolCallChunksArriveInOpenAICompatibleOrder() async throws {
|
||||
let harness = try await makeHarness()
|
||||
let harness = try await makeHarness(initialModelId: "gemma")
|
||||
defer { harness.stop() }
|
||||
|
||||
let detailed = try await sendStreamingChatCompletionDetailed(
|
||||
@@ -1311,7 +1314,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
)
|
||||
}
|
||||
|
||||
private func makeHarness(initialModelId: String = "gemma") async throws -> TestHarness {
|
||||
private func makeHarness(initialModelId: String = "qwen3.5-0.8b") async throws -> TestHarness {
|
||||
let modelManager = await MainActor.run { ModelManager() }
|
||||
let config = try XCTUnwrap(ModelConfig.resolve(initialModelId))
|
||||
|
||||
@@ -1334,9 +1337,9 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
return TestHarness(server: server, modelManager: modelManager, port: port)
|
||||
}
|
||||
|
||||
private func visionRequest(dataURI: String, prompt: String) -> APIChatCompletionRequest {
|
||||
private func visionRequest(modelId: String = "qwen3.5-0.8b", dataURI: String, prompt: String) -> APIChatCompletionRequest {
|
||||
APIChatCompletionRequest(
|
||||
model: "gemma",
|
||||
model: modelId,
|
||||
messages: [
|
||||
APIChatMessage(
|
||||
role: "user",
|
||||
|
||||
@@ -3,9 +3,9 @@ import XCTest
|
||||
|
||||
@MainActor
|
||||
final class ChatViewModelTests: XCTestCase {
|
||||
func testGemmaChatViewModelSendProducesAssistantReply() async throws {
|
||||
func testQwenChatViewModelSendProducesAssistantReply() async throws {
|
||||
let modelManager = ModelManager()
|
||||
let config = try XCTUnwrap(ModelConfig.resolve("gemma"))
|
||||
let config = try XCTUnwrap(ModelConfig.resolve("qwen3.5-0.8b"))
|
||||
await modelManager.loadModel(config)
|
||||
defer { modelManager.unloadModel() }
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ final class PromptBuilderTests: XCTestCase {
|
||||
n: nil
|
||||
)
|
||||
|
||||
let prepared = PromptBuilder.build(from: request, modelId: "mlx-community/Qwen3.5-4B-MLX-4bit", thinkingEnabled: true)
|
||||
let prepared = PromptBuilder.build(from: request, modelId: "mlx-community/Qwen3.5-0.8B-4bit", thinkingEnabled: true)
|
||||
|
||||
XCTAssertEqual(prepared.chatMessages.count, 1)
|
||||
XCTAssertTrue(prepared.chatMessages[0].content.contains("Let me check."))
|
||||
|
||||
Reference in New Issue
Block a user