feat: first take at scene-local generation settings

This commit is contained in:
2026-03-21 09:24:14 +01:00
parent 24b940d526
commit 941ec92dbf
17 changed files with 811 additions and 45 deletions

View File

@@ -1174,6 +1174,102 @@ final class APIServerRewriteTests: XCTestCase {
XCTAssertGreaterThan(finalLiveSnapshot.totalCacheReusePromptTokens, afterDisconnectLiveSnapshot.totalCacheReusePromptTokens)
}
func testAPIServerUsesModelDefaultsAndRequestOverridesTakePrecedence() async throws {
let modelId = self.genericModelId
let originalSettings = Preferences.generationSettings(forModelId: modelId)
let collector = GenerationSettingsEventCollector()
Preferences.setGenerationSettings(
GenerationSettings(
temperature: 0.11,
topP: 0.77,
topK: 9,
minP: 0.04,
maxTokens: 3,
repetitionPenalty: 1.18,
presencePenalty: 0.25,
frequencyPenalty: 0.4,
thinkingEnabled: false
),
forModelId: modelId
)
APIServer.debugGenerationSettingsEventHandler = { event in
Task {
await collector.record(event)
}
}
defer {
Preferences.setGenerationSettings(originalSettings, forModelId: modelId)
APIServer.debugGenerationSettingsEventHandler = nil
}
let harness = try await makeHarness(initialModelId: modelId)
defer { harness.stop() }
_ = try await sendChatCompletion(
APIChatCompletionRequest(
model: modelId,
messages: [
APIChatMessage(role: "user", content: .text("Reply with one short word."), name: nil, tool_calls: nil, tool_call_id: nil)
],
stream: false
),
port: harness.port
)
try await waitUntil(timeoutSeconds: 5) {
await collector.events().count == 1
}
let firstEvents = await collector.events()
let firstEvent = try XCTUnwrap(firstEvents.first)
XCTAssertEqual(firstEvent.settings.temperature, 0.11)
XCTAssertEqual(firstEvent.settings.topP, 0.77)
XCTAssertEqual(firstEvent.settings.topK, 9)
XCTAssertEqual(firstEvent.settings.minP, 0.04)
XCTAssertEqual(firstEvent.settings.maxTokens, 3)
XCTAssertEqual(firstEvent.settings.repetitionPenalty, 1.18)
XCTAssertEqual(firstEvent.settings.presencePenalty, 0.25)
XCTAssertEqual(firstEvent.settings.frequencyPenalty, 0.4)
XCTAssertFalse(firstEvent.settings.thinkingEnabled)
_ = try await sendChatCompletion(
APIChatCompletionRequest(
model: modelId,
messages: [
APIChatMessage(role: "user", content: .text("Reply with one short word."), name: nil, tool_calls: nil, tool_call_id: nil)
],
temperature: 0.62,
top_p: 0.55,
max_tokens: 5,
stream: false,
frequency_penalty: 0.1,
presence_penalty: 0.2,
top_k: 4,
min_p: 0.02,
repetition_penalty: 1.05
),
port: harness.port
)
try await waitUntil(timeoutSeconds: 5) {
await collector.events().count == 2
}
let secondEvents = await collector.events()
let secondEvent = try XCTUnwrap(secondEvents.last)
XCTAssertEqual(secondEvent.settings.temperature, 0.62)
XCTAssertEqual(secondEvent.settings.topP, 0.55)
XCTAssertEqual(secondEvent.settings.topK, 4)
XCTAssertEqual(secondEvent.settings.minP, 0.02)
XCTAssertEqual(secondEvent.settings.maxTokens, 5)
XCTAssertEqual(secondEvent.settings.repetitionPenalty, 1.05)
XCTAssertEqual(secondEvent.settings.presencePenalty, 0.2)
XCTAssertEqual(secondEvent.settings.frequencyPenalty, 0.1)
XCTAssertFalse(secondEvent.settings.thinkingEnabled)
}
func testStreamingDisconnectStopsServerWorkWithinTwoHundredMilliseconds() async throws {
let harness = try await makeHarness()
defer { harness.stop() }
@@ -1683,6 +1779,18 @@ private actor LookupEventCollector {
}
}
private actor GenerationSettingsEventCollector {
private var recorded: [APIServer.DebugGenerationSettingsEvent] = []
func record(_ event: APIServer.DebugGenerationSettingsEvent) {
recorded.append(event)
}
func events() -> [APIServer.DebugGenerationSettingsEvent] {
recorded
}
}
private struct DetailedStreamingResult {
let events: [StreamingEvent]
let sawDone: Bool

View File

@@ -0,0 +1,80 @@
import XCTest
@testable import MLX_Server
final class GenerationSettingsTests: XCTestCase {
func testSceneOverridesApplyWithoutDiscardingModelDefaults() {
let base = GenerationSettings(
temperature: 0.2,
topP: 0.9,
topK: 12,
minP: 0.05,
maxTokens: 2048,
repetitionPenalty: 1.08,
presencePenalty: 0.3,
frequencyPenalty: 0.1,
thinkingEnabled: true
)
let overrides = GenerationSettingsOverride(
temperature: 0.8,
repetitionPenalty: 1.2,
thinkingEnabled: false
)
let resolved = base.applying(overrides)
XCTAssertEqual(resolved.temperature, 0.8)
XCTAssertEqual(resolved.repetitionPenalty, 1.2)
XCTAssertEqual(resolved.topP, 0.9)
XCTAssertEqual(resolved.topK, 12)
XCTAssertEqual(resolved.maxTokens, 2048)
XCTAssertEqual(resolved.presencePenalty, 0.3)
XCTAssertFalse(resolved.thinkingEnabled)
}
func testPreferencesStoreGenerationDefaultsPerModel() {
let gemmaId = "gemma"
let qwenId = "qwen3.5-0.8b"
let originalGemma = Preferences.generationSettings(forModelId: gemmaId)
let originalQwen = Preferences.generationSettings(forModelId: qwenId)
defer {
Preferences.setGenerationSettings(originalGemma, forModelId: gemmaId)
Preferences.setGenerationSettings(originalQwen, forModelId: qwenId)
}
Preferences.setGenerationSettings(
GenerationSettings(temperature: 0.15, topP: 0.85, maxTokens: 1024, repetitionPenalty: 1.1, thinkingEnabled: false),
forModelId: gemmaId
)
Preferences.setGenerationSettings(
GenerationSettings(temperature: 0.95, topP: 1.0, maxTokens: 8192, repetitionPenalty: nil, thinkingEnabled: true),
forModelId: qwenId
)
let gemma = Preferences.generationSettings(forModelId: gemmaId)
let qwen = Preferences.generationSettings(forModelId: qwenId)
XCTAssertEqual(gemma.temperature, 0.15)
XCTAssertEqual(gemma.topP, 0.85)
XCTAssertEqual(gemma.maxTokens, 1024)
XCTAssertEqual(gemma.repetitionPenalty, 1.1)
XCTAssertFalse(gemma.thinkingEnabled)
XCTAssertEqual(qwen.temperature, 0.95)
XCTAssertEqual(qwen.maxTokens, 8192)
XCTAssertNil(qwen.repetitionPenalty)
XCTAssertTrue(qwen.thinkingEnabled)
}
func testModelFallbackDefaultsComeFromModelDefinitions() {
let gemma = GenerationSettings.modelDefault(for: "gemma")
let qwen = GenerationSettings.modelDefault(for: "qwen")
let stheno = GenerationSettings.modelDefault(for: "stheno")
XCTAssertEqual(gemma, .technicalDefault)
XCTAssertEqual(qwen, .technicalDefault)
XCTAssertEqual(stheno, .roleplayDefault)
XCTAssertNotEqual(gemma, stheno)
}
}