feat: first take at scene-local generation settings
This commit is contained in:
@@ -9,12 +9,14 @@
|
|||||||
/* Begin PBXBuildFile section */
|
/* Begin PBXBuildFile section */
|
||||||
0168AEE16009097901363E16 /* ModelManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 922CBDC9206737BD04AF2874 /* ModelManager.swift */; };
|
0168AEE16009097901363E16 /* ModelManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 922CBDC9206737BD04AF2874 /* ModelManager.swift */; };
|
||||||
07119250A7F9D6ECE7F6B8FD /* SceneCommands.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0F03A123A8908714A89315FE /* SceneCommands.swift */; };
|
07119250A7F9D6ECE7F6B8FD /* SceneCommands.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0F03A123A8908714A89315FE /* SceneCommands.swift */; };
|
||||||
|
0BC7203552A161BC852975EA /* GenerationSettingsEditor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7AE2A32FBB744696DEA77435 /* GenerationSettingsEditor.swift */; };
|
||||||
165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */ = {isa = PBXBuildFile; fileRef = 145B888FBDD4F931512C5473 /* Preferences.swift */; };
|
165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */ = {isa = PBXBuildFile; fileRef = 145B888FBDD4F931512C5473 /* Preferences.swift */; };
|
||||||
189362AAE2CDE5D4B3428334 /* ToolCallParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = E73B165A1822729C907791AE /* ToolCallParser.swift */; };
|
189362AAE2CDE5D4B3428334 /* ToolCallParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = E73B165A1822729C907791AE /* ToolCallParser.swift */; };
|
||||||
1A8833E3CCD3289C95E282A2 /* ChatDocumentManifest.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1607BDDE53C575627DCC6896 /* ChatDocumentManifest.swift */; };
|
1A8833E3CCD3289C95E282A2 /* ChatDocumentManifest.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1607BDDE53C575627DCC6896 /* ChatDocumentManifest.swift */; };
|
||||||
1FE8C624898960ECCE39C0D4 /* PromptBuilderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5F9426FA5A4AC55F8D9C080E /* PromptBuilderTests.swift */; };
|
1FE8C624898960ECCE39C0D4 /* PromptBuilderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5F9426FA5A4AC55F8D9C080E /* PromptBuilderTests.swift */; };
|
||||||
20FFB5DBF75AA6C359AAE31C /* SceneManagementView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FEB592E5E717F817B03151 /* SceneManagementView.swift */; };
|
20FFB5DBF75AA6C359AAE31C /* SceneManagementView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FEB592E5E717F817B03151 /* SceneManagementView.swift */; };
|
||||||
221DEC86374902FCFD661A01 /* TokenPrefixCacheTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */; };
|
221DEC86374902FCFD661A01 /* TokenPrefixCacheTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */; };
|
||||||
|
2640EDCA9033D85C0B785557 /* GenerationSettings.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */; };
|
||||||
29879D696584B96CC56560DF /* ChatExporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D7C9BAD674E29688ACE53B0B /* ChatExporter.swift */; };
|
29879D696584B96CC56560DF /* ChatExporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D7C9BAD674E29688ACE53B0B /* ChatExporter.swift */; };
|
||||||
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; };
|
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; };
|
||||||
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; };
|
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; };
|
||||||
@@ -37,6 +39,7 @@
|
|||||||
7CD765C1E2F9F4D7504C8D09 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B629DA084A9A40E54F8EA5FA /* Assets.xcassets */; };
|
7CD765C1E2F9F4D7504C8D09 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B629DA084A9A40E54F8EA5FA /* Assets.xcassets */; };
|
||||||
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; };
|
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; };
|
||||||
834B49AA3E30A1FED549D057 /* ToolCallParserTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = B89226C9ED585A5296C54441 /* ToolCallParserTests.swift */; };
|
834B49AA3E30A1FED549D057 /* ToolCallParserTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = B89226C9ED585A5296C54441 /* ToolCallParserTests.swift */; };
|
||||||
|
847B445654860396AF5A8280 /* GenerationSettingsTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 57AC0815F72BDD32FC54C88A /* GenerationSettingsTests.swift */; };
|
||||||
84D32315B418B5243E017350 /* ToolPromptBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */; };
|
84D32315B418B5243E017350 /* ToolPromptBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */; };
|
||||||
85FB1EB49D76A9F21E181346 /* ChatScene.swift in Sources */ = {isa = PBXBuildFile; fileRef = C04EE8E6418EC6E9B66999B0 /* ChatScene.swift */; };
|
85FB1EB49D76A9F21E181346 /* ChatScene.swift in Sources */ = {isa = PBXBuildFile; fileRef = C04EE8E6418EC6E9B66999B0 /* ChatScene.swift */; };
|
||||||
8E665E21CCCD87A907CEA78D /* ModelBackedInferenceValidationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D388BE00B42C06ED9D9905BF /* ModelBackedInferenceValidationTests.swift */; };
|
8E665E21CCCD87A907CEA78D /* ModelBackedInferenceValidationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D388BE00B42C06ED9D9905BF /* ModelBackedInferenceValidationTests.swift */; };
|
||||||
@@ -98,11 +101,14 @@
|
|||||||
4147321383E94E9F17A0154E /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = "<group>"; };
|
4147321383E94E9F17A0154E /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = "<group>"; };
|
||||||
4239CFF94B819C35A8D4D617 /* MonitorView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MonitorView.swift; sourceTree = "<group>"; };
|
4239CFF94B819C35A8D4D617 /* MonitorView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MonitorView.swift; sourceTree = "<group>"; };
|
||||||
49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamingSSEEncoderTests.swift; sourceTree = "<group>"; };
|
49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamingSSEEncoderTests.swift; sourceTree = "<group>"; };
|
||||||
|
57AC0815F72BDD32FC54C88A /* GenerationSettingsTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GenerationSettingsTests.swift; sourceTree = "<group>"; };
|
||||||
5F9426FA5A4AC55F8D9C080E /* PromptBuilderTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PromptBuilderTests.swift; sourceTree = "<group>"; };
|
5F9426FA5A4AC55F8D9C080E /* PromptBuilderTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PromptBuilderTests.swift; sourceTree = "<group>"; };
|
||||||
615F8A7C9ABCADEB215D31BD /* StreamingSSEEncoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamingSSEEncoder.swift; sourceTree = "<group>"; };
|
615F8A7C9ABCADEB215D31BD /* StreamingSSEEncoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamingSSEEncoder.swift; sourceTree = "<group>"; };
|
||||||
64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TokenPrefixCacheTests.swift; sourceTree = "<group>"; };
|
64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TokenPrefixCacheTests.swift; sourceTree = "<group>"; };
|
||||||
6B3AA91D2C7842D7366F9A41 /* ChatDocumentPackage.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatDocumentPackage.swift; sourceTree = "<group>"; };
|
6B3AA91D2C7842D7366F9A41 /* ChatDocumentPackage.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatDocumentPackage.swift; sourceTree = "<group>"; };
|
||||||
6EE59189918D06B8D2F588FC /* MLXServer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MLXServer.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
6EE59189918D06B8D2F588FC /* MLXServer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MLXServer.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
|
6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GenerationSettings.swift; sourceTree = "<group>"; };
|
||||||
|
7AE2A32FBB744696DEA77435 /* GenerationSettingsEditor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GenerationSettingsEditor.swift; sourceTree = "<group>"; };
|
||||||
7C1A89C076E717F87A60397D /* ImageDecoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImageDecoder.swift; sourceTree = "<group>"; };
|
7C1A89C076E717F87A60397D /* ImageDecoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImageDecoder.swift; sourceTree = "<group>"; };
|
||||||
7E7DF9F68C10C718844B7B01 /* LiveCountersTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveCountersTests.swift; sourceTree = "<group>"; };
|
7E7DF9F68C10C718844B7B01 /* LiveCountersTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveCountersTests.swift; sourceTree = "<group>"; };
|
||||||
922CBDC9206737BD04AF2874 /* ModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManager.swift; sourceTree = "<group>"; };
|
922CBDC9206737BD04AF2874 /* ModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManager.swift; sourceTree = "<group>"; };
|
||||||
@@ -190,6 +196,7 @@
|
|||||||
E43535D68448F1752D91C3A9 /* APIServerRewriteTests.swift */,
|
E43535D68448F1752D91C3A9 /* APIServerRewriteTests.swift */,
|
||||||
FEFF6168B2283FEC87B4BB8C /* CancellationTokenTests.swift */,
|
FEFF6168B2283FEC87B4BB8C /* CancellationTokenTests.swift */,
|
||||||
B758F596F4F3E68793B045BB /* ChatViewModelTests.swift */,
|
B758F596F4F3E68793B045BB /* ChatViewModelTests.swift */,
|
||||||
|
57AC0815F72BDD32FC54C88A /* GenerationSettingsTests.swift */,
|
||||||
E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */,
|
E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */,
|
||||||
7E7DF9F68C10C718844B7B01 /* LiveCountersTests.swift */,
|
7E7DF9F68C10C718844B7B01 /* LiveCountersTests.swift */,
|
||||||
D388BE00B42C06ED9D9905BF /* ModelBackedInferenceValidationTests.swift */,
|
D388BE00B42C06ED9D9905BF /* ModelBackedInferenceValidationTests.swift */,
|
||||||
@@ -238,6 +245,7 @@
|
|||||||
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */,
|
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */,
|
||||||
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
|
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
|
||||||
2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */,
|
2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */,
|
||||||
|
7AE2A32FBB744696DEA77435 /* GenerationSettingsEditor.swift */,
|
||||||
C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
|
C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
|
||||||
4239CFF94B819C35A8D4D617 /* MonitorView.swift */,
|
4239CFF94B819C35A8D4D617 /* MonitorView.swift */,
|
||||||
37FEB592E5E717F817B03151 /* SceneManagementView.swift */,
|
37FEB592E5E717F817B03151 /* SceneManagementView.swift */,
|
||||||
@@ -263,6 +271,7 @@
|
|||||||
children = (
|
children = (
|
||||||
A4B359324B5FD8D106C74338 /* ChatMessage.swift */,
|
A4B359324B5FD8D106C74338 /* ChatMessage.swift */,
|
||||||
C04EE8E6418EC6E9B66999B0 /* ChatScene.swift */,
|
C04EE8E6418EC6E9B66999B0 /* ChatScene.swift */,
|
||||||
|
6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */,
|
||||||
E35452B166893B25E765FF70 /* InferenceStats.swift */,
|
E35452B166893B25E765FF70 /* InferenceStats.swift */,
|
||||||
38DFC212AF4359A45FBE22BA /* ModelConfig.swift */,
|
38DFC212AF4359A45FBE22BA /* ModelConfig.swift */,
|
||||||
);
|
);
|
||||||
@@ -404,6 +413,7 @@
|
|||||||
CBC9DB0799C4ADF2DC9319DA /* APIServerRewriteTests.swift in Sources */,
|
CBC9DB0799C4ADF2DC9319DA /* APIServerRewriteTests.swift in Sources */,
|
||||||
962083CCCC4AC848E0BBBC99 /* CancellationTokenTests.swift in Sources */,
|
962083CCCC4AC848E0BBBC99 /* CancellationTokenTests.swift in Sources */,
|
||||||
95A612524552AF5CC3B1AE62 /* ChatViewModelTests.swift in Sources */,
|
95A612524552AF5CC3B1AE62 /* ChatViewModelTests.swift in Sources */,
|
||||||
|
847B445654860396AF5A8280 /* GenerationSettingsTests.swift in Sources */,
|
||||||
E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */,
|
E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */,
|
||||||
67B815DC3304BF4B2E9974A8 /* LiveCountersTests.swift in Sources */,
|
67B815DC3304BF4B2E9974A8 /* LiveCountersTests.swift in Sources */,
|
||||||
8E665E21CCCD87A907CEA78D /* ModelBackedInferenceValidationTests.swift in Sources */,
|
8E665E21CCCD87A907CEA78D /* ModelBackedInferenceValidationTests.swift in Sources */,
|
||||||
@@ -437,6 +447,8 @@
|
|||||||
5946258F1DE88CE904584E0B /* ContentView.swift in Sources */,
|
5946258F1DE88CE904584E0B /* ContentView.swift in Sources */,
|
||||||
C07A377244DCD67F4FE709FE /* DownloadModalView.swift in Sources */,
|
C07A377244DCD67F4FE709FE /* DownloadModalView.swift in Sources */,
|
||||||
4DC033E45880B2948B47DEB1 /* FocusedValues.swift in Sources */,
|
4DC033E45880B2948B47DEB1 /* FocusedValues.swift in Sources */,
|
||||||
|
2640EDCA9033D85C0B785557 /* GenerationSettings.swift in Sources */,
|
||||||
|
0BC7203552A161BC852975EA /* GenerationSettingsEditor.swift in Sources */,
|
||||||
A146BBA70CFBEC505BDCDF0D /* ImageDecoder.swift in Sources */,
|
A146BBA70CFBEC505BDCDF0D /* ImageDecoder.swift in Sources */,
|
||||||
EC4FC68608DDFA6A3DF133CC /* InferenceEngine.swift in Sources */,
|
EC4FC68608DDFA6A3DF133CC /* InferenceEngine.swift in Sources */,
|
||||||
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */,
|
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */,
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ struct ChatDocumentManifest: Codable {
|
|||||||
var messages: [StoredChatMessage]
|
var messages: [StoredChatMessage]
|
||||||
var uiState: StoredChatUIState
|
var uiState: StoredChatUIState
|
||||||
|
|
||||||
static let currentSchemaVersion = 1
|
static let currentSchemaVersion = 2
|
||||||
|
|
||||||
struct StoredModelInfo: Codable, Hashable {
|
struct StoredModelInfo: Codable, Hashable {
|
||||||
var id: String
|
var id: String
|
||||||
@@ -23,6 +23,69 @@ struct ChatDocumentManifest: Codable {
|
|||||||
var systemPrompt: String
|
var systemPrompt: String
|
||||||
var thinkingEnabled: Bool
|
var thinkingEnabled: Bool
|
||||||
var temperature: Double
|
var temperature: Double
|
||||||
|
var topP: Double
|
||||||
|
var topK: Int
|
||||||
|
var minP: Double
|
||||||
|
var maxTokens: Int
|
||||||
|
var repetitionPenalty: Double?
|
||||||
|
var presencePenalty: Double?
|
||||||
|
var frequencyPenalty: Double?
|
||||||
|
|
||||||
|
init(systemPrompt: String, generationSettings: GenerationSettings) {
|
||||||
|
self.systemPrompt = systemPrompt
|
||||||
|
self.thinkingEnabled = generationSettings.thinkingEnabled
|
||||||
|
self.temperature = generationSettings.temperature
|
||||||
|
self.topP = generationSettings.topP
|
||||||
|
self.topK = generationSettings.topK
|
||||||
|
self.minP = generationSettings.minP
|
||||||
|
self.maxTokens = generationSettings.maxTokens
|
||||||
|
self.repetitionPenalty = generationSettings.repetitionPenalty
|
||||||
|
self.presencePenalty = generationSettings.presencePenalty
|
||||||
|
self.frequencyPenalty = generationSettings.frequencyPenalty
|
||||||
|
}
|
||||||
|
|
||||||
|
var generationSettings: GenerationSettings {
|
||||||
|
GenerationSettings(
|
||||||
|
temperature: temperature,
|
||||||
|
topP: topP,
|
||||||
|
topK: topK,
|
||||||
|
minP: minP,
|
||||||
|
maxTokens: maxTokens,
|
||||||
|
repetitionPenalty: repetitionPenalty,
|
||||||
|
presencePenalty: presencePenalty,
|
||||||
|
frequencyPenalty: frequencyPenalty,
|
||||||
|
thinkingEnabled: thinkingEnabled
|
||||||
|
).normalized()
|
||||||
|
}
|
||||||
|
|
||||||
|
private enum CodingKeys: String, CodingKey {
|
||||||
|
case systemPrompt
|
||||||
|
case thinkingEnabled
|
||||||
|
case temperature
|
||||||
|
case topP
|
||||||
|
case topK
|
||||||
|
case minP
|
||||||
|
case maxTokens
|
||||||
|
case repetitionPenalty
|
||||||
|
case presencePenalty
|
||||||
|
case frequencyPenalty
|
||||||
|
}
|
||||||
|
|
||||||
|
init(from decoder: Decoder) throws {
|
||||||
|
let container = try decoder.container(keyedBy: CodingKeys.self)
|
||||||
|
let fallback = GenerationSettings()
|
||||||
|
|
||||||
|
systemPrompt = try container.decodeIfPresent(String.self, forKey: .systemPrompt) ?? ""
|
||||||
|
thinkingEnabled = try container.decodeIfPresent(Bool.self, forKey: .thinkingEnabled) ?? fallback.thinkingEnabled
|
||||||
|
temperature = try container.decodeIfPresent(Double.self, forKey: .temperature) ?? fallback.temperature
|
||||||
|
topP = try container.decodeIfPresent(Double.self, forKey: .topP) ?? fallback.topP
|
||||||
|
topK = try container.decodeIfPresent(Int.self, forKey: .topK) ?? fallback.topK
|
||||||
|
minP = try container.decodeIfPresent(Double.self, forKey: .minP) ?? fallback.minP
|
||||||
|
maxTokens = try container.decodeIfPresent(Int.self, forKey: .maxTokens) ?? fallback.maxTokens
|
||||||
|
repetitionPenalty = try container.decodeIfPresent(Double.self, forKey: .repetitionPenalty)
|
||||||
|
presencePenalty = try container.decodeIfPresent(Double.self, forKey: .presencePenalty)
|
||||||
|
frequencyPenalty = try container.decodeIfPresent(Double.self, forKey: .frequencyPenalty)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct StoredChatUIState: Codable, Hashable {
|
struct StoredChatUIState: Codable, Hashable {
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ enum ChatDocumentMigration {
|
|||||||
switch envelope.schemaVersion {
|
switch envelope.schemaVersion {
|
||||||
case 1:
|
case 1:
|
||||||
return try decoder.decode(ChatDocumentManifest.self, from: data)
|
return try decoder.decode(ChatDocumentManifest.self, from: data)
|
||||||
|
case 2:
|
||||||
|
return try decoder.decode(ChatDocumentManifest.self, from: data)
|
||||||
default:
|
default:
|
||||||
throw ChatDocumentError.unsupportedSchemaVersion(envelope.schemaVersion)
|
throw ChatDocumentError.unsupportedSchemaVersion(envelope.schemaVersion)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,19 +6,41 @@ struct ChatScene: Codable, Identifiable, Hashable {
|
|||||||
var modelId: String?
|
var modelId: String?
|
||||||
var systemPrompt: String
|
var systemPrompt: String
|
||||||
var starterPrompt: String
|
var starterPrompt: String
|
||||||
|
var generationOverrides: GenerationSettingsOverride
|
||||||
|
|
||||||
init(
|
init(
|
||||||
id: UUID = UUID(),
|
id: UUID = UUID(),
|
||||||
name: String,
|
name: String,
|
||||||
modelId: String? = nil,
|
modelId: String? = nil,
|
||||||
systemPrompt: String = "",
|
systemPrompt: String = "",
|
||||||
starterPrompt: String = ""
|
starterPrompt: String = "",
|
||||||
|
generationOverrides: GenerationSettingsOverride = .none
|
||||||
) {
|
) {
|
||||||
self.id = id
|
self.id = id
|
||||||
self.name = name
|
self.name = name
|
||||||
self.modelId = modelId
|
self.modelId = modelId
|
||||||
self.systemPrompt = systemPrompt
|
self.systemPrompt = systemPrompt
|
||||||
self.starterPrompt = starterPrompt
|
self.starterPrompt = starterPrompt
|
||||||
|
self.generationOverrides = generationOverrides
|
||||||
|
}
|
||||||
|
|
||||||
|
private enum CodingKeys: String, CodingKey {
|
||||||
|
case id
|
||||||
|
case name
|
||||||
|
case modelId
|
||||||
|
case systemPrompt
|
||||||
|
case starterPrompt
|
||||||
|
case generationOverrides
|
||||||
|
}
|
||||||
|
|
||||||
|
init(from decoder: Decoder) throws {
|
||||||
|
let container = try decoder.container(keyedBy: CodingKeys.self)
|
||||||
|
id = try container.decode(UUID.self, forKey: .id)
|
||||||
|
name = try container.decode(String.self, forKey: .name)
|
||||||
|
modelId = try container.decodeIfPresent(String.self, forKey: .modelId)
|
||||||
|
systemPrompt = try container.decodeIfPresent(String.self, forKey: .systemPrompt) ?? ""
|
||||||
|
starterPrompt = try container.decodeIfPresent(String.self, forKey: .starterPrompt) ?? ""
|
||||||
|
generationOverrides = try container.decodeIfPresent(GenerationSettingsOverride.self, forKey: .generationOverrides) ?? .none
|
||||||
}
|
}
|
||||||
|
|
||||||
var trimmedName: String {
|
var trimmedName: String {
|
||||||
|
|||||||
157
MLXServer/Models/GenerationSettings.swift
Normal file
157
MLXServer/Models/GenerationSettings.swift
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
import Foundation
|
||||||
|
|
||||||
|
struct GenerationSettings: Codable, Hashable, Sendable {
|
||||||
|
var temperature: Double
|
||||||
|
var topP: Double
|
||||||
|
var topK: Int
|
||||||
|
var minP: Double
|
||||||
|
var maxTokens: Int
|
||||||
|
var repetitionPenalty: Double?
|
||||||
|
var presencePenalty: Double?
|
||||||
|
var frequencyPenalty: Double?
|
||||||
|
var thinkingEnabled: Bool
|
||||||
|
|
||||||
|
init(
|
||||||
|
temperature: Double = 0.7,
|
||||||
|
topP: Double = 1.0,
|
||||||
|
topK: Int = 0,
|
||||||
|
minP: Double = 0.0,
|
||||||
|
maxTokens: Int = 4096,
|
||||||
|
repetitionPenalty: Double? = nil,
|
||||||
|
presencePenalty: Double? = nil,
|
||||||
|
frequencyPenalty: Double? = nil,
|
||||||
|
thinkingEnabled: Bool = true
|
||||||
|
) {
|
||||||
|
self.temperature = temperature
|
||||||
|
self.topP = topP
|
||||||
|
self.topK = topK
|
||||||
|
self.minP = minP
|
||||||
|
self.maxTokens = maxTokens
|
||||||
|
self.repetitionPenalty = repetitionPenalty
|
||||||
|
self.presencePenalty = presencePenalty
|
||||||
|
self.frequencyPenalty = frequencyPenalty
|
||||||
|
self.thinkingEnabled = thinkingEnabled
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalized() -> GenerationSettings {
|
||||||
|
GenerationSettings(
|
||||||
|
temperature: max(0, temperature),
|
||||||
|
topP: min(max(topP, 0), 1),
|
||||||
|
topK: max(0, topK),
|
||||||
|
minP: min(max(minP, 0), 1),
|
||||||
|
maxTokens: max(1, maxTokens),
|
||||||
|
repetitionPenalty: Self.normalizePositive(repetitionPenalty),
|
||||||
|
presencePenalty: Self.normalizeSignedPenalty(presencePenalty),
|
||||||
|
frequencyPenalty: Self.normalizeSignedPenalty(frequencyPenalty),
|
||||||
|
thinkingEnabled: thinkingEnabled
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func applying(_ overrides: GenerationSettingsOverride) -> GenerationSettings {
|
||||||
|
GenerationSettings(
|
||||||
|
temperature: overrides.temperature ?? temperature,
|
||||||
|
topP: overrides.topP ?? topP,
|
||||||
|
topK: overrides.topK ?? topK,
|
||||||
|
minP: overrides.minP ?? minP,
|
||||||
|
maxTokens: overrides.maxTokens ?? maxTokens,
|
||||||
|
repetitionPenalty: overrides.repetitionPenalty ?? repetitionPenalty,
|
||||||
|
presencePenalty: overrides.presencePenalty ?? presencePenalty,
|
||||||
|
frequencyPenalty: overrides.frequencyPenalty ?? frequencyPenalty,
|
||||||
|
thinkingEnabled: overrides.thinkingEnabled ?? thinkingEnabled
|
||||||
|
)
|
||||||
|
.normalized()
|
||||||
|
}
|
||||||
|
|
||||||
|
static func modelDefault(for modelId: String, legacyThinkingEnabled: Bool = true) -> GenerationSettings {
|
||||||
|
let fallback = ModelConfig.resolve(modelId)?.defaultGenerationSettings ?? .generalDefault
|
||||||
|
var resolved = fallback
|
||||||
|
if !legacyThinkingEnabled {
|
||||||
|
resolved.thinkingEnabled = false
|
||||||
|
}
|
||||||
|
return resolved.normalized()
|
||||||
|
}
|
||||||
|
|
||||||
|
static let generalDefault = GenerationSettings()
|
||||||
|
|
||||||
|
static let technicalDefault = GenerationSettings(
|
||||||
|
temperature: 0.35,
|
||||||
|
topP: 0.9,
|
||||||
|
topK: 40,
|
||||||
|
minP: 0.0,
|
||||||
|
maxTokens: 4096,
|
||||||
|
repetitionPenalty: 1.05,
|
||||||
|
presencePenalty: nil,
|
||||||
|
frequencyPenalty: nil,
|
||||||
|
thinkingEnabled: true
|
||||||
|
)
|
||||||
|
|
||||||
|
static let roleplayDefault = GenerationSettings(
|
||||||
|
temperature: 0.85,
|
||||||
|
topP: 0.95,
|
||||||
|
topK: 60,
|
||||||
|
minP: 0.0,
|
||||||
|
maxTokens: 4096,
|
||||||
|
repetitionPenalty: 1.02,
|
||||||
|
presencePenalty: nil,
|
||||||
|
frequencyPenalty: nil,
|
||||||
|
thinkingEnabled: false
|
||||||
|
)
|
||||||
|
|
||||||
|
private static func normalizePositive(_ value: Double?) -> Double? {
|
||||||
|
guard let value else { return nil }
|
||||||
|
return value > 0 ? value : nil
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func normalizeSignedPenalty(_ value: Double?) -> Double? {
|
||||||
|
guard let value else { return nil }
|
||||||
|
return min(max(value, -2), 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct GenerationSettingsOverride: Codable, Hashable, Sendable {
|
||||||
|
var temperature: Double?
|
||||||
|
var topP: Double?
|
||||||
|
var topK: Int?
|
||||||
|
var minP: Double?
|
||||||
|
var maxTokens: Int?
|
||||||
|
var repetitionPenalty: Double?
|
||||||
|
var presencePenalty: Double?
|
||||||
|
var frequencyPenalty: Double?
|
||||||
|
var thinkingEnabled: Bool?
|
||||||
|
|
||||||
|
init(
|
||||||
|
temperature: Double? = nil,
|
||||||
|
topP: Double? = nil,
|
||||||
|
topK: Int? = nil,
|
||||||
|
minP: Double? = nil,
|
||||||
|
maxTokens: Int? = nil,
|
||||||
|
repetitionPenalty: Double? = nil,
|
||||||
|
presencePenalty: Double? = nil,
|
||||||
|
frequencyPenalty: Double? = nil,
|
||||||
|
thinkingEnabled: Bool? = nil
|
||||||
|
) {
|
||||||
|
self.temperature = temperature
|
||||||
|
self.topP = topP
|
||||||
|
self.topK = topK
|
||||||
|
self.minP = minP
|
||||||
|
self.maxTokens = maxTokens
|
||||||
|
self.repetitionPenalty = repetitionPenalty
|
||||||
|
self.presencePenalty = presencePenalty
|
||||||
|
self.frequencyPenalty = frequencyPenalty
|
||||||
|
self.thinkingEnabled = thinkingEnabled
|
||||||
|
}
|
||||||
|
|
||||||
|
static let none = GenerationSettingsOverride()
|
||||||
|
|
||||||
|
var hasOverrides: Bool {
|
||||||
|
temperature != nil
|
||||||
|
|| topP != nil
|
||||||
|
|| topK != nil
|
||||||
|
|| minP != nil
|
||||||
|
|| maxTokens != nil
|
||||||
|
|| repetitionPenalty != nil
|
||||||
|
|| presencePenalty != nil
|
||||||
|
|| frequencyPenalty != nil
|
||||||
|
|| thinkingEnabled != nil
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -15,6 +15,7 @@ struct ModelConfig: Identifiable, Hashable {
|
|||||||
let loaderKind: LoaderKind
|
let loaderKind: LoaderKind
|
||||||
let supportsImages: Bool
|
let supportsImages: Bool
|
||||||
let supportsTools: Bool
|
let supportsTools: Bool
|
||||||
|
let defaultGenerationSettings: GenerationSettings
|
||||||
|
|
||||||
/// All models supported by the app.
|
/// All models supported by the app.
|
||||||
static let availableModels: [ModelConfig] = [
|
static let availableModels: [ModelConfig] = [
|
||||||
@@ -25,7 +26,8 @@ struct ModelConfig: Identifiable, Hashable {
|
|||||||
contextLength: 128_000,
|
contextLength: 128_000,
|
||||||
loaderKind: .vlm,
|
loaderKind: .vlm,
|
||||||
supportsImages: true,
|
supportsImages: true,
|
||||||
supportsTools: true
|
supportsTools: true,
|
||||||
|
defaultGenerationSettings: .technicalDefault
|
||||||
),
|
),
|
||||||
ModelConfig(
|
ModelConfig(
|
||||||
id: "qwen",
|
id: "qwen",
|
||||||
@@ -34,7 +36,8 @@ struct ModelConfig: Identifiable, Hashable {
|
|||||||
contextLength: 256_000,
|
contextLength: 256_000,
|
||||||
loaderKind: .vlm,
|
loaderKind: .vlm,
|
||||||
supportsImages: true,
|
supportsImages: true,
|
||||||
supportsTools: true
|
supportsTools: true,
|
||||||
|
defaultGenerationSettings: .technicalDefault
|
||||||
),
|
),
|
||||||
ModelConfig(
|
ModelConfig(
|
||||||
id: "qwen3.5-0.8b",
|
id: "qwen3.5-0.8b",
|
||||||
@@ -43,7 +46,8 @@ struct ModelConfig: Identifiable, Hashable {
|
|||||||
contextLength: 256_000,
|
contextLength: 256_000,
|
||||||
loaderKind: .vlm,
|
loaderKind: .vlm,
|
||||||
supportsImages: true,
|
supportsImages: true,
|
||||||
supportsTools: true
|
supportsTools: true,
|
||||||
|
defaultGenerationSettings: .technicalDefault
|
||||||
),
|
),
|
||||||
ModelConfig(
|
ModelConfig(
|
||||||
id: "qwen3.5-9b",
|
id: "qwen3.5-9b",
|
||||||
@@ -52,7 +56,8 @@ struct ModelConfig: Identifiable, Hashable {
|
|||||||
contextLength: 256_000,
|
contextLength: 256_000,
|
||||||
loaderKind: .vlm,
|
loaderKind: .vlm,
|
||||||
supportsImages: true,
|
supportsImages: true,
|
||||||
supportsTools: true
|
supportsTools: true,
|
||||||
|
defaultGenerationSettings: .technicalDefault
|
||||||
),
|
),
|
||||||
ModelConfig(
|
ModelConfig(
|
||||||
id: "stheno",
|
id: "stheno",
|
||||||
@@ -61,7 +66,8 @@ struct ModelConfig: Identifiable, Hashable {
|
|||||||
contextLength: 8_192,
|
contextLength: 8_192,
|
||||||
loaderKind: .llm,
|
loaderKind: .llm,
|
||||||
supportsImages: false,
|
supportsImages: false,
|
||||||
supportsTools: false
|
supportsTools: false,
|
||||||
|
defaultGenerationSettings: .roleplayDefault
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -152,15 +152,52 @@ struct APIChatCompletionRequest: Codable {
|
|||||||
let messages: [APIChatMessage]
|
let messages: [APIChatMessage]
|
||||||
let temperature: Double?
|
let temperature: Double?
|
||||||
let top_p: Double?
|
let top_p: Double?
|
||||||
|
let top_k: Int?
|
||||||
|
let min_p: Double?
|
||||||
let max_tokens: Int?
|
let max_tokens: Int?
|
||||||
let stream: Bool?
|
let stream: Bool?
|
||||||
let stop: StopSequence?
|
let stop: StopSequence?
|
||||||
let tools: [APIToolDefinition]?
|
let tools: [APIToolDefinition]?
|
||||||
let tool_choice: AnyCodable?
|
let tool_choice: AnyCodable?
|
||||||
|
let repetition_penalty: Double?
|
||||||
let frequency_penalty: Double?
|
let frequency_penalty: Double?
|
||||||
let presence_penalty: Double?
|
let presence_penalty: Double?
|
||||||
let n: Int?
|
let n: Int?
|
||||||
|
|
||||||
|
init(
|
||||||
|
model: String?,
|
||||||
|
messages: [APIChatMessage],
|
||||||
|
temperature: Double? = nil,
|
||||||
|
top_p: Double? = nil,
|
||||||
|
max_tokens: Int? = nil,
|
||||||
|
stream: Bool? = nil,
|
||||||
|
stop: StopSequence? = nil,
|
||||||
|
tools: [APIToolDefinition]? = nil,
|
||||||
|
tool_choice: AnyCodable? = nil,
|
||||||
|
frequency_penalty: Double? = nil,
|
||||||
|
presence_penalty: Double? = nil,
|
||||||
|
n: Int? = nil,
|
||||||
|
top_k: Int? = nil,
|
||||||
|
min_p: Double? = nil,
|
||||||
|
repetition_penalty: Double? = nil
|
||||||
|
) {
|
||||||
|
self.model = model
|
||||||
|
self.messages = messages
|
||||||
|
self.temperature = temperature
|
||||||
|
self.top_p = top_p
|
||||||
|
self.top_k = top_k
|
||||||
|
self.min_p = min_p
|
||||||
|
self.max_tokens = max_tokens
|
||||||
|
self.stream = stream
|
||||||
|
self.stop = stop
|
||||||
|
self.tools = tools
|
||||||
|
self.tool_choice = tool_choice
|
||||||
|
self.repetition_penalty = repetition_penalty
|
||||||
|
self.frequency_penalty = frequency_penalty
|
||||||
|
self.presence_penalty = presence_penalty
|
||||||
|
self.n = n
|
||||||
|
}
|
||||||
|
|
||||||
enum StopSequence: Codable {
|
enum StopSequence: Codable {
|
||||||
case single(String)
|
case single(String)
|
||||||
case multiple([String])
|
case multiple([String])
|
||||||
|
|||||||
@@ -15,12 +15,19 @@ final class APIServer {
|
|||||||
let matchedTokenCount: Int
|
let matchedTokenCount: Int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct DebugGenerationSettingsEvent: Sendable {
|
||||||
|
let requestId: String
|
||||||
|
let modelId: String
|
||||||
|
let settings: GenerationSettings
|
||||||
|
}
|
||||||
|
|
||||||
private struct ActiveRequest {
|
private struct ActiveRequest {
|
||||||
let connection: NWConnection
|
let connection: NWConnection
|
||||||
let cancellation: CancellationToken
|
let cancellation: CancellationToken
|
||||||
}
|
}
|
||||||
|
|
||||||
nonisolated(unsafe) static var debugLookupEventHandler: (@Sendable (DebugLookupEvent) -> Void)?
|
nonisolated(unsafe) static var debugLookupEventHandler: (@Sendable (DebugLookupEvent) -> Void)?
|
||||||
|
nonisolated(unsafe) static var debugGenerationSettingsEventHandler: (@Sendable (DebugGenerationSettingsEvent) -> Void)?
|
||||||
|
|
||||||
var isRunning = false
|
var isRunning = false
|
||||||
var port: Int = 1234
|
var port: Int = 1234
|
||||||
@@ -256,15 +263,26 @@ final class APIServer {
|
|||||||
|
|
||||||
modelManager.touchActivity()
|
modelManager.touchActivity()
|
||||||
|
|
||||||
let isStream = request.stream ?? false
|
|
||||||
let temperature = request.temperature ?? 0.7
|
|
||||||
let topP = request.top_p ?? 1.0
|
|
||||||
let maxTokens = request.max_tokens ?? 4096
|
|
||||||
let requestId = "chatcmpl-\(UUID().uuidString.prefix(12).lowercased())"
|
let requestId = "chatcmpl-\(UUID().uuidString.prefix(12).lowercased())"
|
||||||
let created = Int(Date().timeIntervalSince1970)
|
let created = Int(Date().timeIntervalSince1970)
|
||||||
let modelName = request.model ?? modelManager.currentModel?.repoId ?? "unknown"
|
let modelName = request.model ?? modelManager.currentModel?.repoId ?? "unknown"
|
||||||
let currentModel = modelManager.currentModel
|
let currentModel = modelManager.currentModel
|
||||||
let contextLength = modelManager.currentModel?.contextLength ?? 0
|
let contextLength = modelManager.currentModel?.contextLength ?? 0
|
||||||
|
let baseSettings = Preferences.generationSettings(forModelId: currentModel?.id ?? ModelConfig.default.id)
|
||||||
|
let generationSettings = baseSettings.applying(
|
||||||
|
GenerationSettingsOverride(
|
||||||
|
temperature: request.temperature,
|
||||||
|
topP: request.top_p,
|
||||||
|
topK: request.top_k,
|
||||||
|
minP: request.min_p,
|
||||||
|
maxTokens: request.max_tokens,
|
||||||
|
repetitionPenalty: request.repetition_penalty,
|
||||||
|
presencePenalty: request.presence_penalty,
|
||||||
|
frequencyPenalty: request.frequency_penalty
|
||||||
|
)
|
||||||
|
)
|
||||||
|
let isStream = request.stream ?? false
|
||||||
|
let maxTokens = generationSettings.maxTokens
|
||||||
|
|
||||||
if let tools = request.tools, !tools.isEmpty, currentModel?.supportsTools != true {
|
if let tools = request.tools, !tools.isEmpty, currentModel?.supportsTools != true {
|
||||||
sendResponse(
|
sendResponse(
|
||||||
@@ -281,10 +299,14 @@ final class APIServer {
|
|||||||
let preparedPrompt = PromptBuilder.build(
|
let preparedPrompt = PromptBuilder.build(
|
||||||
from: request,
|
from: request,
|
||||||
modelId: currentModelRepoId,
|
modelId: currentModelRepoId,
|
||||||
thinkingEnabled: Preferences.enableThinking
|
thinkingEnabled: generationSettings.thinkingEnabled
|
||||||
)
|
)
|
||||||
let isQwen = currentModelRepoId.lowercased().contains("qwen")
|
let isQwen = currentModelRepoId.lowercased().contains("qwen")
|
||||||
|
|
||||||
|
Self.debugGenerationSettingsEventHandler?(
|
||||||
|
DebugGenerationSettingsEvent(requestId: requestId, modelId: currentModelRepoId, settings: generationSettings)
|
||||||
|
)
|
||||||
|
|
||||||
if preparedPrompt.containsImages, currentModel?.supportsImages != true {
|
if preparedPrompt.containsImages, currentModel?.supportsImages != true {
|
||||||
LiveCounters.shared.requestCompleted(requestId: requestId, generationTokens: 0)
|
LiveCounters.shared.requestCompleted(requestId: requestId, generationTokens: 0)
|
||||||
sendResponse(
|
sendResponse(
|
||||||
@@ -315,8 +337,16 @@ final class APIServer {
|
|||||||
|
|
||||||
let generateParams = GenerateParameters(
|
let generateParams = GenerateParameters(
|
||||||
maxTokens: maxTokens,
|
maxTokens: maxTokens,
|
||||||
temperature: Float(temperature),
|
temperature: Float(generationSettings.temperature),
|
||||||
topP: Float(topP)
|
topP: Float(generationSettings.topP),
|
||||||
|
topK: generationSettings.topK,
|
||||||
|
minP: Float(generationSettings.minP),
|
||||||
|
repetitionPenalty: generationSettings.repetitionPenalty.map(Float.init),
|
||||||
|
repetitionContextSize: 128,
|
||||||
|
presencePenalty: generationSettings.presencePenalty.map(Float.init),
|
||||||
|
presenceContextSize: 128,
|
||||||
|
frequencyPenalty: generationSettings.frequencyPenalty.map(Float.init),
|
||||||
|
frequencyContextSize: 128
|
||||||
)
|
)
|
||||||
let currentModelId = modelManager.currentModel?.id ?? modelName
|
let currentModelId = modelManager.currentModel?.id ?? modelName
|
||||||
let engine = InferenceEngine(container: container)
|
let engine = InferenceEngine(container: container)
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ enum Preferences {
|
|||||||
|
|
||||||
private static let jsonEncoder = JSONEncoder()
|
private static let jsonEncoder = JSONEncoder()
|
||||||
private static let jsonDecoder = JSONDecoder()
|
private static let jsonDecoder = JSONDecoder()
|
||||||
|
private static let legacyThinkingDefault = true
|
||||||
|
|
||||||
// MARK: - Last used model
|
// MARK: - Last used model
|
||||||
|
|
||||||
@@ -79,12 +80,53 @@ enum Preferences {
|
|||||||
// MARK: - Thinking mode
|
// MARK: - Thinking mode
|
||||||
|
|
||||||
private static let enableThinkingKey = "enableThinking"
|
private static let enableThinkingKey = "enableThinking"
|
||||||
|
private static let modelGenerationSettingsKey = "modelGenerationSettings"
|
||||||
|
|
||||||
/// Whether to enable thinking/reasoning mode for models that support it (e.g. Qwen3.5).
|
/// Whether to enable thinking/reasoning mode for models that support it (e.g. Qwen3.5).
|
||||||
/// When disabled, the model skips internal reasoning and responds directly.
|
/// When disabled, the model skips internal reasoning and responds directly.
|
||||||
static var enableThinking: Bool {
|
static var enableThinking: Bool {
|
||||||
get { defaults.object(forKey: enableThinkingKey) == nil ? true : defaults.bool(forKey: enableThinkingKey) }
|
get {
|
||||||
set { defaults.set(newValue, forKey: enableThinkingKey) }
|
let modelId = defaultModelId ?? lastModelId ?? ModelConfig.default.id
|
||||||
|
if modelGenerationSettingsMap[modelId] != nil {
|
||||||
|
return generationSettings(forModelId: modelId).thinkingEnabled
|
||||||
|
}
|
||||||
|
return defaults.object(forKey: enableThinkingKey) == nil ? Self.legacyThinkingDefault : defaults.bool(forKey: enableThinkingKey)
|
||||||
|
}
|
||||||
|
set {
|
||||||
|
let modelId = defaultModelId ?? lastModelId ?? ModelConfig.default.id
|
||||||
|
var settings = generationSettings(forModelId: modelId)
|
||||||
|
settings.thinkingEnabled = newValue
|
||||||
|
setGenerationSettings(settings, forModelId: modelId)
|
||||||
|
defaults.set(newValue, forKey: enableThinkingKey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static func generationSettings(forModelId modelId: String) -> GenerationSettings {
|
||||||
|
let legacyThinking = defaults.object(forKey: enableThinkingKey) == nil ? Self.legacyThinkingDefault : defaults.bool(forKey: enableThinkingKey)
|
||||||
|
return (modelGenerationSettingsMap[modelId] ?? GenerationSettings.modelDefault(for: modelId, legacyThinkingEnabled: legacyThinking)).normalized()
|
||||||
|
}
|
||||||
|
|
||||||
|
static func setGenerationSettings(_ settings: GenerationSettings, forModelId modelId: String) {
|
||||||
|
var map = modelGenerationSettingsMap
|
||||||
|
let normalized = settings.normalized()
|
||||||
|
map[modelId] = normalized
|
||||||
|
modelGenerationSettingsMap = map
|
||||||
|
defaults.set(normalized.thinkingEnabled, forKey: enableThinkingKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
static func hasGenerationSettings(forModelId modelId: String) -> Bool {
|
||||||
|
modelGenerationSettingsMap[modelId] != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
private static var modelGenerationSettingsMap: [String: GenerationSettings] {
|
||||||
|
get {
|
||||||
|
guard let data = defaults.data(forKey: modelGenerationSettingsKey) else { return [:] }
|
||||||
|
return (try? jsonDecoder.decode([String: GenerationSettings].self, from: data)) ?? [:]
|
||||||
|
}
|
||||||
|
set {
|
||||||
|
guard let data = try? jsonEncoder.encode(newValue) else { return }
|
||||||
|
defaults.set(data, forKey: modelGenerationSettingsKey)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Idle unload
|
// MARK: - Idle unload
|
||||||
|
|||||||
@@ -28,8 +28,7 @@ final class ChatViewModel {
|
|||||||
private var documentId = UUID()
|
private var documentId = UUID()
|
||||||
private var documentCreatedAt = Date()
|
private var documentCreatedAt = Date()
|
||||||
private var documentSystemPromptOverride: String?
|
private var documentSystemPromptOverride: String?
|
||||||
private var documentThinkingOverride: Bool?
|
private var documentGenerationSettingsOverride: GenerationSettings?
|
||||||
private var documentTemperature = 0.7
|
|
||||||
|
|
||||||
let modelManager: ModelManager
|
let modelManager: ModelManager
|
||||||
let apiServer = APIServer()
|
let apiServer = APIServer()
|
||||||
@@ -55,12 +54,25 @@ final class ChatViewModel {
|
|||||||
guard let container = modelManager.modelContainer else { return }
|
guard let container = modelManager.modelContainer else { return }
|
||||||
if chatSession == nil {
|
if chatSession == nil {
|
||||||
let systemPrompt = effectiveSystemPrompt
|
let systemPrompt = effectiveSystemPrompt
|
||||||
|
let generationSettings = effectiveGenerationSettings
|
||||||
// Pass enable_thinking to the Jinja chat template context.
|
// Pass enable_thinking to the Jinja chat template context.
|
||||||
// Qwen3.5 and similar models use this to control reasoning mode.
|
// Qwen3.5 and similar models use this to control reasoning mode.
|
||||||
let thinkingContext: [String: any Sendable]? = effectiveThinkingEnabled
|
let thinkingContext: [String: any Sendable]? = generationSettings.thinkingEnabled
|
||||||
? nil
|
? nil
|
||||||
: ["enable_thinking": false]
|
: ["enable_thinking": false]
|
||||||
let generateParameters = GenerateParameters(temperature: Float(documentTemperature))
|
let generateParameters = GenerateParameters(
|
||||||
|
maxTokens: generationSettings.maxTokens,
|
||||||
|
temperature: Float(generationSettings.temperature),
|
||||||
|
topP: Float(generationSettings.topP),
|
||||||
|
topK: generationSettings.topK,
|
||||||
|
minP: Float(generationSettings.minP),
|
||||||
|
repetitionPenalty: generationSettings.repetitionPenalty.map(Float.init),
|
||||||
|
repetitionContextSize: 128,
|
||||||
|
presencePenalty: generationSettings.presencePenalty.map(Float.init),
|
||||||
|
presenceContextSize: 128,
|
||||||
|
frequencyPenalty: generationSettings.frequencyPenalty.map(Float.init),
|
||||||
|
frequencyContextSize: 128
|
||||||
|
)
|
||||||
let history = conversation.messages.compactMap(historyMessage(from:))
|
let history = conversation.messages.compactMap(historyMessage(from:))
|
||||||
if history.isEmpty {
|
if history.isEmpty {
|
||||||
chatSession = ChatSession(
|
chatSession = ChatSession(
|
||||||
@@ -96,8 +108,17 @@ final class ChatViewModel {
|
|||||||
return parts.joined(separator: "\n\n")
|
return parts.joined(separator: "\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
private var effectiveThinkingEnabled: Bool {
|
private var effectiveGenerationSettings: GenerationSettings {
|
||||||
documentThinkingOverride ?? Preferences.enableThinking
|
if let documentGenerationSettingsOverride {
|
||||||
|
return documentGenerationSettingsOverride
|
||||||
|
}
|
||||||
|
|
||||||
|
let modelId = activeScene?.resolvedModel?.id
|
||||||
|
?? modelManager.currentModel?.id
|
||||||
|
?? Preferences.defaultModelId
|
||||||
|
?? ModelConfig.default.id
|
||||||
|
return Preferences.generationSettings(forModelId: modelId)
|
||||||
|
.applying(activeScene?.generationOverrides ?? .none)
|
||||||
}
|
}
|
||||||
|
|
||||||
func send() {
|
func send() {
|
||||||
@@ -269,8 +290,7 @@ final class ChatViewModel {
|
|||||||
documentId = package.manifest.documentId
|
documentId = package.manifest.documentId
|
||||||
documentCreatedAt = package.manifest.createdAt
|
documentCreatedAt = package.manifest.createdAt
|
||||||
documentSystemPromptOverride = package.manifest.settings.systemPrompt
|
documentSystemPromptOverride = package.manifest.settings.systemPrompt
|
||||||
documentThinkingOverride = package.manifest.settings.thinkingEnabled
|
documentGenerationSettingsOverride = package.manifest.settings.generationSettings
|
||||||
documentTemperature = package.manifest.settings.temperature
|
|
||||||
resetSession()
|
resetSession()
|
||||||
lastSavedSnapshotHash = try snapshotHash()
|
lastSavedSnapshotHash = try snapshotHash()
|
||||||
hasUnsavedChanges = false
|
hasUnsavedChanges = false
|
||||||
@@ -316,8 +336,7 @@ final class ChatViewModel {
|
|||||||
documentId = UUID()
|
documentId = UUID()
|
||||||
documentCreatedAt = Date()
|
documentCreatedAt = Date()
|
||||||
documentSystemPromptOverride = nil
|
documentSystemPromptOverride = nil
|
||||||
documentThinkingOverride = nil
|
documentGenerationSettingsOverride = nil
|
||||||
documentTemperature = 0.7
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private func restoreMessage(
|
private func restoreMessage(
|
||||||
@@ -398,11 +417,7 @@ final class ChatViewModel {
|
|||||||
updatedAt: updatedAt,
|
updatedAt: updatedAt,
|
||||||
appVersion: Bundle.main.object(forInfoDictionaryKey: "CFBundleShortVersionString") as? String ?? "1.0.0",
|
appVersion: Bundle.main.object(forInfoDictionaryKey: "CFBundleShortVersionString") as? String ?? "1.0.0",
|
||||||
model: currentStoredModelInfo,
|
model: currentStoredModelInfo,
|
||||||
settings: .init(
|
settings: .init(systemPrompt: effectiveSystemPrompt, generationSettings: effectiveGenerationSettings),
|
||||||
systemPrompt: effectiveSystemPrompt,
|
|
||||||
thinkingEnabled: effectiveThinkingEnabled,
|
|
||||||
temperature: documentTemperature
|
|
||||||
),
|
|
||||||
messages: messages,
|
messages: messages,
|
||||||
uiState: .init(
|
uiState: .init(
|
||||||
draftInput: inputText,
|
draftInput: inputText,
|
||||||
@@ -443,11 +458,7 @@ final class ChatViewModel {
|
|||||||
documentId: documentId,
|
documentId: documentId,
|
||||||
createdAt: documentCreatedAt,
|
createdAt: documentCreatedAt,
|
||||||
model: currentStoredModelInfo,
|
model: currentStoredModelInfo,
|
||||||
settings: .init(
|
settings: .init(systemPrompt: effectiveSystemPrompt, generationSettings: effectiveGenerationSettings),
|
||||||
systemPrompt: effectiveSystemPrompt,
|
|
||||||
thinkingEnabled: effectiveThinkingEnabled,
|
|
||||||
temperature: documentTemperature
|
|
||||||
),
|
|
||||||
messages: makeManifest(updatedAt: documentCreatedAt).messages,
|
messages: makeManifest(updatedAt: documentCreatedAt).messages,
|
||||||
uiState: .init(draftInput: inputText, scrollAnchorMessageId: conversation.messages.last?.id)
|
uiState: .init(draftInput: inputText, scrollAnchorMessageId: conversation.messages.last?.id)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -16,7 +16,8 @@ final class SceneStore {
|
|||||||
name: scene.displayName,
|
name: scene.displayName,
|
||||||
modelId: scene.modelId,
|
modelId: scene.modelId,
|
||||||
systemPrompt: scene.systemPrompt,
|
systemPrompt: scene.systemPrompt,
|
||||||
starterPrompt: scene.starterPrompt
|
starterPrompt: scene.starterPrompt,
|
||||||
|
generationOverrides: scene.generationOverrides
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
nextScene = .empty
|
nextScene = .empty
|
||||||
|
|||||||
144
MLXServer/Views/GenerationSettingsEditor.swift
Normal file
144
MLXServer/Views/GenerationSettingsEditor.swift
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
import SwiftUI
|
||||||
|
|
||||||
|
private let generationDoubleFormat = FloatingPointFormatStyle<Double>.number.precision(.fractionLength(0...2))
|
||||||
|
private let generationIntegerFormat = IntegerFormatStyle<Int>.number.grouping(.never)
|
||||||
|
|
||||||
|
struct GenerationDefaultsEditor: View {
|
||||||
|
@Binding var settings: GenerationSettings
|
||||||
|
|
||||||
|
var body: some View {
|
||||||
|
Toggle("Enable thinking mode", isOn: $settings.thinkingEnabled)
|
||||||
|
doubleRow("Temperature", value: $settings.temperature)
|
||||||
|
doubleRow("Top P", value: $settings.topP)
|
||||||
|
intRow("Top K", value: $settings.topK)
|
||||||
|
doubleRow("Min P", value: $settings.minP)
|
||||||
|
intRow("Max tokens", value: $settings.maxTokens)
|
||||||
|
optionalDoubleRow("Repetition penalty", value: $settings.repetitionPenalty)
|
||||||
|
optionalDoubleRow("Presence penalty", value: $settings.presencePenalty)
|
||||||
|
optionalDoubleRow("Frequency penalty", value: $settings.frequencyPenalty)
|
||||||
|
}
|
||||||
|
|
||||||
|
private func doubleRow(_ title: String, value: Binding<Double>) -> some View {
|
||||||
|
HStack {
|
||||||
|
Text(title)
|
||||||
|
Spacer()
|
||||||
|
TextField(title, value: value, format: generationDoubleFormat)
|
||||||
|
.multilineTextAlignment(.trailing)
|
||||||
|
.frame(width: 90)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func intRow(_ title: String, value: Binding<Int>) -> some View {
|
||||||
|
HStack {
|
||||||
|
Text(title)
|
||||||
|
Spacer()
|
||||||
|
TextField(title, value: value, format: generationIntegerFormat)
|
||||||
|
.multilineTextAlignment(.trailing)
|
||||||
|
.frame(width: 90)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func optionalDoubleRow(_ title: String, value: Binding<Double?>) -> some View {
|
||||||
|
HStack {
|
||||||
|
Text(title)
|
||||||
|
Spacer()
|
||||||
|
TextField(title, value: binding(for: value), format: generationDoubleFormat)
|
||||||
|
.multilineTextAlignment(.trailing)
|
||||||
|
.frame(width: 90)
|
||||||
|
Button(value.wrappedValue == nil ? "Set" : "Clear") {
|
||||||
|
if value.wrappedValue == nil {
|
||||||
|
value.wrappedValue = 1.0
|
||||||
|
} else {
|
||||||
|
value.wrappedValue = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.buttonStyle(.link)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func binding(for value: Binding<Double?>) -> Binding<Double> {
|
||||||
|
Binding(
|
||||||
|
get: { value.wrappedValue ?? 1.0 },
|
||||||
|
set: { value.wrappedValue = $0 }
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct GenerationOverridesEditor: View {
|
||||||
|
@Binding var overrides: GenerationSettingsOverride
|
||||||
|
let inheritedSettings: GenerationSettings
|
||||||
|
let inheritedSource: String
|
||||||
|
|
||||||
|
var body: some View {
|
||||||
|
Picker("Thinking mode", selection: $overrides.thinkingEnabled) {
|
||||||
|
Text("Inherited (\(inheritedSettings.thinkingEnabled ? "Enabled" : "Disabled"))").tag(Optional<Bool>.none)
|
||||||
|
Text("Enabled").tag(Optional(true))
|
||||||
|
Text("Disabled").tag(Optional(false))
|
||||||
|
}
|
||||||
|
|
||||||
|
optionalDoubleRow("Temperature", value: $overrides.temperature, inheritedValue: inheritedSettings.temperature)
|
||||||
|
optionalDoubleRow("Top P", value: $overrides.topP, inheritedValue: inheritedSettings.topP)
|
||||||
|
optionalIntRow("Top K", value: $overrides.topK, inheritedValue: inheritedSettings.topK)
|
||||||
|
optionalDoubleRow("Min P", value: $overrides.minP, inheritedValue: inheritedSettings.minP)
|
||||||
|
optionalIntRow("Max tokens", value: $overrides.maxTokens, inheritedValue: inheritedSettings.maxTokens)
|
||||||
|
optionalDoubleRow("Repetition penalty", value: $overrides.repetitionPenalty, inheritedValue: inheritedSettings.repetitionPenalty ?? 0)
|
||||||
|
optionalDoubleRow("Presence penalty", value: $overrides.presencePenalty, inheritedValue: inheritedSettings.presencePenalty ?? 0)
|
||||||
|
optionalDoubleRow("Frequency penalty", value: $overrides.frequencyPenalty, inheritedValue: inheritedSettings.frequencyPenalty ?? 0)
|
||||||
|
|
||||||
|
Text("Unset fields inherit from \(inheritedSource). The values shown are the effective starting values for this scene.")
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
|
||||||
|
private func optionalDoubleRow(_ title: String, value: Binding<Double?>, inheritedValue: Double) -> some View {
|
||||||
|
HStack {
|
||||||
|
Text(title)
|
||||||
|
Spacer()
|
||||||
|
TextField(title, value: Binding(
|
||||||
|
get: { value.wrappedValue ?? inheritedValue },
|
||||||
|
set: { value.wrappedValue = $0 }
|
||||||
|
), format: generationDoubleFormat)
|
||||||
|
.multilineTextAlignment(.trailing)
|
||||||
|
.frame(width: 90)
|
||||||
|
if value.wrappedValue == nil {
|
||||||
|
Text("Inherited")
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
Button(value.wrappedValue == nil ? "Override" : "Clear") {
|
||||||
|
if value.wrappedValue == nil {
|
||||||
|
value.wrappedValue = inheritedValue
|
||||||
|
} else {
|
||||||
|
value.wrappedValue = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.buttonStyle(.link)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func optionalIntRow(_ title: String, value: Binding<Int?>, inheritedValue: Int) -> some View {
|
||||||
|
HStack {
|
||||||
|
Text(title)
|
||||||
|
Spacer()
|
||||||
|
TextField(title, value: Binding(
|
||||||
|
get: { value.wrappedValue ?? inheritedValue },
|
||||||
|
set: { value.wrappedValue = $0 }
|
||||||
|
), format: generationIntegerFormat)
|
||||||
|
.multilineTextAlignment(.trailing)
|
||||||
|
.frame(width: 90)
|
||||||
|
if value.wrappedValue == nil {
|
||||||
|
Text("Inherited")
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
Button(value.wrappedValue == nil ? "Override" : "Clear") {
|
||||||
|
if value.wrappedValue == nil {
|
||||||
|
value.wrappedValue = inheritedValue
|
||||||
|
} else {
|
||||||
|
value.wrappedValue = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.buttonStyle(.link)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -246,6 +246,14 @@ private struct SceneEditorView: View {
|
|||||||
.font(.caption)
|
.font(.caption)
|
||||||
.foregroundStyle(.secondary)
|
.foregroundStyle(.secondary)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Section("Generation Overrides") {
|
||||||
|
GenerationOverridesEditor(
|
||||||
|
overrides: generationOverridesBinding,
|
||||||
|
inheritedSettings: inheritedGenerationSettings,
|
||||||
|
inheritedSource: inheritedGenerationSource
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
.formStyle(.grouped)
|
.formStyle(.grouped)
|
||||||
.navigationTitle(scene.displayName)
|
.navigationTitle(scene.displayName)
|
||||||
@@ -272,4 +280,35 @@ private struct SceneEditorView: View {
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private var generationOverridesBinding: Binding<GenerationSettingsOverride> {
|
||||||
|
Binding(
|
||||||
|
get: { sceneStore.scene(id: scene.id)?.generationOverrides ?? scene.generationOverrides },
|
||||||
|
set: { newValue in
|
||||||
|
sceneStore.updateScene(id: scene.id) {
|
||||||
|
$0.generationOverrides = newValue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private var effectiveModelId: String {
|
||||||
|
sceneStore.scene(id: scene.id)?.modelId
|
||||||
|
?? scene.modelId
|
||||||
|
?? Preferences.defaultModelId
|
||||||
|
?? Preferences.lastModelId
|
||||||
|
?? ModelConfig.default.id
|
||||||
|
}
|
||||||
|
|
||||||
|
private var inheritedGenerationSettings: GenerationSettings {
|
||||||
|
Preferences.generationSettings(forModelId: effectiveModelId)
|
||||||
|
}
|
||||||
|
|
||||||
|
private var inheritedGenerationSource: String {
|
||||||
|
let modelName = ModelConfig.resolve(effectiveModelId)?.displayName ?? effectiveModelId
|
||||||
|
if Preferences.hasGenerationSettings(forModelId: effectiveModelId) {
|
||||||
|
return "saved \(modelName) defaults"
|
||||||
|
}
|
||||||
|
return "built-in \(modelName) defaults"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -8,7 +8,7 @@ struct SettingsView: View {
|
|||||||
@State private var apiAutoStart: Bool = Preferences.apiAutoStart
|
@State private var apiAutoStart: Bool = Preferences.apiAutoStart
|
||||||
@State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes)
|
@State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes)
|
||||||
@State private var defaultModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id
|
@State private var defaultModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id
|
||||||
@State private var enableThinking: Bool = Preferences.enableThinking
|
@State private var generationDefaultsModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id
|
||||||
@State private var kvQuantizationEnabled: Bool = Preferences.kvQuantizationEnabled
|
@State private var kvQuantizationEnabled: Bool = Preferences.kvQuantizationEnabled
|
||||||
@State private var kvQuantizationBits: Int = Preferences.kvQuantizationBits
|
@State private var kvQuantizationBits: Int = Preferences.kvQuantizationBits
|
||||||
|
|
||||||
@@ -42,13 +42,16 @@ struct SettingsView: View {
|
|||||||
.foregroundStyle(.secondary)
|
.foregroundStyle(.secondary)
|
||||||
}
|
}
|
||||||
|
|
||||||
Section("Generation") {
|
Section("Generation Defaults") {
|
||||||
Toggle("Enable thinking mode", isOn: $enableThinking)
|
Picker("Defaults for model", selection: $generationDefaultsModelId) {
|
||||||
.onChange(of: enableThinking) {
|
ForEach(ModelConfig.availableModels) { model in
|
||||||
Preferences.enableThinking = enableThinking
|
Text(model.displayName).tag(model.id)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Text("When enabled, models like Qwen3.5 reason internally before responding. Produces better answers but slower. Takes effect on the next conversation.")
|
GenerationDefaultsEditor(settings: generationDefaultsBinding)
|
||||||
|
|
||||||
|
Text("These are the per-model defaults used by chat sessions and by the API server whenever a request omits a generation parameter. Lower temperature and stronger repetition penalties are usually better for technical work; higher temperature is usually better for improvisation and roleplay.")
|
||||||
.font(.caption)
|
.font(.caption)
|
||||||
.foregroundStyle(.secondary)
|
.foregroundStyle(.secondary)
|
||||||
}
|
}
|
||||||
@@ -162,4 +165,11 @@ struct SettingsView: View {
|
|||||||
.formStyle(.grouped)
|
.formStyle(.grouped)
|
||||||
.frame(width: 450, height: 650)
|
.frame(width: 450, height: 650)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private var generationDefaultsBinding: Binding<GenerationSettings> {
|
||||||
|
Binding(
|
||||||
|
get: { Preferences.generationSettings(forModelId: generationDefaultsModelId) },
|
||||||
|
set: { Preferences.setGenerationSettings($0, forModelId: generationDefaultsModelId) }
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1174,6 +1174,102 @@ final class APIServerRewriteTests: XCTestCase {
|
|||||||
XCTAssertGreaterThan(finalLiveSnapshot.totalCacheReusePromptTokens, afterDisconnectLiveSnapshot.totalCacheReusePromptTokens)
|
XCTAssertGreaterThan(finalLiveSnapshot.totalCacheReusePromptTokens, afterDisconnectLiveSnapshot.totalCacheReusePromptTokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func testAPIServerUsesModelDefaultsAndRequestOverridesTakePrecedence() async throws {
|
||||||
|
let modelId = self.genericModelId
|
||||||
|
let originalSettings = Preferences.generationSettings(forModelId: modelId)
|
||||||
|
let collector = GenerationSettingsEventCollector()
|
||||||
|
|
||||||
|
Preferences.setGenerationSettings(
|
||||||
|
GenerationSettings(
|
||||||
|
temperature: 0.11,
|
||||||
|
topP: 0.77,
|
||||||
|
topK: 9,
|
||||||
|
minP: 0.04,
|
||||||
|
maxTokens: 3,
|
||||||
|
repetitionPenalty: 1.18,
|
||||||
|
presencePenalty: 0.25,
|
||||||
|
frequencyPenalty: 0.4,
|
||||||
|
thinkingEnabled: false
|
||||||
|
),
|
||||||
|
forModelId: modelId
|
||||||
|
)
|
||||||
|
APIServer.debugGenerationSettingsEventHandler = { event in
|
||||||
|
Task {
|
||||||
|
await collector.record(event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defer {
|
||||||
|
Preferences.setGenerationSettings(originalSettings, forModelId: modelId)
|
||||||
|
APIServer.debugGenerationSettingsEventHandler = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
let harness = try await makeHarness(initialModelId: modelId)
|
||||||
|
defer { harness.stop() }
|
||||||
|
|
||||||
|
_ = try await sendChatCompletion(
|
||||||
|
APIChatCompletionRequest(
|
||||||
|
model: modelId,
|
||||||
|
messages: [
|
||||||
|
APIChatMessage(role: "user", content: .text("Reply with one short word."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||||
|
],
|
||||||
|
stream: false
|
||||||
|
),
|
||||||
|
port: harness.port
|
||||||
|
)
|
||||||
|
|
||||||
|
try await waitUntil(timeoutSeconds: 5) {
|
||||||
|
await collector.events().count == 1
|
||||||
|
}
|
||||||
|
|
||||||
|
let firstEvents = await collector.events()
|
||||||
|
let firstEvent = try XCTUnwrap(firstEvents.first)
|
||||||
|
XCTAssertEqual(firstEvent.settings.temperature, 0.11)
|
||||||
|
XCTAssertEqual(firstEvent.settings.topP, 0.77)
|
||||||
|
XCTAssertEqual(firstEvent.settings.topK, 9)
|
||||||
|
XCTAssertEqual(firstEvent.settings.minP, 0.04)
|
||||||
|
XCTAssertEqual(firstEvent.settings.maxTokens, 3)
|
||||||
|
XCTAssertEqual(firstEvent.settings.repetitionPenalty, 1.18)
|
||||||
|
XCTAssertEqual(firstEvent.settings.presencePenalty, 0.25)
|
||||||
|
XCTAssertEqual(firstEvent.settings.frequencyPenalty, 0.4)
|
||||||
|
XCTAssertFalse(firstEvent.settings.thinkingEnabled)
|
||||||
|
|
||||||
|
_ = try await sendChatCompletion(
|
||||||
|
APIChatCompletionRequest(
|
||||||
|
model: modelId,
|
||||||
|
messages: [
|
||||||
|
APIChatMessage(role: "user", content: .text("Reply with one short word."), name: nil, tool_calls: nil, tool_call_id: nil)
|
||||||
|
],
|
||||||
|
temperature: 0.62,
|
||||||
|
top_p: 0.55,
|
||||||
|
max_tokens: 5,
|
||||||
|
stream: false,
|
||||||
|
frequency_penalty: 0.1,
|
||||||
|
presence_penalty: 0.2,
|
||||||
|
top_k: 4,
|
||||||
|
min_p: 0.02,
|
||||||
|
repetition_penalty: 1.05
|
||||||
|
),
|
||||||
|
port: harness.port
|
||||||
|
)
|
||||||
|
|
||||||
|
try await waitUntil(timeoutSeconds: 5) {
|
||||||
|
await collector.events().count == 2
|
||||||
|
}
|
||||||
|
|
||||||
|
let secondEvents = await collector.events()
|
||||||
|
let secondEvent = try XCTUnwrap(secondEvents.last)
|
||||||
|
XCTAssertEqual(secondEvent.settings.temperature, 0.62)
|
||||||
|
XCTAssertEqual(secondEvent.settings.topP, 0.55)
|
||||||
|
XCTAssertEqual(secondEvent.settings.topK, 4)
|
||||||
|
XCTAssertEqual(secondEvent.settings.minP, 0.02)
|
||||||
|
XCTAssertEqual(secondEvent.settings.maxTokens, 5)
|
||||||
|
XCTAssertEqual(secondEvent.settings.repetitionPenalty, 1.05)
|
||||||
|
XCTAssertEqual(secondEvent.settings.presencePenalty, 0.2)
|
||||||
|
XCTAssertEqual(secondEvent.settings.frequencyPenalty, 0.1)
|
||||||
|
XCTAssertFalse(secondEvent.settings.thinkingEnabled)
|
||||||
|
}
|
||||||
|
|
||||||
func testStreamingDisconnectStopsServerWorkWithinTwoHundredMilliseconds() async throws {
|
func testStreamingDisconnectStopsServerWorkWithinTwoHundredMilliseconds() async throws {
|
||||||
let harness = try await makeHarness()
|
let harness = try await makeHarness()
|
||||||
defer { harness.stop() }
|
defer { harness.stop() }
|
||||||
@@ -1683,6 +1779,18 @@ private actor LookupEventCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private actor GenerationSettingsEventCollector {
|
||||||
|
private var recorded: [APIServer.DebugGenerationSettingsEvent] = []
|
||||||
|
|
||||||
|
func record(_ event: APIServer.DebugGenerationSettingsEvent) {
|
||||||
|
recorded.append(event)
|
||||||
|
}
|
||||||
|
|
||||||
|
func events() -> [APIServer.DebugGenerationSettingsEvent] {
|
||||||
|
recorded
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private struct DetailedStreamingResult {
|
private struct DetailedStreamingResult {
|
||||||
let events: [StreamingEvent]
|
let events: [StreamingEvent]
|
||||||
let sawDone: Bool
|
let sawDone: Bool
|
||||||
|
|||||||
80
MLXServerTests/Server/GenerationSettingsTests.swift
Normal file
80
MLXServerTests/Server/GenerationSettingsTests.swift
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
import XCTest
|
||||||
|
@testable import MLX_Server
|
||||||
|
|
||||||
|
final class GenerationSettingsTests: XCTestCase {
|
||||||
|
func testSceneOverridesApplyWithoutDiscardingModelDefaults() {
|
||||||
|
let base = GenerationSettings(
|
||||||
|
temperature: 0.2,
|
||||||
|
topP: 0.9,
|
||||||
|
topK: 12,
|
||||||
|
minP: 0.05,
|
||||||
|
maxTokens: 2048,
|
||||||
|
repetitionPenalty: 1.08,
|
||||||
|
presencePenalty: 0.3,
|
||||||
|
frequencyPenalty: 0.1,
|
||||||
|
thinkingEnabled: true
|
||||||
|
)
|
||||||
|
|
||||||
|
let overrides = GenerationSettingsOverride(
|
||||||
|
temperature: 0.8,
|
||||||
|
repetitionPenalty: 1.2,
|
||||||
|
thinkingEnabled: false
|
||||||
|
)
|
||||||
|
|
||||||
|
let resolved = base.applying(overrides)
|
||||||
|
|
||||||
|
XCTAssertEqual(resolved.temperature, 0.8)
|
||||||
|
XCTAssertEqual(resolved.repetitionPenalty, 1.2)
|
||||||
|
XCTAssertEqual(resolved.topP, 0.9)
|
||||||
|
XCTAssertEqual(resolved.topK, 12)
|
||||||
|
XCTAssertEqual(resolved.maxTokens, 2048)
|
||||||
|
XCTAssertEqual(resolved.presencePenalty, 0.3)
|
||||||
|
XCTAssertFalse(resolved.thinkingEnabled)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testPreferencesStoreGenerationDefaultsPerModel() {
|
||||||
|
let gemmaId = "gemma"
|
||||||
|
let qwenId = "qwen3.5-0.8b"
|
||||||
|
let originalGemma = Preferences.generationSettings(forModelId: gemmaId)
|
||||||
|
let originalQwen = Preferences.generationSettings(forModelId: qwenId)
|
||||||
|
|
||||||
|
defer {
|
||||||
|
Preferences.setGenerationSettings(originalGemma, forModelId: gemmaId)
|
||||||
|
Preferences.setGenerationSettings(originalQwen, forModelId: qwenId)
|
||||||
|
}
|
||||||
|
|
||||||
|
Preferences.setGenerationSettings(
|
||||||
|
GenerationSettings(temperature: 0.15, topP: 0.85, maxTokens: 1024, repetitionPenalty: 1.1, thinkingEnabled: false),
|
||||||
|
forModelId: gemmaId
|
||||||
|
)
|
||||||
|
Preferences.setGenerationSettings(
|
||||||
|
GenerationSettings(temperature: 0.95, topP: 1.0, maxTokens: 8192, repetitionPenalty: nil, thinkingEnabled: true),
|
||||||
|
forModelId: qwenId
|
||||||
|
)
|
||||||
|
|
||||||
|
let gemma = Preferences.generationSettings(forModelId: gemmaId)
|
||||||
|
let qwen = Preferences.generationSettings(forModelId: qwenId)
|
||||||
|
|
||||||
|
XCTAssertEqual(gemma.temperature, 0.15)
|
||||||
|
XCTAssertEqual(gemma.topP, 0.85)
|
||||||
|
XCTAssertEqual(gemma.maxTokens, 1024)
|
||||||
|
XCTAssertEqual(gemma.repetitionPenalty, 1.1)
|
||||||
|
XCTAssertFalse(gemma.thinkingEnabled)
|
||||||
|
|
||||||
|
XCTAssertEqual(qwen.temperature, 0.95)
|
||||||
|
XCTAssertEqual(qwen.maxTokens, 8192)
|
||||||
|
XCTAssertNil(qwen.repetitionPenalty)
|
||||||
|
XCTAssertTrue(qwen.thinkingEnabled)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testModelFallbackDefaultsComeFromModelDefinitions() {
|
||||||
|
let gemma = GenerationSettings.modelDefault(for: "gemma")
|
||||||
|
let qwen = GenerationSettings.modelDefault(for: "qwen")
|
||||||
|
let stheno = GenerationSettings.modelDefault(for: "stheno")
|
||||||
|
|
||||||
|
XCTAssertEqual(gemma, .technicalDefault)
|
||||||
|
XCTAssertEqual(qwen, .technicalDefault)
|
||||||
|
XCTAssertEqual(stheno, .roleplayDefault)
|
||||||
|
XCTAssertNotEqual(gemma, stheno)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -42,7 +42,7 @@ This is intended for targeted validation while keeping the normal default as the
|
|||||||
## App Features
|
## App Features
|
||||||
|
|
||||||
- **Chat interface** with markdown rendering and model-aware image attachments (file picker, drag & drop, clipboard paste, Finder copy-paste on vision-capable models)
|
- **Chat interface** with markdown rendering and model-aware image attachments (file picker, drag & drop, clipboard paste, Finder copy-paste on vision-capable models)
|
||||||
- **Scene-based chat starts** — New Chat opens a scene picker with Neutral plus saved scenes, each with an optional model override, a scene prompt layered onto the base system prompt, and an auto-sent starter prompt
|
- **Scene-based chat starts** — New Chat opens a scene picker with Neutral plus saved scenes, each with an optional model override, a scene prompt layered onto the base system prompt, an auto-sent starter prompt, and optional generation-setting overrides for chat-specific behavior
|
||||||
- **Model picker** in toolbar with local/download status indicators and re-download button
|
- **Model picker** in toolbar with local/download status indicators and re-download button
|
||||||
- **Download progress modal** — shows file progress, percentage, and speed when downloading a new model
|
- **Download progress modal** — shows file progress, percentage, and speed when downloading a new model
|
||||||
- **Thinking mode** — models like Qwen3.5 can reason internally before responding; thinking content appears in a collapsible box. Toggle on/off in Settings.
|
- **Thinking mode** — models like Qwen3.5 can reason internally before responding; thinking content appears in a collapsible box. Toggle on/off in Settings.
|
||||||
@@ -52,7 +52,7 @@ This is intended for targeted validation while keeping the normal default as the
|
|||||||
- **Status bar** showing model name, context window, tokens/sec, token counts, GPU memory, API server status
|
- **Status bar** showing model name, context window, tokens/sec, token counts, GPU memory, API server status
|
||||||
- **Keyboard shortcuts**: `Cmd+N` (new chat), `Cmd+O` (open chat document), `Cmd+S` (save chat document), `Cmd+Shift+S` (save chat document as), `Cmd+Shift+E` (export), `Cmd+Return` (send), `Escape` (stop), `Cmd+1/2/3/4/5` (switch models)
|
- **Keyboard shortcuts**: `Cmd+N` (new chat), `Cmd+O` (open chat document), `Cmd+S` (save chat document), `Cmd+Shift+S` (save chat document as), `Cmd+Shift+E` (export), `Cmd+Return` (send), `Escape` (stop), `Cmd+1/2/3/4/5` (switch models)
|
||||||
- **Scene management** — create and edit reusable roleplay/task presets from the New Chat flow or Settings
|
- **Scene management** — create and edit reusable roleplay/task presets from the New Chat flow or Settings
|
||||||
- **Settings** (`Cmd+,`): default model, thinking mode toggle, base system prompt, scene management, API port, API auto-start, idle unload timeout
|
- **Settings** (`Cmd+,`): default model, per-model generation defaults (temperature, top-p/top-k, min-p, repetition/presence/frequency penalties, max tokens, thinking mode), base system prompt, scene management, API port, API auto-start, idle unload timeout
|
||||||
- **Idle auto-unload** — model is unloaded after configurable idle time (resets on both user input and model output), reloaded on next request
|
- **Idle auto-unload** — model is unloaded after configurable idle time (resets on both user input and model output), reloaded on next request
|
||||||
|
|
||||||
## API Server
|
## API Server
|
||||||
@@ -65,6 +65,8 @@ The embedded API server (toggle in toolbar) runs on port 1234 by default. Standa
|
|||||||
|
|
||||||
Capability checks are enforced server-side. If a request sends images to a text-only model or tools to a model without tool support, the server returns a `400 invalid_request_error`.
|
Capability checks are enforced server-side. If a request sends images to a text-only model or tools to a model without tool support, the server returns a `400 invalid_request_error`.
|
||||||
|
|
||||||
|
When a chat-completions request omits generation parameters, the API server falls back to the saved per-model defaults from Settings. Request-supplied values still take precedence on a per-call basis.
|
||||||
|
|
||||||
### Model Swapping
|
### Model Swapping
|
||||||
|
|
||||||
Send any model ID or alias in the `model` field. If it differs from the currently loaded model, the server swaps automatically:
|
Send any model ID or alias in the `model` field. If it differs from the currently loaded model, the server swaps automatically:
|
||||||
|
|||||||
Reference in New Issue
Block a user