Compare commits

..

12 Commits

35 changed files with 1929 additions and 290 deletions

View File

@@ -1,6 +1,7 @@
{ {
"chat.tools.terminal.autoApprove": { "chat.tools.terminal.autoApprove": {
"./test.sh": true, "./test.sh": true,
"setopt": true "setopt": true,
"./build.sh": true
} }
} }

View File

@@ -55,6 +55,7 @@ open "build/Debug/MLX Server.app"
| `qwen` | `mlx-community/Qwen3.5-4B-MLX-4bit` | Vision + thinking mode + tool use via `<tool_call>` tags (256k context) | | `qwen` | `mlx-community/Qwen3.5-4B-MLX-4bit` | Vision + thinking mode + tool use via `<tool_call>` tags (256k context) |
| `qwen3.5-0.8b` | `mlx-community/Qwen3.5-0.8B-4bit` | Vision + thinking mode + tool use via `<tool_call>` tags (256k context) | | `qwen3.5-0.8b` | `mlx-community/Qwen3.5-0.8B-4bit` | Vision + thinking mode + tool use via `<tool_call>` tags (256k context) |
| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | Vision + thinking mode + tool use via `<tool_call>` tags (256k context) | | `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | Vision + thinking mode + tool use via `<tool_call>` tags (256k context) |
| `violet-lotus` | `hobaratio/MN-Violet-Lotus-12B-mlx-4Bit` | Text-only Mistral-based model (32k context) |
Any model in MLX format on HuggingFace can be added — no restriction on uploader or architecture. Any model in MLX format on HuggingFace can be added — no restriction on uploader or architecture.

View File

@@ -17,24 +17,29 @@
20FFB5DBF75AA6C359AAE31C /* SceneManagementView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FEB592E5E717F817B03151 /* SceneManagementView.swift */; }; 20FFB5DBF75AA6C359AAE31C /* SceneManagementView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FEB592E5E717F817B03151 /* SceneManagementView.swift */; };
221DEC86374902FCFD661A01 /* TokenPrefixCacheTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */; }; 221DEC86374902FCFD661A01 /* TokenPrefixCacheTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */; };
2640EDCA9033D85C0B785557 /* GenerationSettings.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */; }; 2640EDCA9033D85C0B785557 /* GenerationSettings.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */; };
28A780EEB6DC74B5B0BBF03D /* HuggingFace in Frameworks */ = {isa = PBXBuildFile; productRef = FDBFD829EE956976552514CC /* HuggingFace */; };
29879D696584B96CC56560DF /* ChatExporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D7C9BAD674E29688ACE53B0B /* ChatExporter.swift */; }; 29879D696584B96CC56560DF /* ChatExporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D7C9BAD674E29688ACE53B0B /* ChatExporter.swift */; };
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; }; 2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; };
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; }; 2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; };
2E3A02DF9C6A5109E532D5E2 /* ChatDocumentController.swift in Sources */ = {isa = PBXBuildFile; fileRef = D5C1FCEFEA72B9ABB87FB20E /* ChatDocumentController.swift */; }; 2E3A02DF9C6A5109E532D5E2 /* ChatDocumentController.swift in Sources */ = {isa = PBXBuildFile; fileRef = D5C1FCEFEA72B9ABB87FB20E /* ChatDocumentController.swift */; };
3A9DB84947BBBBED06CF9E1E /* TestImageFixtures.swift in Sources */ = {isa = PBXBuildFile; fileRef = 31BD930DEC051408444C30D4 /* TestImageFixtures.swift */; }; 3A9DB84947BBBBED06CF9E1E /* TestImageFixtures.swift in Sources */ = {isa = PBXBuildFile; fileRef = 31BD930DEC051408444C30D4 /* TestImageFixtures.swift */; };
4158FA884D981D73288FB74C /* SaveChatCommands.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2E2FCA55CEBEBCED78D9479A /* SaveChatCommands.swift */; }; 4158FA884D981D73288FB74C /* SaveChatCommands.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2E2FCA55CEBEBCED78D9479A /* SaveChatCommands.swift */; };
4B7449F57226CB48C4F5EEBD /* LocalModelResolverTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 43315501A5AFC0EA014F44F5 /* LocalModelResolverTests.swift */; };
4CB13DC1AC7A500DDBB443EC /* ChatInputView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */; }; 4CB13DC1AC7A500DDBB443EC /* ChatInputView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */; };
4DC033E45880B2948B47DEB1 /* FocusedValues.swift in Sources */ = {isa = PBXBuildFile; fileRef = EF518FEBF3A38E830E3CE1A5 /* FocusedValues.swift */; }; 4DC033E45880B2948B47DEB1 /* FocusedValues.swift in Sources */ = {isa = PBXBuildFile; fileRef = EF518FEBF3A38E830E3CE1A5 /* FocusedValues.swift */; };
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C67742651DB486871CEF1612 /* MLXServerApp.swift */; }; 50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C67742651DB486871CEF1612 /* MLXServerApp.swift */; };
50DD129CCF2843482DEC3B96 /* APIServer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3D08828E16B17EF02C14243E /* APIServer.swift */; }; 50DD129CCF2843482DEC3B96 /* APIServer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3D08828E16B17EF02C14243E /* APIServer.swift */; };
5946258F1DE88CE904584E0B /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 944C699FBB76C734C9DF2F2E /* ContentView.swift */; }; 5946258F1DE88CE904584E0B /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 944C699FBB76C734C9DF2F2E /* ContentView.swift */; };
5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */ = {isa = PBXBuildFile; fileRef = DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */; }; 5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */ = {isa = PBXBuildFile; fileRef = DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */; };
5D41C2B260265A32FF42264B /* ModelManagementView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1B8F9568F95E07D212A2B7 /* ModelManagementView.swift */; };
621B7E4382199AC1378F5F9C /* StatusBarView.swift in Sources */ = {isa = PBXBuildFile; fileRef = B0EAB35D7130D56B9E7484BA /* StatusBarView.swift */; }; 621B7E4382199AC1378F5F9C /* StatusBarView.swift in Sources */ = {isa = PBXBuildFile; fileRef = B0EAB35D7130D56B9E7484BA /* StatusBarView.swift */; };
67262C5E24739F1FE0011439 /* StreamingSSEEncoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 615F8A7C9ABCADEB215D31BD /* StreamingSSEEncoder.swift */; }; 67262C5E24739F1FE0011439 /* StreamingSSEEncoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 615F8A7C9ABCADEB215D31BD /* StreamingSSEEncoder.swift */; };
67B815DC3304BF4B2E9974A8 /* LiveCountersTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7E7DF9F68C10C718844B7B01 /* LiveCountersTests.swift */; }; 67B815DC3304BF4B2E9974A8 /* LiveCountersTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7E7DF9F68C10C718844B7B01 /* LiveCountersTests.swift */; };
67D0628F148FE3C2200E0AEF /* APIServerResponseResolutionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 051FEC14CC76A677F79ACD21 /* APIServerResponseResolutionTests.swift */; }; 67D0628F148FE3C2200E0AEF /* APIServerResponseResolutionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 051FEC14CC76A677F79ACD21 /* APIServerResponseResolutionTests.swift */; };
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */ = {isa = PBXBuildFile; fileRef = D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */; }; 6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */ = {isa = PBXBuildFile; fileRef = D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */; };
741692862DB1F13EA0B2D14D /* TokenPrefixCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1962D530BEABCC7F1E8E0ED1 /* TokenPrefixCache.swift */; }; 741692862DB1F13EA0B2D14D /* TokenPrefixCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1962D530BEABCC7F1E8E0ED1 /* TokenPrefixCache.swift */; };
75E046B4ABB1E6FEF17C1A60 /* ModelManagementWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 721D6F203A10434FE0223042 /* ModelManagementWindow.swift */; };
777AEBB3471D8838F0F51D08 /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; };
7936325B425DFA2931F6E421 /* ModelBackedQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F7E6F18C80D9859E89D2B4E3 /* ModelBackedQuantizationTests.swift */; }; 7936325B425DFA2931F6E421 /* ModelBackedQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F7E6F18C80D9859E89D2B4E3 /* ModelBackedQuantizationTests.swift */; };
7CD765C1E2F9F4D7504C8D09 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B629DA084A9A40E54F8EA5FA /* Assets.xcassets */; }; 7CD765C1E2F9F4D7504C8D09 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B629DA084A9A40E54F8EA5FA /* Assets.xcassets */; };
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; }; 80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; };
@@ -64,7 +69,8 @@
E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */; }; E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */; };
EC4FC68608DDFA6A3DF133CC /* InferenceEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02EBDE0C72D1C5CE220E5B93 /* InferenceEngine.swift */; }; EC4FC68608DDFA6A3DF133CC /* InferenceEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02EBDE0C72D1C5CE220E5B93 /* InferenceEngine.swift */; };
EDE59C241940E7B9B53D520D /* TokenPrefixCacheQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D50504058693CDE533D755B5 /* TokenPrefixCacheQuantizationTests.swift */; }; EDE59C241940E7B9B53D520D /* TokenPrefixCacheQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D50504058693CDE533D755B5 /* TokenPrefixCacheQuantizationTests.swift */; };
F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; }; F2A137B60D5DFCC591A01420 /* Tokenizers in Frameworks */ = {isa = PBXBuildFile; productRef = BD266A137966DB9451C2C352 /* Tokenizers */; };
F546CE5955ED253D8A793D5E /* MLXHuggingFace in Frameworks */ = {isa = PBXBuildFile; productRef = 269A55730E9BDC735F9C2B78 /* MLXHuggingFace */; };
FAF7D4714AC6D02674920208 /* ChatMessage.swift in Sources */ = {isa = PBXBuildFile; fileRef = A4B359324B5FD8D106C74338 /* ChatMessage.swift */; }; FAF7D4714AC6D02674920208 /* ChatMessage.swift in Sources */ = {isa = PBXBuildFile; fileRef = A4B359324B5FD8D106C74338 /* ChatMessage.swift */; };
FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */; }; FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */; };
FE4405F66873C75CD6FA19A5 /* StreamingSSEEncoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */; }; FE4405F66873C75CD6FA19A5 /* StreamingSSEEncoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */; };
@@ -100,6 +106,7 @@
3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; }; 3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; };
4147321383E94E9F17A0154E /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = "<group>"; }; 4147321383E94E9F17A0154E /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = "<group>"; };
4239CFF94B819C35A8D4D617 /* MonitorView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MonitorView.swift; sourceTree = "<group>"; }; 4239CFF94B819C35A8D4D617 /* MonitorView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MonitorView.swift; sourceTree = "<group>"; };
43315501A5AFC0EA014F44F5 /* LocalModelResolverTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocalModelResolverTests.swift; sourceTree = "<group>"; };
49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamingSSEEncoderTests.swift; sourceTree = "<group>"; }; 49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamingSSEEncoderTests.swift; sourceTree = "<group>"; };
57AC0815F72BDD32FC54C88A /* GenerationSettingsTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GenerationSettingsTests.swift; sourceTree = "<group>"; }; 57AC0815F72BDD32FC54C88A /* GenerationSettingsTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GenerationSettingsTests.swift; sourceTree = "<group>"; };
5F9426FA5A4AC55F8D9C080E /* PromptBuilderTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PromptBuilderTests.swift; sourceTree = "<group>"; }; 5F9426FA5A4AC55F8D9C080E /* PromptBuilderTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PromptBuilderTests.swift; sourceTree = "<group>"; };
@@ -108,9 +115,11 @@
6B3AA91D2C7842D7366F9A41 /* ChatDocumentPackage.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatDocumentPackage.swift; sourceTree = "<group>"; }; 6B3AA91D2C7842D7366F9A41 /* ChatDocumentPackage.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatDocumentPackage.swift; sourceTree = "<group>"; };
6EE59189918D06B8D2F588FC /* MLXServer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MLXServer.app; sourceTree = BUILT_PRODUCTS_DIR; }; 6EE59189918D06B8D2F588FC /* MLXServer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MLXServer.app; sourceTree = BUILT_PRODUCTS_DIR; };
6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GenerationSettings.swift; sourceTree = "<group>"; }; 6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GenerationSettings.swift; sourceTree = "<group>"; };
721D6F203A10434FE0223042 /* ModelManagementWindow.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManagementWindow.swift; sourceTree = "<group>"; };
7AE2A32FBB744696DEA77435 /* GenerationSettingsEditor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GenerationSettingsEditor.swift; sourceTree = "<group>"; }; 7AE2A32FBB744696DEA77435 /* GenerationSettingsEditor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GenerationSettingsEditor.swift; sourceTree = "<group>"; };
7C1A89C076E717F87A60397D /* ImageDecoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImageDecoder.swift; sourceTree = "<group>"; }; 7C1A89C076E717F87A60397D /* ImageDecoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImageDecoder.swift; sourceTree = "<group>"; };
7E7DF9F68C10C718844B7B01 /* LiveCountersTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveCountersTests.swift; sourceTree = "<group>"; }; 7E7DF9F68C10C718844B7B01 /* LiveCountersTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveCountersTests.swift; sourceTree = "<group>"; };
8A1B8F9568F95E07D212A2B7 /* ModelManagementView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManagementView.swift; sourceTree = "<group>"; };
922CBDC9206737BD04AF2874 /* ModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManager.swift; sourceTree = "<group>"; }; 922CBDC9206737BD04AF2874 /* ModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManager.swift; sourceTree = "<group>"; };
944C699FBB76C734C9DF2F2E /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; }; 944C699FBB76C734C9DF2F2E /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
A4B359324B5FD8D106C74338 /* ChatMessage.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatMessage.swift; sourceTree = "<group>"; }; A4B359324B5FD8D106C74338 /* ChatMessage.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatMessage.swift; sourceTree = "<group>"; };
@@ -152,7 +161,10 @@
FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */, FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */,
945474365D0B3E961811909A /* MLXVLM in Frameworks */, 945474365D0B3E961811909A /* MLXVLM in Frameworks */,
B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */, B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */,
F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */, F546CE5955ED253D8A793D5E /* MLXHuggingFace in Frameworks */,
28A780EEB6DC74B5B0BBF03D /* HuggingFace in Frameworks */,
F2A137B60D5DFCC591A01420 /* Tokenizers in Frameworks */,
777AEBB3471D8838F0F51D08 /* MarkdownUI in Frameworks */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@@ -199,6 +211,7 @@
57AC0815F72BDD32FC54C88A /* GenerationSettingsTests.swift */, 57AC0815F72BDD32FC54C88A /* GenerationSettingsTests.swift */,
E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */, E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */,
7E7DF9F68C10C718844B7B01 /* LiveCountersTests.swift */, 7E7DF9F68C10C718844B7B01 /* LiveCountersTests.swift */,
43315501A5AFC0EA014F44F5 /* LocalModelResolverTests.swift */,
D388BE00B42C06ED9D9905BF /* ModelBackedInferenceValidationTests.swift */, D388BE00B42C06ED9D9905BF /* ModelBackedInferenceValidationTests.swift */,
F7E6F18C80D9859E89D2B4E3 /* ModelBackedQuantizationTests.swift */, F7E6F18C80D9859E89D2B4E3 /* ModelBackedQuantizationTests.swift */,
5F9426FA5A4AC55F8D9C080E /* PromptBuilderTests.swift */, 5F9426FA5A4AC55F8D9C080E /* PromptBuilderTests.swift */,
@@ -246,6 +259,8 @@
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */, DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */, 2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */,
7AE2A32FBB744696DEA77435 /* GenerationSettingsEditor.swift */, 7AE2A32FBB744696DEA77435 /* GenerationSettingsEditor.swift */,
8A1B8F9568F95E07D212A2B7 /* ModelManagementView.swift */,
721D6F203A10434FE0223042 /* ModelManagementWindow.swift */,
C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */, C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
4239CFF94B819C35A8D4D617 /* MonitorView.swift */, 4239CFF94B819C35A8D4D617 /* MonitorView.swift */,
37FEB592E5E717F817B03151 /* SceneManagementView.swift */, 37FEB592E5E717F817B03151 /* SceneManagementView.swift */,
@@ -334,6 +349,9 @@
3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */, 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */,
D5E8E1C2DD8D8AABB4306193 /* MLXVLM */, D5E8E1C2DD8D8AABB4306193 /* MLXVLM */,
9090667D4134056AE66DC2F1 /* MLXLMCommon */, 9090667D4134056AE66DC2F1 /* MLXLMCommon */,
269A55730E9BDC735F9C2B78 /* MLXHuggingFace */,
FDBFD829EE956976552514CC /* HuggingFace */,
BD266A137966DB9451C2C352 /* Tokenizers */,
A98257123539E9E738213BFA /* MarkdownUI */, A98257123539E9E738213BFA /* MarkdownUI */,
); );
productName = MLXServer; productName = MLXServer;
@@ -381,6 +399,8 @@
packageReferences = ( packageReferences = (
D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */, D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */,
1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */, 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */,
A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */,
5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */,
); );
preferredProjectObjectVersion = 77; preferredProjectObjectVersion = 77;
productRefGroup = 652987C2A419DBFC79E32CDE /* Products */; productRefGroup = 652987C2A419DBFC79E32CDE /* Products */;
@@ -416,6 +436,7 @@
847B445654860396AF5A8280 /* GenerationSettingsTests.swift in Sources */, 847B445654860396AF5A8280 /* GenerationSettingsTests.swift in Sources */,
E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */, E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */,
67B815DC3304BF4B2E9974A8 /* LiveCountersTests.swift in Sources */, 67B815DC3304BF4B2E9974A8 /* LiveCountersTests.swift in Sources */,
4B7449F57226CB48C4F5EEBD /* LocalModelResolverTests.swift in Sources */,
8E665E21CCCD87A907CEA78D /* ModelBackedInferenceValidationTests.swift in Sources */, 8E665E21CCCD87A907CEA78D /* ModelBackedInferenceValidationTests.swift in Sources */,
7936325B425DFA2931F6E421 /* ModelBackedQuantizationTests.swift in Sources */, 7936325B425DFA2931F6E421 /* ModelBackedQuantizationTests.swift in Sources */,
1FE8C624898960ECCE39C0D4 /* PromptBuilderTests.swift in Sources */, 1FE8C624898960ECCE39C0D4 /* PromptBuilderTests.swift in Sources */,
@@ -455,6 +476,8 @@
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */, 6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */,
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */, 50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */,
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */, 80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */,
5D41C2B260265A32FF42264B /* ModelManagementView.swift in Sources */,
75E046B4ABB1E6FEF17C1A60 /* ModelManagementWindow.swift in Sources */,
0168AEE16009097901363E16 /* ModelManager.swift in Sources */, 0168AEE16009097901363E16 /* ModelManager.swift in Sources */,
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */, 2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */,
B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */, B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */,
@@ -724,8 +747,24 @@
isa = XCRemoteSwiftPackageReference; isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/ml-explore/mlx-swift-lm"; repositoryURL = "https://github.com/ml-explore/mlx-swift-lm";
requirement = { requirement = {
branch = main; kind = upToNextMajorVersion;
kind = branch; minimumVersion = 3.31.3;
};
};
5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */ = {
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/huggingface/swift-transformers";
requirement = {
kind = upToNextMajorVersion;
minimumVersion = 1.2.0;
};
};
A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */ = {
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/huggingface/swift-huggingface";
requirement = {
kind = upToNextMajorVersion;
minimumVersion = 0.9.0;
}; };
}; };
D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */ = { D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */ = {
@@ -739,6 +778,11 @@
/* End XCRemoteSwiftPackageReference section */ /* End XCRemoteSwiftPackageReference section */
/* Begin XCSwiftPackageProductDependency section */ /* Begin XCSwiftPackageProductDependency section */
269A55730E9BDC735F9C2B78 /* MLXHuggingFace */ = {
isa = XCSwiftPackageProductDependency;
package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
productName = MLXHuggingFace;
};
3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */ = { 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */ = {
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */; package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
@@ -754,11 +798,21 @@
package = D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */; package = D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */;
productName = MarkdownUI; productName = MarkdownUI;
}; };
BD266A137966DB9451C2C352 /* Tokenizers */ = {
isa = XCSwiftPackageProductDependency;
package = 5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */;
productName = Tokenizers;
};
D5E8E1C2DD8D8AABB4306193 /* MLXVLM */ = { D5E8E1C2DD8D8AABB4306193 /* MLXVLM */ = {
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */; package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
productName = MLXVLM; productName = MLXVLM;
}; };
FDBFD829EE956976552514CC /* HuggingFace */ = {
isa = XCSwiftPackageProductDependency;
package = A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */;
productName = HuggingFace;
};
/* End XCSwiftPackageProductDependency section */ /* End XCSwiftPackageProductDependency section */
}; };
rootObject = 938BC479816FCA8527B731F9 /* Project object */; rootObject = 938BC479816FCA8527B731F9 /* Project object */;

View File

@@ -1,5 +1,5 @@
{ {
"originHash" : "418f7299ccb303e0e8992dfc960a3df5df98d527f18667aa162699027b29b6cd", "originHash" : "af28e5c426709ddbdb4b91bab23f3971aba7ff96fb35d16285d757a8f482e340",
"pins" : [ "pins" : [
{ {
"identity" : "eventsource", "identity" : "eventsource",
@@ -15,8 +15,8 @@
"kind" : "remoteSourceControl", "kind" : "remoteSourceControl",
"location" : "https://github.com/ml-explore/mlx-swift", "location" : "https://github.com/ml-explore/mlx-swift",
"state" : { "state" : {
"revision" : "6ba4827fb82c97d012eec9ab4b2de21f85c3b33d", "revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896",
"version" : "0.30.6" "version" : "0.31.3"
} }
}, },
{ {
@@ -24,8 +24,8 @@
"kind" : "remoteSourceControl", "kind" : "remoteSourceControl",
"location" : "https://github.com/ml-explore/mlx-swift-lm", "location" : "https://github.com/ml-explore/mlx-swift-lm",
"state" : { "state" : {
"branch" : "main", "revision" : "1c05248bb0899e2a7a4962b84d319cf12f4e12aa",
"revision" : "bc3c20ef4644c86f2b347debcfe1efe4308712a6" "version" : "3.31.3"
} }
}, },
{ {
@@ -127,6 +127,15 @@
"version" : "1.1.1" "version" : "1.1.1"
} }
}, },
{
"identity" : "swift-syntax",
"kind" : "remoteSourceControl",
"location" : "https://github.com/swiftlang/swift-syntax.git",
"state" : {
"revision" : "0687f71944021d616d34d922343dcef086855920",
"version" : "600.0.1"
}
},
{ {
"identity" : "swift-system", "identity" : "swift-system",
"kind" : "remoteSourceControl", "kind" : "remoteSourceControl",

View File

@@ -12,3 +12,15 @@ struct SceneCommands: Commands {
} }
} }
} }
struct ModelCommands: Commands {
@Environment(\.openWindow) private var openWindow
var body: some Commands {
CommandMenu("Models") {
Button("Manage Models…") {
openWindow(id: ModelManagementWindow.windowID)
}
}
}
}

View File

@@ -11,9 +11,12 @@ struct ContentView: View {
@State private var showLoadError = false @State private var showLoadError = false
@State private var showMonitor = false @State private var showMonitor = false
@State private var showScenePicker = false @State private var showScenePicker = false
@State private var confirmRedownload: ModelConfig?
@State private var exportDocument: ChatExportDocument? @State private var exportDocument: ChatExportDocument?
@State private var documentErrorMessage: String? @State private var documentErrorMessage: String?
@State private var exportErrorMessage: String? @State private var exportErrorMessage: String?
@State private var startupTask: Task<Void, Never>?
@State private var isOpeningDocument = false
private var isRunningTests: Bool { private var isRunningTests: Bool {
ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] != nil ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] != nil
@@ -27,6 +30,7 @@ struct ContentView: View {
AnyView(mainContent) AnyView(mainContent)
.navigationTitle(navigationTitleText) .navigationTitle(navigationTitleText)
.onAppear { .onAppear {
modelManager.refreshAvailableModels()
if chatVM == nil { if chatVM == nil {
let vm = ChatViewModel(modelManager: modelManager) let vm = ChatViewModel(modelManager: modelManager)
chatVM = vm chatVM = vm
@@ -37,14 +41,9 @@ struct ContentView: View {
if Preferences.apiAutoStart && !isRunningTests { if Preferences.apiAutoStart && !isRunningTests {
vm.startAPIServer() vm.startAPIServer()
} }
// Restore autosaved session if no document is being opened
if !documentController.hasPendingOpenRequests && !isRunningTests {
Task {
await vm.restoreFromAutosave()
}
}
} }
scheduleStartupWork()
processPendingOpenRequests() processPendingOpenRequests()
} }
.onChange(of: modelManager.currentModel) { .onChange(of: modelManager.currentModel) {
@@ -62,15 +61,34 @@ struct ContentView: View {
showLoadError = modelManager.errorMessage != nil showLoadError = modelManager.errorMessage != nil
} }
.onChange(of: documentController.openRequestNonce) { .onChange(of: documentController.openRequestNonce) {
startupTask?.cancel()
processPendingOpenRequests() processPendingOpenRequests()
} }
} }
private var alertContent: some View { private var alertContent: some View {
AnyView(lifecycleContent) AnyView(lifecycleContent)
.alert("Re-download Model?", isPresented: .init(
get: { confirmRedownload != nil },
set: { if !$0 { confirmRedownload = nil } }
)) {
Button("Re-download", role: .destructive) {
if let config = confirmRedownload {
confirmRedownload = nil
Task { await modelManager.redownloadModel(config) }
}
}
Button("Cancel", role: .cancel) {
confirmRedownload = nil
}
} message: {
if let config = confirmRedownload {
Text("This will delete the local cache for \(config.displayName) and download it again from HuggingFace.")
}
}
.alert("Model Error", isPresented: $showLoadError) { .alert("Model Error", isPresented: $showLoadError) {
Button("Retry") { Button("Retry") {
if let config = modelManager.currentModel ?? ModelConfig.availableModels.first { if let config = modelManager.currentModel ?? modelManager.availableModels.first {
Task { await modelManager.loadModel(config) } Task { await modelManager.loadModel(config) }
} }
} }
@@ -168,6 +186,16 @@ struct ContentView: View {
@ViewBuilder @ViewBuilder
private var toolbarButtons: some View { private var toolbarButtons: some View {
// Re-download current model
if let current = modelManager.currentModel, !modelManager.isLoading {
Button {
confirmRedownload = current
} label: {
Label("Re-download Model", systemImage: "arrow.clockwise")
}
.help("Re-download \(current.displayName)")
}
// API server toggle // API server toggle
let isRunning = chatVM?.apiServer.isRunning == true let isRunning = chatVM?.apiServer.isRunning == true
Button { Button {
@@ -230,7 +258,7 @@ struct ContentView: View {
@ViewBuilder @ViewBuilder
private var modelSwitchShortcuts: some View { private var modelSwitchShortcuts: some View {
ForEach(Array(ModelConfig.availableModels.enumerated()), id: \.element.id) { index, config in ForEach(Array(ModelConfig.curatedModels.enumerated()), id: \.element.id) { index, config in
if index < 9 { if index < 9 {
Button("") { Button("") {
Task { await modelManager.loadModel(config) } Task { await modelManager.loadModel(config) }
@@ -376,11 +404,58 @@ struct ContentView: View {
Task { Task {
while let url = documentController.consumeNextOpenRequest() { while let url = documentController.consumeNextOpenRequest() {
startupTask?.cancel()
await openDocument(at: url) await openDocument(at: url)
} }
} }
} }
private func scheduleStartupWork() {
guard let chatVM else { return }
startupTask?.cancel()
startupTask = Task {
try? await Task.sleep(nanoseconds: 250_000_000)
guard !Task.isCancelled else { return }
if documentController.hasPendingOpenRequests {
await MainActor.run {
processPendingOpenRequests()
}
return
}
guard !isOpeningDocument else { return }
if !isRunningTests, ChatViewModel.hasAutosavedSession {
let restored = await chatVM.restoreFromAutosave()
guard !Task.isCancelled else { return }
guard !isOpeningDocument else { return }
if restored || documentController.hasPendingOpenRequests {
await MainActor.run {
processPendingOpenRequests()
}
return
}
}
guard !Task.isCancelled else { return }
guard !isOpeningDocument else { return }
guard !documentController.hasPendingOpenRequests else {
await MainActor.run {
processPendingOpenRequests()
}
return
}
guard modelManager.currentModel == nil else { return }
let modelId = Preferences.defaultModelId ?? Preferences.lastModelId ?? ModelConfig.default.id
if let config = ModelConfig.resolve(modelId) {
await modelManager.loadModel(config)
}
}
}
private func openDocument(at url: URL, skipUnsavedCheck: Bool = false) async { private func openDocument(at url: URL, skipUnsavedCheck: Bool = false) async {
if !skipUnsavedCheck { if !skipUnsavedCheck {
let shouldContinue = confirmDiscardUnsavedChanges( let shouldContinue = confirmDiscardUnsavedChanges(
@@ -390,6 +465,10 @@ struct ContentView: View {
guard shouldContinue else { return } guard shouldContinue else { return }
} }
startupTask?.cancel()
isOpeningDocument = true
defer { isOpeningDocument = false }
do { do {
try await chatVM?.loadDocument(from: url) try await chatVM?.loadDocument(from: url)
} catch { } catch {

View File

@@ -3,12 +3,10 @@
<plist version="1.0"> <plist version="1.0">
<dict> <dict>
<key>com.apple.security.app-sandbox</key> <key>com.apple.security.app-sandbox</key>
<true/> <false/>
<key>com.apple.security.network.client</key> <key>com.apple.security.network.client</key>
<true/> <true/>
<key>com.apple.security.network.server</key> <key>com.apple.security.network.server</key>
<true/> <true/>
<key>com.apple.security.files.user-selected.read-write</key>
<true/>
</dict> </dict>
</plist> </plist>

View File

@@ -46,32 +46,32 @@ struct MLXServerApp: App {
.environment(documentController) .environment(documentController)
.environment(modelManager) .environment(modelManager)
.environment(sceneStore) .environment(sceneStore)
.task {
guard !documentController.hasPendingOpenRequests else { return }
guard !ChatViewModel.hasAutosavedSession else { return }
// Auto-load: configured default last used built-in default
let modelId = Preferences.defaultModelId ?? Preferences.lastModelId ?? ModelConfig.default.id
if let config = ModelConfig.availableModels.first(where: { $0.id == modelId }) {
await modelManager.loadModel(config)
}
}
} }
.windowStyle(.titleBar) .windowStyle(.titleBar)
.defaultSize(width: 800, height: 700) .defaultSize(width: 800, height: 700)
.commands { .commands {
SaveChatCommands() SaveChatCommands()
SceneCommands() SceneCommands()
ModelCommands()
} }
Window("Scenes", id: SceneManagementWindow.windowID) { Window("Scenes", id: SceneManagementWindow.windowID) {
SceneManagementView() SceneManagementView()
.environment(modelManager)
.environment(sceneStore) .environment(sceneStore)
} }
.defaultSize(width: 900, height: 560) .defaultSize(width: 900, height: 560)
Window("Models", id: ModelManagementWindow.windowID) {
ModelManagementView()
.environment(modelManager)
}
.defaultSize(width: 900, height: 620)
#if os(macOS) #if os(macOS)
Settings { Settings {
SettingsView() SettingsView()
.environment(modelManager)
.environment(sceneStore) .environment(sceneStore)
} }
#endif #endif

View File

@@ -53,7 +53,7 @@ struct ChatScene: Codable, Identifiable, Hashable {
var resolvedModel: ModelConfig? { var resolvedModel: ModelConfig? {
guard let modelId else { return nil } guard let modelId else { return nil }
return ModelConfig.availableModels.first(where: { $0.id == modelId }) return ModelConfig.resolve(modelId)
} }
static let empty = ChatScene(name: "New Scene") static let empty = ChatScene(name: "New Scene")

View File

@@ -1,30 +1,91 @@
import Foundation import Foundation
import MLXLMCommon import MLXLMCommon
struct ModelMetadataOverride: Codable, Hashable, Sendable {
var contextLength: Int
var primaryLoaderKind: ModelConfig.LoaderKind
var supportsImages: Bool
var supportsTools: Bool
func normalized() -> ModelMetadataOverride {
ModelMetadataOverride(
contextLength: max(0, contextLength),
primaryLoaderKind: primaryLoaderKind,
supportsImages: supportsImages,
supportsTools: supportsTools
)
}
}
/// Defines a supported model with its metadata. /// Defines a supported model with its metadata.
struct ModelConfig: Identifiable, Hashable { struct ModelConfig: Identifiable, Hashable {
enum LoaderKind: Hashable { enum LoaderKind: String, CaseIterable, Codable, Hashable, Sendable {
case llm case llm
case vlm case vlm
var displayName: String {
switch self {
case .llm:
return "Text"
case .vlm:
return "Vision"
}
}
} }
let id: String // alias: "gemma", "gemma3n", "qwen" let id: String // alias: "gemma", "gemma3n", "qwen"
let repoId: String // HuggingFace ID let repoId: String // HuggingFace ID
let displayName: String let displayName: String
let contextLength: Int let contextLength: Int
let loaderKind: LoaderKind let loaderKinds: [LoaderKind]
let supportsImages: Bool let supportsImages: Bool
let supportsTools: Bool let supportsTools: Bool
let defaultGenerationSettings: GenerationSettings let defaultGenerationSettings: GenerationSettings
let isCurated: Bool
let localSizeBytes: Int64?
/// All models supported by the app. init(
static let availableModels: [ModelConfig] = [ id: String,
repoId: String,
displayName: String,
contextLength: Int,
loaderKinds: [LoaderKind],
supportsImages: Bool,
supportsTools: Bool,
defaultGenerationSettings: GenerationSettings,
isCurated: Bool = true,
localSizeBytes: Int64? = nil
) {
self.id = id
self.repoId = repoId
self.displayName = displayName
self.contextLength = contextLength
self.loaderKinds = loaderKinds
self.supportsImages = supportsImages
self.supportsTools = supportsTools
self.defaultGenerationSettings = defaultGenerationSettings
self.isCurated = isCurated
self.localSizeBytes = localSizeBytes
}
/// Curated models supported and tuned by the app.
static let curatedModels: [ModelConfig] = [
ModelConfig( ModelConfig(
id: "gemma", id: "gemma",
repoId: "mlx-community/gemma-3-4b-it-4bit", repoId: "mlx-community/gemma-3-4b-it-4bit",
displayName: "Gemma 3 4B", displayName: "Gemma 3 4B",
contextLength: 128_000, contextLength: 128_000,
loaderKind: .vlm, loaderKinds: [.vlm],
supportsImages: true,
supportsTools: true,
defaultGenerationSettings: .technicalDefault
),
ModelConfig(
id: "gemma-4",
repoId: "mlx-community/gemma-4-e4b-it-4bit",
displayName: "Gemma 4 E4B",
contextLength: 128_000,
loaderKinds: [.vlm],
supportsImages: true, supportsImages: true,
supportsTools: true, supportsTools: true,
defaultGenerationSettings: .technicalDefault defaultGenerationSettings: .technicalDefault
@@ -34,7 +95,7 @@ struct ModelConfig: Identifiable, Hashable {
repoId: "mlx-community/Qwen3.5-4B-MLX-4bit", repoId: "mlx-community/Qwen3.5-4B-MLX-4bit",
displayName: "Qwen3.5 4B", displayName: "Qwen3.5 4B",
contextLength: 256_000, contextLength: 256_000,
loaderKind: .vlm, loaderKinds: [.vlm],
supportsImages: true, supportsImages: true,
supportsTools: true, supportsTools: true,
defaultGenerationSettings: .technicalDefault defaultGenerationSettings: .technicalDefault
@@ -44,7 +105,7 @@ struct ModelConfig: Identifiable, Hashable {
repoId: "mlx-community/Qwen3.5-0.8B-4bit", repoId: "mlx-community/Qwen3.5-0.8B-4bit",
displayName: "Qwen3.5 0.8B", displayName: "Qwen3.5 0.8B",
contextLength: 256_000, contextLength: 256_000,
loaderKind: .vlm, loaderKinds: [.vlm],
supportsImages: true, supportsImages: true,
supportsTools: true, supportsTools: true,
defaultGenerationSettings: .technicalDefault defaultGenerationSettings: .technicalDefault
@@ -54,7 +115,7 @@ struct ModelConfig: Identifiable, Hashable {
repoId: "mlx-community/Qwen3.5-9B-4bit", repoId: "mlx-community/Qwen3.5-9B-4bit",
displayName: "Qwen3.5 9B", displayName: "Qwen3.5 9B",
contextLength: 256_000, contextLength: 256_000,
loaderKind: .vlm, loaderKinds: [.vlm],
supportsImages: true, supportsImages: true,
supportsTools: true, supportsTools: true,
defaultGenerationSettings: .technicalDefault defaultGenerationSettings: .technicalDefault
@@ -64,18 +125,45 @@ struct ModelConfig: Identifiable, Hashable {
repoId: "synk/L3-8B-Stheno-v3.2-MLX", repoId: "synk/L3-8B-Stheno-v3.2-MLX",
displayName: "Stheno L3 8B", displayName: "Stheno L3 8B",
contextLength: 8_192, contextLength: 8_192,
loaderKind: .llm, loaderKinds: [.llm],
supportsImages: false,
supportsTools: false,
defaultGenerationSettings: .roleplayDefault
),
ModelConfig(
id: "violet-lotus",
repoId: "hobaratio/MN-Violet-Lotus-12B-mlx-4Bit",
displayName: "Violet Lotus 12B",
contextLength: 32_768,
loaderKinds: [.llm],
supportsImages: false, supportsImages: false,
supportsTools: false, supportsTools: false,
defaultGenerationSettings: .roleplayDefault defaultGenerationSettings: .roleplayDefault
), ),
] ]
static let `default` = availableModels[0] static var availableModels: [ModelConfig] {
mergedModels(localModels: LocalModelResolver.discoveredLocalModels())
}
static let `default` = curatedModels[0]
/// Whether this model is cached locally (no download needed). /// Whether this model is cached locally (no download needed).
var isLocal: Bool { var isLocal: Bool {
LocalModelResolver.isAvailable(repoId: repoId) localSizeBytes != nil || LocalModelResolver.isAvailable(repoId: repoId)
}
var primaryLoaderKind: LoaderKind {
loaderKinds.first ?? .llm
}
var metadataOverrideValue: ModelMetadataOverride {
ModelMetadataOverride(
contextLength: contextLength,
primaryLoaderKind: primaryLoaderKind,
supportsImages: supportsImages,
supportsTools: supportsTools
)
} }
/// Build a ModelConfiguration for mlx-swift-lm from this config. /// Build a ModelConfiguration for mlx-swift-lm from this config.
@@ -86,6 +174,9 @@ struct ModelConfig: Identifiable, Hashable {
/// Resolve a model string (alias, full repo ID, or partial match) to a ModelConfig. /// Resolve a model string (alias, full repo ID, or partial match) to a ModelConfig.
/// Mirrors the Python server's `ModelManager.resolve_model()`. /// Mirrors the Python server's `ModelManager.resolve_model()`.
static func resolve(_ requested: String) -> ModelConfig? { static func resolve(_ requested: String) -> ModelConfig? {
let requested = requested.trimmingCharacters(in: .whitespacesAndNewlines)
guard !requested.isEmpty else { return nil }
// Exact alias match // Exact alias match
if let config = availableModels.first(where: { $0.id == requested }) { if let config = availableModels.first(where: { $0.id == requested }) {
return config return config
@@ -98,6 +189,129 @@ struct ModelConfig: Identifiable, Hashable {
if let config = availableModels.first(where: { requested.contains($0.id) || $0.repoId.contains(requested) || requested.contains($0.repoId) }) { if let config = availableModels.first(where: { requested.contains($0.id) || $0.repoId.contains(requested) || requested.contains($0.repoId) }) {
return config return config
} }
if requested.contains("/") {
return remoteCustom(repoId: requested)
}
return nil return nil
} }
static func mergedModels(
localModels: [LocalModelResolver.LocalModelInfo],
applyingOverrides: Bool = true
) -> [ModelConfig] {
let localByRepo = Dictionary(uniqueKeysWithValues: localModels.map { ($0.repoId, $0) })
let curatedRepoIds = Set(curatedModels.map(\.repoId))
let curated = curatedModels.map { config in
if let local = localByRepo[config.repoId] {
return applyingOverrides ? applyMetadataOverrideIfNeeded(to: config.withLocalSize(local.sizeBytes)) : config.withLocalSize(local.sizeBytes)
}
return applyingOverrides ? applyMetadataOverrideIfNeeded(to: config) : config
}
let discoveredCustom = localModels
.filter { !curatedRepoIds.contains($0.repoId) }
.map(customLocal)
.sorted { lhs, rhs in
lhs.displayName.localizedCaseInsensitiveCompare(rhs.displayName) == .orderedAscending
}
return curated + discoveredCustom
}
static func baselineModel(
forRepoId repoId: String,
localModels: [LocalModelResolver.LocalModelInfo]
) -> ModelConfig? {
mergedModels(localModels: localModels, applyingOverrides: false)
.first(where: { $0.repoId == repoId || $0.id == repoId })
?? (repoId.contains("/") ? remoteCustom(repoId: repoId) : nil)
}
static func remoteCustom(repoId: String) -> ModelConfig {
let supportsImages = inferredVisionSupport(repoId: repoId)
return applyMetadataOverrideIfNeeded(to: ModelConfig(
id: repoId,
repoId: repoId,
displayName: displayName(for: repoId),
contextLength: 0,
loaderKinds: supportsImages ? [.vlm, .llm] : [.llm, .vlm],
supportsImages: supportsImages,
supportsTools: inferredToolSupport(repoId: repoId),
defaultGenerationSettings: .generalDefault,
isCurated: false
))
}
static func displayName(for repoId: String) -> String {
let raw = repoId.split(separator: "/").last.map(String.init) ?? repoId
return raw
.replacingOccurrences(of: "-", with: " ")
.replacingOccurrences(of: "_", with: " ")
}
private static func customLocal(_ local: LocalModelResolver.LocalModelInfo) -> ModelConfig {
applyMetadataOverrideIfNeeded(to: ModelConfig(
id: local.repoId,
repoId: local.repoId,
displayName: displayName(for: local.repoId),
contextLength: local.contextLength,
loaderKinds: local.loaderKinds,
supportsImages: local.supportsImages,
supportsTools: inferredToolSupport(repoId: local.repoId),
defaultGenerationSettings: .generalDefault,
isCurated: false,
localSizeBytes: local.sizeBytes
))
}
private static func inferredToolSupport(repoId: String) -> Bool {
let normalized = repoId.lowercased()
return normalized.contains("qwen") || normalized.contains("gemma")
}
private static func inferredVisionSupport(repoId: String) -> Bool {
let normalized = repoId.lowercased()
return normalized.contains("vision") || normalized.contains("vl") || normalized.contains("gemma-3") || normalized.contains("qwen")
}
private func withLocalSize(_ sizeBytes: Int64) -> ModelConfig {
ModelConfig(
id: id,
repoId: repoId,
displayName: displayName,
contextLength: contextLength,
loaderKinds: loaderKinds,
supportsImages: supportsImages,
supportsTools: supportsTools,
defaultGenerationSettings: defaultGenerationSettings,
isCurated: isCurated,
localSizeBytes: sizeBytes
)
}
private func applyingMetadataOverride(_ override: ModelMetadataOverride) -> ModelConfig {
let normalized = override.normalized()
let reorderedLoaderKinds = [normalized.primaryLoaderKind] + LoaderKind.allCases.filter { $0 != normalized.primaryLoaderKind }
return ModelConfig(
id: id,
repoId: repoId,
displayName: displayName,
contextLength: normalized.contextLength,
loaderKinds: reorderedLoaderKinds,
supportsImages: normalized.supportsImages,
supportsTools: normalized.supportsTools,
defaultGenerationSettings: defaultGenerationSettings,
isCurated: isCurated,
localSizeBytes: localSizeBytes
)
}
private static func applyMetadataOverrideIfNeeded(to config: ModelConfig) -> ModelConfig {
guard let override = Preferences.modelMetadataOverride(forRepoId: config.repoId) else {
return config
}
return config.applyingMetadataOverride(override)
}
} }

View File

@@ -335,19 +335,23 @@ final class APIServer {
} }
} }
// NOTE: repetition / presence / frequency penalties are intentionally
// not forwarded to GenerateParameters. mlx-swift-lm 3.31.3's
// PenaltyProcessor uses TokenRing.loadPrompt, which assumes a 1-D
// prompt MLXArray. VLM models (Gemma3, Qwen-VL, ) hand it a 2-D
// [1, N] tokens array, so the ring buffer ends up the wrong size and
// every later MLX.where in TokenRing.append crashes via fatalError.
// Re-enable once upstream fixes TokenRing to flatten the prompt.
let generateParams = GenerateParameters( let generateParams = GenerateParameters(
maxTokens: maxTokens, maxTokens: maxTokens,
temperature: Float(generationSettings.temperature), temperature: Float(generationSettings.temperature),
topP: Float(generationSettings.topP), topP: Float(generationSettings.topP),
topK: generationSettings.topK, topK: generationSettings.topK,
minP: Float(generationSettings.minP), minP: Float(generationSettings.minP)
repetitionPenalty: generationSettings.repetitionPenalty.map(Float.init),
repetitionContextSize: 128,
presencePenalty: generationSettings.presencePenalty.map(Float.init),
presenceContextSize: 128,
frequencyPenalty: generationSettings.frequencyPenalty.map(Float.init),
frequencyContextSize: 128
) )
_ = generationSettings.repetitionPenalty
_ = generationSettings.presencePenalty
_ = generationSettings.frequencyPenalty
let currentModelId = modelManager.currentModel?.id ?? modelName let currentModelId = modelManager.currentModel?.id ?? modelName
let engine = InferenceEngine(container: container) let engine = InferenceEngine(container: container)
let preparedInference: InferenceEngine.PreparedInference let preparedInference: InferenceEngine.PreparedInference

View File

@@ -96,7 +96,7 @@ enum PromptBuilder {
additionalContext: additionalContext additionalContext: additionalContext
) )
let estimatedPromptTokens = (instructions.count + chatMessages.reduce(0) { $0 + $1.content.count }) * 10 / 35 let estimatedPromptTokens = estimatePromptTokens(instructions: instructions, chatMessages: chatMessages)
return PreparedPrompt( return PreparedPrompt(
instructions: instructions, instructions: instructions,
@@ -111,6 +111,13 @@ enum PromptBuilder {
) )
} }
static func estimatePromptTokens(instructions: String, chatMessages: [Chat.Message]) -> Int {
let characterCount = instructions.count + chatMessages.reduce(0) { partial, message in
partial + message.content.count
}
return max(0, characterCount * 10 / 35)
}
private static func imageFingerprint(_ source: String) -> UInt64 { private static func imageFingerprint(_ source: String) -> UInt64 {
var hash: UInt64 = 14_695_981_039_346_656_037 var hash: UInt64 = 14_695_981_039_346_656_037
for byte in source.utf8 { for byte in source.utf8 {

View File

@@ -1,15 +1,25 @@
import Foundation import Foundation
/// Resolves HuggingFace model repos to local directories. /// Resolves HuggingFace model repos to local directories in ~/.cache/huggingface/hub/.
///
/// HubApi(downloadBase: .cachesDirectory, cache: nil) downloads models to:
/// ~/Library/Containers/de.rfc1437.mlxserver/Data/Library/Caches/models/{org}/{name}/
enum LocalModelResolver { enum LocalModelResolver {
/// Base directory where HubApi stores downloaded models. struct LocalModelInfo: Identifiable, Hashable {
private static let modelsBase: URL? = { let repoId: String
FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first? let directory: URL
.appendingPathComponent("models", isDirectory: true) let sizeBytes: Int64
let contextLength: Int
let loaderKinds: [ModelConfig.LoaderKind]
let supportsImages: Bool
var id: String { repoId }
}
/// HuggingFace cache directory (~/.cache/huggingface/hub/).
private static let hfCacheBase: URL? = {
return FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent(".cache", isDirectory: true)
.appendingPathComponent("huggingface", isDirectory: true)
.appendingPathComponent("hub", isDirectory: true)
}() }()
/// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit") /// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit")
@@ -17,12 +27,41 @@ enum LocalModelResolver {
/// ///
/// Returns `nil` if the model hasn't been downloaded yet. /// Returns `nil` if the model hasn't been downloaded yet.
static func resolve(repoId: String) -> URL? { static func resolve(repoId: String) -> URL? {
guard let base = modelsBase else { return nil } print("[LocalModelResolver] Resolving: \(repoId)")
let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
var isDir: ObjCBool = false // Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/
if FileManager.default.fileExists(atPath: modelDir.path, isDirectory: &isDir), isDir.boolValue { guard let hfBase = hfCacheBase else {
return modelDir print("[LocalModelResolver] No cache base")
return nil
} }
let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
print("[LocalModelResolver] Checking HF cache: \(modelBase.path)")
let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
var isDir: ObjCBool = false
guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
print("[LocalModelResolver] No snapshots directory found")
return nil
}
if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots")
for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
let configPath = snapshotDir.appendingPathComponent("config.json")
guard FileManager.default.fileExists(atPath: configPath.path) else { continue }
guard hasCompleteWeights(at: snapshotDir) else {
print("[LocalModelResolver] Snapshot missing weight files (incomplete download): \(snapshotDir.path)")
continue
}
print("[LocalModelResolver] Found valid snapshot: \(snapshotDir.path)")
return snapshotDir
}
}
print("[LocalModelResolver] Model not found locally")
return nil return nil
} }
@@ -31,19 +70,222 @@ enum LocalModelResolver {
resolve(repoId: repoId) != nil resolve(repoId: repoId) != nil
} }
static func discoveredLocalModels() -> [LocalModelInfo] {
print("[LocalModelResolver] Scanning HF cache: \(hfCacheBase?.path ?? "N/A")")
guard let hfBase = hfCacheBase else { return [] }
let models = discoverSystemHFModels(in: hfBase)
print("[LocalModelResolver] Found \(models.count) models:")
for model in models {
print("[LocalModelResolver] - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)")
}
return models
}
static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] {
let fileManager = FileManager.default
let directoryKeys: Set<URLResourceKey> = [.isDirectoryKey]
guard let modelBases = try? fileManager.contentsOfDirectory(
at: base,
includingPropertiesForKeys: Array(directoryKeys),
options: [.skipsHiddenFiles]
) else {
return []
}
var discovered: [LocalModelInfo] = []
for modelBase in modelBases {
guard isDirectory(modelBase) else { continue }
let dirName = modelBase.lastPathComponent
// HF uses format: models--{org}--{name}
guard dirName.hasPrefix("models--") else { continue }
let repoId = String(dirName.dropFirst(8)) // Remove "models--" prefix (8 chars)
.replacingOccurrences(of: "--", with: "/")
// Look for snapshots
let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
var isDir: ObjCBool = false
guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
continue
}
// Find snapshot with actual model files
if let snapshotDirs = try? fileManager.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
if let info = localModelInfoFromSystemCache(repoId: repoId, directory: snapshotDir) {
discovered.append(info)
break // Only add one snapshot per model
}
}
}
}
return discovered
}
private static func localModelInfoFromSystemCache(repoId: String, directory: URL) -> LocalModelInfo? {
guard containsModelArtifacts(at: directory) else { return nil }
let config = readJSONObject(at: directory.appendingPathComponent("config.json"))
let tokenizerConfig = readJSONObject(at: directory.appendingPathComponent("tokenizer_config.json"))
let supportsImages = inferredSupportsImages(
repoDirectory: directory,
config: config,
tokenizerConfig: tokenizerConfig
)
let sizeBytes = directorySize(at: directory)
let contextLength = inferredContextLength(config: config, tokenizerConfig: tokenizerConfig)
let loaderKinds: [ModelConfig.LoaderKind] = supportsImages ? [.vlm, .llm] : [.llm, .vlm]
return LocalModelInfo(
repoId: repoId,
directory: directory,
sizeBytes: sizeBytes,
contextLength: contextLength,
loaderKinds: loaderKinds,
supportsImages: supportsImages
)
}
private static func isDirectory(_ url: URL) -> Bool {
var isDir: ObjCBool = false
if FileManager.default.fileExists(atPath: url.path, isDirectory: &isDir) {
return isDir.boolValue
}
return false
}
private static func containsModelArtifacts(at directory: URL) -> Bool {
let configExists = FileManager.default.fileExists(
atPath: directory.appendingPathComponent("config.json").path
)
return configExists && hasCompleteWeights(at: directory)
}
/// Returns true when the snapshot has the actual weight files on disk:
/// either a single `model.safetensors`, or every shard listed in
/// `model.safetensors.index.json`. Returns false for partial/interrupted downloads.
static func hasCompleteWeights(at directory: URL) -> Bool {
let fm = FileManager.default
let single = directory.appendingPathComponent("model.safetensors")
if fm.fileExists(atPath: single.path) {
return true
}
let indexURL = directory.appendingPathComponent("model.safetensors.index.json")
guard fm.fileExists(atPath: indexURL.path),
let data = try? Data(contentsOf: indexURL),
let json = (try? JSONSerialization.jsonObject(with: data)) as? [String: Any],
let weightMap = json["weight_map"] as? [String: Any]
else {
return false
}
let shardNames = Set(weightMap.values.compactMap { $0 as? String })
guard !shardNames.isEmpty else { return false }
return shardNames.allSatisfy { name in
fm.fileExists(atPath: directory.appendingPathComponent(name).path)
}
}
/// Delete the local cache for a model so it will be re-downloaded next time. /// Delete the local cache for a model so it will be re-downloaded next time.
@discardableResult @discardableResult
static func deleteLocal(repoId: String) -> Bool { static func deleteLocal(repoId: String) -> Bool {
guard let base = modelsBase else { return false } guard let hfBase = hfCacheBase else { return false }
let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
guard FileManager.default.fileExists(atPath: modelDir.path) else { return false } let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
guard FileManager.default.fileExists(atPath: modelBase.path) else { return false }
do { do {
try FileManager.default.removeItem(at: modelDir) try FileManager.default.removeItem(at: modelBase)
print("[LocalModelResolver] Deleted \(modelDir.path)") print("[LocalModelResolver] Deleted cache: \(modelBase.path)")
return true return true
} catch { } catch {
print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)") print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)")
return false return false
} }
} }
private static func readJSONObject(at url: URL) -> [String: Any]? {
guard let data = try? Data(contentsOf: url) else { return nil }
return (try? JSONSerialization.jsonObject(with: data)) as? [String: Any]
}
private static func inferredSupportsImages(
repoDirectory: URL,
config: [String: Any]?,
tokenizerConfig: [String: Any]?
) -> Bool {
if config?["vision_config"] != nil {
return true
}
if tokenizerConfig?["image_token"] != nil {
return true
}
let metadataFiles = [
"processor_config.json",
"preprocessor_config.json",
"video_preprocessor_config.json",
]
return metadataFiles.contains {
FileManager.default.fileExists(atPath: repoDirectory.appendingPathComponent($0).path)
}
}
private static func inferredContextLength(
config: [String: Any]?,
tokenizerConfig: [String: Any]?
) -> Int {
if let value = integerValue(at: ["text_config", "max_position_embeddings"], in: config) {
return value
}
if let value = integerValue(at: ["max_position_embeddings"], in: config) {
return value
}
if let value = integerValue(at: ["model_max_length"], in: tokenizerConfig) {
return value
}
return 0
}
private static func integerValue(at path: [String], in json: [String: Any]?) -> Int? {
guard let json else { return nil }
var current: Any = json
for component in path {
guard let dictionary = current as? [String: Any], let next = dictionary[component] else {
return nil
}
current = next
}
if let number = current as? NSNumber {
return number.intValue
}
return current as? Int
}
private static func directorySize(at directory: URL) -> Int64 {
let keys: [URLResourceKey] = [.isRegularFileKey, .fileSizeKey]
guard let enumerator = FileManager.default.enumerator(
at: directory,
includingPropertiesForKeys: keys,
options: [.skipsHiddenFiles]
) else {
return 0
}
var total: Int64 = 0
for case let fileURL as URL in enumerator {
guard let values = try? fileURL.resourceValues(forKeys: Set(keys)), values.isRegularFile == true else {
continue
}
total += Int64(values.fileSize ?? 0)
}
return total
}
} }

View File

@@ -7,6 +7,7 @@ enum Preferences {
private static let jsonEncoder = JSONEncoder() private static let jsonEncoder = JSONEncoder()
private static let jsonDecoder = JSONDecoder() private static let jsonDecoder = JSONDecoder()
private static let legacyThinkingDefault = true private static let legacyThinkingDefault = true
private static let modelMetadataOverridesKey = "modelMetadataOverrides"
// MARK: - Last used model // MARK: - Last used model
@@ -118,6 +119,26 @@ enum Preferences {
modelGenerationSettingsMap[modelId] != nil modelGenerationSettingsMap[modelId] != nil
} }
static func modelMetadataOverride(forRepoId repoId: String) -> ModelMetadataOverride? {
modelMetadataOverridesMap[repoId]?.normalized()
}
static func setModelMetadataOverride(_ override: ModelMetadataOverride, forRepoId repoId: String) {
var map = modelMetadataOverridesMap
map[repoId] = override.normalized()
modelMetadataOverridesMap = map
}
static func removeModelMetadataOverride(forRepoId repoId: String) {
var map = modelMetadataOverridesMap
map.removeValue(forKey: repoId)
modelMetadataOverridesMap = map
}
static func hasModelMetadataOverride(forRepoId repoId: String) -> Bool {
modelMetadataOverridesMap[repoId] != nil
}
private static var modelGenerationSettingsMap: [String: GenerationSettings] { private static var modelGenerationSettingsMap: [String: GenerationSettings] {
get { get {
guard let data = defaults.data(forKey: modelGenerationSettingsKey) else { return [:] } guard let data = defaults.data(forKey: modelGenerationSettingsKey) else { return [:] }
@@ -129,6 +150,17 @@ enum Preferences {
} }
} }
private static var modelMetadataOverridesMap: [String: ModelMetadataOverride] {
get {
guard let data = defaults.data(forKey: modelMetadataOverridesKey) else { return [:] }
return (try? jsonDecoder.decode([String: ModelMetadataOverride].self, from: data)) ?? [:]
}
set {
guard let data = try? jsonEncoder.encode(newValue) else { return }
defaults.set(data, forKey: modelMetadataOverridesKey)
}
}
// MARK: - Idle unload // MARK: - Idle unload
private static let idleUnloadMinutesKey = "idleUnloadMinutes" private static let idleUnloadMinutesKey = "idleUnloadMinutes"

View File

@@ -49,6 +49,34 @@ final class ChatViewModel {
hasUnsavedChanges ? "\(documentDisplayName) *" : documentDisplayName hasUnsavedChanges ? "\(documentDisplayName) *" : documentDisplayName
} }
var currentContextLength: Int {
modelManager.currentModel?.contextLength ?? 0
}
var estimatedPromptTokens: Int {
let draft = inputText.trimmingCharacters(in: .whitespacesAndNewlines)
var chatMessages = conversation.messages.compactMap(historyMessage(from:))
if !draft.isEmpty {
chatMessages.append(Chat.Message(role: .user, content: draft))
}
return PromptBuilder.estimatePromptTokens(
instructions: effectiveSystemPrompt,
chatMessages: chatMessages
)
}
var contextUsedTokens: Int {
if isGenerating && (promptTokens > 0 || generationTokens > 0) {
return promptTokens + generationTokens
}
return estimatedPromptTokens
}
var contextFillRatio: Double {
guard currentContextLength > 0 else { return 0 }
return min(max(Double(contextUsedTokens) / Double(currentContextLength), 0), 1)
}
/// Ensure a ChatSession exists for the current model. /// Ensure a ChatSession exists for the current model.
private func ensureSession() { private func ensureSession() {
guard let container = modelManager.modelContainer else { return } guard let container = modelManager.modelContainer else { return }
@@ -60,18 +88,19 @@ final class ChatViewModel {
let thinkingContext: [String: any Sendable]? = generationSettings.thinkingEnabled let thinkingContext: [String: any Sendable]? = generationSettings.thinkingEnabled
? nil ? nil
: ["enable_thinking": false] : ["enable_thinking": false]
// NOTE: repetition / presence / frequency penalties are intentionally
// not forwarded to GenerateParameters. mlx-swift-lm 3.31.3's
// PenaltyProcessor uses TokenRing.loadPrompt, which assumes a 1-D
// prompt MLXArray. VLM models (Gemma3, Qwen-VL, ) hand it a 2-D
// [1, N] tokens array, so the ring buffer ends up the wrong size and
// every later MLX.where in TokenRing.append crashes via fatalError.
// Re-enable once upstream fixes TokenRing to flatten the prompt.
let generateParameters = GenerateParameters( let generateParameters = GenerateParameters(
maxTokens: generationSettings.maxTokens, maxTokens: generationSettings.maxTokens,
temperature: Float(generationSettings.temperature), temperature: Float(generationSettings.temperature),
topP: Float(generationSettings.topP), topP: Float(generationSettings.topP),
topK: generationSettings.topK, topK: generationSettings.topK,
minP: Float(generationSettings.minP), minP: Float(generationSettings.minP)
repetitionPenalty: generationSettings.repetitionPenalty.map(Float.init),
repetitionContextSize: 128,
presencePenalty: generationSettings.presencePenalty.map(Float.init),
presenceContextSize: 128,
frequencyPenalty: generationSettings.frequencyPenalty.map(Float.init),
frequencyContextSize: 128
) )
let history = conversation.messages.compactMap(historyMessage(from:)) let history = conversation.messages.compactMap(historyMessage(from:))
if history.isEmpty { if history.isEmpty {
@@ -531,7 +560,7 @@ final class ChatViewModel {
if modelManager.currentModel == nil { if modelManager.currentModel == nil {
let modelId = Preferences.defaultModelId ?? Preferences.lastModelId ?? ModelConfig.default.id let modelId = Preferences.defaultModelId ?? Preferences.lastModelId ?? ModelConfig.default.id
if let config = ModelConfig.availableModels.first(where: { $0.id == modelId }) { if let config = ModelConfig.resolve(modelId) {
await modelManager.loadModel(config) await modelManager.loadModel(config)
} }
} }

View File

@@ -1,25 +1,22 @@
import Foundation import Foundation
import Hub import HuggingFace
import MLX import MLX
import MLXHuggingFace
import MLXLLM import MLXLLM
import MLXLMCommon import MLXLMCommon
import MLXVLM import MLXVLM
import Tokenizers
/// Manages model loading, switching, and generation. /// Manages model loading, switching, and generation.
@Observable @Observable
@MainActor @MainActor
final class ModelManager { final class ModelManager {
/// HubApi with blob cache disabled to avoid storing every model twice. private static let hubClient = HubClient.default
/// swift-huggingface defaults to caching in both huggingface/hub/ (snapshots)
/// AND models/ (content-addressed blobs). We only need the snapshots.
/// Must use the same downloadBase as defaultHubApi (.cachesDirectory) so
/// LocalModelResolver can find downloaded models.
private static let hub: HubApi = {
let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
return HubApi(downloadBase: cachesDir, cache: nil)
}()
var currentModel: ModelConfig? var currentModel: ModelConfig?
var availableModels: [ModelConfig]
private(set) var discoveredLocalModels: [LocalModelResolver.LocalModelInfo] = []
var modelContainer: ModelContainer? var modelContainer: ModelContainer?
var isLoading = false var isLoading = false
var downloadProgress: Double = 0 var downloadProgress: Double = 0
@@ -28,32 +25,99 @@ final class ModelManager {
// Download-specific state for the modal // Download-specific state for the modal
var isDownloading = false var isDownloading = false
var downloadFilesTotal: Int64 = 0 var downloadBytesTotal: Int64 = 0
var downloadFilesCompleted: Int64 = 0 var downloadBytesCompleted: Int64 = 0
var downloadSpeed: Double = 0 // bytes/sec var downloadSpeed: Double = 0 // bytes/sec
private var idleTimer: Timer? private var idleTimer: Timer?
private(set) var lastUsed: Date? private(set) var lastUsed: Date?
private var latestLoadRequestID = UUID()
init() {
availableModels = []
refreshAvailableModels()
}
var curatedModels: [ModelConfig] {
availableModels.filter(\.isCurated)
}
var localModelsOnDisk: [ModelConfig] {
availableModels
.filter(\.isLocal)
.sorted {
$0.displayName.localizedCaseInsensitiveCompare($1.displayName) == .orderedAscending
}
}
func refreshAvailableModels() {
discoveredLocalModels = LocalModelResolver.discoveredLocalModels()
availableModels = ModelConfig.mergedModels(localModels: discoveredLocalModels)
if let currentModel {
self.currentModel = availableModels.first(where: { $0.repoId == currentModel.repoId }) ?? currentModel
}
}
func discoveredLocalModelInfo(repoId: String) -> LocalModelResolver.LocalModelInfo? {
discoveredLocalModels.first(where: { $0.repoId == repoId })
}
func baselineModel(repoId: String) -> ModelConfig? {
ModelConfig.baselineModel(forRepoId: repoId, localModels: discoveredLocalModels)
}
func saveMetadataOverride(_ override: ModelMetadataOverride, for config: ModelConfig) {
Preferences.setModelMetadataOverride(override, forRepoId: config.repoId)
refreshAvailableModels()
}
func clearMetadataOverride(for config: ModelConfig) {
Preferences.removeModelMetadataOverride(forRepoId: config.repoId)
refreshAvailableModels()
}
private func clearLoadedState() {
idleTimer?.invalidate()
idleTimer = nil
lastUsed = nil
modelContainer = nil
currentModel = nil
isLoading = false
isDownloading = false
downloadProgress = 0
loadingModelName = ""
downloadBytesTotal = 0
downloadBytesCompleted = 0
downloadSpeed = 0
}
/// Load a model, unloading the current one first. /// Load a model, unloading the current one first.
/// Prefers the local snapshot from ~/.cache/huggingface/hub/ (shared with the Python server). /// Prefers the local snapshot from ~/.cache/huggingface/hub/ (shared with the Python server).
/// Only downloads if the model isn't cached locally. /// Only downloads if the model isn't cached locally.
func loadModel(_ config: ModelConfig) async { func loadModel(_ config: ModelConfig) async {
if currentModel?.id == config.id && modelContainer != nil { refreshAvailableModels()
let effectiveConfig = availableModels.first(where: { $0.repoId == config.repoId }) ?? config
if currentModel?.repoId == effectiveConfig.repoId && modelContainer != nil {
currentModel = effectiveConfig
return // already loaded return // already loaded
} }
unloadModel() let requestID = UUID()
latestLoadRequestID = requestID
clearLoadedState()
MLX.GPU.clearCache()
isLoading = true isLoading = true
downloadProgress = 0 downloadProgress = 0
loadingModelName = config.displayName loadingModelName = effectiveConfig.displayName
errorMessage = nil errorMessage = nil
let needsDownload = !config.isLocal let needsDownload = !effectiveConfig.isLocal
if needsDownload { if needsDownload {
isDownloading = true isDownloading = true
downloadFilesTotal = 0 downloadBytesTotal = 0
downloadFilesCompleted = 0 downloadBytesCompleted = 0
downloadSpeed = 0 downloadSpeed = 0
} }
@@ -62,8 +126,8 @@ final class ModelManager {
Task { @MainActor in Task { @MainActor in
self.downloadProgress = progress.fractionCompleted self.downloadProgress = progress.fractionCompleted
if self.isDownloading { if self.isDownloading {
self.downloadFilesTotal = progress.totalUnitCount self.downloadBytesTotal = progress.totalUnitCount
self.downloadFilesCompleted = progress.completedUnitCount self.downloadBytesCompleted = progress.completedUnitCount
if let speed = progress.userInfo[.throughputKey] as? Double { if let speed = progress.userInfo[.throughputKey] as? Double {
self.downloadSpeed = speed self.downloadSpeed = speed
} }
@@ -72,37 +136,31 @@ final class ModelManager {
} }
let configuration: ModelConfiguration let configuration: ModelConfiguration
if let localDir = LocalModelResolver.resolve(repoId: config.repoId) { if let localDir = LocalModelResolver.resolve(repoId: effectiveConfig.repoId) {
configuration = ModelConfiguration(directory: localDir) configuration = ModelConfiguration(directory: localDir)
} else { } else {
configuration = config.modelConfiguration configuration = effectiveConfig.modelConfiguration
} }
let container: ModelContainer let container = try await Self.loadContainer(
switch config.loaderKind { for: effectiveConfig,
case .llm:
container = try await LLMModelFactory.shared.loadContainer(
hub: Self.hub,
configuration: configuration, configuration: configuration,
progressHandler: progressHandler progressHandler: progressHandler
) )
case .vlm:
container = try await VLMModelFactory.shared.loadContainer(
hub: Self.hub,
configuration: configuration,
progressHandler: progressHandler
)
}
guard latestLoadRequestID == requestID else { return }
refreshAvailableModels()
self.isDownloading = false self.isDownloading = false
self.modelContainer = container self.modelContainer = container
self.currentModel = config self.currentModel = self.availableModels.first(where: { $0.repoId == effectiveConfig.repoId }) ?? effectiveConfig
touchActivity() touchActivity()
} catch { } catch {
guard latestLoadRequestID == requestID else { return }
self.isDownloading = false self.isDownloading = false
self.errorMessage = "Failed to load model: \(error.localizedDescription)" self.errorMessage = "Failed to load model: \(error.localizedDescription)"
} }
guard latestLoadRequestID == requestID else { return }
isLoading = false isLoading = false
} }
@@ -113,13 +171,29 @@ final class ModelManager {
await loadModel(config) await loadModel(config)
} }
func addModel(repoId: String) async {
let repoId = repoId.trimmingCharacters(in: .whitespacesAndNewlines)
guard !repoId.isEmpty else {
errorMessage = "Enter a HuggingFace model ID."
return
}
let config = ModelConfig.resolve(repoId) ?? ModelConfig.remoteCustom(repoId: repoId)
await loadModel(config)
}
func deleteModel(_ config: ModelConfig) {
if currentModel?.repoId == config.repoId {
unloadModel()
}
_ = LocalModelResolver.deleteLocal(repoId: config.repoId)
refreshAvailableModels()
}
/// Unload the current model and free GPU memory. /// Unload the current model and free GPU memory.
func unloadModel() { func unloadModel() {
idleTimer?.invalidate() latestLoadRequestID = UUID()
idleTimer = nil clearLoadedState()
lastUsed = nil
modelContainer = nil
currentModel = nil
MLX.GPU.clearCache() MLX.GPU.clearCache()
} }
@@ -142,4 +216,37 @@ final class ModelManager {
var isReady: Bool { var isReady: Bool {
modelContainer != nil && !isLoading modelContainer != nil && !isLoading
} }
private static func loadContainer(
for config: ModelConfig,
configuration: ModelConfiguration,
progressHandler: @escaping @Sendable (Progress) -> Void
) async throws -> ModelContainer {
var lastError: Error?
for loaderKind in config.loaderKinds {
do {
switch loaderKind {
case .llm:
return try await LLMModelFactory.shared.loadContainer(
from: #hubDownloader(Self.hubClient),
using: #huggingFaceTokenizerLoader(),
configuration: configuration,
progressHandler: progressHandler
)
case .vlm:
return try await VLMModelFactory.shared.loadContainer(
from: #hubDownloader(Self.hubClient),
using: #huggingFaceTokenizerLoader(),
configuration: configuration,
progressHandler: progressHandler
)
}
} catch {
lastError = error
}
}
throw lastError ?? NSError(domain: "ModelManager", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unsupported model configuration"])
}
} }

View File

@@ -20,9 +20,9 @@ struct DownloadModalView: View {
.progressViewStyle(.linear) .progressViewStyle(.linear)
HStack { HStack {
// Files progress // Bytes progress
if modelManager.downloadFilesTotal > 0 { if modelManager.downloadBytesTotal > 0 {
Text("File \(modelManager.downloadFilesCompleted)/\(modelManager.downloadFilesTotal)") Text("\(formatBytes(modelManager.downloadBytesCompleted)) / \(formatBytes(modelManager.downloadBytesTotal))")
.font(.caption.monospacedDigit()) .font(.caption.monospacedDigit())
.foregroundStyle(.secondary) .foregroundStyle(.secondary)
} }
@@ -65,4 +65,17 @@ struct DownloadModalView: View {
return String(format: "%.0f B/s", bytesPerSec) return String(format: "%.0f B/s", bytesPerSec)
} }
} }
private func formatBytes(_ bytes: Int64) -> String {
let value = Double(bytes)
if value >= 1_073_741_824 {
return String(format: "%.2f GB", value / 1_073_741_824)
} else if value >= 1_048_576 {
return String(format: "%.0f MB", value / 1_048_576)
} else if value >= 1024 {
return String(format: "%.0f KB", value / 1024)
} else {
return "\(bytes) B"
}
}
} }

View File

@@ -1,66 +1,18 @@
import SwiftUI import SwiftUI
private let generationDoubleFormat = FloatingPointFormatStyle<Double>.number.precision(.fractionLength(0...2))
private let generationIntegerFormat = IntegerFormatStyle<Int>.number.grouping(.never)
struct GenerationDefaultsEditor: View { struct GenerationDefaultsEditor: View {
@Binding var settings: GenerationSettings @Binding var settings: GenerationSettings
var body: some View { var body: some View {
Toggle("Enable thinking mode", isOn: $settings.thinkingEnabled) Toggle("Enable thinking mode", isOn: $settings.thinkingEnabled)
doubleRow("Temperature", value: $settings.temperature) DecimalSettingRow(title: "Temperature", value: $settings.temperature)
doubleRow("Top P", value: $settings.topP) DecimalSettingRow(title: "Top P", value: $settings.topP)
intRow("Top K", value: $settings.topK) IntegerSettingRow(title: "Top K", value: $settings.topK)
doubleRow("Min P", value: $settings.minP) DecimalSettingRow(title: "Min P", value: $settings.minP)
intRow("Max tokens", value: $settings.maxTokens) IntegerSettingRow(title: "Max tokens", value: $settings.maxTokens)
optionalDoubleRow("Repetition penalty", value: $settings.repetitionPenalty) OptionalDecimalSettingRow(title: "Repetition penalty", value: $settings.repetitionPenalty, fallbackValue: 1.0)
optionalDoubleRow("Presence penalty", value: $settings.presencePenalty) OptionalDecimalSettingRow(title: "Presence penalty", value: $settings.presencePenalty, fallbackValue: 0.0)
optionalDoubleRow("Frequency penalty", value: $settings.frequencyPenalty) OptionalDecimalSettingRow(title: "Frequency penalty", value: $settings.frequencyPenalty, fallbackValue: 0.0)
}
private func doubleRow(_ title: String, value: Binding<Double>) -> some View {
HStack {
Text(title)
Spacer()
TextField(title, value: value, format: generationDoubleFormat)
.multilineTextAlignment(.trailing)
.frame(width: 90)
}
}
private func intRow(_ title: String, value: Binding<Int>) -> some View {
HStack {
Text(title)
Spacer()
TextField(title, value: value, format: generationIntegerFormat)
.multilineTextAlignment(.trailing)
.frame(width: 90)
}
}
private func optionalDoubleRow(_ title: String, value: Binding<Double?>) -> some View {
HStack {
Text(title)
Spacer()
TextField(title, value: binding(for: value), format: generationDoubleFormat)
.multilineTextAlignment(.trailing)
.frame(width: 90)
Button(value.wrappedValue == nil ? "Set" : "Clear") {
if value.wrappedValue == nil {
value.wrappedValue = 1.0
} else {
value.wrappedValue = nil
}
}
.buttonStyle(.link)
}
}
private func binding(for value: Binding<Double?>) -> Binding<Double> {
Binding(
get: { value.wrappedValue ?? 1.0 },
set: { value.wrappedValue = $0 }
)
} }
} }
@@ -76,69 +28,228 @@ struct GenerationOverridesEditor: View {
Text("Disabled").tag(Optional(false)) Text("Disabled").tag(Optional(false))
} }
optionalDoubleRow("Temperature", value: $overrides.temperature, inheritedValue: inheritedSettings.temperature) OptionalDecimalSettingRow(title: "Temperature", value: $overrides.temperature, fallbackValue: inheritedSettings.temperature, inherited: true)
optionalDoubleRow("Top P", value: $overrides.topP, inheritedValue: inheritedSettings.topP) OptionalDecimalSettingRow(title: "Top P", value: $overrides.topP, fallbackValue: inheritedSettings.topP, inherited: true)
optionalIntRow("Top K", value: $overrides.topK, inheritedValue: inheritedSettings.topK) OptionalIntegerSettingRow(title: "Top K", value: $overrides.topK, fallbackValue: inheritedSettings.topK, inherited: true)
optionalDoubleRow("Min P", value: $overrides.minP, inheritedValue: inheritedSettings.minP) OptionalDecimalSettingRow(title: "Min P", value: $overrides.minP, fallbackValue: inheritedSettings.minP, inherited: true)
optionalIntRow("Max tokens", value: $overrides.maxTokens, inheritedValue: inheritedSettings.maxTokens) OptionalIntegerSettingRow(title: "Max tokens", value: $overrides.maxTokens, fallbackValue: inheritedSettings.maxTokens, inherited: true)
optionalDoubleRow("Repetition penalty", value: $overrides.repetitionPenalty, inheritedValue: inheritedSettings.repetitionPenalty ?? 0) OptionalDecimalSettingRow(title: "Repetition penalty", value: $overrides.repetitionPenalty, fallbackValue: inheritedSettings.repetitionPenalty ?? 0, inherited: true)
optionalDoubleRow("Presence penalty", value: $overrides.presencePenalty, inheritedValue: inheritedSettings.presencePenalty ?? 0) OptionalDecimalSettingRow(title: "Presence penalty", value: $overrides.presencePenalty, fallbackValue: inheritedSettings.presencePenalty ?? 0, inherited: true)
optionalDoubleRow("Frequency penalty", value: $overrides.frequencyPenalty, inheritedValue: inheritedSettings.frequencyPenalty ?? 0) OptionalDecimalSettingRow(title: "Frequency penalty", value: $overrides.frequencyPenalty, fallbackValue: inheritedSettings.frequencyPenalty ?? 0, inherited: true)
Text("Unset fields inherit from \(inheritedSource). The values shown are the effective starting values for this scene.") Text("Unset fields inherit from \(inheritedSource). The values shown are the effective starting values for this scene.")
.font(.caption) .font(.caption)
.foregroundStyle(.secondary) .foregroundStyle(.secondary)
} }
}
private func optionalDoubleRow(_ title: String, value: Binding<Double?>, inheritedValue: Double) -> some View { private struct DecimalSettingRow: View {
let title: String
@Binding var value: Double
@State private var text: String
init(title: String, value: Binding<Double>) {
self.title = title
self._value = value
self._text = State(initialValue: NumericFieldFormatting.doubleString(value.wrappedValue))
}
var body: some View {
HStack { HStack {
Text(title) Text(title)
Spacer() Spacer()
TextField(title, value: Binding( TextField("", text: $text)
get: { value.wrappedValue ?? inheritedValue },
set: { value.wrappedValue = $0 }
), format: generationDoubleFormat)
.multilineTextAlignment(.trailing) .multilineTextAlignment(.trailing)
.frame(width: 90) .frame(width: 90)
if value.wrappedValue == nil { .onChange(of: text) {
if let parsed = NumericFieldFormatting.parseDouble(text) {
value = parsed
}
}
.onChange(of: value) {
let formatted = NumericFieldFormatting.doubleString(value)
if text != formatted {
text = formatted
}
}
}
}
}
private struct IntegerSettingRow: View {
let title: String
@Binding var value: Int
@State private var text: String
init(title: String, value: Binding<Int>) {
self.title = title
self._value = value
self._text = State(initialValue: NumericFieldFormatting.intString(value.wrappedValue))
}
var body: some View {
HStack {
Text(title)
Spacer()
TextField("", text: $text)
.multilineTextAlignment(.trailing)
.frame(width: 90)
.onChange(of: text) {
if let parsed = NumericFieldFormatting.parseInt(text) {
value = parsed
}
}
.onChange(of: value) {
let formatted = NumericFieldFormatting.intString(value)
if text != formatted {
text = formatted
}
}
}
}
}
private struct OptionalDecimalSettingRow: View {
let title: String
@Binding var value: Double?
let fallbackValue: Double
var inherited = false
@State private var text: String
init(title: String, value: Binding<Double?>, fallbackValue: Double, inherited: Bool = false) {
self.title = title
self._value = value
self.fallbackValue = fallbackValue
self.inherited = inherited
self._text = State(initialValue: NumericFieldFormatting.doubleString(value.wrappedValue ?? fallbackValue))
}
var body: some View {
HStack {
Text(title)
Spacer()
TextField("", text: $text)
.multilineTextAlignment(.trailing)
.frame(width: 90)
.onChange(of: text) {
if let parsed = NumericFieldFormatting.parseDouble(text) {
value = parsed
}
}
.onChange(of: value) {
syncText()
}
.onChange(of: fallbackValue) {
if value == nil {
syncText()
}
}
if inherited && value == nil {
Text("Inherited") Text("Inherited")
.font(.caption) .font(.caption)
.foregroundStyle(.secondary) .foregroundStyle(.secondary)
} }
Button(value.wrappedValue == nil ? "Override" : "Clear") { Button(value == nil ? "Override" : "Clear") {
if value.wrappedValue == nil { if value == nil {
value.wrappedValue = inheritedValue value = fallbackValue
} else { } else {
value.wrappedValue = nil value = nil
} }
syncText()
} }
.buttonStyle(.link) .buttonStyle(.link)
} }
} }
private func optionalIntRow(_ title: String, value: Binding<Int?>, inheritedValue: Int) -> some View { private func syncText() {
let formatted = NumericFieldFormatting.doubleString(value ?? fallbackValue)
if text != formatted {
text = formatted
}
}
}
private struct OptionalIntegerSettingRow: View {
let title: String
@Binding var value: Int?
let fallbackValue: Int
var inherited = false
@State private var text: String
init(title: String, value: Binding<Int?>, fallbackValue: Int, inherited: Bool = false) {
self.title = title
self._value = value
self.fallbackValue = fallbackValue
self.inherited = inherited
self._text = State(initialValue: NumericFieldFormatting.intString(value.wrappedValue ?? fallbackValue))
}
var body: some View {
HStack { HStack {
Text(title) Text(title)
Spacer() Spacer()
TextField(title, value: Binding( TextField("", text: $text)
get: { value.wrappedValue ?? inheritedValue },
set: { value.wrappedValue = $0 }
), format: generationIntegerFormat)
.multilineTextAlignment(.trailing) .multilineTextAlignment(.trailing)
.frame(width: 90) .frame(width: 90)
if value.wrappedValue == nil { .onChange(of: text) {
if let parsed = NumericFieldFormatting.parseInt(text) {
value = parsed
}
}
.onChange(of: value) {
syncText()
}
.onChange(of: fallbackValue) {
if value == nil {
syncText()
}
}
if inherited && value == nil {
Text("Inherited") Text("Inherited")
.font(.caption) .font(.caption)
.foregroundStyle(.secondary) .foregroundStyle(.secondary)
} }
Button(value.wrappedValue == nil ? "Override" : "Clear") { Button(value == nil ? "Override" : "Clear") {
if value.wrappedValue == nil { if value == nil {
value.wrappedValue = inheritedValue value = fallbackValue
} else { } else {
value.wrappedValue = nil value = nil
} }
syncText()
} }
.buttonStyle(.link) .buttonStyle(.link)
} }
} }
private func syncText() {
let formatted = NumericFieldFormatting.intString(value ?? fallbackValue)
if text != formatted {
text = formatted
}
}
}
private enum NumericFieldFormatting {
static func parseDouble(_ text: String) -> Double? {
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return nil }
return Double(trimmed.replacingOccurrences(of: ",", with: "."))
}
static func parseInt(_ text: String) -> Int? {
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return nil }
return Int(trimmed)
}
static func doubleString(_ value: Double) -> String {
if value.rounded() == value {
return String(Int(value))
}
return String(value)
}
static func intString(_ value: Int) -> String {
String(value)
}
} }

View File

@@ -0,0 +1,464 @@
import SwiftUI
struct ModelManagementView: View {
@Environment(ModelManager.self) private var modelManager
@State private var newRepoId = ""
@State private var pendingDelete: ModelConfig?
@State private var editingMetadataModel: ModelConfig?
@FocusState private var isRepoIdFieldFocused: Bool
private let sizeFormatter: ByteCountFormatter = {
let formatter = ByteCountFormatter()
formatter.allowedUnits = [.useGB, .useMB, .useKB]
formatter.countStyle = .file
formatter.includesUnit = true
formatter.isAdaptive = true
return formatter
}()
var body: some View {
ScrollView {
VStack(alignment: .leading, spacing: 18) {
GroupBox("Add Model") {
VStack(alignment: .leading, spacing: 10) {
Text("Enter a HuggingFace model ID. The app will download it, load it once, and then keep it available in the regular model picker.")
.font(.caption)
.foregroundStyle(.secondary)
HStack {
TextField("owner/repo", text: $newRepoId)
.textFieldStyle(.roundedBorder)
.focused($isRepoIdFieldFocused)
.onSubmit {
downloadEnteredModel()
}
Button("Download & Select") {
downloadEnteredModel()
}
.disabled(modelManager.isLoading || newRepoId.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty)
}
}
}
GroupBox("Recommended Defaults") {
VStack(spacing: 0) {
ForEach(modelManager.curatedModels) { model in
curatedRow(model)
if model.id != modelManager.curatedModels.last?.id {
Divider()
}
}
}
}
GroupBox("Models On Disk") {
if modelManager.localModelsOnDisk.isEmpty {
ContentUnavailableView(
"No Local Models",
systemImage: "externaldrive",
description: Text("Downloaded models will appear here with their summed file sizes.")
)
.frame(maxWidth: .infinity)
.padding(.vertical, 20)
} else {
VStack(spacing: 0) {
ForEach(modelManager.localModelsOnDisk) { model in
localRow(model)
if model.id != modelManager.localModelsOnDisk.last?.id {
Divider()
}
}
}
}
}
}
.padding(20)
}
.navigationTitle("Models")
.frame(minWidth: 760, minHeight: 520)
.sheet(item: $editingMetadataModel) { model in
ModelMetadataEditorView(
model: model,
baselineModel: modelManager.baselineModel(repoId: model.repoId) ?? model,
detectedLocalModel: modelManager.discoveredLocalModelInfo(repoId: model.repoId),
hasSavedOverride: Preferences.hasModelMetadataOverride(forRepoId: model.repoId),
hasSavedGenerationDefaults: Preferences.hasGenerationSettings(forModelId: model.id),
onSave: { override in
modelManager.saveMetadataOverride(override, for: model)
},
onReset: {
modelManager.clearMetadataOverride(for: model)
},
onSaveGenerationSettings: { settings in
Preferences.setGenerationSettings(settings, forModelId: model.id)
}
)
}
.alert(
"Delete Local Model?",
isPresented: Binding(
get: { pendingDelete != nil },
set: { if !$0 { pendingDelete = nil } }
)
) {
Button("Delete", role: .destructive) {
if let pendingDelete {
modelManager.deleteModel(pendingDelete)
}
self.pendingDelete = nil
}
Button("Cancel", role: .cancel) {
pendingDelete = nil
}
} message: {
if let pendingDelete {
Text("This removes the local files for \(pendingDelete.repoId).")
}
}
.onAppear {
modelManager.refreshAvailableModels()
if newRepoId.isEmpty {
isRepoIdFieldFocused = true
}
}
}
@ViewBuilder
private func curatedRow(_ model: ModelConfig) -> some View {
HStack(alignment: .top, spacing: 14) {
VStack(alignment: .leading, spacing: 4) {
HStack(spacing: 8) {
Text(model.displayName)
.font(.headline)
if modelManager.currentModel?.repoId == model.repoId {
Text("Loaded")
.font(.caption.weight(.semibold))
.padding(.horizontal, 8)
.padding(.vertical, 3)
.background(.green.opacity(0.15), in: Capsule())
}
}
Text(model.repoId)
.font(.caption)
.foregroundStyle(.secondary)
}
Spacer()
Label(
model.isLocal ? "On Disk" : "Not Downloaded",
systemImage: model.isLocal ? "checkmark.circle.fill" : "arrow.down.circle"
)
.font(.caption)
.foregroundStyle(model.isLocal ? .green : .secondary)
Button(model.isLocal ? "Load" : "Download") {
Task {
await modelManager.loadModel(model)
}
}
.disabled(modelManager.isLoading)
Button("Metadata…") {
editingMetadataModel = model
}
}
.padding(.vertical, 10)
}
@ViewBuilder
private func localRow(_ model: ModelConfig) -> some View {
HStack(alignment: .top, spacing: 14) {
VStack(alignment: .leading, spacing: 4) {
HStack(spacing: 8) {
Text(model.displayName)
.font(.headline)
if !model.isCurated {
Text("Custom")
.font(.caption.weight(.semibold))
.padding(.horizontal, 8)
.padding(.vertical, 3)
.background(.secondary.opacity(0.14), in: Capsule())
}
if modelManager.currentModel?.repoId == model.repoId {
Text("Loaded")
.font(.caption.weight(.semibold))
.padding(.horizontal, 8)
.padding(.vertical, 3)
.background(.green.opacity(0.15), in: Capsule())
}
}
Text(model.repoId)
.font(.caption)
.foregroundStyle(.secondary)
}
Spacer()
if let localSizeBytes = model.localSizeBytes {
Text(sizeFormatter.string(fromByteCount: localSizeBytes))
.font(.caption.monospacedDigit())
.foregroundStyle(.secondary)
.frame(width: 90, alignment: .trailing)
}
Button("Load") {
Task {
await modelManager.loadModel(model)
}
}
.disabled(modelManager.isLoading)
Button("Metadata…") {
editingMetadataModel = model
}
Button("Delete", role: .destructive) {
pendingDelete = model
}
.disabled(modelManager.isLoading)
}
.padding(.vertical, 10)
}
private func downloadEnteredModel() {
let repoId = newRepoId.trimmingCharacters(in: .whitespacesAndNewlines)
guard !repoId.isEmpty else { return }
Task {
await modelManager.addModel(repoId: repoId)
if modelManager.errorMessage == nil {
newRepoId = ""
}
}
}
}
private struct ModelMetadataEditorView: View {
@Environment(\.dismiss) private var dismiss
let model: ModelConfig
let baselineModel: ModelConfig
let detectedLocalModel: LocalModelResolver.LocalModelInfo?
let hasSavedOverride: Bool
let hasSavedGenerationDefaults: Bool
let onSave: (ModelMetadataOverride) -> Void
let onReset: () -> Void
let onSaveGenerationSettings: (GenerationSettings) -> Void
@State private var contextLengthText: String
@State private var primaryLoaderKind: ModelConfig.LoaderKind
@State private var supportsImages: Bool
@State private var supportsTools: Bool
@State private var generationSettings: GenerationSettings
init(
model: ModelConfig,
baselineModel: ModelConfig,
detectedLocalModel: LocalModelResolver.LocalModelInfo?,
hasSavedOverride: Bool,
hasSavedGenerationDefaults: Bool,
onSave: @escaping (ModelMetadataOverride) -> Void,
onReset: @escaping () -> Void,
onSaveGenerationSettings: @escaping (GenerationSettings) -> Void
) {
self.model = model
self.baselineModel = baselineModel
self.detectedLocalModel = detectedLocalModel
self.hasSavedOverride = hasSavedOverride
self.hasSavedGenerationDefaults = hasSavedGenerationDefaults
self.onSave = onSave
self.onReset = onReset
self.onSaveGenerationSettings = onSaveGenerationSettings
_contextLengthText = State(initialValue: String(model.contextLength))
_primaryLoaderKind = State(initialValue: model.primaryLoaderKind)
_supportsImages = State(initialValue: model.supportsImages)
_supportsTools = State(initialValue: model.supportsTools)
_generationSettings = State(initialValue: Preferences.generationSettings(forModelId: model.id))
}
var body: some View {
NavigationStack {
Form {
Section("Metadata") {
TextField("Context length", text: $contextLengthText)
.textFieldStyle(.roundedBorder)
Picker("Primary loader", selection: $primaryLoaderKind) {
ForEach(ModelConfig.LoaderKind.allCases, id: \.self) { loaderKind in
Text(loaderKind.displayName).tag(loaderKind)
}
}
Toggle("Supports images", isOn: $supportsImages)
Toggle("Supports tools", isOn: $supportsTools)
}
Section("Comparison") {
Text(defaultsSummary)
.foregroundStyle(.secondary)
Grid(alignment: .leading, horizontalSpacing: 16, verticalSpacing: 8) {
GridRow {
Text("")
Text("Effective")
.font(.caption.weight(.semibold))
.foregroundStyle(.secondary)
Text(baselineHeading)
.font(.caption.weight(.semibold))
.foregroundStyle(.secondary)
}
comparisonRow(
label: "Context",
effective: currentOverride?.contextLength.description ?? "Invalid",
baseline: baselineModel.contextLength > 0 ? "\(baselineModel.contextLength)" : "Unknown"
)
comparisonRow(
label: "Loader",
effective: primaryLoaderKind.displayName,
baseline: baselineModel.primaryLoaderKind.displayName
)
comparisonRow(
label: "Images",
effective: yesNo(supportsImages),
baseline: yesNo(baselineModel.supportsImages)
)
comparisonRow(
label: "Tools",
effective: yesNo(supportsTools),
baseline: yesNo(baselineModel.supportsTools)
)
}
}
if let detectedLocalModel {
Section("Discovered Source") {
LabeledContent("Detected context") {
Text(detectedLocalModel.contextLength > 0 ? "\(detectedLocalModel.contextLength)" : "Unknown")
}
LabeledContent("Detected loader order") {
Text(detectedLocalModel.loaderKinds.map(\.displayName).joined(separator: ", "))
}
LabeledContent("Detected vision") {
Text(yesNo(detectedLocalModel.supportsImages))
}
}
}
Section("Generation Defaults") {
GenerationDefaultsEditor(settings: $generationSettings)
Text(generationDefaultsSummary)
.font(.caption)
.foregroundStyle(.secondary)
}
}
.formStyle(.grouped)
.navigationTitle(model.displayName)
.frame(minWidth: 560, minHeight: 620)
.toolbar {
ToolbarItem(placement: .cancellationAction) {
Button("Cancel") {
dismiss()
}
}
ToolbarItem(placement: .primaryAction) {
Button("Save") {
guard let currentOverride else { return }
onSave(currentOverride)
onSaveGenerationSettings(generationSettings.normalized())
dismiss()
}
.disabled(currentOverride == nil)
}
if hasSavedOverride {
ToolbarItem(placement: .automatic) {
Button("Reset to Detected") {
onReset()
dismiss()
}
}
}
}
}
}
private var currentOverride: ModelMetadataOverride? {
guard let contextLength = Int(contextLengthText.trimmingCharacters(in: .whitespacesAndNewlines)), contextLength >= 0 else {
return nil
}
return ModelMetadataOverride(
contextLength: contextLength,
primaryLoaderKind: primaryLoaderKind,
supportsImages: supportsImages,
supportsTools: supportsTools
)
}
private var defaultsSummary: String {
if detectedLocalModel != nil {
if hasSavedOverride {
return "The editable fields show the effective overridden metadata. The comparison column shows the discovered baseline from the local model files."
}
return "The editable fields currently match the discovered baseline from the local model files. Save to store an override for this repo ID."
}
if model.isCurated {
return hasSavedOverride
? "The editable fields show the effective overridden metadata. The comparison column shows the curated built-in baseline."
: "The editable fields currently match the curated built-in baseline. Save to store an override for this repo ID."
}
if hasSavedOverride {
return "The editable fields show the effective overridden metadata. The comparison column shows the inferred baseline for this repo ID."
}
return "The editable fields currently match the inferred baseline for this repo ID. Save to store an override."
}
private var baselineHeading: String {
if detectedLocalModel != nil {
return "Detected"
}
if model.isCurated {
return "Built-in"
}
return "Inferred"
}
@ViewBuilder
private func comparisonRow(label: String, effective: String, baseline: String) -> some View {
GridRow {
Text(label)
Text(effective)
.monospaced()
Text(baseline)
.foregroundStyle(.secondary)
.monospaced()
}
}
private func yesNo(_ value: Bool) -> String {
value ? "Yes" : "No"
}
private var generationDefaultsSummary: String {
if hasSavedGenerationDefaults {
return "These saved generation defaults apply to new chats and to API requests that omit generation parameters for this model."
}
if model.isCurated {
return "These defaults currently match the model's built-in defaults. Save to store a custom per-model default for chats and API requests."
}
return "These defaults currently match the general fallback defaults for this model. Save to store a custom per-model default for chats and API requests."
}
}

View File

@@ -0,0 +1,5 @@
import Foundation
enum ModelManagementWindow {
static let windowID = "model-manager"
}

View File

@@ -2,61 +2,31 @@ import SwiftUI
struct ModelPickerView: View { struct ModelPickerView: View {
@Environment(ModelManager.self) private var modelManager @Environment(ModelManager.self) private var modelManager
@State private var confirmRedownload: ModelConfig?
var body: some View { var body: some View {
HStack(spacing: 8) { Menu {
Picker("Model", selection: selectedModelBinding) { ForEach(modelManager.availableModels) { config in
ForEach(ModelConfig.availableModels) { config in
Label(
config.displayName,
systemImage: config.isLocal ? "checkmark.circle.fill" : "arrow.down.circle"
).tag(config.id)
}
}
.frame(width: 160)
.disabled(modelManager.isLoading)
// Re-download button (visible when a model is loaded)
if let current = modelManager.currentModel, !modelManager.isLoading {
Button { Button {
confirmRedownload = current guard config.id != modelManager.currentModel?.id else { return }
Task { await modelManager.loadModel(config) }
} label: { } label: {
Image(systemName: "arrow.clockwise") Label(
.font(.caption) config.isCurated ? config.displayName : config.repoId,
} systemImage: config.isLocal ? "checkmark.circle.fill" : "arrow.down.circle"
.buttonStyle(.borderless)
.help("Re-download \(current.displayName)")
}
}
.alert("Re-download Model?", isPresented: .init(
get: { confirmRedownload != nil },
set: { if !$0 { confirmRedownload = nil } }
)) {
Button("Re-download", role: .destructive) {
if let config = confirmRedownload {
Task { await modelManager.redownloadModel(config) }
}
}
Button("Cancel", role: .cancel) {
confirmRedownload = nil
}
} message: {
if let config = confirmRedownload {
Text("This will delete the local cache for \(config.displayName) and download it again from HuggingFace.")
}
}
}
private var selectedModelBinding: Binding<String> {
Binding(
get: { modelManager.currentModel?.id ?? ModelConfig.default.id },
set: { newId in
guard let config = ModelConfig.availableModels.first(where: { $0.id == newId }) else { return }
Task {
await modelManager.loadModel(config)
}
}
) )
} }
} }
} label: {
Text(currentModelLabel)
.lineLimit(1)
.frame(minWidth: 200)
}
.menuStyle(.button)
.disabled(modelManager.isLoading)
}
private var currentModelLabel: String {
guard let model = modelManager.currentModel else { return "Select Model" }
return model.isCurated ? model.displayName : model.repoId
}
}

View File

@@ -210,6 +210,7 @@ struct SceneManagementView: View {
} }
private struct SceneEditorView: View { private struct SceneEditorView: View {
@Environment(ModelManager.self) private var modelManager
@Environment(SceneStore.self) private var sceneStore @Environment(SceneStore.self) private var sceneStore
let scene: ChatScene let scene: ChatScene
@@ -221,7 +222,7 @@ private struct SceneEditorView: View {
Picker("Model", selection: modelBinding) { Picker("Model", selection: modelBinding) {
Text("Current model").tag(Optional<String>.none) Text("Current model").tag(Optional<String>.none)
ForEach(ModelConfig.availableModels) { model in ForEach(modelManager.availableModels) { model in
Text(model.displayName).tag(Optional(model.id)) Text(model.displayName).tag(Optional(model.id))
} }
} }
@@ -257,6 +258,9 @@ private struct SceneEditorView: View {
} }
.formStyle(.grouped) .formStyle(.grouped)
.navigationTitle(scene.displayName) .navigationTitle(scene.displayName)
.onAppear {
modelManager.refreshAvailableModels()
}
} }
private var modelBinding: Binding<String?> { private var modelBinding: Binding<String?> {

View File

@@ -2,13 +2,13 @@ import SwiftUI
struct SettingsView: View { struct SettingsView: View {
@Environment(\.openWindow) private var openWindow @Environment(\.openWindow) private var openWindow
@Environment(ModelManager.self) private var modelManager
@Environment(SceneStore.self) private var sceneStore @Environment(SceneStore.self) private var sceneStore
@State private var systemPrompt: String = Preferences.systemPrompt @State private var systemPrompt: String = Preferences.systemPrompt
@State private var apiPort: String = String(Preferences.apiPort) @State private var apiPort: String = String(Preferences.apiPort)
@State private var apiAutoStart: Bool = Preferences.apiAutoStart @State private var apiAutoStart: Bool = Preferences.apiAutoStart
@State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes) @State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes)
@State private var defaultModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id @State private var defaultModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id
@State private var generationDefaultsModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id
@State private var kvQuantizationEnabled: Bool = Preferences.kvQuantizationEnabled @State private var kvQuantizationEnabled: Bool = Preferences.kvQuantizationEnabled
@State private var kvQuantizationBits: Int = Preferences.kvQuantizationBits @State private var kvQuantizationBits: Int = Preferences.kvQuantizationBits
@@ -29,7 +29,7 @@ struct SettingsView: View {
Form { Form {
Section("Startup") { Section("Startup") {
Picker("Default model", selection: $defaultModelId) { Picker("Default model", selection: $defaultModelId) {
ForEach(ModelConfig.availableModels) { model in ForEach(modelManager.availableModels) { model in
Text(model.displayName).tag(model.id) Text(model.displayName).tag(model.id)
} }
} }
@@ -42,20 +42,6 @@ struct SettingsView: View {
.foregroundStyle(.secondary) .foregroundStyle(.secondary)
} }
Section("Generation Defaults") {
Picker("Defaults for model", selection: $generationDefaultsModelId) {
ForEach(ModelConfig.availableModels) { model in
Text(model.displayName).tag(model.id)
}
}
GenerationDefaultsEditor(settings: generationDefaultsBinding)
Text("These are the per-model defaults used by chat sessions and by the API server whenever a request omits a generation parameter. Lower temperature and stronger repetition penalties are usually better for technical work; higher temperature is usually better for improvisation and roleplay.")
.font(.caption)
.foregroundStyle(.secondary)
}
Section("System Prompt") { Section("System Prompt") {
TextEditor(text: $systemPrompt) TextEditor(text: $systemPrompt)
.font(.body.monospaced()) .font(.body.monospaced())
@@ -164,12 +150,11 @@ struct SettingsView: View {
} }
.formStyle(.grouped) .formStyle(.grouped)
.frame(width: 450, height: 650) .frame(width: 450, height: 650)
} .onAppear {
modelManager.refreshAvailableModels()
private var generationDefaultsBinding: Binding<GenerationSettings> { if !modelManager.availableModels.contains(where: { $0.id == defaultModelId }) {
Binding( defaultModelId = modelManager.availableModels.first?.id ?? ModelConfig.default.id
get: { Preferences.generationSettings(forModelId: generationDefaultsModelId) }, }
set: { Preferences.setGenerationSettings($0, forModelId: generationDefaultsModelId) } }
)
} }
} }

View File

@@ -31,6 +31,10 @@ struct StatusBarView: View {
.font(.caption) .font(.caption)
.foregroundStyle(.secondary) .foregroundStyle(.secondary)
if let model = modelManager.currentModel, model.contextLength > 0 {
contextFillView(totalContext: model.contextLength)
}
Spacer() Spacer()
// GPU memory // GPU memory
@@ -78,4 +82,43 @@ struct StatusBarView: View {
.padding(.vertical, 4) .padding(.vertical, 4)
.background(.bar) .background(.bar)
} }
@ViewBuilder
private func contextFillView(totalContext: Int) -> some View {
let usedTokens = viewModel.contextUsedTokens
let ratio = viewModel.contextFillRatio
let percent = Int((ratio * 100).rounded())
HStack(spacing: 6) {
Capsule()
.fill(.quaternary)
.frame(width: 48, height: 6)
.overlay(alignment: .leading) {
Capsule()
.fill(contextFillColor(for: ratio))
.frame(width: max(4, 48 * ratio), height: 6)
}
Text("Ctx \(percent)%")
.font(.caption.monospacedDigit())
.foregroundStyle(.secondary)
}
.help("Approximate context usage: \(formatTokenCount(usedTokens)) of \(formatTokenCount(totalContext)) tokens")
}
private func contextFillColor(for ratio: Double) -> Color {
if ratio >= 0.9 { return .red }
if ratio >= 0.7 { return .yellow }
return .blue
}
private func formatTokenCount(_ count: Int) -> String {
if count >= 1_000_000 {
return String(format: "%.1fM", Double(count) / 1_000_000)
}
if count >= 1_000 {
return String(format: "%.1fk", Double(count) / 1_000)
}
return "\(count)"
}
} }

View File

@@ -0,0 +1,183 @@
import Foundation
import XCTest
@testable import MLX_Server
final class LocalModelResolverTests: XCTestCase {
func testDiscoverSystemHFModelsInfersTextOnlyMetadata() throws {
let base = try makeTempHFCache()
let snapshotDir = try makeHFSnapshot(base: base, repoId: "example/text-only")
try writeJSON(
[
"architectures": ["LlamaForCausalLM"],
"max_position_embeddings": 32768,
],
to: snapshotDir.appendingPathComponent("config.json")
)
try Data(repeating: 0x11, count: 64).write(to: snapshotDir.appendingPathComponent("model.safetensors"))
try Data(repeating: 0x22, count: 19).write(to: snapshotDir.appendingPathComponent("tokenizer.json"))
let expectedSize = Int64(
try Data(contentsOf: snapshotDir.appendingPathComponent("config.json")).count
+ Data(contentsOf: snapshotDir.appendingPathComponent("model.safetensors")).count
+ Data(contentsOf: snapshotDir.appendingPathComponent("tokenizer.json")).count
)
let discovered = LocalModelResolver.discoverSystemHFModels(in: base)
let model = try XCTUnwrap(discovered.first)
XCTAssertEqual(model.repoId, "example/text-only")
XCTAssertEqual(model.contextLength, 32768)
XCTAssertFalse(model.supportsImages)
XCTAssertEqual(model.loaderKinds, [.llm, .vlm])
XCTAssertEqual(model.sizeBytes, expectedSize)
}
func testDiscoverSystemHFModelsInfersVisionMetadata() throws {
let base = try makeTempHFCache()
let snapshotDir = try makeHFSnapshot(base: base, repoId: "example/vision-model")
try writeJSON(
[
"text_config": ["max_position_embeddings": 262144],
"vision_config": ["hidden_size": 768],
],
to: snapshotDir.appendingPathComponent("config.json")
)
try writeJSON(
["processor_class": "Qwen3VLProcessor"],
to: snapshotDir.appendingPathComponent("tokenizer_config.json")
)
try Data(repeating: 0x33, count: 12).write(to: snapshotDir.appendingPathComponent("processor_config.json"))
try Data(repeating: 0x44, count: 8).write(to: snapshotDir.appendingPathComponent("model.safetensors.index.json"))
let discovered = LocalModelResolver.discoverSystemHFModels(in: base)
let model = try XCTUnwrap(discovered.first)
XCTAssertEqual(model.repoId, "example/vision-model")
XCTAssertEqual(model.contextLength, 262144)
XCTAssertTrue(model.supportsImages)
XCTAssertEqual(model.loaderKinds, [.vlm, .llm])
}
func testMergedCatalogKeepsCuratedModelsAndAddsCustomLocalModels() {
let localModels = [
LocalModelResolver.LocalModelInfo(
repoId: "mlx-community/gemma-3-4b-it-4bit",
directory: URL(fileURLWithPath: "/tmp/gemma"),
sizeBytes: 1024,
contextLength: 128000,
loaderKinds: [.vlm, .llm],
supportsImages: true
),
LocalModelResolver.LocalModelInfo(
repoId: "custom-org/custom-model",
directory: URL(fileURLWithPath: "/tmp/custom"),
sizeBytes: 2048,
contextLength: 65536,
loaderKinds: [.llm, .vlm],
supportsImages: false
),
]
let merged = ModelConfig.mergedModels(localModels: localModels)
let gemma = merged.first(where: { $0.id == "gemma" })
let custom = merged.first(where: { $0.repoId == "custom-org/custom-model" })
XCTAssertEqual(gemma?.localSizeBytes, 1024)
XCTAssertEqual(custom?.id, "custom-org/custom-model")
XCTAssertEqual(custom?.contextLength, 65536)
XCTAssertFalse(custom?.isCurated ?? true)
}
func testResolveUnknownRepoIdCreatesRemoteCustomConfig() throws {
let config = try XCTUnwrap(ModelConfig.resolve("custom-owner/custom-repo"))
XCTAssertEqual(config.id, "custom-owner/custom-repo")
XCTAssertEqual(config.repoId, "custom-owner/custom-repo")
XCTAssertFalse(config.isCurated)
}
func testMergedCatalogAppliesSavedMetadataOverride() {
let repoId = "custom-org/override-model"
Preferences.setModelMetadataOverride(
ModelMetadataOverride(
contextLength: 123456,
primaryLoaderKind: .vlm,
supportsImages: true,
supportsTools: true
),
forRepoId: repoId
)
defer {
Preferences.removeModelMetadataOverride(forRepoId: repoId)
}
let localModels = [
LocalModelResolver.LocalModelInfo(
repoId: repoId,
directory: URL(fileURLWithPath: "/tmp/custom-override"),
sizeBytes: 2048,
contextLength: 65536,
loaderKinds: [.llm, .vlm],
supportsImages: false
),
]
let merged = ModelConfig.mergedModels(localModels: localModels)
let overridden = merged.first(where: { $0.repoId == repoId })
XCTAssertEqual(overridden?.contextLength, 123456)
XCTAssertEqual(overridden?.primaryLoaderKind, .vlm)
XCTAssertTrue(overridden?.supportsImages ?? false)
XCTAssertTrue(overridden?.supportsTools ?? false)
}
func testResolveUnknownRepoIdUsesSavedMetadataOverride() throws {
let repoId = "custom-owner/custom-repo-with-override"
Preferences.setModelMetadataOverride(
ModelMetadataOverride(
contextLength: 8192,
primaryLoaderKind: .llm,
supportsImages: false,
supportsTools: true
),
forRepoId: repoId
)
defer {
Preferences.removeModelMetadataOverride(forRepoId: repoId)
}
let config = try XCTUnwrap(ModelConfig.resolve(repoId))
XCTAssertEqual(config.contextLength, 8192)
XCTAssertEqual(config.primaryLoaderKind, .llm)
XCTAssertFalse(config.supportsImages)
XCTAssertTrue(config.supportsTools)
}
private func makeTempHFCache() throws -> URL {
let root = FileManager.default.temporaryDirectory
.appendingPathComponent(UUID().uuidString, isDirectory: true)
try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true)
addTeardownBlock {
try? FileManager.default.removeItem(at: root)
}
return root
}
private func makeHFSnapshot(base: URL, repoId: String, hash: String = "abc123") throws -> URL {
let slug = repoId.replacingOccurrences(of: "/", with: "--")
let snapshotDir = base
.appendingPathComponent("models--\(slug)", isDirectory: true)
.appendingPathComponent("snapshots", isDirectory: true)
.appendingPathComponent(hash, isDirectory: true)
try FileManager.default.createDirectory(at: snapshotDir, withIntermediateDirectories: true)
return snapshotDir
}
private func writeJSON(_ object: Any, to url: URL) throws {
let data = try JSONSerialization.data(withJSONObject: object, options: [.prettyPrinted, .sortedKeys])
try data.write(to: url)
}
}

View File

@@ -1,7 +1,9 @@
import Foundation import Foundation
import Hub import HuggingFace
import MLXHuggingFace
import MLXLMCommon import MLXLMCommon
import MLXVLM import MLXVLM
import Tokenizers
import XCTest import XCTest
@testable import MLX_Server @testable import MLX_Server
@@ -671,10 +673,9 @@ private actor LocalGemmaFixture {
} }
let loadTask = Task<ModelContainer, Error> { let loadTask = Task<ModelContainer, Error> {
let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
let hub = HubApi(downloadBase: cachesDir, cache: nil)
return try await VLMModelFactory.shared.loadContainer( return try await VLMModelFactory.shared.loadContainer(
hub: hub, from: #hubDownloader(HubClient.default),
using: #huggingFaceTokenizerLoader(),
configuration: ModelConfiguration(directory: localDir), configuration: ModelConfiguration(directory: localDir),
progressHandler: { _ in } progressHandler: { _ in }
) )

View File

@@ -1,8 +1,10 @@
import Foundation import Foundation
import Hub import HuggingFace
import MLX import MLX
import MLXHuggingFace
import MLXLMCommon import MLXLMCommon
import MLXVLM import MLXVLM
import Tokenizers
import XCTest import XCTest
@testable import MLX_Server @testable import MLX_Server
@@ -230,10 +232,9 @@ private actor LocalGemmaFixture {
} }
let loadTask = Task<ModelContainer, Error> { let loadTask = Task<ModelContainer, Error> {
let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
let hub = HubApi(downloadBase: cachesDir, cache: nil)
return try await VLMModelFactory.shared.loadContainer( return try await VLMModelFactory.shared.loadContainer(
hub: hub, from: #hubDownloader(HubClient.default),
using: #huggingFaceTokenizerLoader(),
configuration: ModelConfiguration(directory: localDir), configuration: ModelConfiguration(directory: localDir),
progressHandler: { _ in } progressHandler: { _ in }
) )

View File

@@ -61,6 +61,20 @@ final class PromptBuilderTests: XCTestCase {
XCTAssertEqual(prepared.additionalContext?["enable_thinking"] as? Bool, legacy.additionalContext?["enable_thinking"] as? Bool) XCTAssertEqual(prepared.additionalContext?["enable_thinking"] as? Bool, legacy.additionalContext?["enable_thinking"] as? Bool)
} }
func testEstimatePromptTokensMatchesSharedCharacterHeuristic() {
let messages = [
Chat.Message(role: .user, content: "1234567890"),
Chat.Message(role: .assistant, content: "abcdefghij")
]
let estimated = PromptBuilder.estimatePromptTokens(
instructions: "system12345",
chatMessages: messages
)
XCTAssertEqual(estimated, 8)
}
func testBuildAggregatesInstructionsAndMessages() { func testBuildAggregatesInstructionsAndMessages() {
let request = APIChatCompletionRequest( let request = APIChatCompletionRequest(
model: "gemma", model: "gemma",

View File

@@ -249,4 +249,11 @@ private final class NonStandardCache: KVCache {
) -> MLXFast.ScaledDotProductAttentionMaskMode { ) -> MLXFast.ScaledDotProductAttentionMaskMode {
.none .none
} }
func copy() -> any KVCache {
let c = NonStandardCache(tokenCount: 0, headDim: 0)
c.state = state
c.offset = offset
return c
}
} }

View File

@@ -388,4 +388,10 @@ private final class TestTrimRecordingCache: KVCache {
) -> MLXFast.ScaledDotProductAttentionMaskMode { ) -> MLXFast.ScaledDotProductAttentionMaskMode {
.none .none
} }
func copy() -> any KVCache {
let c = TestTrimRecordingCache(offset: offset, trimmable: trimmable)
c.state = state
return c
}
} }

View File

@@ -11,6 +11,7 @@ Native macOS app for running local LLMs on Apple Silicon via [MLX](https://githu
| `qwen3.5-0.8b` | `mlx-community/Qwen3.5-0.8B-4bit` | 256k | `VLMModelFactory` | Vision, thinking mode, tool use (`<tool_call>` tags) | | `qwen3.5-0.8b` | `mlx-community/Qwen3.5-0.8B-4bit` | 256k | `VLMModelFactory` | Vision, thinking mode, tool use (`<tool_call>` tags) |
| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | 256k | `VLMModelFactory` | Vision, thinking mode, tool use (`<tool_call>` tags) | | `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | 256k | `VLMModelFactory` | Vision, thinking mode, tool use (`<tool_call>` tags) |
| `stheno` | `synk/L3-8B-Stheno-v3.2-MLX` | 8k | `LLMModelFactory` | Text-only, llama-based | | `stheno` | `synk/L3-8B-Stheno-v3.2-MLX` | 8k | `LLMModelFactory` | Text-only, llama-based |
| `violet-lotus` | `hobaratio/MN-Violet-Lotus-12B-mlx-4Bit` | 32k | `LLMModelFactory` | Text-only, Mistral-based |
Any model in MLX format on HuggingFace can be added — there is no restriction on uploader or architecture. Any model in MLX format on HuggingFace can be added — there is no restriction on uploader or architecture.
@@ -43,7 +44,8 @@ This is intended for targeted validation while keeping the normal default as the
- **Chat interface** with markdown rendering and model-aware image attachments (file picker, drag & drop, clipboard paste, Finder copy-paste on vision-capable models) - **Chat interface** with markdown rendering and model-aware image attachments (file picker, drag & drop, clipboard paste, Finder copy-paste on vision-capable models)
- **Scene-based chat starts** — New Chat opens a scene picker with Neutral plus saved scenes, each with an optional model override, a scene prompt layered onto the base system prompt, an auto-sent starter prompt, and optional generation-setting overrides for chat-specific behavior - **Scene-based chat starts** — New Chat opens a scene picker with Neutral plus saved scenes, each with an optional model override, a scene prompt layered onto the base system prompt, an auto-sent starter prompt, and optional generation-setting overrides for chat-specific behavior
- **Model picker** in toolbar with local/download status indicators and re-download button - **Model picker** in toolbar with curated defaults plus any locally discovered MLX models on disk
- **Models window** in the menu for downloading a model by HuggingFace ID, inspecting on-disk model sizes, and deleting local model folders
- **Download progress modal** — shows file progress, percentage, and speed when downloading a new model - **Download progress modal** — shows file progress, percentage, and speed when downloading a new model
- **Thinking mode** — models like Qwen3.5 can reason internally before responding; thinking content appears in a collapsible box. Toggle on/off in Settings. - **Thinking mode** — models like Qwen3.5 can reason internally before responding; thinking content appears in a collapsible box. Toggle on/off in Settings.
- **Streaming responses** with live token display - **Streaming responses** with live token display
@@ -139,7 +141,7 @@ MLXServer/
│ ├── ToolCallParser.swift — Parses tool calls from model output │ ├── ToolCallParser.swift — Parses tool calls from model output
│ └── ToolPromptBuilder.swift — Model-specific tool prompt formatting │ └── ToolPromptBuilder.swift — Model-specific tool prompt formatting
└── Utilities/ └── Utilities/
├── LocalModelResolver.swift — Offline-first HuggingFace cache resolution (sandbox + system) ├── LocalModelResolver.swift — Offline-first HuggingFace cache resolution
├── ChatExporter.swift — Export conversations to Markdown or RTF ├── ChatExporter.swift — Export conversations to Markdown or RTF
├── FocusedValues.swift — FocusedValue keys for menu bar integration ├── FocusedValues.swift — FocusedValue keys for menu bar integration
└── Preferences.swift — UserDefaults wrapper, including scene persistence └── Preferences.swift — UserDefaults wrapper, including scene persistence
@@ -151,7 +153,7 @@ build.sh — One-command build script (xcodegen + xcodebuild)
## Key Design Decisions ## Key Design Decisions
- Uses `mlx-swift-lm` for inference — `VLMModelFactory` for vision models and `LLMModelFactory` for text-only models - Uses `mlx-swift-lm` for inference — `VLMModelFactory` for vision models and `LLMModelFactory` for text-only models
- **Offline-first**: `LocalModelResolver` checks both the sandboxed app container and `~/.cache/huggingface/hub/` for locally-cached models before downloading - **Offline-first**: `LocalModelResolver` checks `~/.cache/huggingface/hub/` for locally-cached models before downloading
- **No duplicate storage**: custom `HubApi` with blob cache disabled — models are stored once in the snapshot cache - **No duplicate storage**: custom `HubApi` with blob cache disabled — models are stored once in the snapshot cache
- **KV cache reuse** across API requests — reuses `ChatSession` when conversation history prefix matches - **KV cache reuse** across API requests — reuses `ChatSession` when conversation history prefix matches
- **Thinking mode**: `enable_thinking` passed via Jinja template context; `<think>` tags parsed in real-time during streaming - **Thinking mode**: `enable_thinking` passed via Jinja template context; `<think>` tags parsed in real-time during streaming

View File

@@ -19,6 +19,7 @@ xcodebuild \
-scheme MLXServer \ -scheme MLXServer \
-destination 'platform=macOS' \ -destination 'platform=macOS' \
-configuration "$CONFIG" \ -configuration "$CONFIG" \
-skipMacroValidation \
SYMROOT="$BUILD_DIR" \ SYMROOT="$BUILD_DIR" \
build 2>&1 | \ build 2>&1 | \
grep -E "(CompileSwift .* 'MLXServer'|error:|warning:.*MLXServer/|BUILD )" | \ grep -E "(CompileSwift .* 'MLXServer'|error:|warning:.*MLXServer/|BUILD )" | \

View File

@@ -9,7 +9,13 @@ options:
packages: packages:
mlx-swift-lm: mlx-swift-lm:
url: https://github.com/ml-explore/mlx-swift-lm url: https://github.com/ml-explore/mlx-swift-lm
branch: main from: "3.31.3"
swift-huggingface:
url: https://github.com/huggingface/swift-huggingface
from: "0.9.0"
swift-transformers:
url: https://github.com/huggingface/swift-transformers
from: "1.2.0"
MarkdownUI: MarkdownUI:
url: https://github.com/gonzalezreal/swift-markdown-ui url: https://github.com/gonzalezreal/swift-markdown-ui
from: "2.4.0" from: "2.4.0"
@@ -40,6 +46,12 @@ targets:
product: MLXVLM product: MLXVLM
- package: mlx-swift-lm - package: mlx-swift-lm
product: MLXLMCommon product: MLXLMCommon
- package: mlx-swift-lm
product: MLXHuggingFace
- package: swift-huggingface
product: HuggingFace
- package: swift-transformers
product: Tokenizers
- package: MarkdownUI - package: MarkdownUI
product: MarkdownUI product: MarkdownUI
MLXServerTests: MLXServerTests:

27
resolve-packages.sh Executable file
View File

@@ -0,0 +1,27 @@
#!/bin/bash
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_PATH="$PROJECT_DIR/MLXServer.xcodeproj"
RESOLVED_PATH="$PROJECT_PATH/project.xcworkspace/xcshareddata/swiftpm/Package.resolved"
SPM_STATE_DIR="$PROJECT_DIR/build/swiftpm"
PACKAGE_CACHE_PATH="$SPM_STATE_DIR/cache"
CLONED_SOURCES_PATH="$SPM_STATE_DIR/clones"
DERIVED_DATA_PATH="$PROJECT_DIR/build/DerivedData"
echo "==> Resolving Swift packages from project.yml constraints"
# For branch-based dependencies (like mlx-swift-lm main), force a fresh resolve
# so the lockfile follows the current branch head.
rm -f "$RESOLVED_PATH"
rm -rf "$PACKAGE_CACHE_PATH" "$CLONED_SOURCES_PATH"
rm -rf "$DERIVED_DATA_PATH"
mkdir -p "$PACKAGE_CACHE_PATH" "$CLONED_SOURCES_PATH"
xcodebuild \
-resolvePackageDependencies \
-project "$PROJECT_PATH" \
-scheme MLXServer \
-disablePackageRepositoryCache \
-packageCachePath "$PACKAGE_CACHE_PATH" \
-clonedSourcePackagesDirPath "$CLONED_SOURCES_PATH"

View File

@@ -21,6 +21,7 @@ XCODEBUILD_ARGS=(
-scheme MLXServer -scheme MLXServer
-destination "$DESTINATION" -destination "$DESTINATION"
-configuration "$CONFIG" -configuration "$CONFIG"
-skipMacroValidation
SYMROOT="$BUILD_DIR" SYMROOT="$BUILD_DIR"
) )