feat: added stheno (llambda based) text-only model, too
This commit is contained in:
@@ -22,7 +22,7 @@ struct ContentView: View {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
.onChange(of: modelManager.currentModel) {
|
.onChange(of: modelManager.currentModel) {
|
||||||
chatVM?.resetSession()
|
chatVM?.handleModelChange()
|
||||||
// Persist last used model
|
// Persist last used model
|
||||||
if let id = modelManager.currentModel?.id {
|
if let id = modelManager.currentModel?.id {
|
||||||
Preferences.lastModelId = id
|
Preferences.lastModelId = id
|
||||||
|
|||||||
@@ -3,10 +3,18 @@ import MLXLMCommon
|
|||||||
|
|
||||||
/// Defines a supported model with its metadata.
|
/// Defines a supported model with its metadata.
|
||||||
struct ModelConfig: Identifiable, Hashable {
|
struct ModelConfig: Identifiable, Hashable {
|
||||||
|
enum LoaderKind: Hashable {
|
||||||
|
case llm
|
||||||
|
case vlm
|
||||||
|
}
|
||||||
|
|
||||||
let id: String // alias: "gemma", "gemma3n", "qwen"
|
let id: String // alias: "gemma", "gemma3n", "qwen"
|
||||||
let repoId: String // HuggingFace ID
|
let repoId: String // HuggingFace ID
|
||||||
let displayName: String
|
let displayName: String
|
||||||
let contextLength: Int
|
let contextLength: Int
|
||||||
|
let loaderKind: LoaderKind
|
||||||
|
let supportsImages: Bool
|
||||||
|
let supportsTools: Bool
|
||||||
|
|
||||||
/// All models supported by the app.
|
/// All models supported by the app.
|
||||||
static let availableModels: [ModelConfig] = [
|
static let availableModels: [ModelConfig] = [
|
||||||
@@ -14,19 +22,37 @@ struct ModelConfig: Identifiable, Hashable {
|
|||||||
id: "gemma",
|
id: "gemma",
|
||||||
repoId: "mlx-community/gemma-3-4b-it-4bit",
|
repoId: "mlx-community/gemma-3-4b-it-4bit",
|
||||||
displayName: "Gemma 3 4B",
|
displayName: "Gemma 3 4B",
|
||||||
contextLength: 128_000
|
contextLength: 128_000,
|
||||||
|
loaderKind: .vlm,
|
||||||
|
supportsImages: true,
|
||||||
|
supportsTools: true
|
||||||
),
|
),
|
||||||
ModelConfig(
|
ModelConfig(
|
||||||
id: "qwen",
|
id: "qwen",
|
||||||
repoId: "mlx-community/Qwen3-VL-4B-Instruct-4bit",
|
repoId: "mlx-community/Qwen3-VL-4B-Instruct-4bit",
|
||||||
displayName: "Qwen3 VL 4B",
|
displayName: "Qwen3 VL 4B",
|
||||||
contextLength: 256_000
|
contextLength: 256_000,
|
||||||
|
loaderKind: .vlm,
|
||||||
|
supportsImages: true,
|
||||||
|
supportsTools: true
|
||||||
),
|
),
|
||||||
ModelConfig(
|
ModelConfig(
|
||||||
id: "qwen3.5-9b",
|
id: "qwen3.5-9b",
|
||||||
repoId: "mlx-community/Qwen3.5-9B-4bit",
|
repoId: "mlx-community/Qwen3.5-9B-4bit",
|
||||||
displayName: "Qwen3.5 9B",
|
displayName: "Qwen3.5 9B",
|
||||||
contextLength: 256_000
|
contextLength: 256_000,
|
||||||
|
loaderKind: .llm,
|
||||||
|
supportsImages: false,
|
||||||
|
supportsTools: true
|
||||||
|
),
|
||||||
|
ModelConfig(
|
||||||
|
id: "stheno",
|
||||||
|
repoId: "synk/L3-8B-Stheno-v3.2-MLX",
|
||||||
|
displayName: "Stheno L3 8B",
|
||||||
|
contextLength: 8_192,
|
||||||
|
loaderKind: .llm,
|
||||||
|
supportsImages: false,
|
||||||
|
supportsTools: false
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -221,12 +221,22 @@ final class APIServer {
|
|||||||
let requestId = "chatcmpl-\(UUID().uuidString.prefix(12).lowercased())"
|
let requestId = "chatcmpl-\(UUID().uuidString.prefix(12).lowercased())"
|
||||||
let created = Int(Date().timeIntervalSince1970)
|
let created = Int(Date().timeIntervalSince1970)
|
||||||
let modelName = request.model ?? modelManager.currentModel?.repoId ?? "unknown"
|
let modelName = request.model ?? modelManager.currentModel?.repoId ?? "unknown"
|
||||||
|
let currentModel = modelManager.currentModel
|
||||||
let contextLength = modelManager.currentModel?.contextLength ?? 0
|
let contextLength = modelManager.currentModel?.contextLength ?? 0
|
||||||
|
|
||||||
|
if let tools = request.tools, !tools.isEmpty, currentModel?.supportsTools != true {
|
||||||
|
sendResponse(
|
||||||
|
connection: connection,
|
||||||
|
status: 400,
|
||||||
|
body: #"{"error":{"message":"The currently selected model does not support tool calls.","type":"invalid_request_error","code":"tools_not_supported"}}"#
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Convert API messages to Chat.Message, extracting images from content parts
|
// Convert API messages to Chat.Message, extracting images from content parts
|
||||||
var chatMessages: [Chat.Message] = []
|
var chatMessages: [Chat.Message] = []
|
||||||
var images: [UserInput.Image] = []
|
var images: [UserInput.Image] = []
|
||||||
let currentModelRepoId = modelManager.currentModel?.repoId ?? modelName
|
let currentModelRepoId = currentModel?.repoId ?? modelName
|
||||||
|
|
||||||
// Build the instructions string (system prompt + tool definitions).
|
// Build the instructions string (system prompt + tool definitions).
|
||||||
// This is passed to ChatSession via `instructions:` rather than injected
|
// This is passed to ChatSession via `instructions:` rather than injected
|
||||||
@@ -298,6 +308,15 @@ final class APIServer {
|
|||||||
images.append(contentsOf: messageImages)
|
images.append(contentsOf: messageImages)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !images.isEmpty, currentModel?.supportsImages != true {
|
||||||
|
sendResponse(
|
||||||
|
connection: connection,
|
||||||
|
status: 400,
|
||||||
|
body: #"{"error":{"message":"The currently selected model does not support image inputs.","type":"invalid_request_error","code":"vision_not_supported"}}"#
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Context window check: estimate token count and reject if over limit
|
// Context window check: estimate token count and reject if over limit
|
||||||
if contextLength > 0 {
|
if contextLength > 0 {
|
||||||
let totalChars = chatMessages.reduce(0) { $0 + $1.content.count }
|
let totalChars = chatMessages.reduce(0) { $0 + $1.content.count }
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ final class ChatViewModel {
|
|||||||
ensureSession()
|
ensureSession()
|
||||||
guard let session = chatSession else { return }
|
guard let session = chatSession else { return }
|
||||||
|
|
||||||
let images = attachedImages
|
let images = modelManager.currentModel?.supportsImages == true ? attachedImages : []
|
||||||
inputText = ""
|
inputText = ""
|
||||||
attachedImages = []
|
attachedImages = []
|
||||||
|
|
||||||
@@ -135,6 +135,7 @@ final class ChatViewModel {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func attachImage(_ image: NSImage) {
|
func attachImage(_ image: NSImage) {
|
||||||
|
guard modelManager.currentModel?.supportsImages == true else { return }
|
||||||
attachedImages.append(image)
|
attachedImages.append(image)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -154,6 +155,13 @@ final class ChatViewModel {
|
|||||||
chatSession = nil
|
chatSession = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func handleModelChange() {
|
||||||
|
resetSession()
|
||||||
|
if modelManager.currentModel?.supportsImages != true {
|
||||||
|
attachedImages = []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// MARK: - API Server
|
// MARK: - API Server
|
||||||
|
|
||||||
func startAPIServer() {
|
func startAPIServer() {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import Foundation
|
import Foundation
|
||||||
import Hub
|
import Hub
|
||||||
import MLX
|
import MLX
|
||||||
|
import MLXLLM
|
||||||
import MLXLMCommon
|
import MLXLMCommon
|
||||||
import MLXVLM
|
import MLXVLM
|
||||||
|
|
||||||
@@ -77,11 +78,21 @@ final class ModelManager {
|
|||||||
configuration = config.modelConfiguration
|
configuration = config.modelConfiguration
|
||||||
}
|
}
|
||||||
|
|
||||||
let container = try await VLMModelFactory.shared.loadContainer(
|
let container: ModelContainer
|
||||||
hub: Self.hub,
|
switch config.loaderKind {
|
||||||
configuration: configuration,
|
case .llm:
|
||||||
progressHandler: progressHandler
|
container = try await LLMModelFactory.shared.loadContainer(
|
||||||
)
|
hub: Self.hub,
|
||||||
|
configuration: configuration,
|
||||||
|
progressHandler: progressHandler
|
||||||
|
)
|
||||||
|
case .vlm:
|
||||||
|
container = try await VLMModelFactory.shared.loadContainer(
|
||||||
|
hub: Self.hub,
|
||||||
|
configuration: configuration,
|
||||||
|
progressHandler: progressHandler
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
self.isDownloading = false
|
self.isDownloading = false
|
||||||
self.modelContainer = container
|
self.modelContainer = container
|
||||||
|
|||||||
@@ -5,10 +5,14 @@ struct ChatInputView: View {
|
|||||||
@Bindable var viewModel: ChatViewModel
|
@Bindable var viewModel: ChatViewModel
|
||||||
@State private var pasteMonitor: Any?
|
@State private var pasteMonitor: Any?
|
||||||
|
|
||||||
|
private var supportsImages: Bool {
|
||||||
|
viewModel.modelManager.currentModel?.supportsImages == true
|
||||||
|
}
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
VStack(spacing: 8) {
|
VStack(spacing: 8) {
|
||||||
// Image preview strip
|
// Image preview strip
|
||||||
if !viewModel.attachedImages.isEmpty {
|
if supportsImages && !viewModel.attachedImages.isEmpty {
|
||||||
ScrollView(.horizontal, showsIndicators: false) {
|
ScrollView(.horizontal, showsIndicators: false) {
|
||||||
HStack(spacing: 8) {
|
HStack(spacing: 8) {
|
||||||
ForEach(Array(viewModel.attachedImages.enumerated()), id: \.offset) { index, image in
|
ForEach(Array(viewModel.attachedImages.enumerated()), id: \.offset) { index, image in
|
||||||
@@ -46,7 +50,7 @@ struct ChatInputView: View {
|
|||||||
.font(.title3)
|
.font(.title3)
|
||||||
}
|
}
|
||||||
.buttonStyle(.plain)
|
.buttonStyle(.plain)
|
||||||
.disabled(!viewModel.modelManager.isReady)
|
.disabled(!viewModel.modelManager.isReady || !supportsImages)
|
||||||
|
|
||||||
// Text field
|
// Text field
|
||||||
TextField("Message…", text: $viewModel.inputText, axis: .vertical)
|
TextField("Message…", text: $viewModel.inputText, axis: .vertical)
|
||||||
@@ -87,6 +91,7 @@ struct ChatInputView: View {
|
|||||||
}
|
}
|
||||||
.padding(.top, 4)
|
.padding(.top, 4)
|
||||||
.onDrop(of: [.image, .fileURL], isTargeted: nil) { providers in
|
.onDrop(of: [.image, .fileURL], isTargeted: nil) { providers in
|
||||||
|
guard supportsImages else { return false }
|
||||||
for provider in providers {
|
for provider in providers {
|
||||||
if provider.hasItemConformingToTypeIdentifier(UTType.fileURL.identifier) {
|
if provider.hasItemConformingToTypeIdentifier(UTType.fileURL.identifier) {
|
||||||
provider.loadItem(forTypeIdentifier: UTType.fileURL.identifier, options: nil) { data, _ in
|
provider.loadItem(forTypeIdentifier: UTType.fileURL.identifier, options: nil) { data, _ in
|
||||||
@@ -121,6 +126,7 @@ struct ChatInputView: View {
|
|||||||
private func installPasteMonitor() {
|
private func installPasteMonitor() {
|
||||||
guard pasteMonitor == nil else { return }
|
guard pasteMonitor == nil else { return }
|
||||||
pasteMonitor = NSEvent.addLocalMonitorForEvents(matching: .keyDown) { event in
|
pasteMonitor = NSEvent.addLocalMonitorForEvents(matching: .keyDown) { event in
|
||||||
|
guard supportsImages else { return event }
|
||||||
// Check for Cmd+V
|
// Check for Cmd+V
|
||||||
guard event.modifierFlags.contains(.command),
|
guard event.modifierFlags.contains(.command),
|
||||||
event.charactersIgnoringModifiers == "v" else {
|
event.charactersIgnoringModifiers == "v" else {
|
||||||
@@ -178,6 +184,7 @@ struct ChatInputView: View {
|
|||||||
// MARK: - File picker
|
// MARK: - File picker
|
||||||
|
|
||||||
private func pickImage() {
|
private func pickImage() {
|
||||||
|
guard supportsImages else { return }
|
||||||
let panel = NSOpenPanel()
|
let panel = NSOpenPanel()
|
||||||
panel.allowedContentTypes = [.image]
|
panel.allowedContentTypes = [.image]
|
||||||
panel.allowsMultipleSelection = true
|
panel.allowsMultipleSelection = true
|
||||||
|
|||||||
25
README.md
25
README.md
@@ -1,14 +1,17 @@
|
|||||||
# MLX Server
|
# MLX Server
|
||||||
|
|
||||||
Native macOS app for running local LLMs on Apple Silicon via [MLX](https://github.com/ml-explore/mlx). Built with SwiftUI, it provides both a **chat UI** and an embedded **OpenAI-compatible API server**. Supports vision, tool use, and thinking mode.
|
Native macOS app for running local LLMs on Apple Silicon via [MLX](https://github.com/ml-explore/mlx). Built with SwiftUI, it provides both a **chat UI** and an embedded **OpenAI-compatible API server**. Supports both vision-capable and text-only MLX models, plus tool use and thinking mode where the selected model supports them.
|
||||||
|
|
||||||
## Supported Models
|
## Supported Models
|
||||||
|
|
||||||
| Alias | Model | Context | Capabilities |
|
| Alias | Model | Context | Loader | Capabilities |
|
||||||
|-------|-------|---------|-------------|
|
|-------|-------|---------|--------|-------------|
|
||||||
| `gemma` | `mlx-community/gemma-3-4b-it-4bit` | 128k | Vision, tool use (`tool_code` blocks) |
|
| `gemma` | `mlx-community/gemma-3-4b-it-4bit` | 128k | `VLMModelFactory` | Vision, tool use (`tool_code` blocks) |
|
||||||
| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | 256k | Vision, tool use (`<tool_call>` tags) |
|
| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | 256k | `VLMModelFactory` | Vision, tool use (`<tool_call>` tags) |
|
||||||
| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | 256k | Thinking mode, tool use |
|
| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | 256k | `LLMModelFactory` | Text-only, thinking mode, tool use |
|
||||||
|
| `stheno` | `synk/L3-8B-Stheno-v3.2-MLX` | 8k | `LLMModelFactory` | Text-only |
|
||||||
|
|
||||||
|
`stheno` is loaded as a standard MLX text model. The Hugging Face card provides an `mlx_lm.load(...)` sample rather than a VLM example, and its config reports `model_type: llama` with `max_position_embeddings: 8192`, so the app treats it as an 8k Llama-family text model.
|
||||||
|
|
||||||
Any model in MLX format on HuggingFace can be added — there is no restriction on uploader or architecture.
|
Any model in MLX format on HuggingFace can be added — there is no restriction on uploader or architecture.
|
||||||
|
|
||||||
@@ -23,7 +26,7 @@ open "build/Debug/MLX Server.app"
|
|||||||
|
|
||||||
## App Features
|
## App Features
|
||||||
|
|
||||||
- **Chat interface** with markdown rendering, image attachments (file picker, drag & drop, clipboard paste, Finder copy-paste)
|
- **Chat interface** with markdown rendering and model-aware image attachments (file picker, drag & drop, clipboard paste, Finder copy-paste on vision-capable models)
|
||||||
- **Model picker** in toolbar with local/download status indicators and re-download button
|
- **Model picker** in toolbar with local/download status indicators and re-download button
|
||||||
- **Download progress modal** — shows file progress, percentage, and speed when downloading a new model
|
- **Download progress modal** — shows file progress, percentage, and speed when downloading a new model
|
||||||
- **Thinking mode** — models like Qwen3.5 can reason internally before responding; thinking content appears in a collapsible box. Toggle on/off in Settings.
|
- **Thinking mode** — models like Qwen3.5 can reason internally before responding; thinking content appears in a collapsible box. Toggle on/off in Settings.
|
||||||
@@ -42,6 +45,8 @@ The embedded API server (toggle in toolbar) runs on port 1234 by default. Standa
|
|||||||
- `POST /v1/chat/completions` — chat completions (streaming and non-streaming)
|
- `POST /v1/chat/completions` — chat completions (streaming and non-streaming)
|
||||||
- `GET /health` — health check
|
- `GET /health` — health check
|
||||||
|
|
||||||
|
Capability checks are enforced server-side. If a request sends images to a text-only model or tools to a model without tool support, the server returns a `400 invalid_request_error`.
|
||||||
|
|
||||||
### Model Swapping
|
### Model Swapping
|
||||||
|
|
||||||
Send any model ID or alias in the `model` field. If it differs from the currently loaded model, the server swaps automatically:
|
Send any model ID or alias in the `model` field. If it differs from the currently loaded model, the server swaps automatically:
|
||||||
@@ -69,10 +74,14 @@ Pass images as base64 data URIs in the `image_url` content part:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Text-only models such as `qwen3.5-9b` and `stheno` reject image inputs.
|
||||||
|
|
||||||
### Tool Use
|
### Tool Use
|
||||||
|
|
||||||
Pass tools in the `tools` field (OpenAI format). The server handles model-specific formatting (Gemma `tool_code` blocks, Qwen `<tool_call>` XML tags) and parses tool calls from output automatically. When tools are present during streaming, output is buffered to strip tool-call markup before sending to the client.
|
Pass tools in the `tools` field (OpenAI format). The server handles model-specific formatting (Gemma `tool_code` blocks, Qwen `<tool_call>` XML tags) and parses tool calls from output automatically. When tools are present during streaming, output is buffered to strip tool-call markup before sending to the client.
|
||||||
|
|
||||||
|
`stheno` is currently documented and configured as a plain text model, so tool requests to it are rejected.
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
@@ -112,7 +121,7 @@ build.sh — One-command build script (xcodegen + xcodebuild)
|
|||||||
|
|
||||||
## Key Design Decisions
|
## Key Design Decisions
|
||||||
|
|
||||||
- Uses `mlx-swift-lm` (`MLXVLM` / `VLMModelFactory`) for inference — loads any MLX-format model from HuggingFace
|
- Uses `mlx-swift-lm` for inference — `VLMModelFactory` for vision models and `LLMModelFactory` for text-only models
|
||||||
- **Offline-first**: `LocalModelResolver` checks both the sandboxed app container and `~/.cache/huggingface/hub/` for locally-cached models before downloading
|
- **Offline-first**: `LocalModelResolver` checks both the sandboxed app container and `~/.cache/huggingface/hub/` for locally-cached models before downloading
|
||||||
- **No duplicate storage**: custom `HubApi` with blob cache disabled — models are stored once in the snapshot cache
|
- **No duplicate storage**: custom `HubApi` with blob cache disabled — models are stored once in the snapshot cache
|
||||||
- **KV cache reuse** across API requests — reuses `ChatSession` when conversation history prefix matches
|
- **KV cache reuse** across API requests — reuses `ChatSession` when conversation history prefix matches
|
||||||
|
|||||||
Reference in New Issue
Block a user