From 27849ccbd7eed5e8aabd63f44326190edf6a6cb3 Mon Sep 17 00:00:00 2001
From: Chili Palmer <hugoms@me.com>
Date: Wed, 18 Mar 2026 13:08:21 +0100
Subject: [PATCH] feat: added stheno (llambda based) text-only model, too

---
 MLXServer/ContentView.swift              |  2 +-
 MLXServer/Models/ModelConfig.swift       | 32 +++++++++++++++++++++---
 MLXServer/Server/APIServer.swift         | 21 +++++++++++++++-
 MLXServer/ViewModels/ChatViewModel.swift | 10 +++++++-
 MLXServer/ViewModels/ModelManager.swift  | 21 ++++++++++++----
 MLXServer/Views/ChatInputView.swift      | 11 ++++++--
 README.md                                | 25 ++++++++++++------
 7 files changed, 101 insertions(+), 21 deletions(-)

diff --git a/MLXServer/ContentView.swift b/MLXServer/ContentView.swift
index 9ab55db..6f52594 100644
--- a/MLXServer/ContentView.swift
+++ b/MLXServer/ContentView.swift
@@ -22,7 +22,7 @@ struct ContentView: View {
                 }
             }
             .onChange(of: modelManager.currentModel) {
-                chatVM?.resetSession()
+                chatVM?.handleModelChange()
                 // Persist last used model
                 if let id = modelManager.currentModel?.id {
                     Preferences.lastModelId = id
diff --git a/MLXServer/Models/ModelConfig.swift b/MLXServer/Models/ModelConfig.swift
index ac68653..d393cdd 100644
--- a/MLXServer/Models/ModelConfig.swift
+++ b/MLXServer/Models/ModelConfig.swift
@@ -3,10 +3,18 @@ import MLXLMCommon
 
 /// Defines a supported model with its metadata.
 struct ModelConfig: Identifiable, Hashable {
+    enum LoaderKind: Hashable {
+        case llm
+        case vlm
+    }
+
     let id: String          // alias: "gemma", "gemma3n", "qwen"
     let repoId: String      // HuggingFace ID
     let displayName: String
     let contextLength: Int
+    let loaderKind: LoaderKind
+    let supportsImages: Bool
+    let supportsTools: Bool
 
     /// All models supported by the app.
     static let availableModels: [ModelConfig] = [
@@ -14,19 +22,37 @@ struct ModelConfig: Identifiable, Hashable {
             id: "gemma",
             repoId: "mlx-community/gemma-3-4b-it-4bit",
             displayName: "Gemma 3 4B",
-            contextLength: 128_000
+            contextLength: 128_000,
+            loaderKind: .vlm,
+            supportsImages: true,
+            supportsTools: true
         ),
         ModelConfig(
             id: "qwen",
             repoId: "mlx-community/Qwen3-VL-4B-Instruct-4bit",
             displayName: "Qwen3 VL 4B",
-            contextLength: 256_000
+            contextLength: 256_000,
+            loaderKind: .vlm,
+            supportsImages: true,
+            supportsTools: true
         ),
         ModelConfig(
             id: "qwen3.5-9b",
             repoId: "mlx-community/Qwen3.5-9B-4bit",
             displayName: "Qwen3.5 9B",
-            contextLength: 256_000
+            contextLength: 256_000,
+            loaderKind: .llm,
+            supportsImages: false,
+            supportsTools: true
+        ),
+        ModelConfig(
+            id: "stheno",
+            repoId: "synk/L3-8B-Stheno-v3.2-MLX",
+            displayName: "Stheno L3 8B",
+            contextLength: 8_192,
+            loaderKind: .llm,
+            supportsImages: false,
+            supportsTools: false
         ),
     ]
 
diff --git a/MLXServer/Server/APIServer.swift b/MLXServer/Server/APIServer.swift
index 9da6a70..ab6c102 100644
--- a/MLXServer/Server/APIServer.swift
+++ b/MLXServer/Server/APIServer.swift
@@ -221,12 +221,22 @@ final class APIServer {
         let requestId = "chatcmpl-\(UUID().uuidString.prefix(12).lowercased())"
         let created = Int(Date().timeIntervalSince1970)
         let modelName = request.model ?? modelManager.currentModel?.repoId ?? "unknown"
+        let currentModel = modelManager.currentModel
         let contextLength = modelManager.currentModel?.contextLength ?? 0
 
+        if let tools = request.tools, !tools.isEmpty, currentModel?.supportsTools != true {
+            sendResponse(
+                connection: connection,
+                status: 400,
+                body: #"{"error":{"message":"The currently selected model does not support tool calls.","type":"invalid_request_error","code":"tools_not_supported"}}"#
+            )
+            return
+        }
+
         // Convert API messages to Chat.Message, extracting images from content parts
         var chatMessages: [Chat.Message] = []
         var images: [UserInput.Image] = []
-        let currentModelRepoId = modelManager.currentModel?.repoId ?? modelName
+        let currentModelRepoId = currentModel?.repoId ?? modelName
 
         // Build the instructions string (system prompt + tool definitions).
         // This is passed to ChatSession via `instructions:` rather than injected
@@ -298,6 +308,15 @@ final class APIServer {
             images.append(contentsOf: messageImages)
         }
 
+        if !images.isEmpty, currentModel?.supportsImages != true {
+            sendResponse(
+                connection: connection,
+                status: 400,
+                body: #"{"error":{"message":"The currently selected model does not support image inputs.","type":"invalid_request_error","code":"vision_not_supported"}}"#
+            )
+            return
+        }
+
         // Context window check: estimate token count and reject if over limit
         if contextLength > 0 {
             let totalChars = chatMessages.reduce(0) { $0 + $1.content.count }
diff --git a/MLXServer/ViewModels/ChatViewModel.swift b/MLXServer/ViewModels/ChatViewModel.swift
index fe4c8db..76c353a 100644
--- a/MLXServer/ViewModels/ChatViewModel.swift
+++ b/MLXServer/ViewModels/ChatViewModel.swift
@@ -53,7 +53,7 @@ final class ChatViewModel {
         ensureSession()
         guard let session = chatSession else { return }
 
-        let images = attachedImages
+        let images = modelManager.currentModel?.supportsImages == true ? attachedImages : []
         inputText = ""
         attachedImages = []
 
@@ -135,6 +135,7 @@ final class ChatViewModel {
     }
 
     func attachImage(_ image: NSImage) {
+        guard modelManager.currentModel?.supportsImages == true else { return }
         attachedImages.append(image)
     }
 
@@ -154,6 +155,13 @@ final class ChatViewModel {
         chatSession = nil
     }
 
+    func handleModelChange() {
+        resetSession()
+        if modelManager.currentModel?.supportsImages != true {
+            attachedImages = []
+        }
+    }
+
     // MARK: - API Server
 
     func startAPIServer() {
diff --git a/MLXServer/ViewModels/ModelManager.swift b/MLXServer/ViewModels/ModelManager.swift
index aa22335..a6e914c 100644
--- a/MLXServer/ViewModels/ModelManager.swift
+++ b/MLXServer/ViewModels/ModelManager.swift
@@ -1,6 +1,7 @@
 import Foundation
 import Hub
 import MLX
+import MLXLLM
 import MLXLMCommon
 import MLXVLM
 
@@ -77,11 +78,21 @@ final class ModelManager {
                 configuration = config.modelConfiguration
             }
 
-            let container = try await VLMModelFactory.shared.loadContainer(
-                hub: Self.hub,
-                configuration: configuration,
-                progressHandler: progressHandler
-            )
+            let container: ModelContainer
+            switch config.loaderKind {
+            case .llm:
+                container = try await LLMModelFactory.shared.loadContainer(
+                    hub: Self.hub,
+                    configuration: configuration,
+                    progressHandler: progressHandler
+                )
+            case .vlm:
+                container = try await VLMModelFactory.shared.loadContainer(
+                    hub: Self.hub,
+                    configuration: configuration,
+                    progressHandler: progressHandler
+                )
+            }
 
             self.isDownloading = false
             self.modelContainer = container
diff --git a/MLXServer/Views/ChatInputView.swift b/MLXServer/Views/ChatInputView.swift
index 1c544a7..6117a22 100644
--- a/MLXServer/Views/ChatInputView.swift
+++ b/MLXServer/Views/ChatInputView.swift
@@ -5,10 +5,14 @@ struct ChatInputView: View {
     @Bindable var viewModel: ChatViewModel
     @State private var pasteMonitor: Any?
 
+    private var supportsImages: Bool {
+        viewModel.modelManager.currentModel?.supportsImages == true
+    }
+
     var body: some View {
         VStack(spacing: 8) {
             // Image preview strip
-            if !viewModel.attachedImages.isEmpty {
+            if supportsImages && !viewModel.attachedImages.isEmpty {
                 ScrollView(.horizontal, showsIndicators: false) {
                     HStack(spacing: 8) {
                         ForEach(Array(viewModel.attachedImages.enumerated()), id: \.offset) { index, image in
@@ -46,7 +50,7 @@ struct ChatInputView: View {
                         .font(.title3)
                 }
                 .buttonStyle(.plain)
-                .disabled(!viewModel.modelManager.isReady)
+                .disabled(!viewModel.modelManager.isReady || !supportsImages)
 
                 // Text field
                 TextField("Message…", text: $viewModel.inputText, axis: .vertical)
@@ -87,6 +91,7 @@ struct ChatInputView: View {
         }
         .padding(.top, 4)
         .onDrop(of: [.image, .fileURL], isTargeted: nil) { providers in
+            guard supportsImages else { return false }
             for provider in providers {
                 if provider.hasItemConformingToTypeIdentifier(UTType.fileURL.identifier) {
                     provider.loadItem(forTypeIdentifier: UTType.fileURL.identifier, options: nil) { data, _ in
@@ -121,6 +126,7 @@ struct ChatInputView: View {
     private func installPasteMonitor() {
         guard pasteMonitor == nil else { return }
         pasteMonitor = NSEvent.addLocalMonitorForEvents(matching: .keyDown) { event in
+            guard supportsImages else { return event }
             // Check for Cmd+V
             guard event.modifierFlags.contains(.command),
                   event.charactersIgnoringModifiers == "v" else {
@@ -178,6 +184,7 @@ struct ChatInputView: View {
     // MARK: - File picker
 
     private func pickImage() {
+        guard supportsImages else { return }
         let panel = NSOpenPanel()
         panel.allowedContentTypes = [.image]
         panel.allowsMultipleSelection = true
diff --git a/README.md b/README.md
index 1075e12..cbac6d8 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,17 @@
 # MLX Server
 
-Native macOS app for running local LLMs on Apple Silicon via [MLX](https://github.com/ml-explore/mlx). Built with SwiftUI, it provides both a **chat UI** and an embedded **OpenAI-compatible API server**. Supports vision, tool use, and thinking mode.
+Native macOS app for running local LLMs on Apple Silicon via [MLX](https://github.com/ml-explore/mlx). Built with SwiftUI, it provides both a **chat UI** and an embedded **OpenAI-compatible API server**. Supports both vision-capable and text-only MLX models, plus tool use and thinking mode where the selected model supports them.
 
 ## Supported Models
 
-| Alias | Model | Context | Capabilities |
-|-------|-------|---------|-------------|
-| `gemma` | `mlx-community/gemma-3-4b-it-4bit` | 128k | Vision, tool use (`tool_code` blocks) |
-| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | 256k | Vision, tool use (`<tool_call>` tags) |
-| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | 256k | Thinking mode, tool use |
+| Alias | Model | Context | Loader | Capabilities |
+|-------|-------|---------|--------|-------------|
+| `gemma` | `mlx-community/gemma-3-4b-it-4bit` | 128k | `VLMModelFactory` | Vision, tool use (`tool_code` blocks) |
+| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | 256k | `VLMModelFactory` | Vision, tool use (`<tool_call>` tags) |
+| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | 256k | `LLMModelFactory` | Text-only, thinking mode, tool use |
+| `stheno` | `synk/L3-8B-Stheno-v3.2-MLX` | 8k | `LLMModelFactory` | Text-only |
+
+`stheno` is loaded as a standard MLX text model. The Hugging Face card provides an `mlx_lm.load(...)` sample rather than a VLM example, and its config reports `model_type: llama` with `max_position_embeddings: 8192`, so the app treats it as an 8k Llama-family text model.
 
 Any model in MLX format on HuggingFace can be added — there is no restriction on uploader or architecture.
 
@@ -23,7 +26,7 @@ open "build/Debug/MLX Server.app"
 
 ## App Features
 
-- **Chat interface** with markdown rendering, image attachments (file picker, drag & drop, clipboard paste, Finder copy-paste)
+- **Chat interface** with markdown rendering and model-aware image attachments (file picker, drag & drop, clipboard paste, Finder copy-paste on vision-capable models)
 - **Model picker** in toolbar with local/download status indicators and re-download button
 - **Download progress modal** — shows file progress, percentage, and speed when downloading a new model
 - **Thinking mode** — models like Qwen3.5 can reason internally before responding; thinking content appears in a collapsible box. Toggle on/off in Settings.
@@ -42,6 +45,8 @@ The embedded API server (toggle in toolbar) runs on port 1234 by default. Standa
 - `POST /v1/chat/completions` — chat completions (streaming and non-streaming)
 - `GET /health` — health check
 
+Capability checks are enforced server-side. If a request sends images to a text-only model or tools to a model without tool support, the server returns a `400 invalid_request_error`.
+
 ### Model Swapping
 
 Send any model ID or alias in the `model` field. If it differs from the currently loaded model, the server swaps automatically:
@@ -69,10 +74,14 @@ Pass images as base64 data URIs in the `image_url` content part:
 }
 ```
 
+Text-only models such as `qwen3.5-9b` and `stheno` reject image inputs.
+
 ### Tool Use
 
 Pass tools in the `tools` field (OpenAI format). The server handles model-specific formatting (Gemma `tool_code` blocks, Qwen `<tool_call>` XML tags) and parses tool calls from output automatically. When tools are present during streaming, output is buffered to strip tool-call markup before sending to the client.
 
+`stheno` is currently documented and configured as a plain text model, so tool requests to it are rejected.
+
 ## Project Structure
 
 ```
@@ -112,7 +121,7 @@ build.sh        — One-command build script (xcodegen + xcodebuild)
 
 ## Key Design Decisions
 
-- Uses `mlx-swift-lm` (`MLXVLM` / `VLMModelFactory`) for inference — loads any MLX-format model from HuggingFace
+- Uses `mlx-swift-lm` for inference — `VLMModelFactory` for vision models and `LLMModelFactory` for text-only models
 - **Offline-first**: `LocalModelResolver` checks both the sandboxed app container and `~/.cache/huggingface/hub/` for locally-cached models before downloading
 - **No duplicate storage**: custom `HubApi` with blob cache disabled — models are stored once in the snapshot cache
 - **KV cache reuse** across API requests — reuses `ChatSession` when conversation history prefix matches