From 7d259550427a1c51b6acd4c2ee6ca375ed915ad6 Mon Sep 17 00:00:00 2001
From: Chili Palmer <hugoms@me.com>
Date: Fri, 20 Mar 2026 16:47:10 +0100
Subject: [PATCH] chore: added more qwen 3.5 models

---
 AGENTS.md                                      |  5 +++--
 MLXServer/Models/ModelConfig.swift             | 13 +++++++++++--
 MLXServerTests/Server/PromptBuilderTests.swift |  2 +-
 README.md                                      |  9 +++++----
 4 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 1d50458..d161350 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -52,8 +52,9 @@ open "build/Debug/MLX Server.app"
 | Alias | HuggingFace ID | Notes |
 |-------|---------------|-------|
 | `gemma` | `mlx-community/gemma-3-4b-it-4bit` | Vision + tool use via `tool_code` blocks (128k context) |
-| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | Vision + tool use via `<tool_call>` tags (256k context) |
-| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | Thinking mode, tool use (256k context) |
+| `qwen` | `mlx-community/Qwen3.5-4B-MLX-4bit` | Vision + thinking mode + tool use via `<tool_call>` tags (256k context) |
+| `qwen3.5-0.8b` | `mlx-community/Qwen3.5-0.8B-4bit` | Vision + thinking mode + tool use via `<tool_call>` tags (256k context) |
+| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | Vision + thinking mode + tool use via `<tool_call>` tags (256k context) |
 
 Any model in MLX format on HuggingFace can be added — no restriction on uploader or architecture.
 
diff --git a/MLXServer/Models/ModelConfig.swift b/MLXServer/Models/ModelConfig.swift
index b64955a..dba35aa 100644
--- a/MLXServer/Models/ModelConfig.swift
+++ b/MLXServer/Models/ModelConfig.swift
@@ -29,8 +29,17 @@ struct ModelConfig: Identifiable, Hashable {
         ),
         ModelConfig(
             id: "qwen",
-            repoId: "mlx-community/Qwen3-VL-4B-Instruct-4bit",
-            displayName: "Qwen3 VL 4B",
+            repoId: "mlx-community/Qwen3.5-4B-MLX-4bit",
+            displayName: "Qwen3.5 4B",
+            contextLength: 256_000,
+            loaderKind: .vlm,
+            supportsImages: true,
+            supportsTools: true
+        ),
+        ModelConfig(
+            id: "qwen3.5-0.8b",
+            repoId: "mlx-community/Qwen3.5-0.8B-4bit",
+            displayName: "Qwen3.5 0.8B",
             contextLength: 256_000,
             loaderKind: .vlm,
             supportsImages: true,
diff --git a/MLXServerTests/Server/PromptBuilderTests.swift b/MLXServerTests/Server/PromptBuilderTests.swift
index 5154d46..79e5a76 100644
--- a/MLXServerTests/Server/PromptBuilderTests.swift
+++ b/MLXServerTests/Server/PromptBuilderTests.swift
@@ -114,7 +114,7 @@ final class PromptBuilderTests: XCTestCase {
             n: nil
         )
 
-        let prepared = PromptBuilder.build(from: request, modelId: "mlx-community/Qwen3-VL-4B-Instruct-4bit", thinkingEnabled: true)
+        let prepared = PromptBuilder.build(from: request, modelId: "mlx-community/Qwen3.5-4B-MLX-4bit", thinkingEnabled: true)
 
         XCTAssertEqual(prepared.chatMessages.count, 1)
         XCTAssertTrue(prepared.chatMessages[0].content.contains("Let me check."))
diff --git a/README.md b/README.md
index d06bf67..421c2c8 100644
--- a/README.md
+++ b/README.md
@@ -7,8 +7,9 @@ Native macOS app for running local LLMs on Apple Silicon via [MLX](https://githu
 | Alias | Model | Context | Loader | Capabilities |
 |-------|-------|---------|--------|-------------|
 | `gemma` | `mlx-community/gemma-3-4b-it-4bit` | 128k | `VLMModelFactory` | Vision, tool use (`tool_code` blocks) |
-| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | 256k | `VLMModelFactory` | Vision, tool use (`<tool_call>` tags) |
-| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | 256k | `LLMModelFactory` | Vision, thinking mode, tool use |
+| `qwen` | `mlx-community/Qwen3.5-4B-MLX-4bit` | 256k | `VLMModelFactory` | Vision, thinking mode, tool use (`<tool_call>` tags) |
+| `qwen3.5-0.8b` | `mlx-community/Qwen3.5-0.8B-4bit` | 256k | `VLMModelFactory` | Vision, thinking mode, tool use (`<tool_call>` tags) |
+| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | 256k | `VLMModelFactory` | Vision, thinking mode, tool use (`<tool_call>` tags) |
 | `stheno` | `synk/L3-8B-Stheno-v3.2-MLX` | 8k | `LLMModelFactory` | Text-only, llama-based |
 
 Any model in MLX format on HuggingFace can be added — there is no restriction on uploader or architecture.
@@ -33,7 +34,7 @@ open "build/Debug/MLX Server.app"
 - **Native chat documents** — save chats as `.mlxchat` package documents, reopen them from File > Open Chat or by double-clicking them in Finder, and continue the conversation with restored model context, thinking blocks, and images
 - **Export chat** — File > Export Chat (Cmd+Shift+E) saves conversations as Markdown or RTF (Pages-compatible)
 - **Status bar** showing model name, context window, tokens/sec, token counts, GPU memory, API server status
-- **Keyboard shortcuts**: `Cmd+N` (new chat), `Cmd+O` (open chat document), `Cmd+S` (save chat document), `Cmd+Shift+S` (save chat document as), `Cmd+Shift+E` (export), `Cmd+Return` (send), `Escape` (stop), `Cmd+1/2/3/4` (switch models)
+- **Keyboard shortcuts**: `Cmd+N` (new chat), `Cmd+O` (open chat document), `Cmd+S` (save chat document), `Cmd+Shift+S` (save chat document as), `Cmd+Shift+E` (export), `Cmd+Return` (send), `Escape` (stop), `Cmd+1/2/3/4/5` (switch models)
 - **Scene management** — create and edit reusable roleplay/task presets from the New Chat flow or Settings
 - **Settings** (`Cmd+,`): default model, thinking mode toggle, base system prompt, scene management, API port, API auto-start, idle unload timeout
 - **Idle auto-unload** — model is unloaded after configurable idle time (resets on both user input and model output), reloaded on next request
@@ -75,7 +76,7 @@ Pass images as base64 data URIs in the `image_url` content part:
 }
 ```
 
-Text-only models such as `qwen3.5-9b` and `stheno` reject image inputs.
+Text-only models such as `stheno` reject image inputs.
 
 ### Tool Use