From 7d259550427a1c51b6acd4c2ee6ca375ed915ad6 Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Fri, 20 Mar 2026 16:47:10 +0100 Subject: [PATCH] chore: added more qwen 3.5 models --- AGENTS.md | 5 +++-- MLXServer/Models/ModelConfig.swift | 13 +++++++++++-- MLXServerTests/Server/PromptBuilderTests.swift | 2 +- README.md | 9 +++++---- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 1d50458..d161350 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -52,8 +52,9 @@ open "build/Debug/MLX Server.app" | Alias | HuggingFace ID | Notes | |-------|---------------|-------| | `gemma` | `mlx-community/gemma-3-4b-it-4bit` | Vision + tool use via `tool_code` blocks (128k context) | -| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | Vision + tool use via `` tags (256k context) | -| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | Thinking mode, tool use (256k context) | +| `qwen` | `mlx-community/Qwen3.5-4B-MLX-4bit` | Vision + thinking mode + tool use via `` tags (256k context) | +| `qwen3.5-0.8b` | `mlx-community/Qwen3.5-0.8B-4bit` | Vision + thinking mode + tool use via `` tags (256k context) | +| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | Vision + thinking mode + tool use via `` tags (256k context) | Any model in MLX format on HuggingFace can be added — no restriction on uploader or architecture. diff --git a/MLXServer/Models/ModelConfig.swift b/MLXServer/Models/ModelConfig.swift index b64955a..dba35aa 100644 --- a/MLXServer/Models/ModelConfig.swift +++ b/MLXServer/Models/ModelConfig.swift @@ -29,8 +29,17 @@ struct ModelConfig: Identifiable, Hashable { ), ModelConfig( id: "qwen", - repoId: "mlx-community/Qwen3-VL-4B-Instruct-4bit", - displayName: "Qwen3 VL 4B", + repoId: "mlx-community/Qwen3.5-4B-MLX-4bit", + displayName: "Qwen3.5 4B", + contextLength: 256_000, + loaderKind: .vlm, + supportsImages: true, + supportsTools: true + ), + ModelConfig( + id: "qwen3.5-0.8b", + repoId: "mlx-community/Qwen3.5-0.8B-4bit", + displayName: "Qwen3.5 0.8B", contextLength: 256_000, loaderKind: .vlm, supportsImages: true, diff --git a/MLXServerTests/Server/PromptBuilderTests.swift b/MLXServerTests/Server/PromptBuilderTests.swift index 5154d46..79e5a76 100644 --- a/MLXServerTests/Server/PromptBuilderTests.swift +++ b/MLXServerTests/Server/PromptBuilderTests.swift @@ -114,7 +114,7 @@ final class PromptBuilderTests: XCTestCase { n: nil ) - let prepared = PromptBuilder.build(from: request, modelId: "mlx-community/Qwen3-VL-4B-Instruct-4bit", thinkingEnabled: true) + let prepared = PromptBuilder.build(from: request, modelId: "mlx-community/Qwen3.5-4B-MLX-4bit", thinkingEnabled: true) XCTAssertEqual(prepared.chatMessages.count, 1) XCTAssertTrue(prepared.chatMessages[0].content.contains("Let me check.")) diff --git a/README.md b/README.md index d06bf67..421c2c8 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,9 @@ Native macOS app for running local LLMs on Apple Silicon via [MLX](https://githu | Alias | Model | Context | Loader | Capabilities | |-------|-------|---------|--------|-------------| | `gemma` | `mlx-community/gemma-3-4b-it-4bit` | 128k | `VLMModelFactory` | Vision, tool use (`tool_code` blocks) | -| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | 256k | `VLMModelFactory` | Vision, tool use (`` tags) | -| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | 256k | `LLMModelFactory` | Vision, thinking mode, tool use | +| `qwen` | `mlx-community/Qwen3.5-4B-MLX-4bit` | 256k | `VLMModelFactory` | Vision, thinking mode, tool use (`` tags) | +| `qwen3.5-0.8b` | `mlx-community/Qwen3.5-0.8B-4bit` | 256k | `VLMModelFactory` | Vision, thinking mode, tool use (`` tags) | +| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | 256k | `VLMModelFactory` | Vision, thinking mode, tool use (`` tags) | | `stheno` | `synk/L3-8B-Stheno-v3.2-MLX` | 8k | `LLMModelFactory` | Text-only, llama-based | Any model in MLX format on HuggingFace can be added — there is no restriction on uploader or architecture. @@ -33,7 +34,7 @@ open "build/Debug/MLX Server.app" - **Native chat documents** — save chats as `.mlxchat` package documents, reopen them from File > Open Chat or by double-clicking them in Finder, and continue the conversation with restored model context, thinking blocks, and images - **Export chat** — File > Export Chat (Cmd+Shift+E) saves conversations as Markdown or RTF (Pages-compatible) - **Status bar** showing model name, context window, tokens/sec, token counts, GPU memory, API server status -- **Keyboard shortcuts**: `Cmd+N` (new chat), `Cmd+O` (open chat document), `Cmd+S` (save chat document), `Cmd+Shift+S` (save chat document as), `Cmd+Shift+E` (export), `Cmd+Return` (send), `Escape` (stop), `Cmd+1/2/3/4` (switch models) +- **Keyboard shortcuts**: `Cmd+N` (new chat), `Cmd+O` (open chat document), `Cmd+S` (save chat document), `Cmd+Shift+S` (save chat document as), `Cmd+Shift+E` (export), `Cmd+Return` (send), `Escape` (stop), `Cmd+1/2/3/4/5` (switch models) - **Scene management** — create and edit reusable roleplay/task presets from the New Chat flow or Settings - **Settings** (`Cmd+,`): default model, thinking mode toggle, base system prompt, scene management, API port, API auto-start, idle unload timeout - **Idle auto-unload** — model is unloaded after configurable idle time (resets on both user input and model output), reloaded on next request @@ -75,7 +76,7 @@ Pass images as base64 data URIs in the `image_url` content part: } ``` -Text-only models such as `qwen3.5-9b` and `stheno` reject image inputs. +Text-only models such as `stheno` reject image inputs. ### Tool Use