From 6a87fe6f0845a7396ced6bfef0f7c2767089e7aa Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Wed, 18 Mar 2026 11:59:51 +0100 Subject: [PATCH] fix: export finally works --- AGENTS.md | 81 +++++++++++++++++++++++ CLAUDE.md | 80 +--------------------- MLXServer/Commands/SaveChatCommands.swift | 8 +-- MLXServer/ContentView.swift | 54 ++++++++++++--- MLXServer/MLXServer.entitlements | 2 +- MLXServer/Utilities/ChatExporter.swift | 27 +++++--- MLXServer/Utilities/FocusedValues.swift | 18 +++-- 7 files changed, 162 insertions(+), 108 deletions(-) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..cd8d70b --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,81 @@ +# MLX Server + +Native macOS SwiftUI app for local LLMs on Apple Silicon via MLX. Provides a chat UI and an embedded OpenAI-compatible API server. Supports vision, tool use, and thinking mode. + +## Quick Start + +**Always use `./build.sh` to build the project** — never call `xcodebuild` directly. The script runs xcodegen first (to pick up new/removed files) and uses the correct scheme, destination, and build directory. + +```bash +# Build (requires xcodegen: brew install xcodegen) +./build.sh + +# Run +open "build/Debug/MLX Server.app" +``` + +## Project Structure + +- `MLXServer/MLXServerApp.swift` — App entry point, GPU cache config, menu commands +- `MLXServer/ContentView.swift` — Main layout, toolbar, keyboard shortcuts, focused values +- `MLXServer/Models/ModelConfig.swift` — Model definitions (alias, repoId, contextLength), resolution +- `MLXServer/Models/ChatMessage.swift` — Chat message data model, `` tag parsing +- `MLXServer/ViewModels/ModelManager.swift` — Model loading/switching via VLMModelFactory, download tracking, idle unload +- `MLXServer/ViewModels/ChatViewModel.swift` — Chat state, ChatSession management, API server lifecycle +- `MLXServer/Server/APIServer.swift` — NWListener HTTP server, SSE streaming, KV cache reuse, vision, tool call handling +- `MLXServer/Server/APIModels.swift` — OpenAI-compatible Codable structs +- `MLXServer/Server/ToolCallParser.swift` — Parses tool calls from model output (Gemma tool_code, Qwen XML tags) +- `MLXServer/Server/ToolPromptBuilder.swift` — Model-specific tool prompt formatting +- `MLXServer/Views/DownloadModalView.swift` — Modal overlay for model download progress +- `MLXServer/Views/ChatMessagesView.swift` — Message bubbles with markdown rendering and collapsible thinking blocks +- `MLXServer/Views/ChatInputView.swift` — Text input, image attach (file picker, drag & drop, Finder copy-paste) +- `MLXServer/Commands/SaveChatCommands.swift` — File > Export Chat menu command +- `MLXServer/Utilities/LocalModelResolver.swift` — Resolves HF repo IDs to local snapshots (sandbox + system cache + flat layouts) +- `MLXServer/Utilities/ChatExporter.swift` — Export conversations to Markdown or RTF (Pages-compatible) +- `MLXServer/Utilities/FocusedValues.swift` — FocusedValue keys for menu bar integration +- `MLXServer/Utilities/Preferences.swift` — UserDefaults wrapper (model, thinking mode, API, idle timeout) +- `project.yml` — xcodegen project spec +- `build.sh` — Build script (xcodegen + xcodebuild) + +## Supported Models + +| Alias | HuggingFace ID | Notes | +|-------|---------------|-------| +| `gemma` | `mlx-community/gemma-3-4b-it-4bit` | Vision + tool use via `tool_code` blocks (128k context) | +| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | Vision + tool use via `` tags (256k context) | +| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | Thinking mode, tool use (256k context) | + +Any model in MLX format on HuggingFace can be added — no restriction on uploader or architecture. + +## Critical Performance Rule + +**Inference speed is the #1 priority.** The token generation loop must never be blocked or slowed by anything else — no MainActor hops, no SwiftUI observation, no synchronous I/O. Everything that isn't inference (stats collection, UI updates, logging) must run on separate threads via loose coupling: + +- **`LiveCounters`** (thread-safe singleton with `OSAllocatedUnfairLock`) is the bridge: generation code writes to it directly from any thread with zero actor overhead. +- **`InferenceStats`** (UI-side, `@Observable @MainActor`) polls `LiveCounters` at 1Hz via a timer — never the other way around. +- SSE streaming (`sendSSEEvent`/`sendData`) runs nonisolated off MainActor so token sends don't compete with SwiftUI rendering. +- Never gate token output on UI state, analytics, or any `@MainActor`-isolated code. + +## Key Design Decisions + +- Uses `mlx-swift-lm` (`MLXVLM` / `VLMModelFactory`) as the inference backend — loads any MLX-format model from HuggingFace +- Model-specific prompt formatting: Gemma uses `tool_code` blocks; Qwen uses `` XML tags +- **Offline-first**: `LocalModelResolver` checks the sandboxed app container, system `~/.cache/huggingface/hub/`, and flat download layouts — no network requests if model is cached +- **No duplicate storage**: custom `HubApi(cache: nil)` with explicit `downloadBase` — models stored once in the snapshot cache, not duplicated across blob cache and snapshots +- **Thinking mode**: `enable_thinking` passed to Jinja template context via `additionalContext`; `...` tags parsed in real-time during streaming and shown in collapsible UI blocks. Toggleable in Settings. +- **Download progress**: separate `isDownloading` state from `isLoading`; modal overlay shows file count, percentage, speed +- **Idle unload**: timer resets on both user input and model generation completion (not just request start) +- **Chat export**: Markdown (user messages as blockquotes) and RTF (Pages-compatible with formatted markdown) +- **Finder paste**: local event monitor intercepts Cmd+V to check pasteboard for image file URLs before TextField handles it +- HTTP server built on `Network.framework` (`NWListener`) — no third-party server dependencies +- KV cache reuse across API requests — reuses `ChatSession` when conversation history prefix matches +- GPU cache limit set to 20 MB; cache cleared on model unload + +## Dependencies + +Managed via Swift Package Manager (declared in `project.yml` for xcodegen). + +| Package | Products | +|---------|----------| +| `mlx-swift-lm` | `MLXLLM`, `MLXVLM`, `MLXLMCommon` | +| `swift-markdown-ui` | `MarkdownUI` | diff --git a/CLAUDE.md b/CLAUDE.md index 145cd6e..43c994c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,79 +1 @@ -# MLX Server - -Native macOS SwiftUI app for local LLMs on Apple Silicon via MLX. Provides a chat UI and an embedded OpenAI-compatible API server. Supports vision, tool use, and thinking mode. - -## Quick Start - -```bash -# Build (requires xcodegen: brew install xcodegen) -./build.sh - -# Run -open "build/Debug/MLX Server.app" -``` - -## Project Structure - -- `MLXServer/MLXServerApp.swift` — App entry point, GPU cache config, menu commands -- `MLXServer/ContentView.swift` — Main layout, toolbar, keyboard shortcuts, focused values -- `MLXServer/Models/ModelConfig.swift` — Model definitions (alias, repoId, contextLength), resolution -- `MLXServer/Models/ChatMessage.swift` — Chat message data model, `` tag parsing -- `MLXServer/ViewModels/ModelManager.swift` — Model loading/switching via VLMModelFactory, download tracking, idle unload -- `MLXServer/ViewModels/ChatViewModel.swift` — Chat state, ChatSession management, API server lifecycle -- `MLXServer/Server/APIServer.swift` — NWListener HTTP server, SSE streaming, KV cache reuse, vision, tool call handling -- `MLXServer/Server/APIModels.swift` — OpenAI-compatible Codable structs -- `MLXServer/Server/ToolCallParser.swift` — Parses tool calls from model output (Gemma tool_code, Qwen XML tags) -- `MLXServer/Server/ToolPromptBuilder.swift` — Model-specific tool prompt formatting -- `MLXServer/Views/DownloadModalView.swift` — Modal overlay for model download progress -- `MLXServer/Views/ChatMessagesView.swift` — Message bubbles with markdown rendering and collapsible thinking blocks -- `MLXServer/Views/ChatInputView.swift` — Text input, image attach (file picker, drag & drop, Finder copy-paste) -- `MLXServer/Commands/SaveChatCommands.swift` — File > Export Chat menu command -- `MLXServer/Utilities/LocalModelResolver.swift` — Resolves HF repo IDs to local snapshots (sandbox + system cache + flat layouts) -- `MLXServer/Utilities/ChatExporter.swift` — Export conversations to Markdown or RTF (Pages-compatible) -- `MLXServer/Utilities/FocusedValues.swift` — FocusedValue keys for menu bar integration -- `MLXServer/Utilities/Preferences.swift` — UserDefaults wrapper (model, thinking mode, API, idle timeout) -- `project.yml` — xcodegen project spec -- `build.sh` — Build script (xcodegen + xcodebuild) - -## Supported Models - -| Alias | HuggingFace ID | Notes | -|-------|---------------|-------| -| `gemma` | `mlx-community/gemma-3-4b-it-4bit` | Vision + tool use via `tool_code` blocks (128k context) | -| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | Vision + tool use via `` tags (256k context) | -| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | Thinking mode, tool use (256k context) | - -Any model in MLX format on HuggingFace can be added — no restriction on uploader or architecture. - -## Critical Performance Rule - -**Inference speed is the #1 priority.** The token generation loop must never be blocked or slowed by anything else — no MainActor hops, no SwiftUI observation, no synchronous I/O. Everything that isn't inference (stats collection, UI updates, logging) must run on separate threads via loose coupling: - -- **`LiveCounters`** (thread-safe singleton with `OSAllocatedUnfairLock`) is the bridge: generation code writes to it directly from any thread with zero actor overhead. -- **`InferenceStats`** (UI-side, `@Observable @MainActor`) polls `LiveCounters` at 1Hz via a timer — never the other way around. -- SSE streaming (`sendSSEEvent`/`sendData`) runs nonisolated off MainActor so token sends don't compete with SwiftUI rendering. -- Never gate token output on UI state, analytics, or any `@MainActor`-isolated code. - -## Key Design Decisions - -- Uses `mlx-swift-lm` (`MLXVLM` / `VLMModelFactory`) as the inference backend — loads any MLX-format model from HuggingFace -- Model-specific prompt formatting: Gemma uses `tool_code` blocks; Qwen uses `` XML tags -- **Offline-first**: `LocalModelResolver` checks the sandboxed app container, system `~/.cache/huggingface/hub/`, and flat download layouts — no network requests if model is cached -- **No duplicate storage**: custom `HubApi(cache: nil)` with explicit `downloadBase` — models stored once in the snapshot cache, not duplicated across blob cache and snapshots -- **Thinking mode**: `enable_thinking` passed to Jinja template context via `additionalContext`; `...` tags parsed in real-time during streaming and shown in collapsible UI blocks. Toggleable in Settings. -- **Download progress**: separate `isDownloading` state from `isLoading`; modal overlay shows file count, percentage, speed -- **Idle unload**: timer resets on both user input and model generation completion (not just request start) -- **Chat export**: Markdown (user messages as blockquotes) and RTF (Pages-compatible with formatted markdown) -- **Finder paste**: local event monitor intercepts Cmd+V to check pasteboard for image file URLs before TextField handles it -- HTTP server built on `Network.framework` (`NWListener`) — no third-party server dependencies -- KV cache reuse across API requests — reuses `ChatSession` when conversation history prefix matches -- GPU cache limit set to 20 MB; cache cleared on model unload - -## Dependencies - -Managed via Swift Package Manager (declared in `project.yml` for xcodegen). - -| Package | Products | -|---------|----------| -| `mlx-swift-lm` | `MLXLLM`, `MLXVLM`, `MLXLMCommon` | -| `swift-markdown-ui` | `MarkdownUI` | +@AGENTS.md diff --git a/MLXServer/Commands/SaveChatCommands.swift b/MLXServer/Commands/SaveChatCommands.swift index 532559b..e08006a 100644 --- a/MLXServer/Commands/SaveChatCommands.swift +++ b/MLXServer/Commands/SaveChatCommands.swift @@ -2,15 +2,15 @@ import SwiftUI /// Adds "Export Chat…" to the File menu. struct SaveChatCommands: Commands { - @FocusedBinding(\.exportTrigger) var isExporting + @FocusedValue(\.exportChatAction) private var exportChatAction var body: some Commands { CommandGroup(after: .saveItem) { Button("Export Chat…") { - isExporting = true + exportChatAction?() } - .keyboardShortcut("e", modifiers: [.command, .shift]) - .disabled(isExporting == nil) + .keyboardShortcut("s", modifiers: [.command, .shift]) + .disabled(exportChatAction == nil) } } } diff --git a/MLXServer/ContentView.swift b/MLXServer/ContentView.swift index 2537aba..9ab55db 100644 --- a/MLXServer/ContentView.swift +++ b/MLXServer/ContentView.swift @@ -6,7 +6,8 @@ struct ContentView: View { @State private var chatVM: ChatViewModel? @State private var showLoadError = false @State private var showMonitor = false - @State private var isExporting = false + @State private var exportDocument: ChatExportDocument? + @State private var exportErrorMessage: String? var body: some View { mainContent @@ -42,6 +43,13 @@ struct ContentView: View { } message: { Text(modelManager.errorMessage ?? "Unknown error loading model.") } + .alert("Export Failed", isPresented: exportErrorBinding) { + Button("OK", role: .cancel) { + exportErrorMessage = nil + } + } message: { + Text(exportErrorMessage ?? "Unknown export error.") + } .toolbar { ToolbarItem(placement: .principal) { ModelPickerView() @@ -54,19 +62,24 @@ struct ContentView: View { .background { modelSwitchShortcuts } - // Expose export trigger to menu bar command - .focusedSceneValue(\.exportTrigger, $isExporting) + .focusedSceneValue(\.exportChatAction, ExportChatAction(perform: beginExport)) .fileExporter( - isPresented: $isExporting, - document: ChatExportDocument( - messages: chatVM?.conversation.messages ?? [], - modelName: modelManager.currentModel?.displayName + isPresented: Binding( + get: { exportDocument != nil }, + set: { + if !$0 { + exportDocument = nil + } + } ), + document: exportDocument, contentTypes: ChatExportDocument.writableContentTypes, - defaultFilename: "chat" + defaultFilename: exportDefaultFilename ) { result in + exportDocument = nil if case .failure(let error) = result { print("[Export] Failed: \(error.localizedDescription)") + exportErrorMessage = error.localizedDescription } } } @@ -148,6 +161,31 @@ struct ContentView: View { } } } + + private var exportErrorBinding: Binding { + Binding( + get: { exportErrorMessage != nil }, + set: { + if !$0 { + exportErrorMessage = nil + } + } + ) + } + + private var exportDefaultFilename: String { + let formatter = DateFormatter() + formatter.dateFormat = "yyyy-MM-dd-HHmm" + return "chat-\(formatter.string(from: .now))" + } + + private func beginExport() { + guard exportDocument == nil else { return } + exportDocument = ChatExportDocument( + messages: chatVM?.conversation.messages ?? [], + modelName: modelManager.currentModel?.displayName + ) + } } /// The main chat layout: messages + input area + status bar. diff --git a/MLXServer/MLXServer.entitlements b/MLXServer/MLXServer.entitlements index 779c582..c1a22b8 100644 --- a/MLXServer/MLXServer.entitlements +++ b/MLXServer/MLXServer.entitlements @@ -8,7 +8,7 @@ com.apple.security.network.server - com.apple.security.files.user-selected.read-only + com.apple.security.files.user-selected.read-write diff --git a/MLXServer/Utilities/ChatExporter.swift b/MLXServer/Utilities/ChatExporter.swift index 46238b1..29f0f86 100644 --- a/MLXServer/Utilities/ChatExporter.swift +++ b/MLXServer/Utilities/ChatExporter.swift @@ -3,12 +3,16 @@ import Foundation import SwiftUI import UniformTypeIdentifiers -/// A FileDocument that exports a chat conversation as Markdown or RTF. -struct ChatExportDocument: FileDocument { - static var readableContentTypes: [UTType] { [.plainText] } - static var writableContentTypes: [UTType] { - [UTType(filenameExtension: "md") ?? .plainText, .rtf] +extension UTType { + static var markdownText: UTType { + UTType(filenameExtension: "md") ?? .plainText } +} + +/// FileDocument for exporting chat as Markdown (.md) or RTF. +struct ChatExportDocument: FileDocument { + static var readableContentTypes: [UTType] { [.rtf, .markdownText] } + static var writableContentTypes: [UTType] { [.rtf, .markdownText] } let messages: [ChatMessage] let modelName: String? @@ -24,14 +28,15 @@ struct ChatExportDocument: FileDocument { } func fileWrapper(configuration: WriteConfiguration) throws -> FileWrapper { - let contentType = configuration.contentType - - if contentType == .rtf, let data = ChatExporter.exportRTF(messages: messages, modelName: modelName) { + if configuration.contentType == .rtf { + guard let data = ChatExporter.exportRTF(messages: messages, modelName: modelName) else { + throw CocoaError(.fileWriteUnknown) + } return FileWrapper(regularFileWithContents: data) - } else { - let md = ChatExporter.exportMarkdown(messages: messages, modelName: modelName) - return FileWrapper(regularFileWithContents: Data(md.utf8)) } + + let md = ChatExporter.exportMarkdown(messages: messages, modelName: modelName) + return FileWrapper(regularFileWithContents: Data(md.utf8)) } } diff --git a/MLXServer/Utilities/FocusedValues.swift b/MLXServer/Utilities/FocusedValues.swift index 6f74ac4..36eb15b 100644 --- a/MLXServer/Utilities/FocusedValues.swift +++ b/MLXServer/Utilities/FocusedValues.swift @@ -1,13 +1,21 @@ import SwiftUI +struct ExportChatAction { + let perform: () -> Void + + func callAsFunction() { + perform() + } +} + /// Focused value key for triggering chat export from the menu bar. -struct FocusedExportTriggerKey: FocusedValueKey { - typealias Value = Binding +struct FocusedExportActionKey: FocusedValueKey { + typealias Value = ExportChatAction } extension FocusedValues { - var exportTrigger: Binding? { - get { self[FocusedExportTriggerKey.self] } - set { self[FocusedExportTriggerKey.self] = newValue } + var exportChatAction: ExportChatAction? { + get { self[FocusedExportActionKey.self] } + set { self[FocusedExportActionKey.self] = newValue } } }