From 6a87fe6f0845a7396ced6bfef0f7c2767089e7aa Mon Sep 17 00:00:00 2001
From: Chili Palmer <hugoms@me.com>
Date: Wed, 18 Mar 2026 11:59:51 +0100
Subject: [PATCH] fix: export finally works

---
 AGENTS.md                                 | 81 +++++++++++++++++++++++
 CLAUDE.md                                 | 80 +---------------------
 MLXServer/Commands/SaveChatCommands.swift |  8 +--
 MLXServer/ContentView.swift               | 54 ++++++++++++---
 MLXServer/MLXServer.entitlements          |  2 +-
 MLXServer/Utilities/ChatExporter.swift    | 27 +++++---
 MLXServer/Utilities/FocusedValues.swift   | 18 +++--
 7 files changed, 162 insertions(+), 108 deletions(-)
 create mode 100644 AGENTS.md
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..cd8d70b
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,81 @@
+# MLX Server
+
+Native macOS SwiftUI app for local LLMs on Apple Silicon via MLX. Provides a chat UI and an embedded OpenAI-compatible API server. Supports vision, tool use, and thinking mode.
+
+## Quick Start
+
+**Always use `./build.sh` to build the project** — never call `xcodebuild` directly. The script runs xcodegen first (to pick up new/removed files) and uses the correct scheme, destination, and build directory.
+
+```bash
+# Build (requires xcodegen: brew install xcodegen)
+./build.sh
+
+# Run
+open "build/Debug/MLX Server.app"
+```
+
+## Project Structure
+
+- `MLXServer/MLXServerApp.swift` — App entry point, GPU cache config, menu commands
+- `MLXServer/ContentView.swift` — Main layout, toolbar, keyboard shortcuts, focused values
+- `MLXServer/Models/ModelConfig.swift` — Model definitions (alias, repoId, contextLength), resolution
+- `MLXServer/Models/ChatMessage.swift` — Chat message data model, `<think>` tag parsing
+- `MLXServer/ViewModels/ModelManager.swift` — Model loading/switching via VLMModelFactory, download tracking, idle unload
+- `MLXServer/ViewModels/ChatViewModel.swift` — Chat state, ChatSession management, API server lifecycle
+- `MLXServer/Server/APIServer.swift` — NWListener HTTP server, SSE streaming, KV cache reuse, vision, tool call handling
+- `MLXServer/Server/APIModels.swift` — OpenAI-compatible Codable structs
+- `MLXServer/Server/ToolCallParser.swift` — Parses tool calls from model output (Gemma tool_code, Qwen XML tags)
+- `MLXServer/Server/ToolPromptBuilder.swift` — Model-specific tool prompt formatting
+- `MLXServer/Views/DownloadModalView.swift` — Modal overlay for model download progress
+- `MLXServer/Views/ChatMessagesView.swift` — Message bubbles with markdown rendering and collapsible thinking blocks
+- `MLXServer/Views/ChatInputView.swift` — Text input, image attach (file picker, drag & drop, Finder copy-paste)
+- `MLXServer/Commands/SaveChatCommands.swift` — File > Export Chat menu command
+- `MLXServer/Utilities/LocalModelResolver.swift` — Resolves HF repo IDs to local snapshots (sandbox + system cache + flat layouts)
+- `MLXServer/Utilities/ChatExporter.swift` — Export conversations to Markdown or RTF (Pages-compatible)
+- `MLXServer/Utilities/FocusedValues.swift` — FocusedValue keys for menu bar integration
+- `MLXServer/Utilities/Preferences.swift` — UserDefaults wrapper (model, thinking mode, API, idle timeout)
+- `project.yml` — xcodegen project spec
+- `build.sh` — Build script (xcodegen + xcodebuild)
+
+## Supported Models
+
+| Alias | HuggingFace ID | Notes |
+|-------|---------------|-------|
+| `gemma` | `mlx-community/gemma-3-4b-it-4bit` | Vision + tool use via `tool_code` blocks (128k context) |
+| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | Vision + tool use via `<tool_call>` tags (256k context) |
+| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | Thinking mode, tool use (256k context) |
+
+Any model in MLX format on HuggingFace can be added — no restriction on uploader or architecture.
+
+## Critical Performance Rule
+
+**Inference speed is the #1 priority.** The token generation loop must never be blocked or slowed by anything else — no MainActor hops, no SwiftUI observation, no synchronous I/O. Everything that isn't inference (stats collection, UI updates, logging) must run on separate threads via loose coupling:
+
+- **`LiveCounters`** (thread-safe singleton with `OSAllocatedUnfairLock`) is the bridge: generation code writes to it directly from any thread with zero actor overhead.
+- **`InferenceStats`** (UI-side, `@Observable @MainActor`) polls `LiveCounters` at 1Hz via a timer — never the other way around.
+- SSE streaming (`sendSSEEvent`/`sendData`) runs nonisolated off MainActor so token sends don't compete with SwiftUI rendering.
+- Never gate token output on UI state, analytics, or any `@MainActor`-isolated code.
+
+## Key Design Decisions
+
+- Uses `mlx-swift-lm` (`MLXVLM` / `VLMModelFactory`) as the inference backend — loads any MLX-format model from HuggingFace
+- Model-specific prompt formatting: Gemma uses `tool_code` blocks; Qwen uses `<tool_call>` XML tags
+- **Offline-first**: `LocalModelResolver` checks the sandboxed app container, system `~/.cache/huggingface/hub/`, and flat download layouts — no network requests if model is cached
+- **No duplicate storage**: custom `HubApi(cache: nil)` with explicit `downloadBase` — models stored once in the snapshot cache, not duplicated across blob cache and snapshots
+- **Thinking mode**: `enable_thinking` passed to Jinja template context via `additionalContext`; `<think>...</think>` tags parsed in real-time during streaming and shown in collapsible UI blocks. Toggleable in Settings.
+- **Download progress**: separate `isDownloading` state from `isLoading`; modal overlay shows file count, percentage, speed
+- **Idle unload**: timer resets on both user input and model generation completion (not just request start)
+- **Chat export**: Markdown (user messages as blockquotes) and RTF (Pages-compatible with formatted markdown)
+- **Finder paste**: local event monitor intercepts Cmd+V to check pasteboard for image file URLs before TextField handles it
+- HTTP server built on `Network.framework` (`NWListener`) — no third-party server dependencies
+- KV cache reuse across API requests — reuses `ChatSession` when conversation history prefix matches
+- GPU cache limit set to 20 MB; cache cleared on model unload
+
+## Dependencies
+
+Managed via Swift Package Manager (declared in `project.yml` for xcodegen).
+
+| Package | Products |
+|---------|----------|
+| `mlx-swift-lm` | `MLXLLM`, `MLXVLM`, `MLXLMCommon` |
+| `swift-markdown-ui` | `MarkdownUI` |
diff --git a/CLAUDE.md b/CLAUDE.md
index 145cd6e..43c994c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,79 +1 @@
-# MLX Server
-
-Native macOS SwiftUI app for local LLMs on Apple Silicon via MLX. Provides a chat UI and an embedded OpenAI-compatible API server. Supports vision, tool use, and thinking mode.
-
-## Quick Start
-
-```bash
-# Build (requires xcodegen: brew install xcodegen)
-./build.sh
-
-# Run
-open "build/Debug/MLX Server.app"
-```
-
-## Project Structure
-
-- `MLXServer/MLXServerApp.swift` — App entry point, GPU cache config, menu commands
-- `MLXServer/ContentView.swift` — Main layout, toolbar, keyboard shortcuts, focused values
-- `MLXServer/Models/ModelConfig.swift` — Model definitions (alias, repoId, contextLength), resolution
-- `MLXServer/Models/ChatMessage.swift` — Chat message data model, `<think>` tag parsing
-- `MLXServer/ViewModels/ModelManager.swift` — Model loading/switching via VLMModelFactory, download tracking, idle unload
-- `MLXServer/ViewModels/ChatViewModel.swift` — Chat state, ChatSession management, API server lifecycle
-- `MLXServer/Server/APIServer.swift` — NWListener HTTP server, SSE streaming, KV cache reuse, vision, tool call handling
-- `MLXServer/Server/APIModels.swift` — OpenAI-compatible Codable structs
-- `MLXServer/Server/ToolCallParser.swift` — Parses tool calls from model output (Gemma tool_code, Qwen XML tags)
-- `MLXServer/Server/ToolPromptBuilder.swift` — Model-specific tool prompt formatting
-- `MLXServer/Views/DownloadModalView.swift` — Modal overlay for model download progress
-- `MLXServer/Views/ChatMessagesView.swift` — Message bubbles with markdown rendering and collapsible thinking blocks
-- `MLXServer/Views/ChatInputView.swift` — Text input, image attach (file picker, drag & drop, Finder copy-paste)
-- `MLXServer/Commands/SaveChatCommands.swift` — File > Export Chat menu command
-- `MLXServer/Utilities/LocalModelResolver.swift` — Resolves HF repo IDs to local snapshots (sandbox + system cache + flat layouts)
-- `MLXServer/Utilities/ChatExporter.swift` — Export conversations to Markdown or RTF (Pages-compatible)
-- `MLXServer/Utilities/FocusedValues.swift` — FocusedValue keys for menu bar integration
-- `MLXServer/Utilities/Preferences.swift` — UserDefaults wrapper (model, thinking mode, API, idle timeout)
-- `project.yml` — xcodegen project spec
-- `build.sh` — Build script (xcodegen + xcodebuild)
-
-## Supported Models
-
-| Alias | HuggingFace ID | Notes |
-|-------|---------------|-------|
-| `gemma` | `mlx-community/gemma-3-4b-it-4bit` | Vision + tool use via `tool_code` blocks (128k context) |
-| `qwen` | `mlx-community/Qwen3-VL-4B-Instruct-4bit` | Vision + tool use via `<tool_call>` tags (256k context) |
-| `qwen3.5-9b` | `mlx-community/Qwen3.5-9B-4bit` | Thinking mode, tool use (256k context) |
-
-Any model in MLX format on HuggingFace can be added — no restriction on uploader or architecture.
-
-## Critical Performance Rule
-
-**Inference speed is the #1 priority.** The token generation loop must never be blocked or slowed by anything else — no MainActor hops, no SwiftUI observation, no synchronous I/O. Everything that isn't inference (stats collection, UI updates, logging) must run on separate threads via loose coupling:
-
-- **`LiveCounters`** (thread-safe singleton with `OSAllocatedUnfairLock`) is the bridge: generation code writes to it directly from any thread with zero actor overhead.
-- **`InferenceStats`** (UI-side, `@Observable @MainActor`) polls `LiveCounters` at 1Hz via a timer — never the other way around.
-- SSE streaming (`sendSSEEvent`/`sendData`) runs nonisolated off MainActor so token sends don't compete with SwiftUI rendering.
-- Never gate token output on UI state, analytics, or any `@MainActor`-isolated code.
-
-## Key Design Decisions
-
-- Uses `mlx-swift-lm` (`MLXVLM` / `VLMModelFactory`) as the inference backend — loads any MLX-format model from HuggingFace
-- Model-specific prompt formatting: Gemma uses `tool_code` blocks; Qwen uses `<tool_call>` XML tags
-- **Offline-first**: `LocalModelResolver` checks the sandboxed app container, system `~/.cache/huggingface/hub/`, and flat download layouts — no network requests if model is cached
-- **No duplicate storage**: custom `HubApi(cache: nil)` with explicit `downloadBase` — models stored once in the snapshot cache, not duplicated across blob cache and snapshots
-- **Thinking mode**: `enable_thinking` passed to Jinja template context via `additionalContext`; `<think>...</think>` tags parsed in real-time during streaming and shown in collapsible UI blocks. Toggleable in Settings.
-- **Download progress**: separate `isDownloading` state from `isLoading`; modal overlay shows file count, percentage, speed
-- **Idle unload**: timer resets on both user input and model generation completion (not just request start)
-- **Chat export**: Markdown (user messages as blockquotes) and RTF (Pages-compatible with formatted markdown)
-- **Finder paste**: local event monitor intercepts Cmd+V to check pasteboard for image file URLs before TextField handles it
-- HTTP server built on `Network.framework` (`NWListener`) — no third-party server dependencies
-- KV cache reuse across API requests — reuses `ChatSession` when conversation history prefix matches
-- GPU cache limit set to 20 MB; cache cleared on model unload
-
-## Dependencies
-
-Managed via Swift Package Manager (declared in `project.yml` for xcodegen).
-
-| Package | Products |
-|---------|----------|
-| `mlx-swift-lm` | `MLXLLM`, `MLXVLM`, `MLXLMCommon` |
-| `swift-markdown-ui` | `MarkdownUI` |
+@AGENTS.md
diff --git a/MLXServer/Commands/SaveChatCommands.swift b/MLXServer/Commands/SaveChatCommands.swift
index 532559b..e08006a 100644
--- a/MLXServer/Commands/SaveChatCommands.swift
+++ b/MLXServer/Commands/SaveChatCommands.swift
@@ -2,15 +2,15 @@ import SwiftUI
 
 /// Adds "Export Chat…" to the File menu.
 struct SaveChatCommands: Commands {
-    @FocusedBinding(\.exportTrigger) var isExporting
+    @FocusedValue(\.exportChatAction) private var exportChatAction
 
     var body: some Commands {
         CommandGroup(after: .saveItem) {
             Button("Export Chat…") {
-                isExporting = true
+                exportChatAction?()
             }
-            .keyboardShortcut("e", modifiers: [.command, .shift])
-            .disabled(isExporting == nil)
+            .keyboardShortcut("s", modifiers: [.command, .shift])
+            .disabled(exportChatAction == nil)
         }
     }
 }
diff --git a/MLXServer/ContentView.swift b/MLXServer/ContentView.swift
index 2537aba..9ab55db 100644
--- a/MLXServer/ContentView.swift
+++ b/MLXServer/ContentView.swift
@@ -6,7 +6,8 @@ struct ContentView: View {
     @State private var chatVM: ChatViewModel?
     @State private var showLoadError = false
     @State private var showMonitor = false
-    @State private var isExporting = false
+    @State private var exportDocument: ChatExportDocument?
+    @State private var exportErrorMessage: String?
 
     var body: some View {
         mainContent
@@ -42,6 +43,13 @@ struct ContentView: View {
             } message: {
                 Text(modelManager.errorMessage ?? "Unknown error loading model.")
             }
+            .alert("Export Failed", isPresented: exportErrorBinding) {
+                Button("OK", role: .cancel) {
+                    exportErrorMessage = nil
+                }
+            } message: {
+                Text(exportErrorMessage ?? "Unknown export error.")
+            }
             .toolbar {
                 ToolbarItem(placement: .principal) {
                     ModelPickerView()
@@ -54,19 +62,24 @@ struct ContentView: View {
             .background {
                 modelSwitchShortcuts
             }
-            // Expose export trigger to menu bar command
-            .focusedSceneValue(\.exportTrigger, $isExporting)
+            .focusedSceneValue(\.exportChatAction, ExportChatAction(perform: beginExport))
             .fileExporter(
-                isPresented: $isExporting,
-                document: ChatExportDocument(
-                    messages: chatVM?.conversation.messages ?? [],
-                    modelName: modelManager.currentModel?.displayName
+                isPresented: Binding(
+                    get: { exportDocument != nil },
+                    set: {
+                        if !$0 {
+                            exportDocument = nil
+                        }
+                    }
                 ),
+                document: exportDocument,
                 contentTypes: ChatExportDocument.writableContentTypes,
-                defaultFilename: "chat"
+                defaultFilename: exportDefaultFilename
             ) { result in
+                exportDocument = nil
                 if case .failure(let error) = result {
                     print("[Export] Failed: \(error.localizedDescription)")
+                    exportErrorMessage = error.localizedDescription
                 }
             }
     }
@@ -148,6 +161,31 @@ struct ContentView: View {
             }
         }
     }
+
+    private var exportErrorBinding: Binding<Bool> {
+        Binding(
+            get: { exportErrorMessage != nil },
+            set: {
+                if !$0 {
+                    exportErrorMessage = nil
+                }
+            }
+        )
+    }
+
+    private var exportDefaultFilename: String {
+        let formatter = DateFormatter()
+        formatter.dateFormat = "yyyy-MM-dd-HHmm"
+        return "chat-\(formatter.string(from: .now))"
+    }
+
+    private func beginExport() {
+        guard exportDocument == nil else { return }
+        exportDocument = ChatExportDocument(
+            messages: chatVM?.conversation.messages ?? [],
+            modelName: modelManager.currentModel?.displayName
+        )
+    }
 }
 
 /// The main chat layout: messages + input area + status bar.
diff --git a/MLXServer/MLXServer.entitlements b/MLXServer/MLXServer.entitlements
index 779c582..c1a22b8 100644
--- a/MLXServer/MLXServer.entitlements
+++ b/MLXServer/MLXServer.entitlements
@@ -8,7 +8,7 @@
 	<true/>
 	<key>com.apple.security.network.server</key>
 	<true/>
-	<key>com.apple.security.files.user-selected.read-only</key>
+	<key>com.apple.security.files.user-selected.read-write</key>
 	<true/>
 </dict>
 </plist>
diff --git a/MLXServer/Utilities/ChatExporter.swift b/MLXServer/Utilities/ChatExporter.swift
index 46238b1..29f0f86 100644
--- a/MLXServer/Utilities/ChatExporter.swift
+++ b/MLXServer/Utilities/ChatExporter.swift
@@ -3,12 +3,16 @@ import Foundation
 import SwiftUI
 import UniformTypeIdentifiers
 
-/// A FileDocument that exports a chat conversation as Markdown or RTF.
-struct ChatExportDocument: FileDocument {
-    static var readableContentTypes: [UTType] { [.plainText] }
-    static var writableContentTypes: [UTType] {
-        [UTType(filenameExtension: "md") ?? .plainText, .rtf]
+extension UTType {
+    static var markdownText: UTType {
+        UTType(filenameExtension: "md") ?? .plainText
     }
+}
+
+/// FileDocument for exporting chat as Markdown (.md) or RTF.
+struct ChatExportDocument: FileDocument {
+    static var readableContentTypes: [UTType] { [.rtf, .markdownText] }
+    static var writableContentTypes: [UTType] { [.rtf, .markdownText] }
 
     let messages: [ChatMessage]
     let modelName: String?
@@ -24,14 +28,15 @@ struct ChatExportDocument: FileDocument {
     }
 
     func fileWrapper(configuration: WriteConfiguration) throws -> FileWrapper {
-        let contentType = configuration.contentType
-
-        if contentType == .rtf, let data = ChatExporter.exportRTF(messages: messages, modelName: modelName) {
+        if configuration.contentType == .rtf {
+            guard let data = ChatExporter.exportRTF(messages: messages, modelName: modelName) else {
+                throw CocoaError(.fileWriteUnknown)
+            }
             return FileWrapper(regularFileWithContents: data)
-        } else {
-            let md = ChatExporter.exportMarkdown(messages: messages, modelName: modelName)
-            return FileWrapper(regularFileWithContents: Data(md.utf8))
         }
+
+        let md = ChatExporter.exportMarkdown(messages: messages, modelName: modelName)
+        return FileWrapper(regularFileWithContents: Data(md.utf8))
     }
 }
 
diff --git a/MLXServer/Utilities/FocusedValues.swift b/MLXServer/Utilities/FocusedValues.swift
index 6f74ac4..36eb15b 100644
--- a/MLXServer/Utilities/FocusedValues.swift
+++ b/MLXServer/Utilities/FocusedValues.swift
@@ -1,13 +1,21 @@
 import SwiftUI
 
+struct ExportChatAction {
+    let perform: () -> Void
+
+    func callAsFunction() {
+        perform()
+    }
+}
+
 /// Focused value key for triggering chat export from the menu bar.
-struct FocusedExportTriggerKey: FocusedValueKey {
-    typealias Value = Binding<Bool>
+struct FocusedExportActionKey: FocusedValueKey {
+    typealias Value = ExportChatAction
 }
 
 extension FocusedValues {
-    var exportTrigger: Binding<Bool>? {
-        get { self[FocusedExportTriggerKey.self] }
-        set { self[FocusedExportTriggerKey.self] = newValue }
+    var exportChatAction: ExportChatAction? {
+        get { self[FocusedExportActionKey.self] }
+        set { self[FocusedExportActionKey.self] = newValue }
     }
 }