feat: start of support for thinking mode, qwen 3.5 9b addition and better idle time handling

This commit is contained in:
2026-03-18 09:15:08 +01:00
parent ed6cc5f5d1
commit 07b71f90ec
13 changed files with 389 additions and 33 deletions

View File

@@ -26,6 +26,7 @@
B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4239CFF94B819C35A8D4D617 /* MonitorView.swift */; }; B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4239CFF94B819C35A8D4D617 /* MonitorView.swift */; };
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8BD93859F0291F1A3E09DA5 /* ChatViewModel.swift */; }; B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8BD93859F0291F1A3E09DA5 /* ChatViewModel.swift */; };
B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = 9090667D4134056AE66DC2F1 /* MLXLMCommon */; }; B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = 9090667D4134056AE66DC2F1 /* MLXLMCommon */; };
C07A377244DCD67F4FE709FE /* DownloadModalView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */; };
D666A311788375E8A061C832 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4147321383E94E9F17A0154E /* SettingsView.swift */; }; D666A311788375E8A061C832 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4147321383E94E9F17A0154E /* SettingsView.swift */; };
D96DDE66F76FDDA642629E17 /* APIModels.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1A52E2C9964ADA9D841A89B /* APIModels.swift */; }; D96DDE66F76FDDA642629E17 /* APIModels.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1A52E2C9964ADA9D841A89B /* APIModels.swift */; };
F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; }; F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; };
@@ -36,6 +37,7 @@
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
145B888FBDD4F931512C5473 /* Preferences.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Preferences.swift; sourceTree = "<group>"; }; 145B888FBDD4F931512C5473 /* Preferences.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Preferences.swift; sourceTree = "<group>"; };
16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolPromptBuilder.swift; sourceTree = "<group>"; }; 16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolPromptBuilder.swift; sourceTree = "<group>"; };
2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadModalView.swift; sourceTree = "<group>"; };
38DFC212AF4359A45FBE22BA /* ModelConfig.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelConfig.swift; sourceTree = "<group>"; }; 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelConfig.swift; sourceTree = "<group>"; };
3AF462805202797F61422AEE /* MLXServer.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = MLXServer.entitlements; sourceTree = "<group>"; }; 3AF462805202797F61422AEE /* MLXServer.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = MLXServer.entitlements; sourceTree = "<group>"; };
3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; }; 3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; };
@@ -111,6 +113,7 @@
children = ( children = (
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */, E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */,
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */, DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */,
C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */, C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
4239CFF94B819C35A8D4D617 /* MonitorView.swift */, 4239CFF94B819C35A8D4D617 /* MonitorView.swift */,
4147321383E94E9F17A0154E /* SettingsView.swift */, 4147321383E94E9F17A0154E /* SettingsView.swift */,
@@ -240,6 +243,7 @@
5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */, 5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */,
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */, B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */,
5946258F1DE88CE904584E0B /* ContentView.swift in Sources */, 5946258F1DE88CE904584E0B /* ContentView.swift in Sources */,
C07A377244DCD67F4FE709FE /* DownloadModalView.swift in Sources */,
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */, 2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */,
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */, 6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */,
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */, 50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */,

View File

@@ -56,6 +56,7 @@ struct ContentView: View {
@ViewBuilder @ViewBuilder
private var mainContent: some View { private var mainContent: some View {
ZStack {
if let chatVM { if let chatVM {
if showMonitor { if showMonitor {
MonitorView(stats: chatVM.apiServer.inferenceStats) MonitorView(stats: chatVM.apiServer.inferenceStats)
@@ -65,6 +66,14 @@ struct ContentView: View {
} else { } else {
ProgressView("Initializing…") ProgressView("Initializing…")
} }
// Download modal overlay
if modelManager.isDownloading {
Color.black.opacity(0.3)
.ignoresSafeArea()
DownloadModalView()
}
}
} }
@ViewBuilder @ViewBuilder

View File

@@ -10,6 +10,16 @@ struct ChatMessage: Identifiable {
var isStreaming: Bool var isStreaming: Bool
let timestamp: Date let timestamp: Date
/// Raw streamed text including <think> tags (only for assistant messages).
/// `content` and `thinkingContent` are derived from this.
var rawContent: String = ""
/// The thinking/reasoning content extracted from <think>...</think> tags.
var thinkingContent: String = ""
/// Whether the model is currently in a thinking block.
var isThinking: Bool = false
enum Role: String { enum Role: String {
case system case system
case user case user
@@ -19,6 +29,7 @@ struct ChatMessage: Identifiable {
init(role: Role, content: String, images: [NSImage] = [], isStreaming: Bool = false) { init(role: Role, content: String, images: [NSImage] = [], isStreaming: Bool = false) {
self.role = role self.role = role
self.content = content self.content = content
self.rawContent = content
self.images = images self.images = images
self.isStreaming = isStreaming self.isStreaming = isStreaming
self.timestamp = Date() self.timestamp = Date()
@@ -43,15 +54,53 @@ final class Conversation {
} }
/// Appends a text chunk to the assistant message at the given index. /// Appends a text chunk to the assistant message at the given index.
/// Handles `<think>...</think>` tags by routing content to `thinkingContent` vs `content`.
func appendToMessage(at index: Int, chunk: String) { func appendToMessage(at index: Int, chunk: String) {
guard index < messages.count else { return } guard index < messages.count else { return }
messages[index].content += chunk messages[index].rawContent += chunk
// Parse the full raw content to separate thinking from response.
// This is simpler and more robust than incremental parsing since
// tag boundaries can split across chunks.
let raw = messages[index].rawContent
var thinking = ""
var visible = ""
var isInThink = false
var scanner = raw[raw.startIndex...]
while !scanner.isEmpty {
if isInThink {
if let endRange = scanner.range(of: "</think>") {
thinking += String(scanner[scanner.startIndex..<endRange.lowerBound])
scanner = scanner[endRange.upperBound...]
isInThink = false
} else {
// Still inside thinking all remaining text is thinking
thinking += String(scanner)
break
}
} else {
if let startRange = scanner.range(of: "<think>") {
visible += String(scanner[scanner.startIndex..<startRange.lowerBound])
scanner = scanner[startRange.upperBound...]
isInThink = true
} else {
visible += String(scanner)
break
}
}
}
messages[index].thinkingContent = thinking.trimmingCharacters(in: .whitespacesAndNewlines)
messages[index].content = visible.trimmingCharacters(in: .whitespacesAndNewlines)
messages[index].isThinking = isInThink
} }
/// Marks the assistant message at the given index as done streaming. /// Marks the assistant message at the given index as done streaming.
func finalizeMessage(at index: Int) { func finalizeMessage(at index: Int) {
guard index < messages.count else { return } guard index < messages.count else { return }
messages[index].isStreaming = false messages[index].isStreaming = false
messages[index].isThinking = false
} }
func clear() { func clear() {

View File

@@ -22,6 +22,12 @@ struct ModelConfig: Identifiable, Hashable {
displayName: "Qwen3 VL 4B", displayName: "Qwen3 VL 4B",
contextLength: 256_000 contextLength: 256_000
), ),
ModelConfig(
id: "qwen3.5-9b",
repoId: "mlx-community/Qwen3.5-9B-4bit",
displayName: "Qwen3.5 9B",
contextLength: 256_000
),
] ]
static let `default` = availableModels[0] static let `default` = availableModels[0]

View File

@@ -347,18 +347,23 @@ final class APIServer {
// Only conversation turns go in `history:` this avoids replaying the // Only conversation turns go in `history:` this avoids replaying the
// large tool prompt as history on every new session. // large tool prompt as history on every new session.
let instr = instructions.isEmpty ? nil : instructions let instr = instructions.isEmpty ? nil : instructions
let thinkingContext: [String: any Sendable]? = Preferences.enableThinking
? nil
: ["enable_thinking": false]
if !allButLast.isEmpty { if !allButLast.isEmpty {
session = ChatSession( session = ChatSession(
container, container,
instructions: instr, instructions: instr,
history: allButLast, history: allButLast,
generateParameters: generateParams generateParameters: generateParams,
additionalContext: thinkingContext
) )
} else { } else {
session = ChatSession( session = ChatSession(
container, container,
instructions: instr, instructions: instr,
generateParameters: generateParams generateParameters: generateParams,
additionalContext: thinkingContext
) )
} }
} }
@@ -464,6 +469,7 @@ final class APIServer {
} }
LiveCounters.shared.requestCompleted(generationTokens: completionTokens) LiveCounters.shared.requestCompleted(generationTokens: completionTokens)
modelManager?.touchActivity()
// Parse tool calls: first check framework-detected ones, then our own text parser // Parse tool calls: first check framework-detected ones, then our own text parser
var finishReason = "stop" var finishReason = "stop"
@@ -536,6 +542,7 @@ final class APIServer {
} }
} catch { } catch {
LiveCounters.shared.requestCompleted(generationTokens: 0) LiveCounters.shared.requestCompleted(generationTokens: 0)
modelManager?.touchActivity()
sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#) sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#)
} }
} }
@@ -671,6 +678,7 @@ final class APIServer {
)) ))
LiveCounters.shared.requestCompleted(generationTokens: completionTokens) LiveCounters.shared.requestCompleted(generationTokens: completionTokens)
modelManager?.touchActivity()
// Send [DONE] and close // Send [DONE] and close
await Self.sendData(connection: connection, data: "data: [DONE]\n\n".data(using: .utf8)!) await Self.sendData(connection: connection, data: "data: [DONE]\n\n".data(using: .utf8)!)

View File

@@ -3,14 +3,43 @@ import Foundation
/// Resolves HuggingFace model repos to local snapshot directories, /// Resolves HuggingFace model repos to local snapshot directories,
/// matching the cache layout used by Python's `huggingface_hub`. /// matching the cache layout used by Python's `huggingface_hub`.
/// ///
/// Checks two locations:
/// 1. App sandbox container: ~/Library/Containers/com.mlxserver.app/.../huggingface/hub/
/// 2. System-wide cache: ~/.cache/huggingface/hub/ (shared with Python tools)
///
/// Cache structure: /// Cache structure:
/// ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{hash}/ /// .../huggingface/hub/models--{org}--{name}/snapshots/{hash}/
enum LocalModelResolver { enum LocalModelResolver {
/// The standard HuggingFace cache directory used by Python's `huggingface_hub`. /// All HuggingFace cache directories to search, in priority order.
private static let cacheBase: URL = { /// The sandboxed container path is checked first (where the app downloads to),
FileManager.default.homeDirectoryForCurrentUser /// then the system-wide Python cache (for models downloaded via huggingface-cli).
private static let cacheBases: [URL] = {
var bases: [URL] = []
// 1. Sandboxed app container cache (where swift-transformers Hub downloads to)
let containerCache = FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent("Library/Caches/huggingface/hub", isDirectory: true)
bases.append(containerCache)
// 2. System-wide ~/.cache/huggingface/hub/ (Python huggingface_hub)
// When sandboxed, homeDirectory points to the container, so construct the real path.
let realHome = URL(fileURLWithPath: NSHomeDirectory())
let systemCache = realHome
.appendingPathComponent(".cache/huggingface/hub", isDirectory: true) .appendingPathComponent(".cache/huggingface/hub", isDirectory: true)
// Avoid duplicate if they resolve to the same path
if systemCache.path != containerCache.path {
bases.append(systemCache)
}
// 3. Also try the unsandboxed home directory path
let globalHome = FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent(".cache/huggingface/hub", isDirectory: true)
if globalHome.path != containerCache.path && globalHome.path != systemCache.path {
bases.append(globalHome)
}
return bases
}() }()
/// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit") /// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit")
@@ -18,30 +47,71 @@ enum LocalModelResolver {
/// ///
/// Returns `nil` if the model hasn't been downloaded yet. /// Returns `nil` if the model hasn't been downloaded yet.
static func resolve(repoId: String) -> URL? { static func resolve(repoId: String) -> URL? {
// Convert "mlx-community/gemma-3-4b-it-4bit" "models--mlx-community--gemma-3-4b-it-4bit"
let dirName = "models--" + repoId.replacingOccurrences(of: "/", with: "--") let dirName = "models--" + repoId.replacingOccurrences(of: "/", with: "--")
for cacheBase in cacheBases {
let snapshotsDir = cacheBase let snapshotsDir = cacheBase
.appendingPathComponent(dirName, isDirectory: true) .appendingPathComponent(dirName, isDirectory: true)
.appendingPathComponent("snapshots", isDirectory: true) .appendingPathComponent("snapshots", isDirectory: true)
// Find the first (usually only) snapshot hash directory
guard let contents = try? FileManager.default.contentsOfDirectory( guard let contents = try? FileManager.default.contentsOfDirectory(
at: snapshotsDir, at: snapshotsDir,
includingPropertiesForKeys: [.isDirectoryKey], includingPropertiesForKeys: [.isDirectoryKey],
options: [.skipsHiddenFiles] options: [.skipsHiddenFiles]
) else { ) else {
return nil continue
} }
// Return the most recent snapshot (last alphabetically = latest hash) if let snapshot = contents
return contents .filter({ (try? $0.resourceValues(forKeys: [.isDirectoryKey]).isDirectory) == true })
.filter { (try? $0.resourceValues(forKeys: [.isDirectoryKey]).isDirectory) == true }
.sorted(by: { $0.lastPathComponent < $1.lastPathComponent }) .sorted(by: { $0.lastPathComponent < $1.lastPathComponent })
.last .last {
return snapshot
}
}
return nil
} }
/// Check if a model is available locally. /// Check if a model is available locally.
static func isAvailable(repoId: String) -> Bool { static func isAvailable(repoId: String) -> Bool {
resolve(repoId: repoId) != nil resolve(repoId: repoId) != nil
} }
/// Delete the local cache for a model so it will be re-downloaded next time.
/// Removes from all cache locations.
/// Returns true if something was deleted.
@discardableResult
static func deleteLocal(repoId: String) -> Bool {
let dirName = "models--" + repoId.replacingOccurrences(of: "/", with: "--")
var deleted = false
for cacheBase in cacheBases {
let modelDir = cacheBase.appendingPathComponent(dirName, isDirectory: true)
guard FileManager.default.fileExists(atPath: modelDir.path) else { continue }
do {
try FileManager.default.removeItem(at: modelDir)
print("[LocalModelResolver] Deleted \(modelDir.path)")
deleted = true
} catch {
print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)")
}
}
// Also clean up the per-model cache in the container (used by swift-transformers)
let containerModelsDir = FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent("Library/Caches/models", isDirectory: true)
.appendingPathComponent(repoId, isDirectory: true)
if FileManager.default.fileExists(atPath: containerModelsDir.path) {
do {
try FileManager.default.removeItem(at: containerModelsDir)
print("[LocalModelResolver] Deleted \(containerModelsDir.path)")
deleted = true
} catch {
print("[LocalModelResolver] Failed to delete \(containerModelsDir.path): \(error)")
}
}
return deleted
}
} }

View File

@@ -49,6 +49,17 @@ enum Preferences {
set { defaults.set(newValue, forKey: apiAutoStartKey) } set { defaults.set(newValue, forKey: apiAutoStartKey) }
} }
// MARK: - Thinking mode
private static let enableThinkingKey = "enableThinking"
/// Whether to enable thinking/reasoning mode for models that support it (e.g. Qwen3.5).
/// When disabled, the model skips internal reasoning and responds directly.
static var enableThinking: Bool {
get { defaults.object(forKey: enableThinkingKey) == nil ? true : defaults.bool(forKey: enableThinkingKey) }
set { defaults.set(newValue, forKey: enableThinkingKey) }
}
// MARK: - Idle unload // MARK: - Idle unload
private static let idleUnloadMinutesKey = "idleUnloadMinutes" private static let idleUnloadMinutesKey = "idleUnloadMinutes"

View File

@@ -31,10 +31,16 @@ final class ChatViewModel {
guard let container = modelManager.modelContainer else { return } guard let container = modelManager.modelContainer else { return }
if chatSession == nil { if chatSession == nil {
let systemPrompt = Preferences.systemPrompt let systemPrompt = Preferences.systemPrompt
// Pass enable_thinking to the Jinja chat template context.
// Qwen3.5 and similar models use this to control reasoning mode.
let thinkingContext: [String: any Sendable]? = Preferences.enableThinking
? nil
: ["enable_thinking": false]
chatSession = ChatSession( chatSession = ChatSession(
container, container,
instructions: systemPrompt.isEmpty ? nil : systemPrompt, instructions: systemPrompt.isEmpty ? nil : systemPrompt,
generateParameters: GenerateParameters(temperature: 0.7) generateParameters: GenerateParameters(temperature: 0.7),
additionalContext: thinkingContext
) )
} }
} }
@@ -113,6 +119,7 @@ final class ChatViewModel {
conversation.finalizeMessage(at: assistantIndex) conversation.finalizeMessage(at: assistantIndex)
isGenerating = false isGenerating = false
generationTask = nil generationTask = nil
modelManager.touchActivity()
} }
} }

View File

@@ -1,4 +1,5 @@
import Foundation import Foundation
import Hub
import MLX import MLX
import MLXLMCommon import MLXLMCommon
import MLXVLM import MLXVLM
@@ -7,6 +8,11 @@ import MLXVLM
@Observable @Observable
@MainActor @MainActor
final class ModelManager { final class ModelManager {
/// HubApi with blob cache disabled to avoid storing every model twice.
/// swift-huggingface defaults to caching in both huggingface/hub/ (snapshots)
/// AND models/ (content-addressed blobs). We only need the snapshots.
private static let hub = HubApi(cache: nil)
var currentModel: ModelConfig? var currentModel: ModelConfig?
var modelContainer: ModelContainer? var modelContainer: ModelContainer?
var isLoading = false var isLoading = false
@@ -14,6 +20,12 @@ final class ModelManager {
var loadingModelName: String = "" var loadingModelName: String = ""
var errorMessage: String? var errorMessage: String?
// Download-specific state for the modal
var isDownloading = false
var downloadFilesTotal: Int64 = 0
var downloadFilesCompleted: Int64 = 0
var downloadSpeed: Double = 0 // bytes/sec
private var idleTimer: Timer? private var idleTimer: Timer?
private(set) var lastUsed: Date? private(set) var lastUsed: Date?
@@ -31,11 +43,26 @@ final class ModelManager {
loadingModelName = config.displayName loadingModelName = config.displayName
errorMessage = nil errorMessage = nil
let needsDownload = !config.isLocal
if needsDownload {
isDownloading = true
downloadFilesTotal = 0
downloadFilesCompleted = 0
downloadSpeed = 0
}
do { do {
let container: ModelContainer let container: ModelContainer
let progressHandler: @Sendable (Progress) -> Void = { progress in let progressHandler: @Sendable (Progress) -> Void = { progress in
Task { @MainActor in Task { @MainActor in
self.downloadProgress = progress.fractionCompleted self.downloadProgress = progress.fractionCompleted
if self.isDownloading {
self.downloadFilesTotal = progress.totalUnitCount
self.downloadFilesCompleted = progress.completedUnitCount
if let speed = progress.userInfo[.throughputKey] as? Double {
self.downloadSpeed = speed
}
}
} }
} }
@@ -47,20 +74,30 @@ final class ModelManager {
} }
container = try await VLMModelFactory.shared.loadContainer( container = try await VLMModelFactory.shared.loadContainer(
hub: Self.hub,
configuration: configuration, configuration: configuration,
progressHandler: progressHandler progressHandler: progressHandler
) )
self.isDownloading = false
self.modelContainer = container self.modelContainer = container
self.currentModel = config self.currentModel = config
touchActivity() touchActivity()
} catch { } catch {
self.isDownloading = false
self.errorMessage = "Failed to load model: \(error.localizedDescription)" self.errorMessage = "Failed to load model: \(error.localizedDescription)"
} }
isLoading = false isLoading = false
} }
/// Delete local cache and re-download a model.
func redownloadModel(_ config: ModelConfig) async {
unloadModel()
LocalModelResolver.deleteLocal(repoId: config.repoId)
await loadModel(config)
}
/// Unload the current model and free GPU memory. /// Unload the current model and free GPU memory.
func unloadModel() { func unloadModel() {
idleTimer?.invalidate() idleTimer?.invalidate()

View File

@@ -57,6 +57,7 @@ struct ChatMessagesView: View {
struct MessageBubbleView: View { struct MessageBubbleView: View {
let message: ChatMessage let message: ChatMessage
@State private var showThinking = false
var body: some View { var body: some View {
HStack { HStack {
@@ -76,11 +77,16 @@ struct MessageBubbleView: View {
} }
} }
// Thinking block (collapsible)
if !message.thinkingContent.isEmpty || message.isThinking {
thinkingView
}
// Message content // Message content
if !message.content.isEmpty || message.isStreaming { if !message.content.isEmpty || (message.isStreaming && !message.isThinking) {
Group { Group {
if message.role == .assistant { if message.role == .assistant {
Markdown(message.content + (message.isStreaming ? "" : "")) Markdown(message.content + (message.isStreaming && !message.isThinking ? "" : ""))
.textSelection(.enabled) .textSelection(.enabled)
} else { } else {
Text(message.content) Text(message.content)
@@ -101,4 +107,43 @@ struct MessageBubbleView: View {
if message.role == .assistant { Spacer(minLength: 60) } if message.role == .assistant { Spacer(minLength: 60) }
} }
} }
private var thinkingView: some View {
VStack(alignment: .leading, spacing: 0) {
Button {
withAnimation(.easeInOut(duration: 0.15)) {
showThinking.toggle()
}
} label: {
HStack(spacing: 4) {
Image(systemName: showThinking ? "chevron.down" : "chevron.right")
.font(.caption2)
if message.isThinking {
ProgressView()
.controlSize(.mini)
Text("Thinking…")
} else {
Image(systemName: "brain")
Text("Thought")
}
}
.font(.caption)
.foregroundStyle(.secondary)
}
.buttonStyle(.plain)
if showThinking {
Text(message.thinkingContent + (message.isThinking ? "" : ""))
.font(.caption)
.foregroundStyle(.tertiary)
.textSelection(.enabled)
.padding(.top, 4)
.padding(.leading, 14)
}
}
.padding(.horizontal, 12)
.padding(.vertical, 6)
.background(Color.purple.opacity(0.06))
.clipShape(RoundedRectangle(cornerRadius: 8))
}
} }

View File

@@ -0,0 +1,68 @@
import SwiftUI
/// Modal overlay shown when a model is being downloaded from HuggingFace.
struct DownloadModalView: View {
@Environment(ModelManager.self) private var modelManager
var body: some View {
VStack(spacing: 20) {
// Header
Label("Downloading Model", systemImage: "arrow.down.circle")
.font(.headline)
Text(modelManager.loadingModelName)
.font(.title3.weight(.medium))
.foregroundStyle(.primary)
// Progress bar
VStack(spacing: 8) {
ProgressView(value: modelManager.downloadProgress)
.progressViewStyle(.linear)
HStack {
// Files progress
if modelManager.downloadFilesTotal > 0 {
Text("File \(modelManager.downloadFilesCompleted)/\(modelManager.downloadFilesTotal)")
.font(.caption.monospacedDigit())
.foregroundStyle(.secondary)
}
Spacer()
// Percentage
Text("\(Int(modelManager.downloadProgress * 100))%")
.font(.caption.monospacedDigit())
.foregroundStyle(.secondary)
}
// Speed
if modelManager.downloadSpeed > 0 {
Text(formatSpeed(modelManager.downloadSpeed))
.font(.caption.monospacedDigit())
.foregroundStyle(.tertiary)
}
}
Text("The model will be cached locally for future use.")
.font(.caption)
.foregroundStyle(.tertiary)
.multilineTextAlignment(.center)
}
.padding(32)
.frame(width: 380)
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 16))
.shadow(radius: 20)
}
private func formatSpeed(_ bytesPerSec: Double) -> String {
if bytesPerSec >= 1_073_741_824 {
return String(format: "%.1f GB/s", bytesPerSec / 1_073_741_824)
} else if bytesPerSec >= 1_048_576 {
return String(format: "%.1f MB/s", bytesPerSec / 1_048_576)
} else if bytesPerSec >= 1024 {
return String(format: "%.0f KB/s", bytesPerSec / 1024)
} else {
return String(format: "%.0f B/s", bytesPerSec)
}
}
}

View File

@@ -2,6 +2,7 @@ import SwiftUI
struct ModelPickerView: View { struct ModelPickerView: View {
@Environment(ModelManager.self) private var modelManager @Environment(ModelManager.self) private var modelManager
@State private var confirmRedownload: ModelConfig?
var body: some View { var body: some View {
HStack(spacing: 8) { HStack(spacing: 8) {
@@ -15,6 +16,35 @@ struct ModelPickerView: View {
} }
.frame(width: 160) .frame(width: 160)
.disabled(modelManager.isLoading) .disabled(modelManager.isLoading)
// Re-download button (visible when a model is loaded)
if let current = modelManager.currentModel, !modelManager.isLoading {
Button {
confirmRedownload = current
} label: {
Image(systemName: "arrow.clockwise")
.font(.caption)
}
.buttonStyle(.borderless)
.help("Re-download \(current.displayName)")
}
}
.alert("Re-download Model?", isPresented: .init(
get: { confirmRedownload != nil },
set: { if !$0 { confirmRedownload = nil } }
)) {
Button("Re-download", role: .destructive) {
if let config = confirmRedownload {
Task { await modelManager.redownloadModel(config) }
}
}
Button("Cancel", role: .cancel) {
confirmRedownload = nil
}
} message: {
if let config = confirmRedownload {
Text("This will delete the local cache for \(config.displayName) and download it again from HuggingFace.")
}
} }
} }

View File

@@ -6,6 +6,7 @@ struct SettingsView: View {
@State private var apiAutoStart: Bool = Preferences.apiAutoStart @State private var apiAutoStart: Bool = Preferences.apiAutoStart
@State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes) @State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes)
@State private var defaultModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id @State private var defaultModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id
@State private var enableThinking: Bool = Preferences.enableThinking
var body: some View { var body: some View {
Form { Form {
@@ -24,6 +25,17 @@ struct SettingsView: View {
.foregroundStyle(.secondary) .foregroundStyle(.secondary)
} }
Section("Generation") {
Toggle("Enable thinking mode", isOn: $enableThinking)
.onChange(of: enableThinking) {
Preferences.enableThinking = enableThinking
}
Text("When enabled, models like Qwen3.5 reason internally before responding. Produces better answers but slower. Takes effect on the next conversation.")
.font(.caption)
.foregroundStyle(.secondary)
}
Section("System Prompt") { Section("System Prompt") {
TextEditor(text: $systemPrompt) TextEditor(text: $systemPrompt)
.font(.body.monospaced()) .font(.body.monospaced())
@@ -75,6 +87,6 @@ struct SettingsView: View {
} }
} }
.formStyle(.grouped) .formStyle(.grouped)
.frame(width: 450, height: 460) .frame(width: 450, height: 550)
} }
} }