feat: start of support for thinking mode, qwen 3.5 9b addition and better idle time handling
This commit is contained in:
@@ -26,6 +26,7 @@
|
|||||||
B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4239CFF94B819C35A8D4D617 /* MonitorView.swift */; };
|
B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4239CFF94B819C35A8D4D617 /* MonitorView.swift */; };
|
||||||
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8BD93859F0291F1A3E09DA5 /* ChatViewModel.swift */; };
|
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8BD93859F0291F1A3E09DA5 /* ChatViewModel.swift */; };
|
||||||
B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = 9090667D4134056AE66DC2F1 /* MLXLMCommon */; };
|
B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = 9090667D4134056AE66DC2F1 /* MLXLMCommon */; };
|
||||||
|
C07A377244DCD67F4FE709FE /* DownloadModalView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */; };
|
||||||
D666A311788375E8A061C832 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4147321383E94E9F17A0154E /* SettingsView.swift */; };
|
D666A311788375E8A061C832 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4147321383E94E9F17A0154E /* SettingsView.swift */; };
|
||||||
D96DDE66F76FDDA642629E17 /* APIModels.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1A52E2C9964ADA9D841A89B /* APIModels.swift */; };
|
D96DDE66F76FDDA642629E17 /* APIModels.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1A52E2C9964ADA9D841A89B /* APIModels.swift */; };
|
||||||
F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; };
|
F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; };
|
||||||
@@ -36,6 +37,7 @@
|
|||||||
/* Begin PBXFileReference section */
|
/* Begin PBXFileReference section */
|
||||||
145B888FBDD4F931512C5473 /* Preferences.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Preferences.swift; sourceTree = "<group>"; };
|
145B888FBDD4F931512C5473 /* Preferences.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Preferences.swift; sourceTree = "<group>"; };
|
||||||
16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolPromptBuilder.swift; sourceTree = "<group>"; };
|
16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolPromptBuilder.swift; sourceTree = "<group>"; };
|
||||||
|
2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadModalView.swift; sourceTree = "<group>"; };
|
||||||
38DFC212AF4359A45FBE22BA /* ModelConfig.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelConfig.swift; sourceTree = "<group>"; };
|
38DFC212AF4359A45FBE22BA /* ModelConfig.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelConfig.swift; sourceTree = "<group>"; };
|
||||||
3AF462805202797F61422AEE /* MLXServer.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = MLXServer.entitlements; sourceTree = "<group>"; };
|
3AF462805202797F61422AEE /* MLXServer.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = MLXServer.entitlements; sourceTree = "<group>"; };
|
||||||
3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; };
|
3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; };
|
||||||
@@ -111,6 +113,7 @@
|
|||||||
children = (
|
children = (
|
||||||
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */,
|
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */,
|
||||||
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
|
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
|
||||||
|
2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */,
|
||||||
C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
|
C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
|
||||||
4239CFF94B819C35A8D4D617 /* MonitorView.swift */,
|
4239CFF94B819C35A8D4D617 /* MonitorView.swift */,
|
||||||
4147321383E94E9F17A0154E /* SettingsView.swift */,
|
4147321383E94E9F17A0154E /* SettingsView.swift */,
|
||||||
@@ -240,6 +243,7 @@
|
|||||||
5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */,
|
5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */,
|
||||||
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */,
|
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */,
|
||||||
5946258F1DE88CE904584E0B /* ContentView.swift in Sources */,
|
5946258F1DE88CE904584E0B /* ContentView.swift in Sources */,
|
||||||
|
C07A377244DCD67F4FE709FE /* DownloadModalView.swift in Sources */,
|
||||||
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */,
|
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */,
|
||||||
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */,
|
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */,
|
||||||
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */,
|
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */,
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ struct ContentView: View {
|
|||||||
|
|
||||||
@ViewBuilder
|
@ViewBuilder
|
||||||
private var mainContent: some View {
|
private var mainContent: some View {
|
||||||
|
ZStack {
|
||||||
if let chatVM {
|
if let chatVM {
|
||||||
if showMonitor {
|
if showMonitor {
|
||||||
MonitorView(stats: chatVM.apiServer.inferenceStats)
|
MonitorView(stats: chatVM.apiServer.inferenceStats)
|
||||||
@@ -65,6 +66,14 @@ struct ContentView: View {
|
|||||||
} else {
|
} else {
|
||||||
ProgressView("Initializing…")
|
ProgressView("Initializing…")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Download modal overlay
|
||||||
|
if modelManager.isDownloading {
|
||||||
|
Color.black.opacity(0.3)
|
||||||
|
.ignoresSafeArea()
|
||||||
|
DownloadModalView()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ViewBuilder
|
@ViewBuilder
|
||||||
|
|||||||
@@ -10,6 +10,16 @@ struct ChatMessage: Identifiable {
|
|||||||
var isStreaming: Bool
|
var isStreaming: Bool
|
||||||
let timestamp: Date
|
let timestamp: Date
|
||||||
|
|
||||||
|
/// Raw streamed text including <think> tags (only for assistant messages).
|
||||||
|
/// `content` and `thinkingContent` are derived from this.
|
||||||
|
var rawContent: String = ""
|
||||||
|
|
||||||
|
/// The thinking/reasoning content extracted from <think>...</think> tags.
|
||||||
|
var thinkingContent: String = ""
|
||||||
|
|
||||||
|
/// Whether the model is currently in a thinking block.
|
||||||
|
var isThinking: Bool = false
|
||||||
|
|
||||||
enum Role: String {
|
enum Role: String {
|
||||||
case system
|
case system
|
||||||
case user
|
case user
|
||||||
@@ -19,6 +29,7 @@ struct ChatMessage: Identifiable {
|
|||||||
init(role: Role, content: String, images: [NSImage] = [], isStreaming: Bool = false) {
|
init(role: Role, content: String, images: [NSImage] = [], isStreaming: Bool = false) {
|
||||||
self.role = role
|
self.role = role
|
||||||
self.content = content
|
self.content = content
|
||||||
|
self.rawContent = content
|
||||||
self.images = images
|
self.images = images
|
||||||
self.isStreaming = isStreaming
|
self.isStreaming = isStreaming
|
||||||
self.timestamp = Date()
|
self.timestamp = Date()
|
||||||
@@ -43,15 +54,53 @@ final class Conversation {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Appends a text chunk to the assistant message at the given index.
|
/// Appends a text chunk to the assistant message at the given index.
|
||||||
|
/// Handles `<think>...</think>` tags by routing content to `thinkingContent` vs `content`.
|
||||||
func appendToMessage(at index: Int, chunk: String) {
|
func appendToMessage(at index: Int, chunk: String) {
|
||||||
guard index < messages.count else { return }
|
guard index < messages.count else { return }
|
||||||
messages[index].content += chunk
|
messages[index].rawContent += chunk
|
||||||
|
|
||||||
|
// Parse the full raw content to separate thinking from response.
|
||||||
|
// This is simpler and more robust than incremental parsing since
|
||||||
|
// tag boundaries can split across chunks.
|
||||||
|
let raw = messages[index].rawContent
|
||||||
|
var thinking = ""
|
||||||
|
var visible = ""
|
||||||
|
var isInThink = false
|
||||||
|
|
||||||
|
var scanner = raw[raw.startIndex...]
|
||||||
|
while !scanner.isEmpty {
|
||||||
|
if isInThink {
|
||||||
|
if let endRange = scanner.range(of: "</think>") {
|
||||||
|
thinking += String(scanner[scanner.startIndex..<endRange.lowerBound])
|
||||||
|
scanner = scanner[endRange.upperBound...]
|
||||||
|
isInThink = false
|
||||||
|
} else {
|
||||||
|
// Still inside thinking — all remaining text is thinking
|
||||||
|
thinking += String(scanner)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if let startRange = scanner.range(of: "<think>") {
|
||||||
|
visible += String(scanner[scanner.startIndex..<startRange.lowerBound])
|
||||||
|
scanner = scanner[startRange.upperBound...]
|
||||||
|
isInThink = true
|
||||||
|
} else {
|
||||||
|
visible += String(scanner)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
messages[index].thinkingContent = thinking.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
|
messages[index].content = visible.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
|
messages[index].isThinking = isInThink
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Marks the assistant message at the given index as done streaming.
|
/// Marks the assistant message at the given index as done streaming.
|
||||||
func finalizeMessage(at index: Int) {
|
func finalizeMessage(at index: Int) {
|
||||||
guard index < messages.count else { return }
|
guard index < messages.count else { return }
|
||||||
messages[index].isStreaming = false
|
messages[index].isStreaming = false
|
||||||
|
messages[index].isThinking = false
|
||||||
}
|
}
|
||||||
|
|
||||||
func clear() {
|
func clear() {
|
||||||
|
|||||||
@@ -22,6 +22,12 @@ struct ModelConfig: Identifiable, Hashable {
|
|||||||
displayName: "Qwen3 VL 4B",
|
displayName: "Qwen3 VL 4B",
|
||||||
contextLength: 256_000
|
contextLength: 256_000
|
||||||
),
|
),
|
||||||
|
ModelConfig(
|
||||||
|
id: "qwen3.5-9b",
|
||||||
|
repoId: "mlx-community/Qwen3.5-9B-4bit",
|
||||||
|
displayName: "Qwen3.5 9B",
|
||||||
|
contextLength: 256_000
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
static let `default` = availableModels[0]
|
static let `default` = availableModels[0]
|
||||||
|
|||||||
@@ -347,18 +347,23 @@ final class APIServer {
|
|||||||
// Only conversation turns go in `history:` — this avoids replaying the
|
// Only conversation turns go in `history:` — this avoids replaying the
|
||||||
// large tool prompt as history on every new session.
|
// large tool prompt as history on every new session.
|
||||||
let instr = instructions.isEmpty ? nil : instructions
|
let instr = instructions.isEmpty ? nil : instructions
|
||||||
|
let thinkingContext: [String: any Sendable]? = Preferences.enableThinking
|
||||||
|
? nil
|
||||||
|
: ["enable_thinking": false]
|
||||||
if !allButLast.isEmpty {
|
if !allButLast.isEmpty {
|
||||||
session = ChatSession(
|
session = ChatSession(
|
||||||
container,
|
container,
|
||||||
instructions: instr,
|
instructions: instr,
|
||||||
history: allButLast,
|
history: allButLast,
|
||||||
generateParameters: generateParams
|
generateParameters: generateParams,
|
||||||
|
additionalContext: thinkingContext
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
session = ChatSession(
|
session = ChatSession(
|
||||||
container,
|
container,
|
||||||
instructions: instr,
|
instructions: instr,
|
||||||
generateParameters: generateParams
|
generateParameters: generateParams,
|
||||||
|
additionalContext: thinkingContext
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -464,6 +469,7 @@ final class APIServer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
LiveCounters.shared.requestCompleted(generationTokens: completionTokens)
|
LiveCounters.shared.requestCompleted(generationTokens: completionTokens)
|
||||||
|
modelManager?.touchActivity()
|
||||||
|
|
||||||
// Parse tool calls: first check framework-detected ones, then our own text parser
|
// Parse tool calls: first check framework-detected ones, then our own text parser
|
||||||
var finishReason = "stop"
|
var finishReason = "stop"
|
||||||
@@ -536,6 +542,7 @@ final class APIServer {
|
|||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
LiveCounters.shared.requestCompleted(generationTokens: 0)
|
LiveCounters.shared.requestCompleted(generationTokens: 0)
|
||||||
|
modelManager?.touchActivity()
|
||||||
sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#)
|
sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -671,6 +678,7 @@ final class APIServer {
|
|||||||
))
|
))
|
||||||
|
|
||||||
LiveCounters.shared.requestCompleted(generationTokens: completionTokens)
|
LiveCounters.shared.requestCompleted(generationTokens: completionTokens)
|
||||||
|
modelManager?.touchActivity()
|
||||||
|
|
||||||
// Send [DONE] and close
|
// Send [DONE] and close
|
||||||
await Self.sendData(connection: connection, data: "data: [DONE]\n\n".data(using: .utf8)!)
|
await Self.sendData(connection: connection, data: "data: [DONE]\n\n".data(using: .utf8)!)
|
||||||
|
|||||||
@@ -3,14 +3,43 @@ import Foundation
|
|||||||
/// Resolves HuggingFace model repos to local snapshot directories,
|
/// Resolves HuggingFace model repos to local snapshot directories,
|
||||||
/// matching the cache layout used by Python's `huggingface_hub`.
|
/// matching the cache layout used by Python's `huggingface_hub`.
|
||||||
///
|
///
|
||||||
|
/// Checks two locations:
|
||||||
|
/// 1. App sandbox container: ~/Library/Containers/com.mlxserver.app/.../huggingface/hub/
|
||||||
|
/// 2. System-wide cache: ~/.cache/huggingface/hub/ (shared with Python tools)
|
||||||
|
///
|
||||||
/// Cache structure:
|
/// Cache structure:
|
||||||
/// ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{hash}/
|
/// .../huggingface/hub/models--{org}--{name}/snapshots/{hash}/
|
||||||
enum LocalModelResolver {
|
enum LocalModelResolver {
|
||||||
|
|
||||||
/// The standard HuggingFace cache directory used by Python's `huggingface_hub`.
|
/// All HuggingFace cache directories to search, in priority order.
|
||||||
private static let cacheBase: URL = {
|
/// The sandboxed container path is checked first (where the app downloads to),
|
||||||
FileManager.default.homeDirectoryForCurrentUser
|
/// then the system-wide Python cache (for models downloaded via huggingface-cli).
|
||||||
|
private static let cacheBases: [URL] = {
|
||||||
|
var bases: [URL] = []
|
||||||
|
|
||||||
|
// 1. Sandboxed app container cache (where swift-transformers Hub downloads to)
|
||||||
|
let containerCache = FileManager.default.homeDirectoryForCurrentUser
|
||||||
|
.appendingPathComponent("Library/Caches/huggingface/hub", isDirectory: true)
|
||||||
|
bases.append(containerCache)
|
||||||
|
|
||||||
|
// 2. System-wide ~/.cache/huggingface/hub/ (Python huggingface_hub)
|
||||||
|
// When sandboxed, homeDirectory points to the container, so construct the real path.
|
||||||
|
let realHome = URL(fileURLWithPath: NSHomeDirectory())
|
||||||
|
let systemCache = realHome
|
||||||
.appendingPathComponent(".cache/huggingface/hub", isDirectory: true)
|
.appendingPathComponent(".cache/huggingface/hub", isDirectory: true)
|
||||||
|
// Avoid duplicate if they resolve to the same path
|
||||||
|
if systemCache.path != containerCache.path {
|
||||||
|
bases.append(systemCache)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Also try the unsandboxed home directory path
|
||||||
|
let globalHome = FileManager.default.homeDirectoryForCurrentUser
|
||||||
|
.appendingPathComponent(".cache/huggingface/hub", isDirectory: true)
|
||||||
|
if globalHome.path != containerCache.path && globalHome.path != systemCache.path {
|
||||||
|
bases.append(globalHome)
|
||||||
|
}
|
||||||
|
|
||||||
|
return bases
|
||||||
}()
|
}()
|
||||||
|
|
||||||
/// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit")
|
/// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit")
|
||||||
@@ -18,30 +47,71 @@ enum LocalModelResolver {
|
|||||||
///
|
///
|
||||||
/// Returns `nil` if the model hasn't been downloaded yet.
|
/// Returns `nil` if the model hasn't been downloaded yet.
|
||||||
static func resolve(repoId: String) -> URL? {
|
static func resolve(repoId: String) -> URL? {
|
||||||
// Convert "mlx-community/gemma-3-4b-it-4bit" → "models--mlx-community--gemma-3-4b-it-4bit"
|
|
||||||
let dirName = "models--" + repoId.replacingOccurrences(of: "/", with: "--")
|
let dirName = "models--" + repoId.replacingOccurrences(of: "/", with: "--")
|
||||||
|
|
||||||
|
for cacheBase in cacheBases {
|
||||||
let snapshotsDir = cacheBase
|
let snapshotsDir = cacheBase
|
||||||
.appendingPathComponent(dirName, isDirectory: true)
|
.appendingPathComponent(dirName, isDirectory: true)
|
||||||
.appendingPathComponent("snapshots", isDirectory: true)
|
.appendingPathComponent("snapshots", isDirectory: true)
|
||||||
|
|
||||||
// Find the first (usually only) snapshot hash directory
|
|
||||||
guard let contents = try? FileManager.default.contentsOfDirectory(
|
guard let contents = try? FileManager.default.contentsOfDirectory(
|
||||||
at: snapshotsDir,
|
at: snapshotsDir,
|
||||||
includingPropertiesForKeys: [.isDirectoryKey],
|
includingPropertiesForKeys: [.isDirectoryKey],
|
||||||
options: [.skipsHiddenFiles]
|
options: [.skipsHiddenFiles]
|
||||||
) else {
|
) else {
|
||||||
return nil
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the most recent snapshot (last alphabetically = latest hash)
|
if let snapshot = contents
|
||||||
return contents
|
.filter({ (try? $0.resourceValues(forKeys: [.isDirectoryKey]).isDirectory) == true })
|
||||||
.filter { (try? $0.resourceValues(forKeys: [.isDirectoryKey]).isDirectory) == true }
|
|
||||||
.sorted(by: { $0.lastPathComponent < $1.lastPathComponent })
|
.sorted(by: { $0.lastPathComponent < $1.lastPathComponent })
|
||||||
.last
|
.last {
|
||||||
|
return snapshot
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if a model is available locally.
|
/// Check if a model is available locally.
|
||||||
static func isAvailable(repoId: String) -> Bool {
|
static func isAvailable(repoId: String) -> Bool {
|
||||||
resolve(repoId: repoId) != nil
|
resolve(repoId: repoId) != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Delete the local cache for a model so it will be re-downloaded next time.
|
||||||
|
/// Removes from all cache locations.
|
||||||
|
/// Returns true if something was deleted.
|
||||||
|
@discardableResult
|
||||||
|
static func deleteLocal(repoId: String) -> Bool {
|
||||||
|
let dirName = "models--" + repoId.replacingOccurrences(of: "/", with: "--")
|
||||||
|
var deleted = false
|
||||||
|
|
||||||
|
for cacheBase in cacheBases {
|
||||||
|
let modelDir = cacheBase.appendingPathComponent(dirName, isDirectory: true)
|
||||||
|
guard FileManager.default.fileExists(atPath: modelDir.path) else { continue }
|
||||||
|
do {
|
||||||
|
try FileManager.default.removeItem(at: modelDir)
|
||||||
|
print("[LocalModelResolver] Deleted \(modelDir.path)")
|
||||||
|
deleted = true
|
||||||
|
} catch {
|
||||||
|
print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also clean up the per-model cache in the container (used by swift-transformers)
|
||||||
|
let containerModelsDir = FileManager.default.homeDirectoryForCurrentUser
|
||||||
|
.appendingPathComponent("Library/Caches/models", isDirectory: true)
|
||||||
|
.appendingPathComponent(repoId, isDirectory: true)
|
||||||
|
if FileManager.default.fileExists(atPath: containerModelsDir.path) {
|
||||||
|
do {
|
||||||
|
try FileManager.default.removeItem(at: containerModelsDir)
|
||||||
|
print("[LocalModelResolver] Deleted \(containerModelsDir.path)")
|
||||||
|
deleted = true
|
||||||
|
} catch {
|
||||||
|
print("[LocalModelResolver] Failed to delete \(containerModelsDir.path): \(error)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return deleted
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,6 +49,17 @@ enum Preferences {
|
|||||||
set { defaults.set(newValue, forKey: apiAutoStartKey) }
|
set { defaults.set(newValue, forKey: apiAutoStartKey) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MARK: - Thinking mode
|
||||||
|
|
||||||
|
private static let enableThinkingKey = "enableThinking"
|
||||||
|
|
||||||
|
/// Whether to enable thinking/reasoning mode for models that support it (e.g. Qwen3.5).
|
||||||
|
/// When disabled, the model skips internal reasoning and responds directly.
|
||||||
|
static var enableThinking: Bool {
|
||||||
|
get { defaults.object(forKey: enableThinkingKey) == nil ? true : defaults.bool(forKey: enableThinkingKey) }
|
||||||
|
set { defaults.set(newValue, forKey: enableThinkingKey) }
|
||||||
|
}
|
||||||
|
|
||||||
// MARK: - Idle unload
|
// MARK: - Idle unload
|
||||||
|
|
||||||
private static let idleUnloadMinutesKey = "idleUnloadMinutes"
|
private static let idleUnloadMinutesKey = "idleUnloadMinutes"
|
||||||
|
|||||||
@@ -31,10 +31,16 @@ final class ChatViewModel {
|
|||||||
guard let container = modelManager.modelContainer else { return }
|
guard let container = modelManager.modelContainer else { return }
|
||||||
if chatSession == nil {
|
if chatSession == nil {
|
||||||
let systemPrompt = Preferences.systemPrompt
|
let systemPrompt = Preferences.systemPrompt
|
||||||
|
// Pass enable_thinking to the Jinja chat template context.
|
||||||
|
// Qwen3.5 and similar models use this to control reasoning mode.
|
||||||
|
let thinkingContext: [String: any Sendable]? = Preferences.enableThinking
|
||||||
|
? nil
|
||||||
|
: ["enable_thinking": false]
|
||||||
chatSession = ChatSession(
|
chatSession = ChatSession(
|
||||||
container,
|
container,
|
||||||
instructions: systemPrompt.isEmpty ? nil : systemPrompt,
|
instructions: systemPrompt.isEmpty ? nil : systemPrompt,
|
||||||
generateParameters: GenerateParameters(temperature: 0.7)
|
generateParameters: GenerateParameters(temperature: 0.7),
|
||||||
|
additionalContext: thinkingContext
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -113,6 +119,7 @@ final class ChatViewModel {
|
|||||||
conversation.finalizeMessage(at: assistantIndex)
|
conversation.finalizeMessage(at: assistantIndex)
|
||||||
isGenerating = false
|
isGenerating = false
|
||||||
generationTask = nil
|
generationTask = nil
|
||||||
|
modelManager.touchActivity()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import Foundation
|
import Foundation
|
||||||
|
import Hub
|
||||||
import MLX
|
import MLX
|
||||||
import MLXLMCommon
|
import MLXLMCommon
|
||||||
import MLXVLM
|
import MLXVLM
|
||||||
@@ -7,6 +8,11 @@ import MLXVLM
|
|||||||
@Observable
|
@Observable
|
||||||
@MainActor
|
@MainActor
|
||||||
final class ModelManager {
|
final class ModelManager {
|
||||||
|
|
||||||
|
/// HubApi with blob cache disabled to avoid storing every model twice.
|
||||||
|
/// swift-huggingface defaults to caching in both huggingface/hub/ (snapshots)
|
||||||
|
/// AND models/ (content-addressed blobs). We only need the snapshots.
|
||||||
|
private static let hub = HubApi(cache: nil)
|
||||||
var currentModel: ModelConfig?
|
var currentModel: ModelConfig?
|
||||||
var modelContainer: ModelContainer?
|
var modelContainer: ModelContainer?
|
||||||
var isLoading = false
|
var isLoading = false
|
||||||
@@ -14,6 +20,12 @@ final class ModelManager {
|
|||||||
var loadingModelName: String = ""
|
var loadingModelName: String = ""
|
||||||
var errorMessage: String?
|
var errorMessage: String?
|
||||||
|
|
||||||
|
// Download-specific state for the modal
|
||||||
|
var isDownloading = false
|
||||||
|
var downloadFilesTotal: Int64 = 0
|
||||||
|
var downloadFilesCompleted: Int64 = 0
|
||||||
|
var downloadSpeed: Double = 0 // bytes/sec
|
||||||
|
|
||||||
private var idleTimer: Timer?
|
private var idleTimer: Timer?
|
||||||
private(set) var lastUsed: Date?
|
private(set) var lastUsed: Date?
|
||||||
|
|
||||||
@@ -31,11 +43,26 @@ final class ModelManager {
|
|||||||
loadingModelName = config.displayName
|
loadingModelName = config.displayName
|
||||||
errorMessage = nil
|
errorMessage = nil
|
||||||
|
|
||||||
|
let needsDownload = !config.isLocal
|
||||||
|
if needsDownload {
|
||||||
|
isDownloading = true
|
||||||
|
downloadFilesTotal = 0
|
||||||
|
downloadFilesCompleted = 0
|
||||||
|
downloadSpeed = 0
|
||||||
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
let container: ModelContainer
|
let container: ModelContainer
|
||||||
let progressHandler: @Sendable (Progress) -> Void = { progress in
|
let progressHandler: @Sendable (Progress) -> Void = { progress in
|
||||||
Task { @MainActor in
|
Task { @MainActor in
|
||||||
self.downloadProgress = progress.fractionCompleted
|
self.downloadProgress = progress.fractionCompleted
|
||||||
|
if self.isDownloading {
|
||||||
|
self.downloadFilesTotal = progress.totalUnitCount
|
||||||
|
self.downloadFilesCompleted = progress.completedUnitCount
|
||||||
|
if let speed = progress.userInfo[.throughputKey] as? Double {
|
||||||
|
self.downloadSpeed = speed
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -47,20 +74,30 @@ final class ModelManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
container = try await VLMModelFactory.shared.loadContainer(
|
container = try await VLMModelFactory.shared.loadContainer(
|
||||||
|
hub: Self.hub,
|
||||||
configuration: configuration,
|
configuration: configuration,
|
||||||
progressHandler: progressHandler
|
progressHandler: progressHandler
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.isDownloading = false
|
||||||
self.modelContainer = container
|
self.modelContainer = container
|
||||||
self.currentModel = config
|
self.currentModel = config
|
||||||
touchActivity()
|
touchActivity()
|
||||||
} catch {
|
} catch {
|
||||||
|
self.isDownloading = false
|
||||||
self.errorMessage = "Failed to load model: \(error.localizedDescription)"
|
self.errorMessage = "Failed to load model: \(error.localizedDescription)"
|
||||||
}
|
}
|
||||||
|
|
||||||
isLoading = false
|
isLoading = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Delete local cache and re-download a model.
|
||||||
|
func redownloadModel(_ config: ModelConfig) async {
|
||||||
|
unloadModel()
|
||||||
|
LocalModelResolver.deleteLocal(repoId: config.repoId)
|
||||||
|
await loadModel(config)
|
||||||
|
}
|
||||||
|
|
||||||
/// Unload the current model and free GPU memory.
|
/// Unload the current model and free GPU memory.
|
||||||
func unloadModel() {
|
func unloadModel() {
|
||||||
idleTimer?.invalidate()
|
idleTimer?.invalidate()
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ struct ChatMessagesView: View {
|
|||||||
|
|
||||||
struct MessageBubbleView: View {
|
struct MessageBubbleView: View {
|
||||||
let message: ChatMessage
|
let message: ChatMessage
|
||||||
|
@State private var showThinking = false
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
HStack {
|
HStack {
|
||||||
@@ -76,11 +77,16 @@ struct MessageBubbleView: View {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Thinking block (collapsible)
|
||||||
|
if !message.thinkingContent.isEmpty || message.isThinking {
|
||||||
|
thinkingView
|
||||||
|
}
|
||||||
|
|
||||||
// Message content
|
// Message content
|
||||||
if !message.content.isEmpty || message.isStreaming {
|
if !message.content.isEmpty || (message.isStreaming && !message.isThinking) {
|
||||||
Group {
|
Group {
|
||||||
if message.role == .assistant {
|
if message.role == .assistant {
|
||||||
Markdown(message.content + (message.isStreaming ? " ●" : ""))
|
Markdown(message.content + (message.isStreaming && !message.isThinking ? " ●" : ""))
|
||||||
.textSelection(.enabled)
|
.textSelection(.enabled)
|
||||||
} else {
|
} else {
|
||||||
Text(message.content)
|
Text(message.content)
|
||||||
@@ -101,4 +107,43 @@ struct MessageBubbleView: View {
|
|||||||
if message.role == .assistant { Spacer(minLength: 60) }
|
if message.role == .assistant { Spacer(minLength: 60) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private var thinkingView: some View {
|
||||||
|
VStack(alignment: .leading, spacing: 0) {
|
||||||
|
Button {
|
||||||
|
withAnimation(.easeInOut(duration: 0.15)) {
|
||||||
|
showThinking.toggle()
|
||||||
|
}
|
||||||
|
} label: {
|
||||||
|
HStack(spacing: 4) {
|
||||||
|
Image(systemName: showThinking ? "chevron.down" : "chevron.right")
|
||||||
|
.font(.caption2)
|
||||||
|
if message.isThinking {
|
||||||
|
ProgressView()
|
||||||
|
.controlSize(.mini)
|
||||||
|
Text("Thinking…")
|
||||||
|
} else {
|
||||||
|
Image(systemName: "brain")
|
||||||
|
Text("Thought")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
.buttonStyle(.plain)
|
||||||
|
|
||||||
|
if showThinking {
|
||||||
|
Text(message.thinkingContent + (message.isThinking ? " ●" : ""))
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(.tertiary)
|
||||||
|
.textSelection(.enabled)
|
||||||
|
.padding(.top, 4)
|
||||||
|
.padding(.leading, 14)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.padding(.horizontal, 12)
|
||||||
|
.padding(.vertical, 6)
|
||||||
|
.background(Color.purple.opacity(0.06))
|
||||||
|
.clipShape(RoundedRectangle(cornerRadius: 8))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
68
MLXServer/Views/DownloadModalView.swift
Normal file
68
MLXServer/Views/DownloadModalView.swift
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
import SwiftUI
|
||||||
|
|
||||||
|
/// Modal overlay shown when a model is being downloaded from HuggingFace.
|
||||||
|
struct DownloadModalView: View {
|
||||||
|
@Environment(ModelManager.self) private var modelManager
|
||||||
|
|
||||||
|
var body: some View {
|
||||||
|
VStack(spacing: 20) {
|
||||||
|
// Header
|
||||||
|
Label("Downloading Model", systemImage: "arrow.down.circle")
|
||||||
|
.font(.headline)
|
||||||
|
|
||||||
|
Text(modelManager.loadingModelName)
|
||||||
|
.font(.title3.weight(.medium))
|
||||||
|
.foregroundStyle(.primary)
|
||||||
|
|
||||||
|
// Progress bar
|
||||||
|
VStack(spacing: 8) {
|
||||||
|
ProgressView(value: modelManager.downloadProgress)
|
||||||
|
.progressViewStyle(.linear)
|
||||||
|
|
||||||
|
HStack {
|
||||||
|
// Files progress
|
||||||
|
if modelManager.downloadFilesTotal > 0 {
|
||||||
|
Text("File \(modelManager.downloadFilesCompleted)/\(modelManager.downloadFilesTotal)")
|
||||||
|
.font(.caption.monospacedDigit())
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
|
||||||
|
Spacer()
|
||||||
|
|
||||||
|
// Percentage
|
||||||
|
Text("\(Int(modelManager.downloadProgress * 100))%")
|
||||||
|
.font(.caption.monospacedDigit())
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Speed
|
||||||
|
if modelManager.downloadSpeed > 0 {
|
||||||
|
Text(formatSpeed(modelManager.downloadSpeed))
|
||||||
|
.font(.caption.monospacedDigit())
|
||||||
|
.foregroundStyle(.tertiary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Text("The model will be cached locally for future use.")
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(.tertiary)
|
||||||
|
.multilineTextAlignment(.center)
|
||||||
|
}
|
||||||
|
.padding(32)
|
||||||
|
.frame(width: 380)
|
||||||
|
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 16))
|
||||||
|
.shadow(radius: 20)
|
||||||
|
}
|
||||||
|
|
||||||
|
private func formatSpeed(_ bytesPerSec: Double) -> String {
|
||||||
|
if bytesPerSec >= 1_073_741_824 {
|
||||||
|
return String(format: "%.1f GB/s", bytesPerSec / 1_073_741_824)
|
||||||
|
} else if bytesPerSec >= 1_048_576 {
|
||||||
|
return String(format: "%.1f MB/s", bytesPerSec / 1_048_576)
|
||||||
|
} else if bytesPerSec >= 1024 {
|
||||||
|
return String(format: "%.0f KB/s", bytesPerSec / 1024)
|
||||||
|
} else {
|
||||||
|
return String(format: "%.0f B/s", bytesPerSec)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ import SwiftUI
|
|||||||
|
|
||||||
struct ModelPickerView: View {
|
struct ModelPickerView: View {
|
||||||
@Environment(ModelManager.self) private var modelManager
|
@Environment(ModelManager.self) private var modelManager
|
||||||
|
@State private var confirmRedownload: ModelConfig?
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
HStack(spacing: 8) {
|
HStack(spacing: 8) {
|
||||||
@@ -15,6 +16,35 @@ struct ModelPickerView: View {
|
|||||||
}
|
}
|
||||||
.frame(width: 160)
|
.frame(width: 160)
|
||||||
.disabled(modelManager.isLoading)
|
.disabled(modelManager.isLoading)
|
||||||
|
|
||||||
|
// Re-download button (visible when a model is loaded)
|
||||||
|
if let current = modelManager.currentModel, !modelManager.isLoading {
|
||||||
|
Button {
|
||||||
|
confirmRedownload = current
|
||||||
|
} label: {
|
||||||
|
Image(systemName: "arrow.clockwise")
|
||||||
|
.font(.caption)
|
||||||
|
}
|
||||||
|
.buttonStyle(.borderless)
|
||||||
|
.help("Re-download \(current.displayName)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.alert("Re-download Model?", isPresented: .init(
|
||||||
|
get: { confirmRedownload != nil },
|
||||||
|
set: { if !$0 { confirmRedownload = nil } }
|
||||||
|
)) {
|
||||||
|
Button("Re-download", role: .destructive) {
|
||||||
|
if let config = confirmRedownload {
|
||||||
|
Task { await modelManager.redownloadModel(config) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Button("Cancel", role: .cancel) {
|
||||||
|
confirmRedownload = nil
|
||||||
|
}
|
||||||
|
} message: {
|
||||||
|
if let config = confirmRedownload {
|
||||||
|
Text("This will delete the local cache for \(config.displayName) and download it again from HuggingFace.")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ struct SettingsView: View {
|
|||||||
@State private var apiAutoStart: Bool = Preferences.apiAutoStart
|
@State private var apiAutoStart: Bool = Preferences.apiAutoStart
|
||||||
@State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes)
|
@State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes)
|
||||||
@State private var defaultModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id
|
@State private var defaultModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id
|
||||||
|
@State private var enableThinking: Bool = Preferences.enableThinking
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
Form {
|
Form {
|
||||||
@@ -24,6 +25,17 @@ struct SettingsView: View {
|
|||||||
.foregroundStyle(.secondary)
|
.foregroundStyle(.secondary)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Section("Generation") {
|
||||||
|
Toggle("Enable thinking mode", isOn: $enableThinking)
|
||||||
|
.onChange(of: enableThinking) {
|
||||||
|
Preferences.enableThinking = enableThinking
|
||||||
|
}
|
||||||
|
|
||||||
|
Text("When enabled, models like Qwen3.5 reason internally before responding. Produces better answers but slower. Takes effect on the next conversation.")
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
|
||||||
Section("System Prompt") {
|
Section("System Prompt") {
|
||||||
TextEditor(text: $systemPrompt)
|
TextEditor(text: $systemPrompt)
|
||||||
.font(.body.monospaced())
|
.font(.body.monospaced())
|
||||||
@@ -75,6 +87,6 @@ struct SettingsView: View {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
.formStyle(.grouped)
|
.formStyle(.grouped)
|
||||||
.frame(width: 450, height: 460)
|
.frame(width: 450, height: 550)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user