feat: start of support for thinking mode, qwen 3.5 9b addition and better idle time handling
This commit is contained in:
@@ -26,6 +26,7 @@
|
||||
B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4239CFF94B819C35A8D4D617 /* MonitorView.swift */; };
|
||||
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8BD93859F0291F1A3E09DA5 /* ChatViewModel.swift */; };
|
||||
B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = 9090667D4134056AE66DC2F1 /* MLXLMCommon */; };
|
||||
C07A377244DCD67F4FE709FE /* DownloadModalView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */; };
|
||||
D666A311788375E8A061C832 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4147321383E94E9F17A0154E /* SettingsView.swift */; };
|
||||
D96DDE66F76FDDA642629E17 /* APIModels.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1A52E2C9964ADA9D841A89B /* APIModels.swift */; };
|
||||
F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; };
|
||||
@@ -36,6 +37,7 @@
|
||||
/* Begin PBXFileReference section */
|
||||
145B888FBDD4F931512C5473 /* Preferences.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Preferences.swift; sourceTree = "<group>"; };
|
||||
16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolPromptBuilder.swift; sourceTree = "<group>"; };
|
||||
2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadModalView.swift; sourceTree = "<group>"; };
|
||||
38DFC212AF4359A45FBE22BA /* ModelConfig.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelConfig.swift; sourceTree = "<group>"; };
|
||||
3AF462805202797F61422AEE /* MLXServer.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = MLXServer.entitlements; sourceTree = "<group>"; };
|
||||
3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; };
|
||||
@@ -111,6 +113,7 @@
|
||||
children = (
|
||||
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */,
|
||||
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
|
||||
2DC8C86D397B1FCA08E07CBD /* DownloadModalView.swift */,
|
||||
C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
|
||||
4239CFF94B819C35A8D4D617 /* MonitorView.swift */,
|
||||
4147321383E94E9F17A0154E /* SettingsView.swift */,
|
||||
@@ -240,6 +243,7 @@
|
||||
5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */,
|
||||
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */,
|
||||
5946258F1DE88CE904584E0B /* ContentView.swift in Sources */,
|
||||
C07A377244DCD67F4FE709FE /* DownloadModalView.swift in Sources */,
|
||||
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */,
|
||||
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */,
|
||||
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */,
|
||||
|
||||
@@ -56,14 +56,23 @@ struct ContentView: View {
|
||||
|
||||
@ViewBuilder
|
||||
private var mainContent: some View {
|
||||
if let chatVM {
|
||||
if showMonitor {
|
||||
MonitorView(stats: chatVM.apiServer.inferenceStats)
|
||||
ZStack {
|
||||
if let chatVM {
|
||||
if showMonitor {
|
||||
MonitorView(stats: chatVM.apiServer.inferenceStats)
|
||||
} else {
|
||||
ChatView(viewModel: chatVM)
|
||||
}
|
||||
} else {
|
||||
ChatView(viewModel: chatVM)
|
||||
ProgressView("Initializing…")
|
||||
}
|
||||
|
||||
// Download modal overlay
|
||||
if modelManager.isDownloading {
|
||||
Color.black.opacity(0.3)
|
||||
.ignoresSafeArea()
|
||||
DownloadModalView()
|
||||
}
|
||||
} else {
|
||||
ProgressView("Initializing…")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,16 @@ struct ChatMessage: Identifiable {
|
||||
var isStreaming: Bool
|
||||
let timestamp: Date
|
||||
|
||||
/// Raw streamed text including <think> tags (only for assistant messages).
|
||||
/// `content` and `thinkingContent` are derived from this.
|
||||
var rawContent: String = ""
|
||||
|
||||
/// The thinking/reasoning content extracted from <think>...</think> tags.
|
||||
var thinkingContent: String = ""
|
||||
|
||||
/// Whether the model is currently in a thinking block.
|
||||
var isThinking: Bool = false
|
||||
|
||||
enum Role: String {
|
||||
case system
|
||||
case user
|
||||
@@ -19,6 +29,7 @@ struct ChatMessage: Identifiable {
|
||||
init(role: Role, content: String, images: [NSImage] = [], isStreaming: Bool = false) {
|
||||
self.role = role
|
||||
self.content = content
|
||||
self.rawContent = content
|
||||
self.images = images
|
||||
self.isStreaming = isStreaming
|
||||
self.timestamp = Date()
|
||||
@@ -43,15 +54,53 @@ final class Conversation {
|
||||
}
|
||||
|
||||
/// Appends a text chunk to the assistant message at the given index.
|
||||
/// Handles `<think>...</think>` tags by routing content to `thinkingContent` vs `content`.
|
||||
func appendToMessage(at index: Int, chunk: String) {
|
||||
guard index < messages.count else { return }
|
||||
messages[index].content += chunk
|
||||
messages[index].rawContent += chunk
|
||||
|
||||
// Parse the full raw content to separate thinking from response.
|
||||
// This is simpler and more robust than incremental parsing since
|
||||
// tag boundaries can split across chunks.
|
||||
let raw = messages[index].rawContent
|
||||
var thinking = ""
|
||||
var visible = ""
|
||||
var isInThink = false
|
||||
|
||||
var scanner = raw[raw.startIndex...]
|
||||
while !scanner.isEmpty {
|
||||
if isInThink {
|
||||
if let endRange = scanner.range(of: "</think>") {
|
||||
thinking += String(scanner[scanner.startIndex..<endRange.lowerBound])
|
||||
scanner = scanner[endRange.upperBound...]
|
||||
isInThink = false
|
||||
} else {
|
||||
// Still inside thinking — all remaining text is thinking
|
||||
thinking += String(scanner)
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if let startRange = scanner.range(of: "<think>") {
|
||||
visible += String(scanner[scanner.startIndex..<startRange.lowerBound])
|
||||
scanner = scanner[startRange.upperBound...]
|
||||
isInThink = true
|
||||
} else {
|
||||
visible += String(scanner)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
messages[index].thinkingContent = thinking.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
messages[index].content = visible.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
messages[index].isThinking = isInThink
|
||||
}
|
||||
|
||||
/// Marks the assistant message at the given index as done streaming.
|
||||
func finalizeMessage(at index: Int) {
|
||||
guard index < messages.count else { return }
|
||||
messages[index].isStreaming = false
|
||||
messages[index].isThinking = false
|
||||
}
|
||||
|
||||
func clear() {
|
||||
|
||||
@@ -22,6 +22,12 @@ struct ModelConfig: Identifiable, Hashable {
|
||||
displayName: "Qwen3 VL 4B",
|
||||
contextLength: 256_000
|
||||
),
|
||||
ModelConfig(
|
||||
id: "qwen3.5-9b",
|
||||
repoId: "mlx-community/Qwen3.5-9B-4bit",
|
||||
displayName: "Qwen3.5 9B",
|
||||
contextLength: 256_000
|
||||
),
|
||||
]
|
||||
|
||||
static let `default` = availableModels[0]
|
||||
|
||||
@@ -347,18 +347,23 @@ final class APIServer {
|
||||
// Only conversation turns go in `history:` — this avoids replaying the
|
||||
// large tool prompt as history on every new session.
|
||||
let instr = instructions.isEmpty ? nil : instructions
|
||||
let thinkingContext: [String: any Sendable]? = Preferences.enableThinking
|
||||
? nil
|
||||
: ["enable_thinking": false]
|
||||
if !allButLast.isEmpty {
|
||||
session = ChatSession(
|
||||
container,
|
||||
instructions: instr,
|
||||
history: allButLast,
|
||||
generateParameters: generateParams
|
||||
generateParameters: generateParams,
|
||||
additionalContext: thinkingContext
|
||||
)
|
||||
} else {
|
||||
session = ChatSession(
|
||||
container,
|
||||
instructions: instr,
|
||||
generateParameters: generateParams
|
||||
generateParameters: generateParams,
|
||||
additionalContext: thinkingContext
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -464,6 +469,7 @@ final class APIServer {
|
||||
}
|
||||
|
||||
LiveCounters.shared.requestCompleted(generationTokens: completionTokens)
|
||||
modelManager?.touchActivity()
|
||||
|
||||
// Parse tool calls: first check framework-detected ones, then our own text parser
|
||||
var finishReason = "stop"
|
||||
@@ -536,6 +542,7 @@ final class APIServer {
|
||||
}
|
||||
} catch {
|
||||
LiveCounters.shared.requestCompleted(generationTokens: 0)
|
||||
modelManager?.touchActivity()
|
||||
sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#)
|
||||
}
|
||||
}
|
||||
@@ -671,6 +678,7 @@ final class APIServer {
|
||||
))
|
||||
|
||||
LiveCounters.shared.requestCompleted(generationTokens: completionTokens)
|
||||
modelManager?.touchActivity()
|
||||
|
||||
// Send [DONE] and close
|
||||
await Self.sendData(connection: connection, data: "data: [DONE]\n\n".data(using: .utf8)!)
|
||||
|
||||
@@ -3,14 +3,43 @@ import Foundation
|
||||
/// Resolves HuggingFace model repos to local snapshot directories,
|
||||
/// matching the cache layout used by Python's `huggingface_hub`.
|
||||
///
|
||||
/// Checks two locations:
|
||||
/// 1. App sandbox container: ~/Library/Containers/com.mlxserver.app/.../huggingface/hub/
|
||||
/// 2. System-wide cache: ~/.cache/huggingface/hub/ (shared with Python tools)
|
||||
///
|
||||
/// Cache structure:
|
||||
/// ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{hash}/
|
||||
/// .../huggingface/hub/models--{org}--{name}/snapshots/{hash}/
|
||||
enum LocalModelResolver {
|
||||
|
||||
/// The standard HuggingFace cache directory used by Python's `huggingface_hub`.
|
||||
private static let cacheBase: URL = {
|
||||
FileManager.default.homeDirectoryForCurrentUser
|
||||
/// All HuggingFace cache directories to search, in priority order.
|
||||
/// The sandboxed container path is checked first (where the app downloads to),
|
||||
/// then the system-wide Python cache (for models downloaded via huggingface-cli).
|
||||
private static let cacheBases: [URL] = {
|
||||
var bases: [URL] = []
|
||||
|
||||
// 1. Sandboxed app container cache (where swift-transformers Hub downloads to)
|
||||
let containerCache = FileManager.default.homeDirectoryForCurrentUser
|
||||
.appendingPathComponent("Library/Caches/huggingface/hub", isDirectory: true)
|
||||
bases.append(containerCache)
|
||||
|
||||
// 2. System-wide ~/.cache/huggingface/hub/ (Python huggingface_hub)
|
||||
// When sandboxed, homeDirectory points to the container, so construct the real path.
|
||||
let realHome = URL(fileURLWithPath: NSHomeDirectory())
|
||||
let systemCache = realHome
|
||||
.appendingPathComponent(".cache/huggingface/hub", isDirectory: true)
|
||||
// Avoid duplicate if they resolve to the same path
|
||||
if systemCache.path != containerCache.path {
|
||||
bases.append(systemCache)
|
||||
}
|
||||
|
||||
// 3. Also try the unsandboxed home directory path
|
||||
let globalHome = FileManager.default.homeDirectoryForCurrentUser
|
||||
.appendingPathComponent(".cache/huggingface/hub", isDirectory: true)
|
||||
if globalHome.path != containerCache.path && globalHome.path != systemCache.path {
|
||||
bases.append(globalHome)
|
||||
}
|
||||
|
||||
return bases
|
||||
}()
|
||||
|
||||
/// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit")
|
||||
@@ -18,30 +47,71 @@ enum LocalModelResolver {
|
||||
///
|
||||
/// Returns `nil` if the model hasn't been downloaded yet.
|
||||
static func resolve(repoId: String) -> URL? {
|
||||
// Convert "mlx-community/gemma-3-4b-it-4bit" → "models--mlx-community--gemma-3-4b-it-4bit"
|
||||
let dirName = "models--" + repoId.replacingOccurrences(of: "/", with: "--")
|
||||
let snapshotsDir = cacheBase
|
||||
.appendingPathComponent(dirName, isDirectory: true)
|
||||
.appendingPathComponent("snapshots", isDirectory: true)
|
||||
|
||||
// Find the first (usually only) snapshot hash directory
|
||||
guard let contents = try? FileManager.default.contentsOfDirectory(
|
||||
at: snapshotsDir,
|
||||
includingPropertiesForKeys: [.isDirectoryKey],
|
||||
options: [.skipsHiddenFiles]
|
||||
) else {
|
||||
return nil
|
||||
for cacheBase in cacheBases {
|
||||
let snapshotsDir = cacheBase
|
||||
.appendingPathComponent(dirName, isDirectory: true)
|
||||
.appendingPathComponent("snapshots", isDirectory: true)
|
||||
|
||||
guard let contents = try? FileManager.default.contentsOfDirectory(
|
||||
at: snapshotsDir,
|
||||
includingPropertiesForKeys: [.isDirectoryKey],
|
||||
options: [.skipsHiddenFiles]
|
||||
) else {
|
||||
continue
|
||||
}
|
||||
|
||||
if let snapshot = contents
|
||||
.filter({ (try? $0.resourceValues(forKeys: [.isDirectoryKey]).isDirectory) == true })
|
||||
.sorted(by: { $0.lastPathComponent < $1.lastPathComponent })
|
||||
.last {
|
||||
return snapshot
|
||||
}
|
||||
}
|
||||
|
||||
// Return the most recent snapshot (last alphabetically = latest hash)
|
||||
return contents
|
||||
.filter { (try? $0.resourceValues(forKeys: [.isDirectoryKey]).isDirectory) == true }
|
||||
.sorted(by: { $0.lastPathComponent < $1.lastPathComponent })
|
||||
.last
|
||||
return nil
|
||||
}
|
||||
|
||||
/// Check if a model is available locally.
|
||||
static func isAvailable(repoId: String) -> Bool {
|
||||
resolve(repoId: repoId) != nil
|
||||
}
|
||||
|
||||
/// Delete the local cache for a model so it will be re-downloaded next time.
|
||||
/// Removes from all cache locations.
|
||||
/// Returns true if something was deleted.
|
||||
@discardableResult
|
||||
static func deleteLocal(repoId: String) -> Bool {
|
||||
let dirName = "models--" + repoId.replacingOccurrences(of: "/", with: "--")
|
||||
var deleted = false
|
||||
|
||||
for cacheBase in cacheBases {
|
||||
let modelDir = cacheBase.appendingPathComponent(dirName, isDirectory: true)
|
||||
guard FileManager.default.fileExists(atPath: modelDir.path) else { continue }
|
||||
do {
|
||||
try FileManager.default.removeItem(at: modelDir)
|
||||
print("[LocalModelResolver] Deleted \(modelDir.path)")
|
||||
deleted = true
|
||||
} catch {
|
||||
print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)")
|
||||
}
|
||||
}
|
||||
|
||||
// Also clean up the per-model cache in the container (used by swift-transformers)
|
||||
let containerModelsDir = FileManager.default.homeDirectoryForCurrentUser
|
||||
.appendingPathComponent("Library/Caches/models", isDirectory: true)
|
||||
.appendingPathComponent(repoId, isDirectory: true)
|
||||
if FileManager.default.fileExists(atPath: containerModelsDir.path) {
|
||||
do {
|
||||
try FileManager.default.removeItem(at: containerModelsDir)
|
||||
print("[LocalModelResolver] Deleted \(containerModelsDir.path)")
|
||||
deleted = true
|
||||
} catch {
|
||||
print("[LocalModelResolver] Failed to delete \(containerModelsDir.path): \(error)")
|
||||
}
|
||||
}
|
||||
|
||||
return deleted
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,6 +49,17 @@ enum Preferences {
|
||||
set { defaults.set(newValue, forKey: apiAutoStartKey) }
|
||||
}
|
||||
|
||||
// MARK: - Thinking mode
|
||||
|
||||
private static let enableThinkingKey = "enableThinking"
|
||||
|
||||
/// Whether to enable thinking/reasoning mode for models that support it (e.g. Qwen3.5).
|
||||
/// When disabled, the model skips internal reasoning and responds directly.
|
||||
static var enableThinking: Bool {
|
||||
get { defaults.object(forKey: enableThinkingKey) == nil ? true : defaults.bool(forKey: enableThinkingKey) }
|
||||
set { defaults.set(newValue, forKey: enableThinkingKey) }
|
||||
}
|
||||
|
||||
// MARK: - Idle unload
|
||||
|
||||
private static let idleUnloadMinutesKey = "idleUnloadMinutes"
|
||||
|
||||
@@ -31,10 +31,16 @@ final class ChatViewModel {
|
||||
guard let container = modelManager.modelContainer else { return }
|
||||
if chatSession == nil {
|
||||
let systemPrompt = Preferences.systemPrompt
|
||||
// Pass enable_thinking to the Jinja chat template context.
|
||||
// Qwen3.5 and similar models use this to control reasoning mode.
|
||||
let thinkingContext: [String: any Sendable]? = Preferences.enableThinking
|
||||
? nil
|
||||
: ["enable_thinking": false]
|
||||
chatSession = ChatSession(
|
||||
container,
|
||||
instructions: systemPrompt.isEmpty ? nil : systemPrompt,
|
||||
generateParameters: GenerateParameters(temperature: 0.7)
|
||||
generateParameters: GenerateParameters(temperature: 0.7),
|
||||
additionalContext: thinkingContext
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -113,6 +119,7 @@ final class ChatViewModel {
|
||||
conversation.finalizeMessage(at: assistantIndex)
|
||||
isGenerating = false
|
||||
generationTask = nil
|
||||
modelManager.touchActivity()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import Foundation
|
||||
import Hub
|
||||
import MLX
|
||||
import MLXLMCommon
|
||||
import MLXVLM
|
||||
@@ -7,6 +8,11 @@ import MLXVLM
|
||||
@Observable
|
||||
@MainActor
|
||||
final class ModelManager {
|
||||
|
||||
/// HubApi with blob cache disabled to avoid storing every model twice.
|
||||
/// swift-huggingface defaults to caching in both huggingface/hub/ (snapshots)
|
||||
/// AND models/ (content-addressed blobs). We only need the snapshots.
|
||||
private static let hub = HubApi(cache: nil)
|
||||
var currentModel: ModelConfig?
|
||||
var modelContainer: ModelContainer?
|
||||
var isLoading = false
|
||||
@@ -14,6 +20,12 @@ final class ModelManager {
|
||||
var loadingModelName: String = ""
|
||||
var errorMessage: String?
|
||||
|
||||
// Download-specific state for the modal
|
||||
var isDownloading = false
|
||||
var downloadFilesTotal: Int64 = 0
|
||||
var downloadFilesCompleted: Int64 = 0
|
||||
var downloadSpeed: Double = 0 // bytes/sec
|
||||
|
||||
private var idleTimer: Timer?
|
||||
private(set) var lastUsed: Date?
|
||||
|
||||
@@ -31,11 +43,26 @@ final class ModelManager {
|
||||
loadingModelName = config.displayName
|
||||
errorMessage = nil
|
||||
|
||||
let needsDownload = !config.isLocal
|
||||
if needsDownload {
|
||||
isDownloading = true
|
||||
downloadFilesTotal = 0
|
||||
downloadFilesCompleted = 0
|
||||
downloadSpeed = 0
|
||||
}
|
||||
|
||||
do {
|
||||
let container: ModelContainer
|
||||
let progressHandler: @Sendable (Progress) -> Void = { progress in
|
||||
Task { @MainActor in
|
||||
self.downloadProgress = progress.fractionCompleted
|
||||
if self.isDownloading {
|
||||
self.downloadFilesTotal = progress.totalUnitCount
|
||||
self.downloadFilesCompleted = progress.completedUnitCount
|
||||
if let speed = progress.userInfo[.throughputKey] as? Double {
|
||||
self.downloadSpeed = speed
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,20 +74,30 @@ final class ModelManager {
|
||||
}
|
||||
|
||||
container = try await VLMModelFactory.shared.loadContainer(
|
||||
hub: Self.hub,
|
||||
configuration: configuration,
|
||||
progressHandler: progressHandler
|
||||
)
|
||||
|
||||
self.isDownloading = false
|
||||
self.modelContainer = container
|
||||
self.currentModel = config
|
||||
touchActivity()
|
||||
} catch {
|
||||
self.isDownloading = false
|
||||
self.errorMessage = "Failed to load model: \(error.localizedDescription)"
|
||||
}
|
||||
|
||||
isLoading = false
|
||||
}
|
||||
|
||||
/// Delete local cache and re-download a model.
|
||||
func redownloadModel(_ config: ModelConfig) async {
|
||||
unloadModel()
|
||||
LocalModelResolver.deleteLocal(repoId: config.repoId)
|
||||
await loadModel(config)
|
||||
}
|
||||
|
||||
/// Unload the current model and free GPU memory.
|
||||
func unloadModel() {
|
||||
idleTimer?.invalidate()
|
||||
|
||||
@@ -57,6 +57,7 @@ struct ChatMessagesView: View {
|
||||
|
||||
struct MessageBubbleView: View {
|
||||
let message: ChatMessage
|
||||
@State private var showThinking = false
|
||||
|
||||
var body: some View {
|
||||
HStack {
|
||||
@@ -76,11 +77,16 @@ struct MessageBubbleView: View {
|
||||
}
|
||||
}
|
||||
|
||||
// Thinking block (collapsible)
|
||||
if !message.thinkingContent.isEmpty || message.isThinking {
|
||||
thinkingView
|
||||
}
|
||||
|
||||
// Message content
|
||||
if !message.content.isEmpty || message.isStreaming {
|
||||
if !message.content.isEmpty || (message.isStreaming && !message.isThinking) {
|
||||
Group {
|
||||
if message.role == .assistant {
|
||||
Markdown(message.content + (message.isStreaming ? " ●" : ""))
|
||||
Markdown(message.content + (message.isStreaming && !message.isThinking ? " ●" : ""))
|
||||
.textSelection(.enabled)
|
||||
} else {
|
||||
Text(message.content)
|
||||
@@ -101,4 +107,43 @@ struct MessageBubbleView: View {
|
||||
if message.role == .assistant { Spacer(minLength: 60) }
|
||||
}
|
||||
}
|
||||
|
||||
private var thinkingView: some View {
|
||||
VStack(alignment: .leading, spacing: 0) {
|
||||
Button {
|
||||
withAnimation(.easeInOut(duration: 0.15)) {
|
||||
showThinking.toggle()
|
||||
}
|
||||
} label: {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: showThinking ? "chevron.down" : "chevron.right")
|
||||
.font(.caption2)
|
||||
if message.isThinking {
|
||||
ProgressView()
|
||||
.controlSize(.mini)
|
||||
Text("Thinking…")
|
||||
} else {
|
||||
Image(systemName: "brain")
|
||||
Text("Thought")
|
||||
}
|
||||
}
|
||||
.font(.caption)
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
|
||||
if showThinking {
|
||||
Text(message.thinkingContent + (message.isThinking ? " ●" : ""))
|
||||
.font(.caption)
|
||||
.foregroundStyle(.tertiary)
|
||||
.textSelection(.enabled)
|
||||
.padding(.top, 4)
|
||||
.padding(.leading, 14)
|
||||
}
|
||||
}
|
||||
.padding(.horizontal, 12)
|
||||
.padding(.vertical, 6)
|
||||
.background(Color.purple.opacity(0.06))
|
||||
.clipShape(RoundedRectangle(cornerRadius: 8))
|
||||
}
|
||||
}
|
||||
|
||||
68
MLXServer/Views/DownloadModalView.swift
Normal file
68
MLXServer/Views/DownloadModalView.swift
Normal file
@@ -0,0 +1,68 @@
|
||||
import SwiftUI
|
||||
|
||||
/// Modal overlay shown when a model is being downloaded from HuggingFace.
|
||||
struct DownloadModalView: View {
|
||||
@Environment(ModelManager.self) private var modelManager
|
||||
|
||||
var body: some View {
|
||||
VStack(spacing: 20) {
|
||||
// Header
|
||||
Label("Downloading Model", systemImage: "arrow.down.circle")
|
||||
.font(.headline)
|
||||
|
||||
Text(modelManager.loadingModelName)
|
||||
.font(.title3.weight(.medium))
|
||||
.foregroundStyle(.primary)
|
||||
|
||||
// Progress bar
|
||||
VStack(spacing: 8) {
|
||||
ProgressView(value: modelManager.downloadProgress)
|
||||
.progressViewStyle(.linear)
|
||||
|
||||
HStack {
|
||||
// Files progress
|
||||
if modelManager.downloadFilesTotal > 0 {
|
||||
Text("File \(modelManager.downloadFilesCompleted)/\(modelManager.downloadFilesTotal)")
|
||||
.font(.caption.monospacedDigit())
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
|
||||
Spacer()
|
||||
|
||||
// Percentage
|
||||
Text("\(Int(modelManager.downloadProgress * 100))%")
|
||||
.font(.caption.monospacedDigit())
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
|
||||
// Speed
|
||||
if modelManager.downloadSpeed > 0 {
|
||||
Text(formatSpeed(modelManager.downloadSpeed))
|
||||
.font(.caption.monospacedDigit())
|
||||
.foregroundStyle(.tertiary)
|
||||
}
|
||||
}
|
||||
|
||||
Text("The model will be cached locally for future use.")
|
||||
.font(.caption)
|
||||
.foregroundStyle(.tertiary)
|
||||
.multilineTextAlignment(.center)
|
||||
}
|
||||
.padding(32)
|
||||
.frame(width: 380)
|
||||
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 16))
|
||||
.shadow(radius: 20)
|
||||
}
|
||||
|
||||
private func formatSpeed(_ bytesPerSec: Double) -> String {
|
||||
if bytesPerSec >= 1_073_741_824 {
|
||||
return String(format: "%.1f GB/s", bytesPerSec / 1_073_741_824)
|
||||
} else if bytesPerSec >= 1_048_576 {
|
||||
return String(format: "%.1f MB/s", bytesPerSec / 1_048_576)
|
||||
} else if bytesPerSec >= 1024 {
|
||||
return String(format: "%.0f KB/s", bytesPerSec / 1024)
|
||||
} else {
|
||||
return String(format: "%.0f B/s", bytesPerSec)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ import SwiftUI
|
||||
|
||||
struct ModelPickerView: View {
|
||||
@Environment(ModelManager.self) private var modelManager
|
||||
@State private var confirmRedownload: ModelConfig?
|
||||
|
||||
var body: some View {
|
||||
HStack(spacing: 8) {
|
||||
@@ -15,6 +16,35 @@ struct ModelPickerView: View {
|
||||
}
|
||||
.frame(width: 160)
|
||||
.disabled(modelManager.isLoading)
|
||||
|
||||
// Re-download button (visible when a model is loaded)
|
||||
if let current = modelManager.currentModel, !modelManager.isLoading {
|
||||
Button {
|
||||
confirmRedownload = current
|
||||
} label: {
|
||||
Image(systemName: "arrow.clockwise")
|
||||
.font(.caption)
|
||||
}
|
||||
.buttonStyle(.borderless)
|
||||
.help("Re-download \(current.displayName)")
|
||||
}
|
||||
}
|
||||
.alert("Re-download Model?", isPresented: .init(
|
||||
get: { confirmRedownload != nil },
|
||||
set: { if !$0 { confirmRedownload = nil } }
|
||||
)) {
|
||||
Button("Re-download", role: .destructive) {
|
||||
if let config = confirmRedownload {
|
||||
Task { await modelManager.redownloadModel(config) }
|
||||
}
|
||||
}
|
||||
Button("Cancel", role: .cancel) {
|
||||
confirmRedownload = nil
|
||||
}
|
||||
} message: {
|
||||
if let config = confirmRedownload {
|
||||
Text("This will delete the local cache for \(config.displayName) and download it again from HuggingFace.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ struct SettingsView: View {
|
||||
@State private var apiAutoStart: Bool = Preferences.apiAutoStart
|
||||
@State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes)
|
||||
@State private var defaultModelId: String = Preferences.defaultModelId ?? ModelConfig.default.id
|
||||
@State private var enableThinking: Bool = Preferences.enableThinking
|
||||
|
||||
var body: some View {
|
||||
Form {
|
||||
@@ -24,6 +25,17 @@ struct SettingsView: View {
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
|
||||
Section("Generation") {
|
||||
Toggle("Enable thinking mode", isOn: $enableThinking)
|
||||
.onChange(of: enableThinking) {
|
||||
Preferences.enableThinking = enableThinking
|
||||
}
|
||||
|
||||
Text("When enabled, models like Qwen3.5 reason internally before responding. Produces better answers but slower. Takes effect on the next conversation.")
|
||||
.font(.caption)
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
|
||||
Section("System Prompt") {
|
||||
TextEditor(text: $systemPrompt)
|
||||
.font(.body.monospaced())
|
||||
@@ -75,6 +87,6 @@ struct SettingsView: View {
|
||||
}
|
||||
}
|
||||
.formStyle(.grouped)
|
||||
.frame(width: 450, height: 460)
|
||||
.frame(width: 450, height: 550)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user