diff --git a/MLXServer.xcodeproj/project.pbxproj b/MLXServer.xcodeproj/project.pbxproj index ceb748b..2c00ae2 100644 --- a/MLXServer.xcodeproj/project.pbxproj +++ b/MLXServer.xcodeproj/project.pbxproj @@ -11,6 +11,7 @@ 165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */ = {isa = PBXBuildFile; fileRef = 145B888FBDD4F931512C5473 /* Preferences.swift */; }; 189362AAE2CDE5D4B3428334 /* ToolCallParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = E73B165A1822729C907791AE /* ToolCallParser.swift */; }; 2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; }; + 2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; }; 4CB13DC1AC7A500DDBB443EC /* ChatInputView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */; }; 50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C67742651DB486871CEF1612 /* MLXServerApp.swift */; }; 50DD129CCF2843482DEC3B96 /* APIServer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3D08828E16B17EF02C14243E /* APIServer.swift */; }; @@ -22,6 +23,7 @@ 80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; }; 84D32315B418B5243E017350 /* ToolPromptBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */; }; 945474365D0B3E961811909A /* MLXVLM in Frameworks */ = {isa = PBXBuildFile; productRef = D5E8E1C2DD8D8AABB4306193 /* MLXVLM */; }; + B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4239CFF94B819C35A8D4D617 /* MonitorView.swift */; }; B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8BD93859F0291F1A3E09DA5 /* ChatViewModel.swift */; }; B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = 9090667D4134056AE66DC2F1 /* MLXLMCommon */; }; D666A311788375E8A061C832 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4147321383E94E9F17A0154E /* SettingsView.swift */; }; @@ -38,6 +40,7 @@ 3AF462805202797F61422AEE /* MLXServer.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = MLXServer.entitlements; sourceTree = ""; }; 3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = ""; }; 4147321383E94E9F17A0154E /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = ""; }; + 4239CFF94B819C35A8D4D617 /* MonitorView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MonitorView.swift; sourceTree = ""; }; 6EE59189918D06B8D2F588FC /* MLXServer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MLXServer.app; sourceTree = BUILT_PRODUCTS_DIR; }; 922CBDC9206737BD04AF2874 /* ModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManager.swift; sourceTree = ""; }; 944C699FBB76C734C9DF2F2E /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = ""; }; @@ -49,6 +52,7 @@ C67742651DB486871CEF1612 /* MLXServerApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MLXServerApp.swift; sourceTree = ""; }; D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocalModelResolver.swift; sourceTree = ""; }; DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatMessagesView.swift; sourceTree = ""; }; + E35452B166893B25E765FF70 /* InferenceStats.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InferenceStats.swift; sourceTree = ""; }; E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatInputView.swift; sourceTree = ""; }; E73B165A1822729C907791AE /* ToolCallParser.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolCallParser.swift; sourceTree = ""; }; F1A52E2C9964ADA9D841A89B /* APIModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIModels.swift; sourceTree = ""; }; @@ -108,6 +112,7 @@ E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */, DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */, C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */, + 4239CFF94B819C35A8D4D617 /* MonitorView.swift */, 4147321383E94E9F17A0154E /* SettingsView.swift */, B0EAB35D7130D56B9E7484BA /* StatusBarView.swift */, ); @@ -118,6 +123,7 @@ isa = PBXGroup; children = ( A4B359324B5FD8D106C74338 /* ChatMessage.swift */, + E35452B166893B25E765FF70 /* InferenceStats.swift */, 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */, ); path = Models; @@ -234,11 +240,13 @@ 5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */, B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */, 5946258F1DE88CE904584E0B /* ContentView.swift in Sources */, + 2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */, 6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */, 50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */, 80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */, 0168AEE16009097901363E16 /* ModelManager.swift in Sources */, 2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */, + B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */, 165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */, D666A311788375E8A061C832 /* SettingsView.swift in Sources */, 621B7E4382199AC1378F5F9C /* StatusBarView.swift in Sources */, diff --git a/MLXServer/ContentView.swift b/MLXServer/ContentView.swift index 62ced89..4b1d67a 100644 --- a/MLXServer/ContentView.swift +++ b/MLXServer/ContentView.swift @@ -4,86 +4,110 @@ struct ContentView: View { @Environment(ModelManager.self) private var modelManager @State private var chatVM: ChatViewModel? @State private var showLoadError = false + @State private var showMonitor = false var body: some View { - Group { - if let chatVM { - ChatView(viewModel: chatVM) - } else { - ProgressView("Initializing…") - } - } - .navigationTitle(modelManager.currentModel?.displayName ?? "MLX Server") - .onAppear { - if chatVM == nil { - chatVM = ChatViewModel(modelManager: modelManager) - // Auto-start API server if configured - if Preferences.apiAutoStart { - chatVM?.startAPIServer() - } - } - } - .onChange(of: modelManager.currentModel) { - chatVM?.resetSession() - // Persist last used model - if let id = modelManager.currentModel?.id { - Preferences.lastModelId = id - } - } - .onChange(of: modelManager.errorMessage) { - showLoadError = modelManager.errorMessage != nil - } - .alert("Model Error", isPresented: $showLoadError) { - Button("Retry") { - if let config = modelManager.currentModel ?? ModelConfig.availableModels.first { - Task { await modelManager.loadModel(config) } - } - } - Button("Cancel", role: .cancel) { - modelManager.errorMessage = nil - } - } message: { - Text(modelManager.errorMessage ?? "Unknown error loading model.") - } - .toolbar { - ToolbarItem(placement: .principal) { - ModelPickerView() - } - - ToolbarItemGroup(placement: .primaryAction) { - // API server toggle - Button { - if let chatVM { - if chatVM.apiServer.isRunning { - chatVM.stopAPIServer() - } else { - chatVM.startAPIServer() - } + mainContent + .navigationTitle(modelManager.currentModel?.displayName ?? "MLX Server") + .onAppear { + if chatVM == nil { + chatVM = ChatViewModel(modelManager: modelManager) + // Auto-start API server if configured + if Preferences.apiAutoStart { + chatVM?.startAPIServer() } - } label: { - // Running → solid globe (green tint), click to stop - // Stopped → slashed globe, click to start - Label( - chatVM?.apiServer.isRunning == true ? "Stop API" : "Start API", - systemImage: chatVM?.apiServer.isRunning == true ? "network" : "network.slash" - ) - .foregroundStyle(chatVM?.apiServer.isRunning == true ? .green : .secondary) } - .help(chatVM?.apiServer.isRunning == true ? "API server running on port \(Preferences.apiPort) — click to stop" : "Click to start API server") - - // New conversation - Button { - chatVM?.newConversation() - } label: { - Label("New Chat", systemImage: "plus.message") - } - .keyboardShortcut("n", modifiers: .command) } + .onChange(of: modelManager.currentModel) { + chatVM?.resetSession() + // Persist last used model + if let id = modelManager.currentModel?.id { + Preferences.lastModelId = id + } + } + .onChange(of: modelManager.errorMessage) { + showLoadError = modelManager.errorMessage != nil + } + .alert("Model Error", isPresented: $showLoadError) { + Button("Retry") { + if let config = modelManager.currentModel ?? ModelConfig.availableModels.first { + Task { await modelManager.loadModel(config) } + } + } + Button("Cancel", role: .cancel) { + modelManager.errorMessage = nil + } + } message: { + Text(modelManager.errorMessage ?? "Unknown error loading model.") + } + .toolbar { + ToolbarItem(placement: .principal) { + ModelPickerView() + } + ToolbarItemGroup(placement: .primaryAction) { + toolbarButtons + } + } + // Cmd+1/2/3 model switching + .background { + modelSwitchShortcuts + } + } + + @ViewBuilder + private var mainContent: some View { + if let chatVM { + if showMonitor { + MonitorView(stats: chatVM.apiServer.inferenceStats) + } else { + ChatView(viewModel: chatVM) + } + } else { + ProgressView("Initializing…") } - // Cmd+1/2/3 model switching - .background { - modelSwitchShortcuts + } + + @ViewBuilder + private var toolbarButtons: some View { + // API server toggle + let isRunning = chatVM?.apiServer.isRunning == true + Button { + if let chatVM { + if chatVM.apiServer.isRunning { + chatVM.stopAPIServer() + } else { + chatVM.startAPIServer() + } + } + } label: { + Label( + isRunning ? "Stop API" : "Start API", + systemImage: isRunning ? "network" : "network.slash" + ) + .foregroundStyle(isRunning ? .green : .secondary) } + .help(isRunning ? "API server running on port \(Preferences.apiPort) — click to stop" : "Click to start API server") + + // Monitor toggle + Button { + showMonitor.toggle() + } label: { + Label( + showMonitor ? "Chat" : "Monitor", + systemImage: showMonitor ? "bubble.left.and.text.bubble.right" : "chart.xyaxis.line" + ) + .foregroundStyle(showMonitor ? Color.accentColor : Color.secondary) + } + .help(showMonitor ? "Switch to chat" : "Show inference monitor") + .keyboardShortcut("m", modifiers: [.command, .shift]) + + // New conversation + Button { + chatVM?.newConversation() + } label: { + Label("New Chat", systemImage: "plus.message") + } + .keyboardShortcut("n", modifiers: .command) } @ViewBuilder diff --git a/MLXServer/Models/InferenceStats.swift b/MLXServer/Models/InferenceStats.swift new file mode 100644 index 0000000..5efc6a5 --- /dev/null +++ b/MLXServer/Models/InferenceStats.swift @@ -0,0 +1,141 @@ +import Foundation + +/// Lightweight stats collector for inference activity visualization. +/// All mutations happen on @MainActor to avoid locks. +@Observable +@MainActor +final class InferenceStats { + // MARK: - Current request state + + var activeRequests: Int = 0 + var currentPromptTokens: Int = 0 + var currentGenerationTokens: Int = 0 + var isGenerating: Bool = false + var isPrefilling: Bool = false + var currentTokensPerSecond: Double = 0 + var contextUsed: Int = 0 + var contextMax: Int = 0 + + // MARK: - Cumulative counters + + var totalRequests: Int = 0 + var totalPromptTokens: Int = 0 + var totalGenerationTokens: Int = 0 + + // MARK: - Time series data (ring buffers for charts) + + struct DataPoint: Identifiable { + let id = UUID() + let timestamp: Date + let value: Double + } + + private(set) var tokenRateHistory: [DataPoint] = [] + private(set) var promptTokenHistory: [DataPoint] = [] + private(set) var generationTokenHistory: [DataPoint] = [] + + private static let maxHistoryPoints = 120 // ~2 minutes at 1Hz + + // Periodic sampling + private var sampleTimer: Timer? + private var lastGenerationTokenCount: Int = 0 + private var lastPromptTokenCount: Int = 0 + private var lastSampleTime: Date = .now + + func startSampling() { + guard sampleTimer == nil else { return } + lastSampleTime = .now + sampleTimer = Timer.scheduledTimer(withTimeInterval: 1.0, repeats: true) { [weak self] _ in + Task { @MainActor in + self?.recordSample() + } + } + } + + func stopSampling() { + sampleTimer?.invalidate() + sampleTimer = nil + } + + private func recordSample() { + let now = Date.now + + // Token rate: tokens generated since last sample + let genDelta = totalGenerationTokens - lastGenerationTokenCount + let promptDelta = totalPromptTokens - lastPromptTokenCount + lastGenerationTokenCount = totalGenerationTokens + lastPromptTokenCount = totalPromptTokens + + tokenRateHistory.append(DataPoint(timestamp: now, value: currentTokensPerSecond)) + generationTokenHistory.append(DataPoint(timestamp: now, value: Double(genDelta))) + promptTokenHistory.append(DataPoint(timestamp: now, value: Double(promptDelta))) + + // Trim to ring buffer size + if tokenRateHistory.count > Self.maxHistoryPoints { + tokenRateHistory.removeFirst(tokenRateHistory.count - Self.maxHistoryPoints) + } + if generationTokenHistory.count > Self.maxHistoryPoints { + generationTokenHistory.removeFirst(generationTokenHistory.count - Self.maxHistoryPoints) + } + if promptTokenHistory.count > Self.maxHistoryPoints { + promptTokenHistory.removeFirst(promptTokenHistory.count - Self.maxHistoryPoints) + } + } + + // MARK: - Event recording (called from APIServer) + + func requestStarted(contextLength: Int) { + activeRequests += 1 + totalRequests += 1 + isPrefilling = true + isGenerating = false + currentPromptTokens = 0 + currentGenerationTokens = 0 + currentTokensPerSecond = 0 + contextMax = contextLength + contextUsed = 0 + } + + func prefillCompleted(promptTokens: Int) { + isPrefilling = false + isGenerating = true + currentPromptTokens = promptTokens + totalPromptTokens += promptTokens + contextUsed = promptTokens + } + + func tokenGenerated(tokensPerSecond: Double, totalGenerated: Int) { + currentGenerationTokens = totalGenerated + currentTokensPerSecond = tokensPerSecond + contextUsed = currentPromptTokens + totalGenerated + } + + func requestCompleted(promptTokens: Int, generationTokens: Int) { + activeRequests = max(0, activeRequests - 1) + totalGenerationTokens += generationTokens + if activeRequests == 0 { + isGenerating = false + isPrefilling = false + currentTokensPerSecond = 0 + } + } + + func reset() { + activeRequests = 0 + currentPromptTokens = 0 + currentGenerationTokens = 0 + isGenerating = false + isPrefilling = false + currentTokensPerSecond = 0 + contextUsed = 0 + contextMax = 0 + totalRequests = 0 + totalPromptTokens = 0 + totalGenerationTokens = 0 + tokenRateHistory.removeAll() + promptTokenHistory.removeAll() + generationTokenHistory.removeAll() + lastGenerationTokenCount = 0 + lastPromptTokenCount = 0 + } +} diff --git a/MLXServer/Server/APIServer.swift b/MLXServer/Server/APIServer.swift index 5944842..17e7d07 100644 --- a/MLXServer/Server/APIServer.swift +++ b/MLXServer/Server/APIServer.swift @@ -11,6 +11,7 @@ final class APIServer { var isRunning = false var port: Int = 1234 var requestCount: Int = 0 + let inferenceStats = InferenceStats() private var listener: NWListener? private var modelManager: ModelManager? @@ -54,6 +55,7 @@ final class APIServer { } listener?.start(queue: .global(qos: .userInitiated)) + inferenceStats.startSampling() } catch { print("[APIServer] Failed to start: \(error)") } @@ -66,6 +68,7 @@ final class APIServer { cachedSession = nil cachedMessages = nil cachedModelId = nil + inferenceStats.stopSampling() } // MARK: - Connection handling @@ -341,6 +344,8 @@ final class APIServer { // Extract images from the last message only (ChatSession.streamDetails takes images separately) let lastImages = lastMessage.images + inferenceStats.requestStarted(contextLength: contextLength) + if isStream { await handleStreamingResponse( connection: connection, @@ -421,14 +426,22 @@ final class APIServer { switch generation { case .chunk(let text): fullText += text + completionTokens += 1 + inferenceStats.tokenGenerated(tokensPerSecond: 0, totalGenerated: completionTokens) case .info(let info): promptTokens = info.promptTokenCount completionTokens = info.generationTokenCount + inferenceStats.prefillCompleted(promptTokens: promptTokens) + if info.tokensPerSecond > 0 { + inferenceStats.tokenGenerated(tokensPerSecond: info.tokensPerSecond, totalGenerated: completionTokens) + } case .toolCall(let call): frameworkToolCalls.append(call) } } + inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens) + // Parse tool calls: first check framework-detected ones, then our own text parser var finishReason = "stop" var responseContent: String? = fullText @@ -499,6 +512,7 @@ final class APIServer { sendResponse(connection: connection, status: 200, body: String(data: json, encoding: .utf8) ?? "{}") } } catch { + inferenceStats.requestCompleted(promptTokens: 0, generationTokens: 0) sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#) } } @@ -564,6 +578,7 @@ final class APIServer { case .chunk(let text): completionTokens += 1 fullText += text + inferenceStats.tokenGenerated(tokensPerSecond: 0, totalGenerated: completionTokens) if !bufferForTools { sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk( @@ -579,12 +594,17 @@ final class APIServer { case .info(let info): promptTokens = info.promptTokenCount completionTokens = info.generationTokenCount + inferenceStats.prefillCompleted(promptTokens: promptTokens) + if info.tokensPerSecond > 0 { + inferenceStats.tokenGenerated(tokensPerSecond: info.tokensPerSecond, totalGenerated: completionTokens) + } case .toolCall(let call): frameworkToolCalls.append(call) } } } catch { + inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens) let errorEvent = "data: {\"error\":\"\(error.localizedDescription)\"}\n\n" connection.send(content: errorEvent.data(using: .utf8), completion: .contentProcessed({ _ in })) } @@ -687,6 +707,8 @@ final class APIServer { ) )) + inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens) + // Send [DONE] and close let done = "data: [DONE]\n\n" connection.send(content: done.data(using: .utf8), completion: .contentProcessed({ _ in diff --git a/MLXServer/Views/ChatMessagesView.swift b/MLXServer/Views/ChatMessagesView.swift index 8f22d0e..d7476ec 100644 --- a/MLXServer/Views/ChatMessagesView.swift +++ b/MLXServer/Views/ChatMessagesView.swift @@ -7,7 +7,7 @@ struct ChatMessagesView: View { var body: some View { ScrollViewReader { proxy in ScrollView { - LazyVStack(alignment: .leading, spacing: 12) { + VStack(alignment: .leading, spacing: 12) { if viewModel.conversation.messages.isEmpty { emptyState } else { @@ -16,14 +16,20 @@ struct ChatMessagesView: View { .id(message.id) } } + Color.clear + .frame(height: 1) + .id("bottom") } .padding() } .onChange(of: viewModel.conversation.messages.last?.content) { - scrollToBottom(proxy: proxy) + // During streaming, scroll without animation to avoid overlapping animations + proxy.scrollTo("bottom", anchor: .bottom) } .onChange(of: viewModel.conversation.messages.count) { - scrollToBottom(proxy: proxy) + withAnimation(.easeOut(duration: 0.2)) { + proxy.scrollTo("bottom", anchor: .bottom) + } } } } @@ -47,13 +53,6 @@ struct ChatMessagesView: View { .frame(maxWidth: .infinity, minHeight: 300) } - private func scrollToBottom(proxy: ScrollViewProxy) { - if let lastId = viewModel.conversation.messages.last?.id { - withAnimation(.easeOut(duration: 0.2)) { - proxy.scrollTo(lastId, anchor: .bottom) - } - } - } } struct MessageBubbleView: View { diff --git a/MLXServer/Views/MonitorView.swift b/MLXServer/Views/MonitorView.swift new file mode 100644 index 0000000..505110b --- /dev/null +++ b/MLXServer/Views/MonitorView.swift @@ -0,0 +1,352 @@ +import Charts +import MLX +import SwiftUI + +/// Real-time inference monitoring dashboard, shown in place of the chat UI. +struct MonitorView: View { + let stats: InferenceStats + @Environment(ModelManager.self) private var modelManager + + var body: some View { + ScrollView { + VStack(spacing: 20) { + // Live status header + liveStatusSection + + // Charts + HStack(alignment: .top, spacing: 16) { + tokenRateChart + tokenThroughputChart + } + + // Gauges row + HStack(spacing: 16) { + contextGauge + gpuMemoryGauge + requestsCard + } + + // Cumulative stats + cumulativeSection + } + .padding(20) + } + .frame(maxWidth: .infinity, maxHeight: .infinity) + .background(.background) + } + + // MARK: - Live Status + + @ViewBuilder + private var liveStatusSection: some View { + HStack(spacing: 16) { + // Activity indicator + HStack(spacing: 8) { + Circle() + .fill(activityColor) + .frame(width: 10, height: 10) + .overlay { + if stats.isGenerating || stats.isPrefilling { + Circle() + .stroke(activityColor.opacity(0.5), lineWidth: 2) + .scaleEffect(1.8) + .opacity(0.6) + } + } + + Text(activityLabel) + .font(.headline) + } + + Spacer() + + if stats.isGenerating { + Text(String(format: "%.1f tok/s", stats.currentTokensPerSecond)) + .font(.title2.monospacedDigit().bold()) + .foregroundStyle(.green) + } + + if stats.currentPromptTokens > 0 { + HStack(spacing: 4) { + Image(systemName: "arrow.down.circle.fill") + .foregroundStyle(.blue) + Text("\(stats.currentPromptTokens)") + .monospacedDigit() + Image(systemName: "arrow.up.circle.fill") + .foregroundStyle(.orange) + Text("\(stats.currentGenerationTokens)") + .monospacedDigit() + } + .font(.callout) + } + } + .padding(12) + .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10)) + } + + private var activityColor: Color { + if stats.isPrefilling { return .blue } + if stats.isGenerating { return .green } + if stats.activeRequests > 0 { return .orange } + return .secondary + } + + private var activityLabel: String { + if stats.isPrefilling { return "Prefilling" } + if stats.isGenerating { return "Generating" } + if stats.activeRequests > 0 { return "Processing" } + return "Idle" + } + + // MARK: - Token Rate Chart + + @ViewBuilder + private var tokenRateChart: some View { + VStack(alignment: .leading, spacing: 6) { + Text("Generation Speed (tok/s)") + .font(.caption.bold()) + .foregroundStyle(.secondary) + + Chart(stats.tokenRateHistory) { point in + LineMark( + x: .value("Time", point.timestamp), + y: .value("tok/s", point.value) + ) + .foregroundStyle(.green) + .interpolationMethod(.monotone) + + AreaMark( + x: .value("Time", point.timestamp), + y: .value("tok/s", point.value) + ) + .foregroundStyle(.green.opacity(0.1)) + .interpolationMethod(.monotone) + } + .chartXAxis { + AxisMarks(values: .stride(by: .second, count: 30)) { _ in + AxisGridLine() + } + } + .chartYAxis { + AxisMarks(position: .leading) { value in + AxisGridLine() + AxisValueLabel { + if let v = value.as(Double.self) { + Text(String(format: "%.0f", v)) + .font(.caption2.monospacedDigit()) + } + } + } + } + .chartYScale(domain: 0...(maxTokenRate + 1)) + .frame(height: 150) + } + .padding(12) + .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10)) + } + + private var maxTokenRate: Double { + stats.tokenRateHistory.map(\.value).max() ?? 10 + } + + // MARK: - Token Throughput Chart + + @ViewBuilder + private var tokenThroughputChart: some View { + VStack(alignment: .leading, spacing: 6) { + Text("Token Throughput (/sec)") + .font(.caption.bold()) + .foregroundStyle(.secondary) + + Chart { + ForEach(stats.promptTokenHistory) { point in + BarMark( + x: .value("Time", point.timestamp), + y: .value("Tokens", point.value) + ) + .foregroundStyle(.blue.opacity(0.7)) + } + ForEach(stats.generationTokenHistory) { point in + BarMark( + x: .value("Time", point.timestamp), + y: .value("Tokens", point.value) + ) + .foregroundStyle(.orange.opacity(0.7)) + } + } + .chartXAxis { + AxisMarks(values: .stride(by: .second, count: 30)) { _ in + AxisGridLine() + } + } + .chartYAxis { + AxisMarks(position: .leading) { value in + AxisGridLine() + AxisValueLabel { + if let v = value.as(Double.self) { + Text(String(format: "%.0f", v)) + .font(.caption2.monospacedDigit()) + } + } + } + } + .frame(height: 150) + + // Legend + HStack(spacing: 12) { + Label("Prompt", systemImage: "circle.fill") + .font(.caption2) + .foregroundStyle(.blue) + Label("Generation", systemImage: "circle.fill") + .font(.caption2) + .foregroundStyle(.orange) + } + } + .padding(12) + .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10)) + } + + // MARK: - Context Gauge + + @ViewBuilder + private var contextGauge: some View { + VStack(spacing: 8) { + Text("Context") + .font(.caption.bold()) + .foregroundStyle(.secondary) + + let maxCtx = max(stats.contextMax, modelManager.currentModel?.contextLength ?? 0) + let used = stats.contextUsed + let ratio = maxCtx > 0 ? Double(used) / Double(maxCtx) : 0 + + Gauge(value: ratio) { + EmptyView() + } currentValueLabel: { + Text(formatTokenCount(used)) + .font(.title3.monospacedDigit().bold()) + } minimumValueLabel: { + Text("0") + .font(.caption2) + } maximumValueLabel: { + Text(formatTokenCount(maxCtx)) + .font(.caption2) + } + .gaugeStyle(.accessoryCircular) + .scaleEffect(1.3) + .tint(contextGradient(ratio: ratio)) + + Text("\(Int(ratio * 100))%") + .font(.caption.monospacedDigit()) + .foregroundStyle(.secondary) + } + .frame(maxWidth: .infinity) + .padding(12) + .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10)) + } + + private func contextGradient(ratio: Double) -> Color { + if ratio > 0.9 { return .red } + if ratio > 0.7 { return .orange } + return .blue + } + + // MARK: - GPU Memory Gauge + + @ViewBuilder + private var gpuMemoryGauge: some View { + VStack(spacing: 8) { + Text("GPU Memory") + .font(.caption.bold()) + .foregroundStyle(.secondary) + + let activeMB = Double(MLX.GPU.activeMemory) / 1_048_576 + let peakMB = Double(MLX.GPU.peakMemory) / 1_048_576 + + Text(String(format: "%.0f MB", activeMB)) + .font(.title3.monospacedDigit().bold()) + + if peakMB > 0 { + Text(String(format: "Peak: %.0f MB", peakMB)) + .font(.caption2.monospacedDigit()) + .foregroundStyle(.tertiary) + } + } + .frame(maxWidth: .infinity) + .padding(12) + .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10)) + } + + // MARK: - Requests Card + + @ViewBuilder + private var requestsCard: some View { + VStack(spacing: 8) { + Text("Requests") + .font(.caption.bold()) + .foregroundStyle(.secondary) + + Text("\(stats.totalRequests)") + .font(.title3.monospacedDigit().bold()) + + if stats.activeRequests > 0 { + Text("\(stats.activeRequests) active") + .font(.caption2) + .foregroundStyle(.green) + } else { + Text("none active") + .font(.caption2) + .foregroundStyle(.tertiary) + } + } + .frame(maxWidth: .infinity) + .padding(12) + .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10)) + } + + // MARK: - Cumulative + + @ViewBuilder + private var cumulativeSection: some View { + HStack(spacing: 24) { + VStack(spacing: 2) { + Text("Total Prompt Tokens") + .font(.caption2) + .foregroundStyle(.secondary) + Text(formatTokenCount(stats.totalPromptTokens)) + .font(.callout.monospacedDigit().bold()) + .foregroundStyle(.blue) + } + + VStack(spacing: 2) { + Text("Total Generated Tokens") + .font(.caption2) + .foregroundStyle(.secondary) + Text(formatTokenCount(stats.totalGenerationTokens)) + .font(.callout.monospacedDigit().bold()) + .foregroundStyle(.orange) + } + + VStack(spacing: 2) { + Text("Total Tokens") + .font(.caption2) + .foregroundStyle(.secondary) + Text(formatTokenCount(stats.totalPromptTokens + stats.totalGenerationTokens)) + .font(.callout.monospacedDigit().bold()) + } + } + .frame(maxWidth: .infinity) + .padding(12) + .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10)) + } + + // MARK: - Helpers + + private func formatTokenCount(_ count: Int) -> String { + if count >= 1_000_000 { + return String(format: "%.1fM", Double(count) / 1_000_000) + } else if count >= 1_000 { + return String(format: "%.1fk", Double(count) / 1_000) + } + return "\(count)" + } +}