feat: inference visualisation

2026-03-17 19:30:09 +01:00
parent 5313b7175e
commit 1a67311874
6 changed files with 629 additions and 83 deletions
--- a/MLXServer.xcodeproj/project.pbxproj
+++ b/MLXServer.xcodeproj/project.pbxproj
@@ -11,6 +11,7 @@
 		165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */ = {isa = PBXBuildFile; fileRef = 145B888FBDD4F931512C5473 /* Preferences.swift */; };
 		189362AAE2CDE5D4B3428334 /* ToolCallParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = E73B165A1822729C907791AE /* ToolCallParser.swift */; };
 		2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; };
 		2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; };
 		4CB13DC1AC7A500DDBB443EC /* ChatInputView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */; };
 		50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C67742651DB486871CEF1612 /* MLXServerApp.swift */; };
 		50DD129CCF2843482DEC3B96 /* APIServer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3D08828E16B17EF02C14243E /* APIServer.swift */; };
@@ -22,6 +23,7 @@
 		80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; };
 		84D32315B418B5243E017350 /* ToolPromptBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */; };
 		945474365D0B3E961811909A /* MLXVLM in Frameworks */ = {isa = PBXBuildFile; productRef = D5E8E1C2DD8D8AABB4306193 /* MLXVLM */; };
 		B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4239CFF94B819C35A8D4D617 /* MonitorView.swift */; };
 		B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8BD93859F0291F1A3E09DA5 /* ChatViewModel.swift */; };
 		B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = 9090667D4134056AE66DC2F1 /* MLXLMCommon */; };
 		D666A311788375E8A061C832 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4147321383E94E9F17A0154E /* SettingsView.swift */; };
@@ -38,6 +40,7 @@
 		3AF462805202797F61422AEE /* MLXServer.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = MLXServer.entitlements; sourceTree = "<group>"; };
 		3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; };
 		4147321383E94E9F17A0154E /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = "<group>"; };
 		4239CFF94B819C35A8D4D617 /* MonitorView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MonitorView.swift; sourceTree = "<group>"; };
 		6EE59189918D06B8D2F588FC /* MLXServer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MLXServer.app; sourceTree = BUILT_PRODUCTS_DIR; };
 		922CBDC9206737BD04AF2874 /* ModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManager.swift; sourceTree = "<group>"; };
 		944C699FBB76C734C9DF2F2E /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
@@ -49,6 +52,7 @@
 		C67742651DB486871CEF1612 /* MLXServerApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MLXServerApp.swift; sourceTree = "<group>"; };
 		D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocalModelResolver.swift; sourceTree = "<group>"; };
 		DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatMessagesView.swift; sourceTree = "<group>"; };
 		E35452B166893B25E765FF70 /* InferenceStats.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InferenceStats.swift; sourceTree = "<group>"; };
 		E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatInputView.swift; sourceTree = "<group>"; };
 		E73B165A1822729C907791AE /* ToolCallParser.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolCallParser.swift; sourceTree = "<group>"; };
 		F1A52E2C9964ADA9D841A89B /* APIModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIModels.swift; sourceTree = "<group>"; };
@@ -108,6 +112,7 @@
 				E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */,
 				DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
 				C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
 				4239CFF94B819C35A8D4D617 /* MonitorView.swift */,
 				4147321383E94E9F17A0154E /* SettingsView.swift */,
 				B0EAB35D7130D56B9E7484BA /* StatusBarView.swift */,
 			);
@@ -118,6 +123,7 @@
 			isa = PBXGroup;
 			children = (
 				A4B359324B5FD8D106C74338 /* ChatMessage.swift */,
 				E35452B166893B25E765FF70 /* InferenceStats.swift */,
 				38DFC212AF4359A45FBE22BA /* ModelConfig.swift */,
 			);
 			path = Models;
@@ -234,11 +240,13 @@
 				5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */,
 				B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */,
 				5946258F1DE88CE904584E0B /* ContentView.swift in Sources */,
 				2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */,
 				6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */,
 				50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */,
 				80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */,
 				0168AEE16009097901363E16 /* ModelManager.swift in Sources */,
 				2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */,
 				B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */,
 				165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */,
 				D666A311788375E8A061C832 /* SettingsView.swift in Sources */,
 				621B7E4382199AC1378F5F9C /* StatusBarView.swift in Sources */,
--- a/MLXServer/ContentView.swift
+++ b/MLXServer/ContentView.swift
@@ -4,86 +4,110 @@ struct ContentView: View {
    @Environment(ModelManager.self) private var modelManager
    @State private var chatVM: ChatViewModel?
    @State private var showLoadError = false
    @State private var showMonitor = false
    var body: some View {
-        Group {
+        mainContent
-            if let chatVM {
+            .navigationTitle(modelManager.currentModel?.displayName ?? "MLX Server")
-                ChatView(viewModel: chatVM)
+            .onAppear {
-            } else {
+                if chatVM == nil {
-                ProgressView("Initializing…")
+                    chatVM = ChatViewModel(modelManager: modelManager)
-            }
+                    // Auto-start API server if configured
-        }
+                    if Preferences.apiAutoStart {
-        .navigationTitle(modelManager.currentModel?.displayName ?? "MLX Server")
+                        chatVM?.startAPIServer()
        .onAppear {
            if chatVM == nil {
                chatVM = ChatViewModel(modelManager: modelManager)
                // Auto-start API server if configured
                if Preferences.apiAutoStart {
                    chatVM?.startAPIServer()
                }
            }
        }
        .onChange(of: modelManager.currentModel) {
            chatVM?.resetSession()
            // Persist last used model
            if let id = modelManager.currentModel?.id {
                Preferences.lastModelId = id
            }
        }
        .onChange(of: modelManager.errorMessage) {
            showLoadError = modelManager.errorMessage != nil
        }
        .alert("Model Error", isPresented: $showLoadError) {
            Button("Retry") {
                if let config = modelManager.currentModel ?? ModelConfig.availableModels.first {
                    Task { await modelManager.loadModel(config) }
                }
            }
            Button("Cancel", role: .cancel) {
                modelManager.errorMessage = nil
            }
        } message: {
            Text(modelManager.errorMessage ?? "Unknown error loading model.")
        }
        .toolbar {
            ToolbarItem(placement: .principal) {
                ModelPickerView()
            }
            ToolbarItemGroup(placement: .primaryAction) {
                // API server toggle
                Button {
                    if let chatVM {
                        if chatVM.apiServer.isRunning {
                            chatVM.stopAPIServer()
                        } else {
                            chatVM.startAPIServer()
                        }
                    }
                } label: {
                    // Running → solid globe (green tint), click to stop
                    // Stopped → slashed globe, click to start
                    Label(
                        chatVM?.apiServer.isRunning == true ? "Stop API" : "Start API",
                        systemImage: chatVM?.apiServer.isRunning == true ? "network" : "network.slash"
                    )
                    .foregroundStyle(chatVM?.apiServer.isRunning == true ? .green : .secondary)
                }
                .help(chatVM?.apiServer.isRunning == true ? "API server running on port \(Preferences.apiPort) — click to stop" : "Click to start API server")
                // New conversation
                Button {
                    chatVM?.newConversation()
                } label: {
                    Label("New Chat", systemImage: "plus.message")
                }
                .keyboardShortcut("n", modifiers: .command)
            }
            .onChange(of: modelManager.currentModel) {
                chatVM?.resetSession()
                // Persist last used model
                if let id = modelManager.currentModel?.id {
                    Preferences.lastModelId = id
                }
            }
            .onChange(of: modelManager.errorMessage) {
                showLoadError = modelManager.errorMessage != nil
            }
            .alert("Model Error", isPresented: $showLoadError) {
                Button("Retry") {
                    if let config = modelManager.currentModel ?? ModelConfig.availableModels.first {
                        Task { await modelManager.loadModel(config) }
                    }
                }
                Button("Cancel", role: .cancel) {
                    modelManager.errorMessage = nil
                }
            } message: {
                Text(modelManager.errorMessage ?? "Unknown error loading model.")
            }
            .toolbar {
                ToolbarItem(placement: .principal) {
                    ModelPickerView()
                }
                ToolbarItemGroup(placement: .primaryAction) {
                    toolbarButtons
                }
            }
            // Cmd+1/2/3 model switching
            .background {
                modelSwitchShortcuts
            }
    }
    @ViewBuilder
    private var mainContent: some View {
        if let chatVM {
            if showMonitor {
                MonitorView(stats: chatVM.apiServer.inferenceStats)
            } else {
                ChatView(viewModel: chatVM)
            }
        } else {
            ProgressView("Initializing…")
        }
-        // Cmd+1/2/3 model switching
+    }
-        .background {
+
-            modelSwitchShortcuts
+    @ViewBuilder
    private var toolbarButtons: some View {
        // API server toggle
        let isRunning = chatVM?.apiServer.isRunning == true
        Button {
            if let chatVM {
                if chatVM.apiServer.isRunning {
                    chatVM.stopAPIServer()
                } else {
                    chatVM.startAPIServer()
                }
            }
        } label: {
            Label(
                isRunning ? "Stop API" : "Start API",
                systemImage: isRunning ? "network" : "network.slash"
            )
            .foregroundStyle(isRunning ? .green : .secondary)
        }
        .help(isRunning ? "API server running on port \(Preferences.apiPort) — click to stop" : "Click to start API server")
        // Monitor toggle
        Button {
            showMonitor.toggle()
        } label: {
            Label(
                showMonitor ? "Chat" : "Monitor",
                systemImage: showMonitor ? "bubble.left.and.text.bubble.right" : "chart.xyaxis.line"
            )
            .foregroundStyle(showMonitor ? Color.accentColor : Color.secondary)
        }
        .help(showMonitor ? "Switch to chat" : "Show inference monitor")
        .keyboardShortcut("m", modifiers: [.command, .shift])
        // New conversation
        Button {
            chatVM?.newConversation()
        } label: {
            Label("New Chat", systemImage: "plus.message")
        }
        .keyboardShortcut("n", modifiers: .command)
    }
    @ViewBuilder
--- a/MLXServer/Models/InferenceStats.swift
+++ b/MLXServer/Models/InferenceStats.swift
@@ -0,0 +1,141 @@
 import Foundation
 /// Lightweight stats collector for inference activity visualization.
 /// All mutations happen on @MainActor to avoid locks.
@Observable
@MainActor
 final class InferenceStats {
    // MARK: - Current request state
    var activeRequests: Int = 0
    var currentPromptTokens: Int = 0
    var currentGenerationTokens: Int = 0
    var isGenerating: Bool = false
    var isPrefilling: Bool = false
    var currentTokensPerSecond: Double = 0
    var contextUsed: Int = 0
    var contextMax: Int = 0
    // MARK: - Cumulative counters
    var totalRequests: Int = 0
    var totalPromptTokens: Int = 0
    var totalGenerationTokens: Int = 0
    // MARK: - Time series data (ring buffers for charts)
    struct DataPoint: Identifiable {
        let id = UUID()
        let timestamp: Date
        let value: Double
    }
    private(set) var tokenRateHistory: [DataPoint] = []
    private(set) var promptTokenHistory: [DataPoint] = []
    private(set) var generationTokenHistory: [DataPoint] = []
    private static let maxHistoryPoints = 120 // ~2 minutes at 1Hz
    // Periodic sampling
    private var sampleTimer: Timer?
    private var lastGenerationTokenCount: Int = 0
    private var lastPromptTokenCount: Int = 0
    private var lastSampleTime: Date = .now
    func startSampling() {
        guard sampleTimer == nil else { return }
        lastSampleTime = .now
        sampleTimer = Timer.scheduledTimer(withTimeInterval: 1.0, repeats: true) { [weak self] _ in
            Task { @MainActor in
                self?.recordSample()
            }
        }
    }
    func stopSampling() {
        sampleTimer?.invalidate()
        sampleTimer = nil
    }
    private func recordSample() {
        let now = Date.now
        // Token rate: tokens generated since last sample
        let genDelta = totalGenerationTokens - lastGenerationTokenCount
        let promptDelta = totalPromptTokens - lastPromptTokenCount
        lastGenerationTokenCount = totalGenerationTokens
        lastPromptTokenCount = totalPromptTokens
        tokenRateHistory.append(DataPoint(timestamp: now, value: currentTokensPerSecond))
        generationTokenHistory.append(DataPoint(timestamp: now, value: Double(genDelta)))
        promptTokenHistory.append(DataPoint(timestamp: now, value: Double(promptDelta)))
        // Trim to ring buffer size
        if tokenRateHistory.count > Self.maxHistoryPoints {
            tokenRateHistory.removeFirst(tokenRateHistory.count - Self.maxHistoryPoints)
        }
        if generationTokenHistory.count > Self.maxHistoryPoints {
            generationTokenHistory.removeFirst(generationTokenHistory.count - Self.maxHistoryPoints)
        }
        if promptTokenHistory.count > Self.maxHistoryPoints {
            promptTokenHistory.removeFirst(promptTokenHistory.count - Self.maxHistoryPoints)
        }
    }
    // MARK: - Event recording (called from APIServer)
    func requestStarted(contextLength: Int) {
        activeRequests += 1
        totalRequests += 1
        isPrefilling = true
        isGenerating = false
        currentPromptTokens = 0
        currentGenerationTokens = 0
        currentTokensPerSecond = 0
        contextMax = contextLength
        contextUsed = 0
    }
    func prefillCompleted(promptTokens: Int) {
        isPrefilling = false
        isGenerating = true
        currentPromptTokens = promptTokens
        totalPromptTokens += promptTokens
        contextUsed = promptTokens
    }
    func tokenGenerated(tokensPerSecond: Double, totalGenerated: Int) {
        currentGenerationTokens = totalGenerated
        currentTokensPerSecond = tokensPerSecond
        contextUsed = currentPromptTokens + totalGenerated
    }
    func requestCompleted(promptTokens: Int, generationTokens: Int) {
        activeRequests = max(0, activeRequests - 1)
        totalGenerationTokens += generationTokens
        if activeRequests == 0 {
            isGenerating = false
            isPrefilling = false
            currentTokensPerSecond = 0
        }
    }
    func reset() {
        activeRequests = 0
        currentPromptTokens = 0
        currentGenerationTokens = 0
        isGenerating = false
        isPrefilling = false
        currentTokensPerSecond = 0
        contextUsed = 0
        contextMax = 0
        totalRequests = 0
        totalPromptTokens = 0
        totalGenerationTokens = 0
        tokenRateHistory.removeAll()
        promptTokenHistory.removeAll()
        generationTokenHistory.removeAll()
        lastGenerationTokenCount = 0
        lastPromptTokenCount = 0
    }
 }
--- a/MLXServer/Server/APIServer.swift
+++ b/MLXServer/Server/APIServer.swift
@@ -11,6 +11,7 @@ final class APIServer {
    var isRunning = false
    var port: Int = 1234
    var requestCount: Int = 0
    let inferenceStats = InferenceStats()
    private var listener: NWListener?
    private var modelManager: ModelManager?
@@ -54,6 +55,7 @@ final class APIServer {
            }
            listener?.start(queue: .global(qos: .userInitiated))
            inferenceStats.startSampling()
        } catch {
            print("[APIServer] Failed to start: \(error)")
        }
@@ -66,6 +68,7 @@ final class APIServer {
        cachedSession = nil
        cachedMessages = nil
        cachedModelId = nil
        inferenceStats.stopSampling()
    }
    // MARK: - Connection handling
@@ -341,6 +344,8 @@ final class APIServer {
        // Extract images from the last message only (ChatSession.streamDetails takes images separately)
        let lastImages = lastMessage.images
        inferenceStats.requestStarted(contextLength: contextLength)
        if isStream {
            await handleStreamingResponse(
                connection: connection,
@@ -421,14 +426,22 @@ final class APIServer {
                switch generation {
                case .chunk(let text):
                    fullText += text
                    completionTokens += 1
                    inferenceStats.tokenGenerated(tokensPerSecond: 0, totalGenerated: completionTokens)
                case .info(let info):
                    promptTokens = info.promptTokenCount
                    completionTokens = info.generationTokenCount
                    inferenceStats.prefillCompleted(promptTokens: promptTokens)
                    if info.tokensPerSecond > 0 {
                        inferenceStats.tokenGenerated(tokensPerSecond: info.tokensPerSecond, totalGenerated: completionTokens)
                    }
                case .toolCall(let call):
                    frameworkToolCalls.append(call)
                }
            }
            inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens)
            // Parse tool calls: first check framework-detected ones, then our own text parser
            var finishReason = "stop"
            var responseContent: String? = fullText
@@ -499,6 +512,7 @@ final class APIServer {
                sendResponse(connection: connection, status: 200, body: String(data: json, encoding: .utf8) ?? "{}")
            }
        } catch {
            inferenceStats.requestCompleted(promptTokens: 0, generationTokens: 0)
            sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#)
        }
    }
@@ -564,6 +578,7 @@ final class APIServer {
                case .chunk(let text):
                    completionTokens += 1
                    fullText += text
                    inferenceStats.tokenGenerated(tokensPerSecond: 0, totalGenerated: completionTokens)
                    if !bufferForTools {
                        sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk(
@@ -579,12 +594,17 @@ final class APIServer {
                case .info(let info):
                    promptTokens = info.promptTokenCount
                    completionTokens = info.generationTokenCount
                    inferenceStats.prefillCompleted(promptTokens: promptTokens)
                    if info.tokensPerSecond > 0 {
                        inferenceStats.tokenGenerated(tokensPerSecond: info.tokensPerSecond, totalGenerated: completionTokens)
                    }
                case .toolCall(let call):
                    frameworkToolCalls.append(call)
                }
            }
        } catch {
            inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens)
            let errorEvent = "data: {\"error\":\"\(error.localizedDescription)\"}\n\n"
            connection.send(content: errorEvent.data(using: .utf8), completion: .contentProcessed({ _ in }))
        }
@@ -687,6 +707,8 @@ final class APIServer {
            )
        ))
        inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens)
        // Send [DONE] and close
        let done = "data: [DONE]\n\n"
        connection.send(content: done.data(using: .utf8), completion: .contentProcessed({ _ in
--- a/MLXServer/Views/ChatMessagesView.swift
+++ b/MLXServer/Views/ChatMessagesView.swift
@@ -7,7 +7,7 @@ struct ChatMessagesView: View {
    var body: some View {
        ScrollViewReader { proxy in
            ScrollView {
-                LazyVStack(alignment: .leading, spacing: 12) {
+                VStack(alignment: .leading, spacing: 12) {
                    if viewModel.conversation.messages.isEmpty {
                        emptyState
                    } else {
@@ -16,14 +16,20 @@ struct ChatMessagesView: View {
                                .id(message.id)
                        }
                    }
                    Color.clear
                        .frame(height: 1)
                        .id("bottom")
                }
                .padding()
            }
            .onChange(of: viewModel.conversation.messages.last?.content) {
-                scrollToBottom(proxy: proxy)
+                // During streaming, scroll without animation to avoid overlapping animations
                proxy.scrollTo("bottom", anchor: .bottom)
            }
            .onChange(of: viewModel.conversation.messages.count) {
-                scrollToBottom(proxy: proxy)
+                withAnimation(.easeOut(duration: 0.2)) {
                    proxy.scrollTo("bottom", anchor: .bottom)
                }
            }
        }
    }
@@ -47,13 +53,6 @@ struct ChatMessagesView: View {
        .frame(maxWidth: .infinity, minHeight: 300)
    }
    private func scrollToBottom(proxy: ScrollViewProxy) {
        if let lastId = viewModel.conversation.messages.last?.id {
            withAnimation(.easeOut(duration: 0.2)) {
                proxy.scrollTo(lastId, anchor: .bottom)
            }
        }
    }
 }
 struct MessageBubbleView: View {
--- a/MLXServer/Views/MonitorView.swift
+++ b/MLXServer/Views/MonitorView.swift
@@ -0,0 +1,352 @@
 import Charts
 import MLX
 import SwiftUI
 /// Real-time inference monitoring dashboard, shown in place of the chat UI.
 struct MonitorView: View {
    let stats: InferenceStats
    @Environment(ModelManager.self) private var modelManager
    var body: some View {
        ScrollView {
            VStack(spacing: 20) {
                // Live status header
                liveStatusSection
                // Charts
                HStack(alignment: .top, spacing: 16) {
                    tokenRateChart
                    tokenThroughputChart
                }
                // Gauges row
                HStack(spacing: 16) {
                    contextGauge
                    gpuMemoryGauge
                    requestsCard
                }
                // Cumulative stats
                cumulativeSection
            }
            .padding(20)
        }
        .frame(maxWidth: .infinity, maxHeight: .infinity)
        .background(.background)
    }
    // MARK: - Live Status
    @ViewBuilder
    private var liveStatusSection: some View {
        HStack(spacing: 16) {
            // Activity indicator
            HStack(spacing: 8) {
                Circle()
                    .fill(activityColor)
                    .frame(width: 10, height: 10)
                    .overlay {
                        if stats.isGenerating || stats.isPrefilling {
                            Circle()
                                .stroke(activityColor.opacity(0.5), lineWidth: 2)
                                .scaleEffect(1.8)
                                .opacity(0.6)
                        }
                    }
                Text(activityLabel)
                    .font(.headline)
            }
            Spacer()
            if stats.isGenerating {
                Text(String(format: "%.1f tok/s", stats.currentTokensPerSecond))
                    .font(.title2.monospacedDigit().bold())
                    .foregroundStyle(.green)
            }
            if stats.currentPromptTokens > 0 {
                HStack(spacing: 4) {
                    Image(systemName: "arrow.down.circle.fill")
                        .foregroundStyle(.blue)
                    Text("\(stats.currentPromptTokens)")
                        .monospacedDigit()
                    Image(systemName: "arrow.up.circle.fill")
                        .foregroundStyle(.orange)
                    Text("\(stats.currentGenerationTokens)")
                        .monospacedDigit()
                }
                .font(.callout)
            }
        }
        .padding(12)
        .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
    }
    private var activityColor: Color {
        if stats.isPrefilling { return .blue }
        if stats.isGenerating { return .green }
        if stats.activeRequests > 0 { return .orange }
        return .secondary
    }
    private var activityLabel: String {
        if stats.isPrefilling { return "Prefilling" }
        if stats.isGenerating { return "Generating" }
        if stats.activeRequests > 0 { return "Processing" }
        return "Idle"
    }
    // MARK: - Token Rate Chart
    @ViewBuilder
    private var tokenRateChart: some View {
        VStack(alignment: .leading, spacing: 6) {
            Text("Generation Speed (tok/s)")
                .font(.caption.bold())
                .foregroundStyle(.secondary)
            Chart(stats.tokenRateHistory) { point in
                LineMark(
                    x: .value("Time", point.timestamp),
                    y: .value("tok/s", point.value)
                )
                .foregroundStyle(.green)
                .interpolationMethod(.monotone)
                AreaMark(
                    x: .value("Time", point.timestamp),
                    y: .value("tok/s", point.value)
                )
                .foregroundStyle(.green.opacity(0.1))
                .interpolationMethod(.monotone)
            }
            .chartXAxis {
                AxisMarks(values: .stride(by: .second, count: 30)) { _ in
                    AxisGridLine()
                }
            }
            .chartYAxis {
                AxisMarks(position: .leading) { value in
                    AxisGridLine()
                    AxisValueLabel {
                        if let v = value.as(Double.self) {
                            Text(String(format: "%.0f", v))
                                .font(.caption2.monospacedDigit())
                        }
                    }
                }
            }
            .chartYScale(domain: 0...(maxTokenRate + 1))
            .frame(height: 150)
        }
        .padding(12)
        .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
    }
    private var maxTokenRate: Double {
        stats.tokenRateHistory.map(\.value).max() ?? 10
    }
    // MARK: - Token Throughput Chart
    @ViewBuilder
    private var tokenThroughputChart: some View {
        VStack(alignment: .leading, spacing: 6) {
            Text("Token Throughput (/sec)")
                .font(.caption.bold())
                .foregroundStyle(.secondary)
            Chart {
                ForEach(stats.promptTokenHistory) { point in
                    BarMark(
                        x: .value("Time", point.timestamp),
                        y: .value("Tokens", point.value)
                    )
                    .foregroundStyle(.blue.opacity(0.7))
                }
                ForEach(stats.generationTokenHistory) { point in
                    BarMark(
                        x: .value("Time", point.timestamp),
                        y: .value("Tokens", point.value)
                    )
                    .foregroundStyle(.orange.opacity(0.7))
                }
            }
            .chartXAxis {
                AxisMarks(values: .stride(by: .second, count: 30)) { _ in
                    AxisGridLine()
                }
            }
            .chartYAxis {
                AxisMarks(position: .leading) { value in
                    AxisGridLine()
                    AxisValueLabel {
                        if let v = value.as(Double.self) {
                            Text(String(format: "%.0f", v))
                                .font(.caption2.monospacedDigit())
                        }
                    }
                }
            }
            .frame(height: 150)
            // Legend
            HStack(spacing: 12) {
                Label("Prompt", systemImage: "circle.fill")
                    .font(.caption2)
                    .foregroundStyle(.blue)
                Label("Generation", systemImage: "circle.fill")
                    .font(.caption2)
                    .foregroundStyle(.orange)
            }
        }
        .padding(12)
        .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
    }
    // MARK: - Context Gauge
    @ViewBuilder
    private var contextGauge: some View {
        VStack(spacing: 8) {
            Text("Context")
                .font(.caption.bold())
                .foregroundStyle(.secondary)
            let maxCtx = max(stats.contextMax, modelManager.currentModel?.contextLength ?? 0)
            let used = stats.contextUsed
            let ratio = maxCtx > 0 ? Double(used) / Double(maxCtx) : 0
            Gauge(value: ratio) {
                EmptyView()
            } currentValueLabel: {
                Text(formatTokenCount(used))
                    .font(.title3.monospacedDigit().bold())
            } minimumValueLabel: {
                Text("0")
                    .font(.caption2)
            } maximumValueLabel: {
                Text(formatTokenCount(maxCtx))
                    .font(.caption2)
            }
            .gaugeStyle(.accessoryCircular)
            .scaleEffect(1.3)
            .tint(contextGradient(ratio: ratio))
            Text("\(Int(ratio * 100))%")
                .font(.caption.monospacedDigit())
                .foregroundStyle(.secondary)
        }
        .frame(maxWidth: .infinity)
        .padding(12)
        .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
    }
    private func contextGradient(ratio: Double) -> Color {
        if ratio > 0.9 { return .red }
        if ratio > 0.7 { return .orange }
        return .blue
    }
    // MARK: - GPU Memory Gauge
    @ViewBuilder
    private var gpuMemoryGauge: some View {
        VStack(spacing: 8) {
            Text("GPU Memory")
                .font(.caption.bold())
                .foregroundStyle(.secondary)
            let activeMB = Double(MLX.GPU.activeMemory) / 1_048_576
            let peakMB = Double(MLX.GPU.peakMemory) / 1_048_576
            Text(String(format: "%.0f MB", activeMB))
                .font(.title3.monospacedDigit().bold())
            if peakMB > 0 {
                Text(String(format: "Peak: %.0f MB", peakMB))
                    .font(.caption2.monospacedDigit())
                    .foregroundStyle(.tertiary)
            }
        }
        .frame(maxWidth: .infinity)
        .padding(12)
        .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
    }
    // MARK: - Requests Card
    @ViewBuilder
    private var requestsCard: some View {
        VStack(spacing: 8) {
            Text("Requests")
                .font(.caption.bold())
                .foregroundStyle(.secondary)
            Text("\(stats.totalRequests)")
                .font(.title3.monospacedDigit().bold())
            if stats.activeRequests > 0 {
                Text("\(stats.activeRequests) active")
                    .font(.caption2)
                    .foregroundStyle(.green)
            } else {
                Text("none active")
                    .font(.caption2)
                    .foregroundStyle(.tertiary)
            }
        }
        .frame(maxWidth: .infinity)
        .padding(12)
        .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
    }
    // MARK: - Cumulative
    @ViewBuilder
    private var cumulativeSection: some View {
        HStack(spacing: 24) {
            VStack(spacing: 2) {
                Text("Total Prompt Tokens")
                    .font(.caption2)
                    .foregroundStyle(.secondary)
                Text(formatTokenCount(stats.totalPromptTokens))
                    .font(.callout.monospacedDigit().bold())
                    .foregroundStyle(.blue)
            }
            VStack(spacing: 2) {
                Text("Total Generated Tokens")
                    .font(.caption2)
                    .foregroundStyle(.secondary)
                Text(formatTokenCount(stats.totalGenerationTokens))
                    .font(.callout.monospacedDigit().bold())
                    .foregroundStyle(.orange)
            }
            VStack(spacing: 2) {
                Text("Total Tokens")
                    .font(.caption2)
                    .foregroundStyle(.secondary)
                Text(formatTokenCount(stats.totalPromptTokens + stats.totalGenerationTokens))
                    .font(.callout.monospacedDigit().bold())
            }
        }
        .frame(maxWidth: .infinity)
        .padding(12)
        .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
    }
    // MARK: - Helpers
    private func formatTokenCount(_ count: Int) -> String {
        if count >= 1_000_000 {
            return String(format: "%.1fM", Double(count) / 1_000_000)
        } else if count >= 1_000 {
            return String(format: "%.1fk", Double(count) / 1_000)
        }
        return "\(count)"
    }
 }