feat: inference visualisation
This commit is contained in:
@@ -11,6 +11,7 @@
|
||||
165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */ = {isa = PBXBuildFile; fileRef = 145B888FBDD4F931512C5473 /* Preferences.swift */; };
|
||||
189362AAE2CDE5D4B3428334 /* ToolCallParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = E73B165A1822729C907791AE /* ToolCallParser.swift */; };
|
||||
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; };
|
||||
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; };
|
||||
4CB13DC1AC7A500DDBB443EC /* ChatInputView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */; };
|
||||
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C67742651DB486871CEF1612 /* MLXServerApp.swift */; };
|
||||
50DD129CCF2843482DEC3B96 /* APIServer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3D08828E16B17EF02C14243E /* APIServer.swift */; };
|
||||
@@ -22,6 +23,7 @@
|
||||
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; };
|
||||
84D32315B418B5243E017350 /* ToolPromptBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */; };
|
||||
945474365D0B3E961811909A /* MLXVLM in Frameworks */ = {isa = PBXBuildFile; productRef = D5E8E1C2DD8D8AABB4306193 /* MLXVLM */; };
|
||||
B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4239CFF94B819C35A8D4D617 /* MonitorView.swift */; };
|
||||
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8BD93859F0291F1A3E09DA5 /* ChatViewModel.swift */; };
|
||||
B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = 9090667D4134056AE66DC2F1 /* MLXLMCommon */; };
|
||||
D666A311788375E8A061C832 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4147321383E94E9F17A0154E /* SettingsView.swift */; };
|
||||
@@ -38,6 +40,7 @@
|
||||
3AF462805202797F61422AEE /* MLXServer.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = MLXServer.entitlements; sourceTree = "<group>"; };
|
||||
3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; };
|
||||
4147321383E94E9F17A0154E /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = "<group>"; };
|
||||
4239CFF94B819C35A8D4D617 /* MonitorView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MonitorView.swift; sourceTree = "<group>"; };
|
||||
6EE59189918D06B8D2F588FC /* MLXServer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MLXServer.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
922CBDC9206737BD04AF2874 /* ModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManager.swift; sourceTree = "<group>"; };
|
||||
944C699FBB76C734C9DF2F2E /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
||||
@@ -49,6 +52,7 @@
|
||||
C67742651DB486871CEF1612 /* MLXServerApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MLXServerApp.swift; sourceTree = "<group>"; };
|
||||
D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocalModelResolver.swift; sourceTree = "<group>"; };
|
||||
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatMessagesView.swift; sourceTree = "<group>"; };
|
||||
E35452B166893B25E765FF70 /* InferenceStats.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InferenceStats.swift; sourceTree = "<group>"; };
|
||||
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatInputView.swift; sourceTree = "<group>"; };
|
||||
E73B165A1822729C907791AE /* ToolCallParser.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolCallParser.swift; sourceTree = "<group>"; };
|
||||
F1A52E2C9964ADA9D841A89B /* APIModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIModels.swift; sourceTree = "<group>"; };
|
||||
@@ -108,6 +112,7 @@
|
||||
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */,
|
||||
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
|
||||
C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
|
||||
4239CFF94B819C35A8D4D617 /* MonitorView.swift */,
|
||||
4147321383E94E9F17A0154E /* SettingsView.swift */,
|
||||
B0EAB35D7130D56B9E7484BA /* StatusBarView.swift */,
|
||||
);
|
||||
@@ -118,6 +123,7 @@
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
A4B359324B5FD8D106C74338 /* ChatMessage.swift */,
|
||||
E35452B166893B25E765FF70 /* InferenceStats.swift */,
|
||||
38DFC212AF4359A45FBE22BA /* ModelConfig.swift */,
|
||||
);
|
||||
path = Models;
|
||||
@@ -234,11 +240,13 @@
|
||||
5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */,
|
||||
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */,
|
||||
5946258F1DE88CE904584E0B /* ContentView.swift in Sources */,
|
||||
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */,
|
||||
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */,
|
||||
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */,
|
||||
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */,
|
||||
0168AEE16009097901363E16 /* ModelManager.swift in Sources */,
|
||||
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */,
|
||||
B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */,
|
||||
165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */,
|
||||
D666A311788375E8A061C832 /* SettingsView.swift in Sources */,
|
||||
621B7E4382199AC1378F5F9C /* StatusBarView.swift in Sources */,
|
||||
|
||||
@@ -4,15 +4,10 @@ struct ContentView: View {
|
||||
@Environment(ModelManager.self) private var modelManager
|
||||
@State private var chatVM: ChatViewModel?
|
||||
@State private var showLoadError = false
|
||||
@State private var showMonitor = false
|
||||
|
||||
var body: some View {
|
||||
Group {
|
||||
if let chatVM {
|
||||
ChatView(viewModel: chatVM)
|
||||
} else {
|
||||
ProgressView("Initializing…")
|
||||
}
|
||||
}
|
||||
mainContent
|
||||
.navigationTitle(modelManager.currentModel?.displayName ?? "MLX Server")
|
||||
.onAppear {
|
||||
if chatVM == nil {
|
||||
@@ -49,9 +44,33 @@ struct ContentView: View {
|
||||
ToolbarItem(placement: .principal) {
|
||||
ModelPickerView()
|
||||
}
|
||||
|
||||
ToolbarItemGroup(placement: .primaryAction) {
|
||||
toolbarButtons
|
||||
}
|
||||
}
|
||||
// Cmd+1/2/3 model switching
|
||||
.background {
|
||||
modelSwitchShortcuts
|
||||
}
|
||||
}
|
||||
|
||||
@ViewBuilder
|
||||
private var mainContent: some View {
|
||||
if let chatVM {
|
||||
if showMonitor {
|
||||
MonitorView(stats: chatVM.apiServer.inferenceStats)
|
||||
} else {
|
||||
ChatView(viewModel: chatVM)
|
||||
}
|
||||
} else {
|
||||
ProgressView("Initializing…")
|
||||
}
|
||||
}
|
||||
|
||||
@ViewBuilder
|
||||
private var toolbarButtons: some View {
|
||||
// API server toggle
|
||||
let isRunning = chatVM?.apiServer.isRunning == true
|
||||
Button {
|
||||
if let chatVM {
|
||||
if chatVM.apiServer.isRunning {
|
||||
@@ -61,15 +80,26 @@ struct ContentView: View {
|
||||
}
|
||||
}
|
||||
} label: {
|
||||
// Running → solid globe (green tint), click to stop
|
||||
// Stopped → slashed globe, click to start
|
||||
Label(
|
||||
chatVM?.apiServer.isRunning == true ? "Stop API" : "Start API",
|
||||
systemImage: chatVM?.apiServer.isRunning == true ? "network" : "network.slash"
|
||||
isRunning ? "Stop API" : "Start API",
|
||||
systemImage: isRunning ? "network" : "network.slash"
|
||||
)
|
||||
.foregroundStyle(chatVM?.apiServer.isRunning == true ? .green : .secondary)
|
||||
.foregroundStyle(isRunning ? .green : .secondary)
|
||||
}
|
||||
.help(chatVM?.apiServer.isRunning == true ? "API server running on port \(Preferences.apiPort) — click to stop" : "Click to start API server")
|
||||
.help(isRunning ? "API server running on port \(Preferences.apiPort) — click to stop" : "Click to start API server")
|
||||
|
||||
// Monitor toggle
|
||||
Button {
|
||||
showMonitor.toggle()
|
||||
} label: {
|
||||
Label(
|
||||
showMonitor ? "Chat" : "Monitor",
|
||||
systemImage: showMonitor ? "bubble.left.and.text.bubble.right" : "chart.xyaxis.line"
|
||||
)
|
||||
.foregroundStyle(showMonitor ? Color.accentColor : Color.secondary)
|
||||
}
|
||||
.help(showMonitor ? "Switch to chat" : "Show inference monitor")
|
||||
.keyboardShortcut("m", modifiers: [.command, .shift])
|
||||
|
||||
// New conversation
|
||||
Button {
|
||||
@@ -79,12 +109,6 @@ struct ContentView: View {
|
||||
}
|
||||
.keyboardShortcut("n", modifiers: .command)
|
||||
}
|
||||
}
|
||||
// Cmd+1/2/3 model switching
|
||||
.background {
|
||||
modelSwitchShortcuts
|
||||
}
|
||||
}
|
||||
|
||||
@ViewBuilder
|
||||
private var modelSwitchShortcuts: some View {
|
||||
|
||||
141
MLXServer/Models/InferenceStats.swift
Normal file
141
MLXServer/Models/InferenceStats.swift
Normal file
@@ -0,0 +1,141 @@
|
||||
import Foundation
|
||||
|
||||
/// Lightweight stats collector for inference activity visualization.
|
||||
/// All mutations happen on @MainActor to avoid locks.
|
||||
@Observable
|
||||
@MainActor
|
||||
final class InferenceStats {
|
||||
// MARK: - Current request state
|
||||
|
||||
var activeRequests: Int = 0
|
||||
var currentPromptTokens: Int = 0
|
||||
var currentGenerationTokens: Int = 0
|
||||
var isGenerating: Bool = false
|
||||
var isPrefilling: Bool = false
|
||||
var currentTokensPerSecond: Double = 0
|
||||
var contextUsed: Int = 0
|
||||
var contextMax: Int = 0
|
||||
|
||||
// MARK: - Cumulative counters
|
||||
|
||||
var totalRequests: Int = 0
|
||||
var totalPromptTokens: Int = 0
|
||||
var totalGenerationTokens: Int = 0
|
||||
|
||||
// MARK: - Time series data (ring buffers for charts)
|
||||
|
||||
struct DataPoint: Identifiable {
|
||||
let id = UUID()
|
||||
let timestamp: Date
|
||||
let value: Double
|
||||
}
|
||||
|
||||
private(set) var tokenRateHistory: [DataPoint] = []
|
||||
private(set) var promptTokenHistory: [DataPoint] = []
|
||||
private(set) var generationTokenHistory: [DataPoint] = []
|
||||
|
||||
private static let maxHistoryPoints = 120 // ~2 minutes at 1Hz
|
||||
|
||||
// Periodic sampling
|
||||
private var sampleTimer: Timer?
|
||||
private var lastGenerationTokenCount: Int = 0
|
||||
private var lastPromptTokenCount: Int = 0
|
||||
private var lastSampleTime: Date = .now
|
||||
|
||||
func startSampling() {
|
||||
guard sampleTimer == nil else { return }
|
||||
lastSampleTime = .now
|
||||
sampleTimer = Timer.scheduledTimer(withTimeInterval: 1.0, repeats: true) { [weak self] _ in
|
||||
Task { @MainActor in
|
||||
self?.recordSample()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func stopSampling() {
|
||||
sampleTimer?.invalidate()
|
||||
sampleTimer = nil
|
||||
}
|
||||
|
||||
private func recordSample() {
|
||||
let now = Date.now
|
||||
|
||||
// Token rate: tokens generated since last sample
|
||||
let genDelta = totalGenerationTokens - lastGenerationTokenCount
|
||||
let promptDelta = totalPromptTokens - lastPromptTokenCount
|
||||
lastGenerationTokenCount = totalGenerationTokens
|
||||
lastPromptTokenCount = totalPromptTokens
|
||||
|
||||
tokenRateHistory.append(DataPoint(timestamp: now, value: currentTokensPerSecond))
|
||||
generationTokenHistory.append(DataPoint(timestamp: now, value: Double(genDelta)))
|
||||
promptTokenHistory.append(DataPoint(timestamp: now, value: Double(promptDelta)))
|
||||
|
||||
// Trim to ring buffer size
|
||||
if tokenRateHistory.count > Self.maxHistoryPoints {
|
||||
tokenRateHistory.removeFirst(tokenRateHistory.count - Self.maxHistoryPoints)
|
||||
}
|
||||
if generationTokenHistory.count > Self.maxHistoryPoints {
|
||||
generationTokenHistory.removeFirst(generationTokenHistory.count - Self.maxHistoryPoints)
|
||||
}
|
||||
if promptTokenHistory.count > Self.maxHistoryPoints {
|
||||
promptTokenHistory.removeFirst(promptTokenHistory.count - Self.maxHistoryPoints)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Event recording (called from APIServer)
|
||||
|
||||
func requestStarted(contextLength: Int) {
|
||||
activeRequests += 1
|
||||
totalRequests += 1
|
||||
isPrefilling = true
|
||||
isGenerating = false
|
||||
currentPromptTokens = 0
|
||||
currentGenerationTokens = 0
|
||||
currentTokensPerSecond = 0
|
||||
contextMax = contextLength
|
||||
contextUsed = 0
|
||||
}
|
||||
|
||||
func prefillCompleted(promptTokens: Int) {
|
||||
isPrefilling = false
|
||||
isGenerating = true
|
||||
currentPromptTokens = promptTokens
|
||||
totalPromptTokens += promptTokens
|
||||
contextUsed = promptTokens
|
||||
}
|
||||
|
||||
func tokenGenerated(tokensPerSecond: Double, totalGenerated: Int) {
|
||||
currentGenerationTokens = totalGenerated
|
||||
currentTokensPerSecond = tokensPerSecond
|
||||
contextUsed = currentPromptTokens + totalGenerated
|
||||
}
|
||||
|
||||
func requestCompleted(promptTokens: Int, generationTokens: Int) {
|
||||
activeRequests = max(0, activeRequests - 1)
|
||||
totalGenerationTokens += generationTokens
|
||||
if activeRequests == 0 {
|
||||
isGenerating = false
|
||||
isPrefilling = false
|
||||
currentTokensPerSecond = 0
|
||||
}
|
||||
}
|
||||
|
||||
func reset() {
|
||||
activeRequests = 0
|
||||
currentPromptTokens = 0
|
||||
currentGenerationTokens = 0
|
||||
isGenerating = false
|
||||
isPrefilling = false
|
||||
currentTokensPerSecond = 0
|
||||
contextUsed = 0
|
||||
contextMax = 0
|
||||
totalRequests = 0
|
||||
totalPromptTokens = 0
|
||||
totalGenerationTokens = 0
|
||||
tokenRateHistory.removeAll()
|
||||
promptTokenHistory.removeAll()
|
||||
generationTokenHistory.removeAll()
|
||||
lastGenerationTokenCount = 0
|
||||
lastPromptTokenCount = 0
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ final class APIServer {
|
||||
var isRunning = false
|
||||
var port: Int = 1234
|
||||
var requestCount: Int = 0
|
||||
let inferenceStats = InferenceStats()
|
||||
|
||||
private var listener: NWListener?
|
||||
private var modelManager: ModelManager?
|
||||
@@ -54,6 +55,7 @@ final class APIServer {
|
||||
}
|
||||
|
||||
listener?.start(queue: .global(qos: .userInitiated))
|
||||
inferenceStats.startSampling()
|
||||
} catch {
|
||||
print("[APIServer] Failed to start: \(error)")
|
||||
}
|
||||
@@ -66,6 +68,7 @@ final class APIServer {
|
||||
cachedSession = nil
|
||||
cachedMessages = nil
|
||||
cachedModelId = nil
|
||||
inferenceStats.stopSampling()
|
||||
}
|
||||
|
||||
// MARK: - Connection handling
|
||||
@@ -341,6 +344,8 @@ final class APIServer {
|
||||
// Extract images from the last message only (ChatSession.streamDetails takes images separately)
|
||||
let lastImages = lastMessage.images
|
||||
|
||||
inferenceStats.requestStarted(contextLength: contextLength)
|
||||
|
||||
if isStream {
|
||||
await handleStreamingResponse(
|
||||
connection: connection,
|
||||
@@ -421,14 +426,22 @@ final class APIServer {
|
||||
switch generation {
|
||||
case .chunk(let text):
|
||||
fullText += text
|
||||
completionTokens += 1
|
||||
inferenceStats.tokenGenerated(tokensPerSecond: 0, totalGenerated: completionTokens)
|
||||
case .info(let info):
|
||||
promptTokens = info.promptTokenCount
|
||||
completionTokens = info.generationTokenCount
|
||||
inferenceStats.prefillCompleted(promptTokens: promptTokens)
|
||||
if info.tokensPerSecond > 0 {
|
||||
inferenceStats.tokenGenerated(tokensPerSecond: info.tokensPerSecond, totalGenerated: completionTokens)
|
||||
}
|
||||
case .toolCall(let call):
|
||||
frameworkToolCalls.append(call)
|
||||
}
|
||||
}
|
||||
|
||||
inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens)
|
||||
|
||||
// Parse tool calls: first check framework-detected ones, then our own text parser
|
||||
var finishReason = "stop"
|
||||
var responseContent: String? = fullText
|
||||
@@ -499,6 +512,7 @@ final class APIServer {
|
||||
sendResponse(connection: connection, status: 200, body: String(data: json, encoding: .utf8) ?? "{}")
|
||||
}
|
||||
} catch {
|
||||
inferenceStats.requestCompleted(promptTokens: 0, generationTokens: 0)
|
||||
sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#)
|
||||
}
|
||||
}
|
||||
@@ -564,6 +578,7 @@ final class APIServer {
|
||||
case .chunk(let text):
|
||||
completionTokens += 1
|
||||
fullText += text
|
||||
inferenceStats.tokenGenerated(tokensPerSecond: 0, totalGenerated: completionTokens)
|
||||
|
||||
if !bufferForTools {
|
||||
sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk(
|
||||
@@ -579,12 +594,17 @@ final class APIServer {
|
||||
case .info(let info):
|
||||
promptTokens = info.promptTokenCount
|
||||
completionTokens = info.generationTokenCount
|
||||
inferenceStats.prefillCompleted(promptTokens: promptTokens)
|
||||
if info.tokensPerSecond > 0 {
|
||||
inferenceStats.tokenGenerated(tokensPerSecond: info.tokensPerSecond, totalGenerated: completionTokens)
|
||||
}
|
||||
|
||||
case .toolCall(let call):
|
||||
frameworkToolCalls.append(call)
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens)
|
||||
let errorEvent = "data: {\"error\":\"\(error.localizedDescription)\"}\n\n"
|
||||
connection.send(content: errorEvent.data(using: .utf8), completion: .contentProcessed({ _ in }))
|
||||
}
|
||||
@@ -687,6 +707,8 @@ final class APIServer {
|
||||
)
|
||||
))
|
||||
|
||||
inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens)
|
||||
|
||||
// Send [DONE] and close
|
||||
let done = "data: [DONE]\n\n"
|
||||
connection.send(content: done.data(using: .utf8), completion: .contentProcessed({ _ in
|
||||
|
||||
@@ -7,7 +7,7 @@ struct ChatMessagesView: View {
|
||||
var body: some View {
|
||||
ScrollViewReader { proxy in
|
||||
ScrollView {
|
||||
LazyVStack(alignment: .leading, spacing: 12) {
|
||||
VStack(alignment: .leading, spacing: 12) {
|
||||
if viewModel.conversation.messages.isEmpty {
|
||||
emptyState
|
||||
} else {
|
||||
@@ -16,14 +16,20 @@ struct ChatMessagesView: View {
|
||||
.id(message.id)
|
||||
}
|
||||
}
|
||||
Color.clear
|
||||
.frame(height: 1)
|
||||
.id("bottom")
|
||||
}
|
||||
.padding()
|
||||
}
|
||||
.onChange(of: viewModel.conversation.messages.last?.content) {
|
||||
scrollToBottom(proxy: proxy)
|
||||
// During streaming, scroll without animation to avoid overlapping animations
|
||||
proxy.scrollTo("bottom", anchor: .bottom)
|
||||
}
|
||||
.onChange(of: viewModel.conversation.messages.count) {
|
||||
scrollToBottom(proxy: proxy)
|
||||
withAnimation(.easeOut(duration: 0.2)) {
|
||||
proxy.scrollTo("bottom", anchor: .bottom)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -47,13 +53,6 @@ struct ChatMessagesView: View {
|
||||
.frame(maxWidth: .infinity, minHeight: 300)
|
||||
}
|
||||
|
||||
private func scrollToBottom(proxy: ScrollViewProxy) {
|
||||
if let lastId = viewModel.conversation.messages.last?.id {
|
||||
withAnimation(.easeOut(duration: 0.2)) {
|
||||
proxy.scrollTo(lastId, anchor: .bottom)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct MessageBubbleView: View {
|
||||
|
||||
352
MLXServer/Views/MonitorView.swift
Normal file
352
MLXServer/Views/MonitorView.swift
Normal file
@@ -0,0 +1,352 @@
|
||||
import Charts
|
||||
import MLX
|
||||
import SwiftUI
|
||||
|
||||
/// Real-time inference monitoring dashboard, shown in place of the chat UI.
|
||||
struct MonitorView: View {
|
||||
let stats: InferenceStats
|
||||
@Environment(ModelManager.self) private var modelManager
|
||||
|
||||
var body: some View {
|
||||
ScrollView {
|
||||
VStack(spacing: 20) {
|
||||
// Live status header
|
||||
liveStatusSection
|
||||
|
||||
// Charts
|
||||
HStack(alignment: .top, spacing: 16) {
|
||||
tokenRateChart
|
||||
tokenThroughputChart
|
||||
}
|
||||
|
||||
// Gauges row
|
||||
HStack(spacing: 16) {
|
||||
contextGauge
|
||||
gpuMemoryGauge
|
||||
requestsCard
|
||||
}
|
||||
|
||||
// Cumulative stats
|
||||
cumulativeSection
|
||||
}
|
||||
.padding(20)
|
||||
}
|
||||
.frame(maxWidth: .infinity, maxHeight: .infinity)
|
||||
.background(.background)
|
||||
}
|
||||
|
||||
// MARK: - Live Status
|
||||
|
||||
@ViewBuilder
|
||||
private var liveStatusSection: some View {
|
||||
HStack(spacing: 16) {
|
||||
// Activity indicator
|
||||
HStack(spacing: 8) {
|
||||
Circle()
|
||||
.fill(activityColor)
|
||||
.frame(width: 10, height: 10)
|
||||
.overlay {
|
||||
if stats.isGenerating || stats.isPrefilling {
|
||||
Circle()
|
||||
.stroke(activityColor.opacity(0.5), lineWidth: 2)
|
||||
.scaleEffect(1.8)
|
||||
.opacity(0.6)
|
||||
}
|
||||
}
|
||||
|
||||
Text(activityLabel)
|
||||
.font(.headline)
|
||||
}
|
||||
|
||||
Spacer()
|
||||
|
||||
if stats.isGenerating {
|
||||
Text(String(format: "%.1f tok/s", stats.currentTokensPerSecond))
|
||||
.font(.title2.monospacedDigit().bold())
|
||||
.foregroundStyle(.green)
|
||||
}
|
||||
|
||||
if stats.currentPromptTokens > 0 {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: "arrow.down.circle.fill")
|
||||
.foregroundStyle(.blue)
|
||||
Text("\(stats.currentPromptTokens)")
|
||||
.monospacedDigit()
|
||||
Image(systemName: "arrow.up.circle.fill")
|
||||
.foregroundStyle(.orange)
|
||||
Text("\(stats.currentGenerationTokens)")
|
||||
.monospacedDigit()
|
||||
}
|
||||
.font(.callout)
|
||||
}
|
||||
}
|
||||
.padding(12)
|
||||
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||
}
|
||||
|
||||
private var activityColor: Color {
|
||||
if stats.isPrefilling { return .blue }
|
||||
if stats.isGenerating { return .green }
|
||||
if stats.activeRequests > 0 { return .orange }
|
||||
return .secondary
|
||||
}
|
||||
|
||||
private var activityLabel: String {
|
||||
if stats.isPrefilling { return "Prefilling" }
|
||||
if stats.isGenerating { return "Generating" }
|
||||
if stats.activeRequests > 0 { return "Processing" }
|
||||
return "Idle"
|
||||
}
|
||||
|
||||
// MARK: - Token Rate Chart
|
||||
|
||||
@ViewBuilder
|
||||
private var tokenRateChart: some View {
|
||||
VStack(alignment: .leading, spacing: 6) {
|
||||
Text("Generation Speed (tok/s)")
|
||||
.font(.caption.bold())
|
||||
.foregroundStyle(.secondary)
|
||||
|
||||
Chart(stats.tokenRateHistory) { point in
|
||||
LineMark(
|
||||
x: .value("Time", point.timestamp),
|
||||
y: .value("tok/s", point.value)
|
||||
)
|
||||
.foregroundStyle(.green)
|
||||
.interpolationMethod(.monotone)
|
||||
|
||||
AreaMark(
|
||||
x: .value("Time", point.timestamp),
|
||||
y: .value("tok/s", point.value)
|
||||
)
|
||||
.foregroundStyle(.green.opacity(0.1))
|
||||
.interpolationMethod(.monotone)
|
||||
}
|
||||
.chartXAxis {
|
||||
AxisMarks(values: .stride(by: .second, count: 30)) { _ in
|
||||
AxisGridLine()
|
||||
}
|
||||
}
|
||||
.chartYAxis {
|
||||
AxisMarks(position: .leading) { value in
|
||||
AxisGridLine()
|
||||
AxisValueLabel {
|
||||
if let v = value.as(Double.self) {
|
||||
Text(String(format: "%.0f", v))
|
||||
.font(.caption2.monospacedDigit())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
.chartYScale(domain: 0...(maxTokenRate + 1))
|
||||
.frame(height: 150)
|
||||
}
|
||||
.padding(12)
|
||||
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||
}
|
||||
|
||||
private var maxTokenRate: Double {
|
||||
stats.tokenRateHistory.map(\.value).max() ?? 10
|
||||
}
|
||||
|
||||
// MARK: - Token Throughput Chart
|
||||
|
||||
@ViewBuilder
|
||||
private var tokenThroughputChart: some View {
|
||||
VStack(alignment: .leading, spacing: 6) {
|
||||
Text("Token Throughput (/sec)")
|
||||
.font(.caption.bold())
|
||||
.foregroundStyle(.secondary)
|
||||
|
||||
Chart {
|
||||
ForEach(stats.promptTokenHistory) { point in
|
||||
BarMark(
|
||||
x: .value("Time", point.timestamp),
|
||||
y: .value("Tokens", point.value)
|
||||
)
|
||||
.foregroundStyle(.blue.opacity(0.7))
|
||||
}
|
||||
ForEach(stats.generationTokenHistory) { point in
|
||||
BarMark(
|
||||
x: .value("Time", point.timestamp),
|
||||
y: .value("Tokens", point.value)
|
||||
)
|
||||
.foregroundStyle(.orange.opacity(0.7))
|
||||
}
|
||||
}
|
||||
.chartXAxis {
|
||||
AxisMarks(values: .stride(by: .second, count: 30)) { _ in
|
||||
AxisGridLine()
|
||||
}
|
||||
}
|
||||
.chartYAxis {
|
||||
AxisMarks(position: .leading) { value in
|
||||
AxisGridLine()
|
||||
AxisValueLabel {
|
||||
if let v = value.as(Double.self) {
|
||||
Text(String(format: "%.0f", v))
|
||||
.font(.caption2.monospacedDigit())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
.frame(height: 150)
|
||||
|
||||
// Legend
|
||||
HStack(spacing: 12) {
|
||||
Label("Prompt", systemImage: "circle.fill")
|
||||
.font(.caption2)
|
||||
.foregroundStyle(.blue)
|
||||
Label("Generation", systemImage: "circle.fill")
|
||||
.font(.caption2)
|
||||
.foregroundStyle(.orange)
|
||||
}
|
||||
}
|
||||
.padding(12)
|
||||
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||
}
|
||||
|
||||
// MARK: - Context Gauge
|
||||
|
||||
@ViewBuilder
|
||||
private var contextGauge: some View {
|
||||
VStack(spacing: 8) {
|
||||
Text("Context")
|
||||
.font(.caption.bold())
|
||||
.foregroundStyle(.secondary)
|
||||
|
||||
let maxCtx = max(stats.contextMax, modelManager.currentModel?.contextLength ?? 0)
|
||||
let used = stats.contextUsed
|
||||
let ratio = maxCtx > 0 ? Double(used) / Double(maxCtx) : 0
|
||||
|
||||
Gauge(value: ratio) {
|
||||
EmptyView()
|
||||
} currentValueLabel: {
|
||||
Text(formatTokenCount(used))
|
||||
.font(.title3.monospacedDigit().bold())
|
||||
} minimumValueLabel: {
|
||||
Text("0")
|
||||
.font(.caption2)
|
||||
} maximumValueLabel: {
|
||||
Text(formatTokenCount(maxCtx))
|
||||
.font(.caption2)
|
||||
}
|
||||
.gaugeStyle(.accessoryCircular)
|
||||
.scaleEffect(1.3)
|
||||
.tint(contextGradient(ratio: ratio))
|
||||
|
||||
Text("\(Int(ratio * 100))%")
|
||||
.font(.caption.monospacedDigit())
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
.frame(maxWidth: .infinity)
|
||||
.padding(12)
|
||||
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||
}
|
||||
|
||||
private func contextGradient(ratio: Double) -> Color {
|
||||
if ratio > 0.9 { return .red }
|
||||
if ratio > 0.7 { return .orange }
|
||||
return .blue
|
||||
}
|
||||
|
||||
// MARK: - GPU Memory Gauge
|
||||
|
||||
@ViewBuilder
|
||||
private var gpuMemoryGauge: some View {
|
||||
VStack(spacing: 8) {
|
||||
Text("GPU Memory")
|
||||
.font(.caption.bold())
|
||||
.foregroundStyle(.secondary)
|
||||
|
||||
let activeMB = Double(MLX.GPU.activeMemory) / 1_048_576
|
||||
let peakMB = Double(MLX.GPU.peakMemory) / 1_048_576
|
||||
|
||||
Text(String(format: "%.0f MB", activeMB))
|
||||
.font(.title3.monospacedDigit().bold())
|
||||
|
||||
if peakMB > 0 {
|
||||
Text(String(format: "Peak: %.0f MB", peakMB))
|
||||
.font(.caption2.monospacedDigit())
|
||||
.foregroundStyle(.tertiary)
|
||||
}
|
||||
}
|
||||
.frame(maxWidth: .infinity)
|
||||
.padding(12)
|
||||
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||
}
|
||||
|
||||
// MARK: - Requests Card
|
||||
|
||||
@ViewBuilder
|
||||
private var requestsCard: some View {
|
||||
VStack(spacing: 8) {
|
||||
Text("Requests")
|
||||
.font(.caption.bold())
|
||||
.foregroundStyle(.secondary)
|
||||
|
||||
Text("\(stats.totalRequests)")
|
||||
.font(.title3.monospacedDigit().bold())
|
||||
|
||||
if stats.activeRequests > 0 {
|
||||
Text("\(stats.activeRequests) active")
|
||||
.font(.caption2)
|
||||
.foregroundStyle(.green)
|
||||
} else {
|
||||
Text("none active")
|
||||
.font(.caption2)
|
||||
.foregroundStyle(.tertiary)
|
||||
}
|
||||
}
|
||||
.frame(maxWidth: .infinity)
|
||||
.padding(12)
|
||||
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||
}
|
||||
|
||||
// MARK: - Cumulative
|
||||
|
||||
@ViewBuilder
|
||||
private var cumulativeSection: some View {
|
||||
HStack(spacing: 24) {
|
||||
VStack(spacing: 2) {
|
||||
Text("Total Prompt Tokens")
|
||||
.font(.caption2)
|
||||
.foregroundStyle(.secondary)
|
||||
Text(formatTokenCount(stats.totalPromptTokens))
|
||||
.font(.callout.monospacedDigit().bold())
|
||||
.foregroundStyle(.blue)
|
||||
}
|
||||
|
||||
VStack(spacing: 2) {
|
||||
Text("Total Generated Tokens")
|
||||
.font(.caption2)
|
||||
.foregroundStyle(.secondary)
|
||||
Text(formatTokenCount(stats.totalGenerationTokens))
|
||||
.font(.callout.monospacedDigit().bold())
|
||||
.foregroundStyle(.orange)
|
||||
}
|
||||
|
||||
VStack(spacing: 2) {
|
||||
Text("Total Tokens")
|
||||
.font(.caption2)
|
||||
.foregroundStyle(.secondary)
|
||||
Text(formatTokenCount(stats.totalPromptTokens + stats.totalGenerationTokens))
|
||||
.font(.callout.monospacedDigit().bold())
|
||||
}
|
||||
}
|
||||
.frame(maxWidth: .infinity)
|
||||
.padding(12)
|
||||
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||
}
|
||||
|
||||
// MARK: - Helpers
|
||||
|
||||
private func formatTokenCount(_ count: Int) -> String {
|
||||
if count >= 1_000_000 {
|
||||
return String(format: "%.1fM", Double(count) / 1_000_000)
|
||||
} else if count >= 1_000 {
|
||||
return String(format: "%.1fk", Double(count) / 1_000)
|
||||
}
|
||||
return "\(count)"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user