feat: inference visualisation
This commit is contained in:
@@ -11,6 +11,7 @@
|
|||||||
165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */ = {isa = PBXBuildFile; fileRef = 145B888FBDD4F931512C5473 /* Preferences.swift */; };
|
165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */ = {isa = PBXBuildFile; fileRef = 145B888FBDD4F931512C5473 /* Preferences.swift */; };
|
||||||
189362AAE2CDE5D4B3428334 /* ToolCallParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = E73B165A1822729C907791AE /* ToolCallParser.swift */; };
|
189362AAE2CDE5D4B3428334 /* ToolCallParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = E73B165A1822729C907791AE /* ToolCallParser.swift */; };
|
||||||
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; };
|
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; };
|
||||||
|
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; };
|
||||||
4CB13DC1AC7A500DDBB443EC /* ChatInputView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */; };
|
4CB13DC1AC7A500DDBB443EC /* ChatInputView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */; };
|
||||||
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C67742651DB486871CEF1612 /* MLXServerApp.swift */; };
|
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C67742651DB486871CEF1612 /* MLXServerApp.swift */; };
|
||||||
50DD129CCF2843482DEC3B96 /* APIServer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3D08828E16B17EF02C14243E /* APIServer.swift */; };
|
50DD129CCF2843482DEC3B96 /* APIServer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3D08828E16B17EF02C14243E /* APIServer.swift */; };
|
||||||
@@ -22,6 +23,7 @@
|
|||||||
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; };
|
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; };
|
||||||
84D32315B418B5243E017350 /* ToolPromptBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */; };
|
84D32315B418B5243E017350 /* ToolPromptBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 16AE82A64D1D07AE3CD8D33A /* ToolPromptBuilder.swift */; };
|
||||||
945474365D0B3E961811909A /* MLXVLM in Frameworks */ = {isa = PBXBuildFile; productRef = D5E8E1C2DD8D8AABB4306193 /* MLXVLM */; };
|
945474365D0B3E961811909A /* MLXVLM in Frameworks */ = {isa = PBXBuildFile; productRef = D5E8E1C2DD8D8AABB4306193 /* MLXVLM */; };
|
||||||
|
B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4239CFF94B819C35A8D4D617 /* MonitorView.swift */; };
|
||||||
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8BD93859F0291F1A3E09DA5 /* ChatViewModel.swift */; };
|
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8BD93859F0291F1A3E09DA5 /* ChatViewModel.swift */; };
|
||||||
B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = 9090667D4134056AE66DC2F1 /* MLXLMCommon */; };
|
B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = 9090667D4134056AE66DC2F1 /* MLXLMCommon */; };
|
||||||
D666A311788375E8A061C832 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4147321383E94E9F17A0154E /* SettingsView.swift */; };
|
D666A311788375E8A061C832 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4147321383E94E9F17A0154E /* SettingsView.swift */; };
|
||||||
@@ -38,6 +40,7 @@
|
|||||||
3AF462805202797F61422AEE /* MLXServer.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = MLXServer.entitlements; sourceTree = "<group>"; };
|
3AF462805202797F61422AEE /* MLXServer.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = MLXServer.entitlements; sourceTree = "<group>"; };
|
||||||
3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; };
|
3D08828E16B17EF02C14243E /* APIServer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIServer.swift; sourceTree = "<group>"; };
|
||||||
4147321383E94E9F17A0154E /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = "<group>"; };
|
4147321383E94E9F17A0154E /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = "<group>"; };
|
||||||
|
4239CFF94B819C35A8D4D617 /* MonitorView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MonitorView.swift; sourceTree = "<group>"; };
|
||||||
6EE59189918D06B8D2F588FC /* MLXServer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MLXServer.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
6EE59189918D06B8D2F588FC /* MLXServer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MLXServer.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
922CBDC9206737BD04AF2874 /* ModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManager.swift; sourceTree = "<group>"; };
|
922CBDC9206737BD04AF2874 /* ModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelManager.swift; sourceTree = "<group>"; };
|
||||||
944C699FBB76C734C9DF2F2E /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
944C699FBB76C734C9DF2F2E /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
||||||
@@ -49,6 +52,7 @@
|
|||||||
C67742651DB486871CEF1612 /* MLXServerApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MLXServerApp.swift; sourceTree = "<group>"; };
|
C67742651DB486871CEF1612 /* MLXServerApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MLXServerApp.swift; sourceTree = "<group>"; };
|
||||||
D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocalModelResolver.swift; sourceTree = "<group>"; };
|
D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocalModelResolver.swift; sourceTree = "<group>"; };
|
||||||
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatMessagesView.swift; sourceTree = "<group>"; };
|
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatMessagesView.swift; sourceTree = "<group>"; };
|
||||||
|
E35452B166893B25E765FF70 /* InferenceStats.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InferenceStats.swift; sourceTree = "<group>"; };
|
||||||
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatInputView.swift; sourceTree = "<group>"; };
|
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatInputView.swift; sourceTree = "<group>"; };
|
||||||
E73B165A1822729C907791AE /* ToolCallParser.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolCallParser.swift; sourceTree = "<group>"; };
|
E73B165A1822729C907791AE /* ToolCallParser.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ToolCallParser.swift; sourceTree = "<group>"; };
|
||||||
F1A52E2C9964ADA9D841A89B /* APIModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIModels.swift; sourceTree = "<group>"; };
|
F1A52E2C9964ADA9D841A89B /* APIModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = APIModels.swift; sourceTree = "<group>"; };
|
||||||
@@ -108,6 +112,7 @@
|
|||||||
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */,
|
E5E6AD02CDF23BDAB64700A7 /* ChatInputView.swift */,
|
||||||
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
|
DB1A5E8B1C9F2BC4D262C53A /* ChatMessagesView.swift */,
|
||||||
C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
|
C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */,
|
||||||
|
4239CFF94B819C35A8D4D617 /* MonitorView.swift */,
|
||||||
4147321383E94E9F17A0154E /* SettingsView.swift */,
|
4147321383E94E9F17A0154E /* SettingsView.swift */,
|
||||||
B0EAB35D7130D56B9E7484BA /* StatusBarView.swift */,
|
B0EAB35D7130D56B9E7484BA /* StatusBarView.swift */,
|
||||||
);
|
);
|
||||||
@@ -118,6 +123,7 @@
|
|||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
A4B359324B5FD8D106C74338 /* ChatMessage.swift */,
|
A4B359324B5FD8D106C74338 /* ChatMessage.swift */,
|
||||||
|
E35452B166893B25E765FF70 /* InferenceStats.swift */,
|
||||||
38DFC212AF4359A45FBE22BA /* ModelConfig.swift */,
|
38DFC212AF4359A45FBE22BA /* ModelConfig.swift */,
|
||||||
);
|
);
|
||||||
path = Models;
|
path = Models;
|
||||||
@@ -234,11 +240,13 @@
|
|||||||
5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */,
|
5C1E8FE1C521914CEF98D3AA /* ChatMessagesView.swift in Sources */,
|
||||||
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */,
|
B5AA6E3B4BE21676226B342B /* ChatViewModel.swift in Sources */,
|
||||||
5946258F1DE88CE904584E0B /* ContentView.swift in Sources */,
|
5946258F1DE88CE904584E0B /* ContentView.swift in Sources */,
|
||||||
|
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */,
|
||||||
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */,
|
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */,
|
||||||
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */,
|
50B6861FF8610B3ED4FFAD9D /* MLXServerApp.swift in Sources */,
|
||||||
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */,
|
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */,
|
||||||
0168AEE16009097901363E16 /* ModelManager.swift in Sources */,
|
0168AEE16009097901363E16 /* ModelManager.swift in Sources */,
|
||||||
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */,
|
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */,
|
||||||
|
B1D9BC407DB7DB1489230C20 /* MonitorView.swift in Sources */,
|
||||||
165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */,
|
165E8AB6ADAE1D59B1A86420 /* Preferences.swift in Sources */,
|
||||||
D666A311788375E8A061C832 /* SettingsView.swift in Sources */,
|
D666A311788375E8A061C832 /* SettingsView.swift in Sources */,
|
||||||
621B7E4382199AC1378F5F9C /* StatusBarView.swift in Sources */,
|
621B7E4382199AC1378F5F9C /* StatusBarView.swift in Sources */,
|
||||||
|
|||||||
@@ -4,86 +4,110 @@ struct ContentView: View {
|
|||||||
@Environment(ModelManager.self) private var modelManager
|
@Environment(ModelManager.self) private var modelManager
|
||||||
@State private var chatVM: ChatViewModel?
|
@State private var chatVM: ChatViewModel?
|
||||||
@State private var showLoadError = false
|
@State private var showLoadError = false
|
||||||
|
@State private var showMonitor = false
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
Group {
|
mainContent
|
||||||
if let chatVM {
|
.navigationTitle(modelManager.currentModel?.displayName ?? "MLX Server")
|
||||||
ChatView(viewModel: chatVM)
|
.onAppear {
|
||||||
} else {
|
if chatVM == nil {
|
||||||
ProgressView("Initializing…")
|
chatVM = ChatViewModel(modelManager: modelManager)
|
||||||
}
|
// Auto-start API server if configured
|
||||||
}
|
if Preferences.apiAutoStart {
|
||||||
.navigationTitle(modelManager.currentModel?.displayName ?? "MLX Server")
|
chatVM?.startAPIServer()
|
||||||
.onAppear {
|
|
||||||
if chatVM == nil {
|
|
||||||
chatVM = ChatViewModel(modelManager: modelManager)
|
|
||||||
// Auto-start API server if configured
|
|
||||||
if Preferences.apiAutoStart {
|
|
||||||
chatVM?.startAPIServer()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
.onChange(of: modelManager.currentModel) {
|
|
||||||
chatVM?.resetSession()
|
|
||||||
// Persist last used model
|
|
||||||
if let id = modelManager.currentModel?.id {
|
|
||||||
Preferences.lastModelId = id
|
|
||||||
}
|
|
||||||
}
|
|
||||||
.onChange(of: modelManager.errorMessage) {
|
|
||||||
showLoadError = modelManager.errorMessage != nil
|
|
||||||
}
|
|
||||||
.alert("Model Error", isPresented: $showLoadError) {
|
|
||||||
Button("Retry") {
|
|
||||||
if let config = modelManager.currentModel ?? ModelConfig.availableModels.first {
|
|
||||||
Task { await modelManager.loadModel(config) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Button("Cancel", role: .cancel) {
|
|
||||||
modelManager.errorMessage = nil
|
|
||||||
}
|
|
||||||
} message: {
|
|
||||||
Text(modelManager.errorMessage ?? "Unknown error loading model.")
|
|
||||||
}
|
|
||||||
.toolbar {
|
|
||||||
ToolbarItem(placement: .principal) {
|
|
||||||
ModelPickerView()
|
|
||||||
}
|
|
||||||
|
|
||||||
ToolbarItemGroup(placement: .primaryAction) {
|
|
||||||
// API server toggle
|
|
||||||
Button {
|
|
||||||
if let chatVM {
|
|
||||||
if chatVM.apiServer.isRunning {
|
|
||||||
chatVM.stopAPIServer()
|
|
||||||
} else {
|
|
||||||
chatVM.startAPIServer()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} label: {
|
|
||||||
// Running → solid globe (green tint), click to stop
|
|
||||||
// Stopped → slashed globe, click to start
|
|
||||||
Label(
|
|
||||||
chatVM?.apiServer.isRunning == true ? "Stop API" : "Start API",
|
|
||||||
systemImage: chatVM?.apiServer.isRunning == true ? "network" : "network.slash"
|
|
||||||
)
|
|
||||||
.foregroundStyle(chatVM?.apiServer.isRunning == true ? .green : .secondary)
|
|
||||||
}
|
}
|
||||||
.help(chatVM?.apiServer.isRunning == true ? "API server running on port \(Preferences.apiPort) — click to stop" : "Click to start API server")
|
|
||||||
|
|
||||||
// New conversation
|
|
||||||
Button {
|
|
||||||
chatVM?.newConversation()
|
|
||||||
} label: {
|
|
||||||
Label("New Chat", systemImage: "plus.message")
|
|
||||||
}
|
|
||||||
.keyboardShortcut("n", modifiers: .command)
|
|
||||||
}
|
}
|
||||||
|
.onChange(of: modelManager.currentModel) {
|
||||||
|
chatVM?.resetSession()
|
||||||
|
// Persist last used model
|
||||||
|
if let id = modelManager.currentModel?.id {
|
||||||
|
Preferences.lastModelId = id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.onChange(of: modelManager.errorMessage) {
|
||||||
|
showLoadError = modelManager.errorMessage != nil
|
||||||
|
}
|
||||||
|
.alert("Model Error", isPresented: $showLoadError) {
|
||||||
|
Button("Retry") {
|
||||||
|
if let config = modelManager.currentModel ?? ModelConfig.availableModels.first {
|
||||||
|
Task { await modelManager.loadModel(config) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Button("Cancel", role: .cancel) {
|
||||||
|
modelManager.errorMessage = nil
|
||||||
|
}
|
||||||
|
} message: {
|
||||||
|
Text(modelManager.errorMessage ?? "Unknown error loading model.")
|
||||||
|
}
|
||||||
|
.toolbar {
|
||||||
|
ToolbarItem(placement: .principal) {
|
||||||
|
ModelPickerView()
|
||||||
|
}
|
||||||
|
ToolbarItemGroup(placement: .primaryAction) {
|
||||||
|
toolbarButtons
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Cmd+1/2/3 model switching
|
||||||
|
.background {
|
||||||
|
modelSwitchShortcuts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private var mainContent: some View {
|
||||||
|
if let chatVM {
|
||||||
|
if showMonitor {
|
||||||
|
MonitorView(stats: chatVM.apiServer.inferenceStats)
|
||||||
|
} else {
|
||||||
|
ChatView(viewModel: chatVM)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ProgressView("Initializing…")
|
||||||
}
|
}
|
||||||
// Cmd+1/2/3 model switching
|
}
|
||||||
.background {
|
|
||||||
modelSwitchShortcuts
|
@ViewBuilder
|
||||||
|
private var toolbarButtons: some View {
|
||||||
|
// API server toggle
|
||||||
|
let isRunning = chatVM?.apiServer.isRunning == true
|
||||||
|
Button {
|
||||||
|
if let chatVM {
|
||||||
|
if chatVM.apiServer.isRunning {
|
||||||
|
chatVM.stopAPIServer()
|
||||||
|
} else {
|
||||||
|
chatVM.startAPIServer()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} label: {
|
||||||
|
Label(
|
||||||
|
isRunning ? "Stop API" : "Start API",
|
||||||
|
systemImage: isRunning ? "network" : "network.slash"
|
||||||
|
)
|
||||||
|
.foregroundStyle(isRunning ? .green : .secondary)
|
||||||
}
|
}
|
||||||
|
.help(isRunning ? "API server running on port \(Preferences.apiPort) — click to stop" : "Click to start API server")
|
||||||
|
|
||||||
|
// Monitor toggle
|
||||||
|
Button {
|
||||||
|
showMonitor.toggle()
|
||||||
|
} label: {
|
||||||
|
Label(
|
||||||
|
showMonitor ? "Chat" : "Monitor",
|
||||||
|
systemImage: showMonitor ? "bubble.left.and.text.bubble.right" : "chart.xyaxis.line"
|
||||||
|
)
|
||||||
|
.foregroundStyle(showMonitor ? Color.accentColor : Color.secondary)
|
||||||
|
}
|
||||||
|
.help(showMonitor ? "Switch to chat" : "Show inference monitor")
|
||||||
|
.keyboardShortcut("m", modifiers: [.command, .shift])
|
||||||
|
|
||||||
|
// New conversation
|
||||||
|
Button {
|
||||||
|
chatVM?.newConversation()
|
||||||
|
} label: {
|
||||||
|
Label("New Chat", systemImage: "plus.message")
|
||||||
|
}
|
||||||
|
.keyboardShortcut("n", modifiers: .command)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ViewBuilder
|
@ViewBuilder
|
||||||
|
|||||||
141
MLXServer/Models/InferenceStats.swift
Normal file
141
MLXServer/Models/InferenceStats.swift
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
import Foundation
|
||||||
|
|
||||||
|
/// Lightweight stats collector for inference activity visualization.
|
||||||
|
/// All mutations happen on @MainActor to avoid locks.
|
||||||
|
@Observable
|
||||||
|
@MainActor
|
||||||
|
final class InferenceStats {
|
||||||
|
// MARK: - Current request state
|
||||||
|
|
||||||
|
var activeRequests: Int = 0
|
||||||
|
var currentPromptTokens: Int = 0
|
||||||
|
var currentGenerationTokens: Int = 0
|
||||||
|
var isGenerating: Bool = false
|
||||||
|
var isPrefilling: Bool = false
|
||||||
|
var currentTokensPerSecond: Double = 0
|
||||||
|
var contextUsed: Int = 0
|
||||||
|
var contextMax: Int = 0
|
||||||
|
|
||||||
|
// MARK: - Cumulative counters
|
||||||
|
|
||||||
|
var totalRequests: Int = 0
|
||||||
|
var totalPromptTokens: Int = 0
|
||||||
|
var totalGenerationTokens: Int = 0
|
||||||
|
|
||||||
|
// MARK: - Time series data (ring buffers for charts)
|
||||||
|
|
||||||
|
struct DataPoint: Identifiable {
|
||||||
|
let id = UUID()
|
||||||
|
let timestamp: Date
|
||||||
|
let value: Double
|
||||||
|
}
|
||||||
|
|
||||||
|
private(set) var tokenRateHistory: [DataPoint] = []
|
||||||
|
private(set) var promptTokenHistory: [DataPoint] = []
|
||||||
|
private(set) var generationTokenHistory: [DataPoint] = []
|
||||||
|
|
||||||
|
private static let maxHistoryPoints = 120 // ~2 minutes at 1Hz
|
||||||
|
|
||||||
|
// Periodic sampling
|
||||||
|
private var sampleTimer: Timer?
|
||||||
|
private var lastGenerationTokenCount: Int = 0
|
||||||
|
private var lastPromptTokenCount: Int = 0
|
||||||
|
private var lastSampleTime: Date = .now
|
||||||
|
|
||||||
|
func startSampling() {
|
||||||
|
guard sampleTimer == nil else { return }
|
||||||
|
lastSampleTime = .now
|
||||||
|
sampleTimer = Timer.scheduledTimer(withTimeInterval: 1.0, repeats: true) { [weak self] _ in
|
||||||
|
Task { @MainActor in
|
||||||
|
self?.recordSample()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stopSampling() {
|
||||||
|
sampleTimer?.invalidate()
|
||||||
|
sampleTimer = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
private func recordSample() {
|
||||||
|
let now = Date.now
|
||||||
|
|
||||||
|
// Token rate: tokens generated since last sample
|
||||||
|
let genDelta = totalGenerationTokens - lastGenerationTokenCount
|
||||||
|
let promptDelta = totalPromptTokens - lastPromptTokenCount
|
||||||
|
lastGenerationTokenCount = totalGenerationTokens
|
||||||
|
lastPromptTokenCount = totalPromptTokens
|
||||||
|
|
||||||
|
tokenRateHistory.append(DataPoint(timestamp: now, value: currentTokensPerSecond))
|
||||||
|
generationTokenHistory.append(DataPoint(timestamp: now, value: Double(genDelta)))
|
||||||
|
promptTokenHistory.append(DataPoint(timestamp: now, value: Double(promptDelta)))
|
||||||
|
|
||||||
|
// Trim to ring buffer size
|
||||||
|
if tokenRateHistory.count > Self.maxHistoryPoints {
|
||||||
|
tokenRateHistory.removeFirst(tokenRateHistory.count - Self.maxHistoryPoints)
|
||||||
|
}
|
||||||
|
if generationTokenHistory.count > Self.maxHistoryPoints {
|
||||||
|
generationTokenHistory.removeFirst(generationTokenHistory.count - Self.maxHistoryPoints)
|
||||||
|
}
|
||||||
|
if promptTokenHistory.count > Self.maxHistoryPoints {
|
||||||
|
promptTokenHistory.removeFirst(promptTokenHistory.count - Self.maxHistoryPoints)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Event recording (called from APIServer)
|
||||||
|
|
||||||
|
func requestStarted(contextLength: Int) {
|
||||||
|
activeRequests += 1
|
||||||
|
totalRequests += 1
|
||||||
|
isPrefilling = true
|
||||||
|
isGenerating = false
|
||||||
|
currentPromptTokens = 0
|
||||||
|
currentGenerationTokens = 0
|
||||||
|
currentTokensPerSecond = 0
|
||||||
|
contextMax = contextLength
|
||||||
|
contextUsed = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func prefillCompleted(promptTokens: Int) {
|
||||||
|
isPrefilling = false
|
||||||
|
isGenerating = true
|
||||||
|
currentPromptTokens = promptTokens
|
||||||
|
totalPromptTokens += promptTokens
|
||||||
|
contextUsed = promptTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
func tokenGenerated(tokensPerSecond: Double, totalGenerated: Int) {
|
||||||
|
currentGenerationTokens = totalGenerated
|
||||||
|
currentTokensPerSecond = tokensPerSecond
|
||||||
|
contextUsed = currentPromptTokens + totalGenerated
|
||||||
|
}
|
||||||
|
|
||||||
|
func requestCompleted(promptTokens: Int, generationTokens: Int) {
|
||||||
|
activeRequests = max(0, activeRequests - 1)
|
||||||
|
totalGenerationTokens += generationTokens
|
||||||
|
if activeRequests == 0 {
|
||||||
|
isGenerating = false
|
||||||
|
isPrefilling = false
|
||||||
|
currentTokensPerSecond = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func reset() {
|
||||||
|
activeRequests = 0
|
||||||
|
currentPromptTokens = 0
|
||||||
|
currentGenerationTokens = 0
|
||||||
|
isGenerating = false
|
||||||
|
isPrefilling = false
|
||||||
|
currentTokensPerSecond = 0
|
||||||
|
contextUsed = 0
|
||||||
|
contextMax = 0
|
||||||
|
totalRequests = 0
|
||||||
|
totalPromptTokens = 0
|
||||||
|
totalGenerationTokens = 0
|
||||||
|
tokenRateHistory.removeAll()
|
||||||
|
promptTokenHistory.removeAll()
|
||||||
|
generationTokenHistory.removeAll()
|
||||||
|
lastGenerationTokenCount = 0
|
||||||
|
lastPromptTokenCount = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -11,6 +11,7 @@ final class APIServer {
|
|||||||
var isRunning = false
|
var isRunning = false
|
||||||
var port: Int = 1234
|
var port: Int = 1234
|
||||||
var requestCount: Int = 0
|
var requestCount: Int = 0
|
||||||
|
let inferenceStats = InferenceStats()
|
||||||
|
|
||||||
private var listener: NWListener?
|
private var listener: NWListener?
|
||||||
private var modelManager: ModelManager?
|
private var modelManager: ModelManager?
|
||||||
@@ -54,6 +55,7 @@ final class APIServer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
listener?.start(queue: .global(qos: .userInitiated))
|
listener?.start(queue: .global(qos: .userInitiated))
|
||||||
|
inferenceStats.startSampling()
|
||||||
} catch {
|
} catch {
|
||||||
print("[APIServer] Failed to start: \(error)")
|
print("[APIServer] Failed to start: \(error)")
|
||||||
}
|
}
|
||||||
@@ -66,6 +68,7 @@ final class APIServer {
|
|||||||
cachedSession = nil
|
cachedSession = nil
|
||||||
cachedMessages = nil
|
cachedMessages = nil
|
||||||
cachedModelId = nil
|
cachedModelId = nil
|
||||||
|
inferenceStats.stopSampling()
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Connection handling
|
// MARK: - Connection handling
|
||||||
@@ -341,6 +344,8 @@ final class APIServer {
|
|||||||
// Extract images from the last message only (ChatSession.streamDetails takes images separately)
|
// Extract images from the last message only (ChatSession.streamDetails takes images separately)
|
||||||
let lastImages = lastMessage.images
|
let lastImages = lastMessage.images
|
||||||
|
|
||||||
|
inferenceStats.requestStarted(contextLength: contextLength)
|
||||||
|
|
||||||
if isStream {
|
if isStream {
|
||||||
await handleStreamingResponse(
|
await handleStreamingResponse(
|
||||||
connection: connection,
|
connection: connection,
|
||||||
@@ -421,14 +426,22 @@ final class APIServer {
|
|||||||
switch generation {
|
switch generation {
|
||||||
case .chunk(let text):
|
case .chunk(let text):
|
||||||
fullText += text
|
fullText += text
|
||||||
|
completionTokens += 1
|
||||||
|
inferenceStats.tokenGenerated(tokensPerSecond: 0, totalGenerated: completionTokens)
|
||||||
case .info(let info):
|
case .info(let info):
|
||||||
promptTokens = info.promptTokenCount
|
promptTokens = info.promptTokenCount
|
||||||
completionTokens = info.generationTokenCount
|
completionTokens = info.generationTokenCount
|
||||||
|
inferenceStats.prefillCompleted(promptTokens: promptTokens)
|
||||||
|
if info.tokensPerSecond > 0 {
|
||||||
|
inferenceStats.tokenGenerated(tokensPerSecond: info.tokensPerSecond, totalGenerated: completionTokens)
|
||||||
|
}
|
||||||
case .toolCall(let call):
|
case .toolCall(let call):
|
||||||
frameworkToolCalls.append(call)
|
frameworkToolCalls.append(call)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens)
|
||||||
|
|
||||||
// Parse tool calls: first check framework-detected ones, then our own text parser
|
// Parse tool calls: first check framework-detected ones, then our own text parser
|
||||||
var finishReason = "stop"
|
var finishReason = "stop"
|
||||||
var responseContent: String? = fullText
|
var responseContent: String? = fullText
|
||||||
@@ -499,6 +512,7 @@ final class APIServer {
|
|||||||
sendResponse(connection: connection, status: 200, body: String(data: json, encoding: .utf8) ?? "{}")
|
sendResponse(connection: connection, status: 200, body: String(data: json, encoding: .utf8) ?? "{}")
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
|
inferenceStats.requestCompleted(promptTokens: 0, generationTokens: 0)
|
||||||
sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#)
|
sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -564,6 +578,7 @@ final class APIServer {
|
|||||||
case .chunk(let text):
|
case .chunk(let text):
|
||||||
completionTokens += 1
|
completionTokens += 1
|
||||||
fullText += text
|
fullText += text
|
||||||
|
inferenceStats.tokenGenerated(tokensPerSecond: 0, totalGenerated: completionTokens)
|
||||||
|
|
||||||
if !bufferForTools {
|
if !bufferForTools {
|
||||||
sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk(
|
sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk(
|
||||||
@@ -579,12 +594,17 @@ final class APIServer {
|
|||||||
case .info(let info):
|
case .info(let info):
|
||||||
promptTokens = info.promptTokenCount
|
promptTokens = info.promptTokenCount
|
||||||
completionTokens = info.generationTokenCount
|
completionTokens = info.generationTokenCount
|
||||||
|
inferenceStats.prefillCompleted(promptTokens: promptTokens)
|
||||||
|
if info.tokensPerSecond > 0 {
|
||||||
|
inferenceStats.tokenGenerated(tokensPerSecond: info.tokensPerSecond, totalGenerated: completionTokens)
|
||||||
|
}
|
||||||
|
|
||||||
case .toolCall(let call):
|
case .toolCall(let call):
|
||||||
frameworkToolCalls.append(call)
|
frameworkToolCalls.append(call)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
|
inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens)
|
||||||
let errorEvent = "data: {\"error\":\"\(error.localizedDescription)\"}\n\n"
|
let errorEvent = "data: {\"error\":\"\(error.localizedDescription)\"}\n\n"
|
||||||
connection.send(content: errorEvent.data(using: .utf8), completion: .contentProcessed({ _ in }))
|
connection.send(content: errorEvent.data(using: .utf8), completion: .contentProcessed({ _ in }))
|
||||||
}
|
}
|
||||||
@@ -687,6 +707,8 @@ final class APIServer {
|
|||||||
)
|
)
|
||||||
))
|
))
|
||||||
|
|
||||||
|
inferenceStats.requestCompleted(promptTokens: promptTokens, generationTokens: completionTokens)
|
||||||
|
|
||||||
// Send [DONE] and close
|
// Send [DONE] and close
|
||||||
let done = "data: [DONE]\n\n"
|
let done = "data: [DONE]\n\n"
|
||||||
connection.send(content: done.data(using: .utf8), completion: .contentProcessed({ _ in
|
connection.send(content: done.data(using: .utf8), completion: .contentProcessed({ _ in
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ struct ChatMessagesView: View {
|
|||||||
var body: some View {
|
var body: some View {
|
||||||
ScrollViewReader { proxy in
|
ScrollViewReader { proxy in
|
||||||
ScrollView {
|
ScrollView {
|
||||||
LazyVStack(alignment: .leading, spacing: 12) {
|
VStack(alignment: .leading, spacing: 12) {
|
||||||
if viewModel.conversation.messages.isEmpty {
|
if viewModel.conversation.messages.isEmpty {
|
||||||
emptyState
|
emptyState
|
||||||
} else {
|
} else {
|
||||||
@@ -16,14 +16,20 @@ struct ChatMessagesView: View {
|
|||||||
.id(message.id)
|
.id(message.id)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Color.clear
|
||||||
|
.frame(height: 1)
|
||||||
|
.id("bottom")
|
||||||
}
|
}
|
||||||
.padding()
|
.padding()
|
||||||
}
|
}
|
||||||
.onChange(of: viewModel.conversation.messages.last?.content) {
|
.onChange(of: viewModel.conversation.messages.last?.content) {
|
||||||
scrollToBottom(proxy: proxy)
|
// During streaming, scroll without animation to avoid overlapping animations
|
||||||
|
proxy.scrollTo("bottom", anchor: .bottom)
|
||||||
}
|
}
|
||||||
.onChange(of: viewModel.conversation.messages.count) {
|
.onChange(of: viewModel.conversation.messages.count) {
|
||||||
scrollToBottom(proxy: proxy)
|
withAnimation(.easeOut(duration: 0.2)) {
|
||||||
|
proxy.scrollTo("bottom", anchor: .bottom)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -47,13 +53,6 @@ struct ChatMessagesView: View {
|
|||||||
.frame(maxWidth: .infinity, minHeight: 300)
|
.frame(maxWidth: .infinity, minHeight: 300)
|
||||||
}
|
}
|
||||||
|
|
||||||
private func scrollToBottom(proxy: ScrollViewProxy) {
|
|
||||||
if let lastId = viewModel.conversation.messages.last?.id {
|
|
||||||
withAnimation(.easeOut(duration: 0.2)) {
|
|
||||||
proxy.scrollTo(lastId, anchor: .bottom)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct MessageBubbleView: View {
|
struct MessageBubbleView: View {
|
||||||
|
|||||||
352
MLXServer/Views/MonitorView.swift
Normal file
352
MLXServer/Views/MonitorView.swift
Normal file
@@ -0,0 +1,352 @@
|
|||||||
|
import Charts
|
||||||
|
import MLX
|
||||||
|
import SwiftUI
|
||||||
|
|
||||||
|
/// Real-time inference monitoring dashboard, shown in place of the chat UI.
|
||||||
|
struct MonitorView: View {
|
||||||
|
let stats: InferenceStats
|
||||||
|
@Environment(ModelManager.self) private var modelManager
|
||||||
|
|
||||||
|
var body: some View {
|
||||||
|
ScrollView {
|
||||||
|
VStack(spacing: 20) {
|
||||||
|
// Live status header
|
||||||
|
liveStatusSection
|
||||||
|
|
||||||
|
// Charts
|
||||||
|
HStack(alignment: .top, spacing: 16) {
|
||||||
|
tokenRateChart
|
||||||
|
tokenThroughputChart
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gauges row
|
||||||
|
HStack(spacing: 16) {
|
||||||
|
contextGauge
|
||||||
|
gpuMemoryGauge
|
||||||
|
requestsCard
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cumulative stats
|
||||||
|
cumulativeSection
|
||||||
|
}
|
||||||
|
.padding(20)
|
||||||
|
}
|
||||||
|
.frame(maxWidth: .infinity, maxHeight: .infinity)
|
||||||
|
.background(.background)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Live Status
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private var liveStatusSection: some View {
|
||||||
|
HStack(spacing: 16) {
|
||||||
|
// Activity indicator
|
||||||
|
HStack(spacing: 8) {
|
||||||
|
Circle()
|
||||||
|
.fill(activityColor)
|
||||||
|
.frame(width: 10, height: 10)
|
||||||
|
.overlay {
|
||||||
|
if stats.isGenerating || stats.isPrefilling {
|
||||||
|
Circle()
|
||||||
|
.stroke(activityColor.opacity(0.5), lineWidth: 2)
|
||||||
|
.scaleEffect(1.8)
|
||||||
|
.opacity(0.6)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Text(activityLabel)
|
||||||
|
.font(.headline)
|
||||||
|
}
|
||||||
|
|
||||||
|
Spacer()
|
||||||
|
|
||||||
|
if stats.isGenerating {
|
||||||
|
Text(String(format: "%.1f tok/s", stats.currentTokensPerSecond))
|
||||||
|
.font(.title2.monospacedDigit().bold())
|
||||||
|
.foregroundStyle(.green)
|
||||||
|
}
|
||||||
|
|
||||||
|
if stats.currentPromptTokens > 0 {
|
||||||
|
HStack(spacing: 4) {
|
||||||
|
Image(systemName: "arrow.down.circle.fill")
|
||||||
|
.foregroundStyle(.blue)
|
||||||
|
Text("\(stats.currentPromptTokens)")
|
||||||
|
.monospacedDigit()
|
||||||
|
Image(systemName: "arrow.up.circle.fill")
|
||||||
|
.foregroundStyle(.orange)
|
||||||
|
Text("\(stats.currentGenerationTokens)")
|
||||||
|
.monospacedDigit()
|
||||||
|
}
|
||||||
|
.font(.callout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.padding(12)
|
||||||
|
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
private var activityColor: Color {
|
||||||
|
if stats.isPrefilling { return .blue }
|
||||||
|
if stats.isGenerating { return .green }
|
||||||
|
if stats.activeRequests > 0 { return .orange }
|
||||||
|
return .secondary
|
||||||
|
}
|
||||||
|
|
||||||
|
private var activityLabel: String {
|
||||||
|
if stats.isPrefilling { return "Prefilling" }
|
||||||
|
if stats.isGenerating { return "Generating" }
|
||||||
|
if stats.activeRequests > 0 { return "Processing" }
|
||||||
|
return "Idle"
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Token Rate Chart
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private var tokenRateChart: some View {
|
||||||
|
VStack(alignment: .leading, spacing: 6) {
|
||||||
|
Text("Generation Speed (tok/s)")
|
||||||
|
.font(.caption.bold())
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
|
||||||
|
Chart(stats.tokenRateHistory) { point in
|
||||||
|
LineMark(
|
||||||
|
x: .value("Time", point.timestamp),
|
||||||
|
y: .value("tok/s", point.value)
|
||||||
|
)
|
||||||
|
.foregroundStyle(.green)
|
||||||
|
.interpolationMethod(.monotone)
|
||||||
|
|
||||||
|
AreaMark(
|
||||||
|
x: .value("Time", point.timestamp),
|
||||||
|
y: .value("tok/s", point.value)
|
||||||
|
)
|
||||||
|
.foregroundStyle(.green.opacity(0.1))
|
||||||
|
.interpolationMethod(.monotone)
|
||||||
|
}
|
||||||
|
.chartXAxis {
|
||||||
|
AxisMarks(values: .stride(by: .second, count: 30)) { _ in
|
||||||
|
AxisGridLine()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.chartYAxis {
|
||||||
|
AxisMarks(position: .leading) { value in
|
||||||
|
AxisGridLine()
|
||||||
|
AxisValueLabel {
|
||||||
|
if let v = value.as(Double.self) {
|
||||||
|
Text(String(format: "%.0f", v))
|
||||||
|
.font(.caption2.monospacedDigit())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.chartYScale(domain: 0...(maxTokenRate + 1))
|
||||||
|
.frame(height: 150)
|
||||||
|
}
|
||||||
|
.padding(12)
|
||||||
|
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
private var maxTokenRate: Double {
|
||||||
|
stats.tokenRateHistory.map(\.value).max() ?? 10
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Token Throughput Chart
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private var tokenThroughputChart: some View {
|
||||||
|
VStack(alignment: .leading, spacing: 6) {
|
||||||
|
Text("Token Throughput (/sec)")
|
||||||
|
.font(.caption.bold())
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
|
||||||
|
Chart {
|
||||||
|
ForEach(stats.promptTokenHistory) { point in
|
||||||
|
BarMark(
|
||||||
|
x: .value("Time", point.timestamp),
|
||||||
|
y: .value("Tokens", point.value)
|
||||||
|
)
|
||||||
|
.foregroundStyle(.blue.opacity(0.7))
|
||||||
|
}
|
||||||
|
ForEach(stats.generationTokenHistory) { point in
|
||||||
|
BarMark(
|
||||||
|
x: .value("Time", point.timestamp),
|
||||||
|
y: .value("Tokens", point.value)
|
||||||
|
)
|
||||||
|
.foregroundStyle(.orange.opacity(0.7))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.chartXAxis {
|
||||||
|
AxisMarks(values: .stride(by: .second, count: 30)) { _ in
|
||||||
|
AxisGridLine()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.chartYAxis {
|
||||||
|
AxisMarks(position: .leading) { value in
|
||||||
|
AxisGridLine()
|
||||||
|
AxisValueLabel {
|
||||||
|
if let v = value.as(Double.self) {
|
||||||
|
Text(String(format: "%.0f", v))
|
||||||
|
.font(.caption2.monospacedDigit())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.frame(height: 150)
|
||||||
|
|
||||||
|
// Legend
|
||||||
|
HStack(spacing: 12) {
|
||||||
|
Label("Prompt", systemImage: "circle.fill")
|
||||||
|
.font(.caption2)
|
||||||
|
.foregroundStyle(.blue)
|
||||||
|
Label("Generation", systemImage: "circle.fill")
|
||||||
|
.font(.caption2)
|
||||||
|
.foregroundStyle(.orange)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.padding(12)
|
||||||
|
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Context Gauge
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private var contextGauge: some View {
|
||||||
|
VStack(spacing: 8) {
|
||||||
|
Text("Context")
|
||||||
|
.font(.caption.bold())
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
|
||||||
|
let maxCtx = max(stats.contextMax, modelManager.currentModel?.contextLength ?? 0)
|
||||||
|
let used = stats.contextUsed
|
||||||
|
let ratio = maxCtx > 0 ? Double(used) / Double(maxCtx) : 0
|
||||||
|
|
||||||
|
Gauge(value: ratio) {
|
||||||
|
EmptyView()
|
||||||
|
} currentValueLabel: {
|
||||||
|
Text(formatTokenCount(used))
|
||||||
|
.font(.title3.monospacedDigit().bold())
|
||||||
|
} minimumValueLabel: {
|
||||||
|
Text("0")
|
||||||
|
.font(.caption2)
|
||||||
|
} maximumValueLabel: {
|
||||||
|
Text(formatTokenCount(maxCtx))
|
||||||
|
.font(.caption2)
|
||||||
|
}
|
||||||
|
.gaugeStyle(.accessoryCircular)
|
||||||
|
.scaleEffect(1.3)
|
||||||
|
.tint(contextGradient(ratio: ratio))
|
||||||
|
|
||||||
|
Text("\(Int(ratio * 100))%")
|
||||||
|
.font(.caption.monospacedDigit())
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
.frame(maxWidth: .infinity)
|
||||||
|
.padding(12)
|
||||||
|
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
private func contextGradient(ratio: Double) -> Color {
|
||||||
|
if ratio > 0.9 { return .red }
|
||||||
|
if ratio > 0.7 { return .orange }
|
||||||
|
return .blue
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - GPU Memory Gauge
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private var gpuMemoryGauge: some View {
|
||||||
|
VStack(spacing: 8) {
|
||||||
|
Text("GPU Memory")
|
||||||
|
.font(.caption.bold())
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
|
||||||
|
let activeMB = Double(MLX.GPU.activeMemory) / 1_048_576
|
||||||
|
let peakMB = Double(MLX.GPU.peakMemory) / 1_048_576
|
||||||
|
|
||||||
|
Text(String(format: "%.0f MB", activeMB))
|
||||||
|
.font(.title3.monospacedDigit().bold())
|
||||||
|
|
||||||
|
if peakMB > 0 {
|
||||||
|
Text(String(format: "Peak: %.0f MB", peakMB))
|
||||||
|
.font(.caption2.monospacedDigit())
|
||||||
|
.foregroundStyle(.tertiary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.frame(maxWidth: .infinity)
|
||||||
|
.padding(12)
|
||||||
|
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Requests Card
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private var requestsCard: some View {
|
||||||
|
VStack(spacing: 8) {
|
||||||
|
Text("Requests")
|
||||||
|
.font(.caption.bold())
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
|
||||||
|
Text("\(stats.totalRequests)")
|
||||||
|
.font(.title3.monospacedDigit().bold())
|
||||||
|
|
||||||
|
if stats.activeRequests > 0 {
|
||||||
|
Text("\(stats.activeRequests) active")
|
||||||
|
.font(.caption2)
|
||||||
|
.foregroundStyle(.green)
|
||||||
|
} else {
|
||||||
|
Text("none active")
|
||||||
|
.font(.caption2)
|
||||||
|
.foregroundStyle(.tertiary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.frame(maxWidth: .infinity)
|
||||||
|
.padding(12)
|
||||||
|
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Cumulative
|
||||||
|
|
||||||
|
@ViewBuilder
|
||||||
|
private var cumulativeSection: some View {
|
||||||
|
HStack(spacing: 24) {
|
||||||
|
VStack(spacing: 2) {
|
||||||
|
Text("Total Prompt Tokens")
|
||||||
|
.font(.caption2)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
Text(formatTokenCount(stats.totalPromptTokens))
|
||||||
|
.font(.callout.monospacedDigit().bold())
|
||||||
|
.foregroundStyle(.blue)
|
||||||
|
}
|
||||||
|
|
||||||
|
VStack(spacing: 2) {
|
||||||
|
Text("Total Generated Tokens")
|
||||||
|
.font(.caption2)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
Text(formatTokenCount(stats.totalGenerationTokens))
|
||||||
|
.font(.callout.monospacedDigit().bold())
|
||||||
|
.foregroundStyle(.orange)
|
||||||
|
}
|
||||||
|
|
||||||
|
VStack(spacing: 2) {
|
||||||
|
Text("Total Tokens")
|
||||||
|
.font(.caption2)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
Text(formatTokenCount(stats.totalPromptTokens + stats.totalGenerationTokens))
|
||||||
|
.font(.callout.monospacedDigit().bold())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.frame(maxWidth: .infinity)
|
||||||
|
.padding(12)
|
||||||
|
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Helpers
|
||||||
|
|
||||||
|
private func formatTokenCount(_ count: Int) -> String {
|
||||||
|
if count >= 1_000_000 {
|
||||||
|
return String(format: "%.1fM", Double(count) / 1_000_000)
|
||||||
|
} else if count >= 1_000 {
|
||||||
|
return String(format: "%.1fk", Double(count) / 1_000)
|
||||||
|
}
|
||||||
|
return "\(count)"
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user