Files
MLXServer/MLXServer/Views/MonitorView.swift

539 lines
21 KiB
Swift

import Charts
import MLX
import SwiftUI
/// Real-time system dashboard focused on the stateless API server and prefix cache.
struct MonitorView: View {
let stats: InferenceStats
@Environment(ModelManager.self) private var modelManager
private let chartColumns = [
GridItem(.flexible(minimum: 280), spacing: 16),
GridItem(.flexible(minimum: 280), spacing: 16),
]
private let metricColumns = [
GridItem(.flexible(minimum: 180), spacing: 16),
GridItem(.flexible(minimum: 180), spacing: 16),
GridItem(.flexible(minimum: 180), spacing: 16),
]
var body: some View {
ScrollView {
VStack(spacing: 20) {
systemHeader
LazyVGrid(columns: metricColumns, alignment: .leading, spacing: 16) {
metricCard(
title: "Requests",
value: "\(stats.totalRequests)",
detail: stats.activeRequests > 0 ? "\(stats.activeRequests) active" : "idle",
color: stats.activeRequests > 0 ? .green : .secondary
)
metricCard(
title: "Cache Entries",
value: "\(stats.cacheEntryCount)",
detail: formatTokenCount(stats.cacheEstimatedTokens) + " cached tokens",
color: .orange
)
metricCard(
title: "Cache Hit Rate",
value: String(format: "%.0f%%", stats.cacheHitRatePercent),
detail: "\(stats.totalCacheHits) hits / \(stats.totalCacheMisses) misses • P:\(stats.totalPrefixHits) S:\(stats.totalSupersequenceHits) L:\(stats.totalLCPHits)",
color: .blue
)
metricCard(
title: "Cache Quantization",
value: stats.kvQuantizationEnabled ? "ON" : "OFF",
detail: stats.kvQuantizationEnabled && stats.quantizationBytesSaved > 0
? "saved " + formatByteCount(stats.quantizationBytesSaved)
: "8-bit compression",
color: stats.kvQuantizationEnabled && stats.quantizationBytesSaved > 0 ? .mint : .secondary
)
metricCard(
title: "Cache Match",
value: formatTokenCount(stats.cacheMatchDepth),
detail: stats.currentCacheMatchedPromptTokens > 0
? String(format: "%.0f%% match now", stats.currentCacheMatchQualityPercent)
: String(format: "%.0f%% total quality", stats.totalCacheMatchQualityPercent),
color: .teal
)
metricCard(
title: "TTFT",
value: formatMilliseconds(stats.timeToFirstToken * 1_000),
detail: stats.isGenerating ? "time to first token" : "last completed request",
color: .cyan
)
metricCard(
title: "Prefill Speed",
value: stats.prefillTokensPerSecond > 0
? String(format: "%.1f tok/s", stats.prefillTokensPerSecond)
: "0 tok/s",
detail: formatTokenCount(stats.currentPromptTokens) + " prompt tokens",
color: .blue
)
metricCard(
title: "Context",
value: formatTokenCount(stats.contextUsed),
detail: ofTotalContext,
color: contextColor
)
metricCard(
title: "GPU Memory",
value: formatByteCount(Int(MLX.GPU.activeMemory)),
detail: "peak " + formatByteCount(Int(MLX.GPU.peakMemory)),
color: .purple
)
metricCard(
title: "Generation Speed",
value: stats.isGenerating ? String(format: "%.1f tok/s", stats.currentTokensPerSecond) : "0 tok/s",
detail: "\(stats.currentGenerationTokens) output tokens",
color: .green
)
metricCard(
title: "Disconnects",
value: "\(stats.totalDisconnects)",
detail: stats.totalDisconnects == 0 ? "none detected" : "streams cancelled by clients",
color: .red
)
}
LazyVGrid(columns: chartColumns, alignment: .leading, spacing: 16) {
latencyChart
prefillSpeedChart
throughputChart
cacheMatchChart
memoryChart
if hasVisionSamples {
visionChart
}
}
cumulativeSection
cacheEntriesSection
}
.padding(20)
}
.frame(maxWidth: .infinity, maxHeight: .infinity)
.background(.background)
}
private var systemHeader: some View {
VStack(alignment: .leading, spacing: 12) {
HStack(spacing: 12) {
Circle()
.fill(systemStateColor)
.frame(width: 12, height: 12)
Text(systemStateLabel)
.font(.headline)
Spacer()
if stats.activeRequests > 0 {
Text("phase age " + String(format: "%.0fs", stats.currentPhaseElapsed))
.font(.callout.monospacedDigit())
.foregroundStyle(.secondary)
}
}
HStack(spacing: 8) {
statusChip(title: "Preparing", value: stats.preparingRequests, color: .secondary)
statusChip(title: "Prefill", value: stats.prefillingRequests, color: .blue)
statusChip(title: "Generating", value: stats.generatingRequests, color: .green)
statusChip(title: "Cache", value: stats.cacheEntryCount, color: .orange)
statusChip(title: "Evictions", value: stats.totalCacheEvictions, color: .red)
}
}
.padding(14)
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 12))
}
private var throughputChart: some View {
chartCard(title: "Token Throughput") {
Chart {
ForEach(stats.promptTokenHistory) { point in
BarMark(
x: .value("Time", point.timestamp),
y: .value("Prompt", point.value)
)
.foregroundStyle(.blue.opacity(0.7))
}
ForEach(stats.generationTokenHistory) { point in
BarMark(
x: .value("Time", point.timestamp),
y: .value("Generation", point.value)
)
.foregroundStyle(.green.opacity(0.7))
}
}
.chartXAxis { timeAxis }
.chartYAxis { leadingValueAxis }
.frame(height: 180)
} footer: {
legendRow(items: [("Prompt", .blue), ("Generation", .green)])
}
}
private var latencyChart: some View {
chartCard(title: "Time To First Token") {
Chart {
ForEach(stats.ttftHistory) { point in
LineMark(
x: .value("Time", point.timestamp),
y: .value("TTFT", point.value)
)
.foregroundStyle(.cyan)
.interpolationMethod(.monotone)
}
}
.chartXAxis { timeAxis }
.chartYAxis { leadingValueAxis }
.frame(height: 180)
} footer: {
legendRow(items: [("TTFT ms", .cyan)])
}
}
private var prefillSpeedChart: some View {
chartCard(title: "Prefill Speed") {
Chart {
ForEach(stats.prefillSpeedHistory) { point in
LineMark(
x: .value("Time", point.timestamp),
y: .value("Prefill Speed", point.value)
)
.foregroundStyle(.blue)
.interpolationMethod(.monotone)
}
}
.chartXAxis { timeAxis }
.chartYAxis { leadingValueAxis }
.frame(height: 180)
} footer: {
legendRow(items: [("Prompt tok/s", .blue)])
}
}
private var cacheMatchChart: some View {
chartCard(title: "Cache Match Depth") {
Chart {
ForEach(stats.cacheReusePromptHistory) { point in
BarMark(
x: .value("Time", point.timestamp),
y: .value("Cached", point.value)
)
.foregroundStyle(.teal.opacity(0.7))
}
ForEach(stats.cacheRebuildPromptHistory) { point in
BarMark(
x: .value("Time", point.timestamp),
y: .value("Prefilled", point.value)
)
.foregroundStyle(.orange.opacity(0.65))
}
ForEach(stats.cacheMatchDepthHistory) { point in
LineMark(
x: .value("Time", point.timestamp),
y: .value("Depth", point.value)
)
.foregroundStyle(.blue)
.interpolationMethod(.monotone)
}
}
.chartXAxis { timeAxis }
.chartYAxis { leadingValueAxis }
.frame(height: 180)
} footer: {
legendRow(items: [("Cached", .teal), ("Prefilled", .orange), ("Matched depth", .blue)])
}
}
private var memoryChart: some View {
chartCard(title: "Estimated Cache Memory") {
Chart {
ForEach(stats.cacheFootprintHistory) { point in
AreaMark(
x: .value("Time", point.timestamp),
y: .value("MB", point.value / 1_048_576)
)
.foregroundStyle(.orange.opacity(0.15))
.interpolationMethod(.monotone)
LineMark(
x: .value("Time", point.timestamp),
y: .value("MB", point.value / 1_048_576)
)
.foregroundStyle(.orange)
.interpolationMethod(.monotone)
}
ForEach(stats.cacheMemoryPressureHistory) { point in
LineMark(
x: .value("Time", point.timestamp),
y: .value("Pressure", point.value)
)
.foregroundStyle(.red)
.interpolationMethod(.monotone)
}
}
.chartXAxis { timeAxis }
.chartYAxis { leadingValueAxis }
.frame(height: 180)
} footer: {
legendRow(items: [("Estimated MB", .orange), ("Estimated budget %", .red)])
}
}
private var visionChart: some View {
chartCard(title: "Vision Prepare Time") {
Chart {
ForEach(stats.visionTimeHistory) { point in
BarMark(
x: .value("Time", point.timestamp),
y: .value("Vision", point.value)
)
.foregroundStyle(.purple.opacity(0.8))
}
}
.chartXAxis { timeAxis }
.chartYAxis { leadingValueAxis }
.frame(height: 180)
} footer: {
legendRow(items: [("Prepare ms", .purple)])
}
}
private var cumulativeSection: some View {
VStack(alignment: .leading, spacing: 10) {
Text("Totals")
.font(.caption.bold())
.foregroundStyle(.secondary)
LazyVGrid(columns: metricColumns, alignment: .leading, spacing: 12) {
compactTile(title: "Prompt Tokens", value: formatTokenCount(stats.totalPromptTokens), color: .blue)
compactTile(title: "Generated Tokens", value: formatTokenCount(stats.totalGenerationTokens), color: .green)
compactTile(title: "Cache Evictions", value: "\(stats.totalCacheEvictions)", color: .red)
compactTile(title: "Tokens From Cache", value: formatTokenCount(stats.totalCacheReusePromptTokens), color: .teal)
compactTile(title: "Tokens Prefilled", value: formatTokenCount(stats.totalCacheRebuildPromptTokens), color: .orange)
compactTile(title: "Match Quality", value: String(format: "%.0f%%", stats.totalCacheMatchQualityPercent), color: .teal)
compactTile(title: "Prefill Time", value: String(format: "%.1fs", stats.totalPrefillDuration), color: .blue)
compactTile(title: "Generation Time", value: String(format: "%.1fs", stats.totalGenerationDuration), color: .green)
compactTile(title: "Vision Time", value: String(format: "%.1fs", stats.totalVisionEncoderDuration), color: .purple)
compactTile(title: "Disconnects", value: "\(stats.totalDisconnects)", color: .red)
compactTile(title: "Cache Budget", value: formatByteCount(stats.cacheMemoryBudgetBytes), color: .orange)
}
}
.padding(14)
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 12))
}
private var cacheEntriesSection: some View {
VStack(alignment: .leading, spacing: 12) {
HStack {
Text("Prefix Cache Entries")
.font(.headline)
Spacer()
Text("\(stats.cachedEntries.count) visible")
.font(.caption)
.foregroundStyle(.secondary)
}
if stats.cachedEntries.isEmpty {
Text("No cache entries stored yet.")
.font(.callout)
.foregroundStyle(.secondary)
} else {
ForEach(stats.cachedEntries) { entry in
VStack(alignment: .leading, spacing: 10) {
HStack {
Text(entry.modelId)
.font(.callout.weight(.semibold))
.lineLimit(1)
Spacer()
Text(relativeTimeString(entry.lastAccessAt))
.font(.caption.monospacedDigit())
.foregroundStyle(.secondary)
}
HStack(spacing: 16) {
entryMetric("Tokens", formatTokenCount(entry.tokenCount))
entryMetric("Est. Footprint", formatByteCount(entry.estimatedBytes))
entryMetric("Hits", "\(entry.hitCount)")
entryMetric("Created", relativeTimeString(entry.createdAt))
}
}
.padding(12)
.background(Color.primary.opacity(0.035), in: RoundedRectangle(cornerRadius: 10))
}
}
}
.padding(14)
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 12))
}
private func metricCard(title: String, value: String, detail: String, color: Color) -> some View {
VStack(alignment: .leading, spacing: 6) {
Text(title)
.font(.caption)
.foregroundStyle(.secondary)
Text(value)
.font(.title3.monospacedDigit().bold())
.foregroundStyle(color)
Text(detail)
.font(.caption2)
.foregroundStyle(.secondary)
}
.frame(maxWidth: .infinity, alignment: .leading)
.padding(14)
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 12))
}
private func compactTile(title: String, value: String, color: Color) -> some View {
VStack(alignment: .leading, spacing: 4) {
Text(title)
.font(.caption2)
.foregroundStyle(.secondary)
Text(value)
.font(.callout.monospacedDigit().bold())
.foregroundStyle(color)
}
.frame(maxWidth: .infinity, alignment: .leading)
.padding(10)
.background(Color.primary.opacity(0.04), in: RoundedRectangle(cornerRadius: 8))
}
private func chartCard<Content: View, Footer: View>(title: String, @ViewBuilder content: () -> Content, @ViewBuilder footer: () -> Footer) -> some View {
VStack(alignment: .leading, spacing: 8) {
Text(title)
.font(.caption.bold())
.foregroundStyle(.secondary)
content()
footer()
}
.padding(14)
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 12))
}
private func statusChip(title: String, value: Int, color: Color) -> some View {
HStack(spacing: 6) {
Circle()
.fill(color)
.frame(width: 7, height: 7)
Text(title)
Text("\(value)")
.monospacedDigit()
}
.font(.caption)
.padding(.horizontal, 8)
.padding(.vertical, 4)
.background(color.opacity(0.12), in: Capsule())
}
private func entryMetric(_ title: String, _ value: String) -> some View {
VStack(alignment: .leading, spacing: 2) {
Text(title)
.font(.caption2)
.foregroundStyle(.secondary)
Text(value)
.font(.caption.monospacedDigit().bold())
}
}
private func legendRow(items: [(String, Color)]) -> some View {
HStack(spacing: 12) {
ForEach(Array(items.enumerated()), id: \.offset) { _, item in
Label(item.0, systemImage: "circle.fill")
.font(.caption2)
.foregroundStyle(item.1)
}
}
}
private var timeAxis: some AxisContent {
AxisMarks(values: .stride(by: .second, count: 30)) { _ in
AxisGridLine()
}
}
private var leadingValueAxis: some AxisContent {
AxisMarks(position: .leading) { value in
AxisGridLine()
AxisValueLabel {
if let doubleValue = value.as(Double.self) {
Text(String(format: "%.0f", doubleValue))
.font(.caption2.monospacedDigit())
}
}
}
}
private var systemStateColor: Color {
if stats.generatingRequests > 0 { return .green }
if stats.prefillingRequests > 0 { return .blue }
if stats.preparingRequests > 0 { return .orange }
return .secondary
}
private var systemStateLabel: String {
if stats.generatingRequests > 0 { return "Generating" }
if stats.prefillingRequests > 0 { return "Prefilling" }
if stats.preparingRequests > 0 { return "Preparing" }
return "Idle"
}
private var contextColor: Color {
let maxContext = max(stats.contextMax, modelManager.currentModel?.contextLength ?? 0)
guard maxContext > 0 else { return .secondary }
let ratio = Double(stats.contextUsed) / Double(maxContext)
if ratio > 0.9 { return .red }
if ratio > 0.7 { return .orange }
return .blue
}
private var ofTotalContext: String {
let total = max(stats.contextMax, modelManager.currentModel?.contextLength ?? 0)
guard total > 0 else { return "no model" }
return "of " + formatTokenCount(total)
}
private func formatTokenCount(_ count: Int) -> String {
if count >= 1_000_000 {
return String(format: "%.1fM", Double(count) / 1_000_000)
} else if count >= 1_000 {
return String(format: "%.1fk", Double(count) / 1_000)
}
return "\(count)"
}
private func formatByteCount(_ count: Int) -> String {
let bytes = Double(count)
if bytes >= 1_073_741_824 {
return String(format: "%.2f GB", bytes / 1_073_741_824)
}
if bytes >= 1_048_576 {
return String(format: "%.1f MB", bytes / 1_048_576)
}
if bytes >= 1024 {
return String(format: "%.0f KB", bytes / 1024)
}
return "\(count) B"
}
private func relativeTimeString(_ date: Date) -> String {
let seconds = max(0, Int(Date.now.timeIntervalSince(date)))
if seconds < 60 { return "\(seconds)s ago" }
let minutes = seconds / 60
if minutes < 60 { return "\(minutes)m ago" }
return "\(minutes / 60)h ago"
}
private var hasVisionSamples: Bool {
stats.visionTimeHistory.contains { $0.value > 0 }
}
private func formatMilliseconds(_ value: Double) -> String {
guard value > 0 else { return "0 ms" }
if value >= 1_000 {
return String(format: "%.2fs", value / 1_000)
}
return String(format: "%.0f ms", value)
}
}