diff --git a/MLXServer/Models/InferenceStats.swift b/MLXServer/Models/InferenceStats.swift index aaa1208..4e0da16 100644 --- a/MLXServer/Models/InferenceStats.swift +++ b/MLXServer/Models/InferenceStats.swift @@ -9,7 +9,7 @@ final class LiveCounters: @unchecked Sendable { static let shared = LiveCounters() private let lock = OSAllocatedUnfairLock() - private var requestPhases: [String: RequestPhase] = [:] + private var requestPhases: [String: RequestState] = [:] // Current request private var _activeRequests: Int = 0 @@ -23,13 +23,19 @@ final class LiveCounters: @unchecked Sendable { private var _isPrefilling: Bool = false private var _isGenerating: Bool = false private var _contextMax: Int = 0 + private var _currentPhaseElapsed: TimeInterval = 0 // Cumulative private var _totalRequests: Int = 0 private var _totalPromptTokens: Int = 0 private var _totalGenerationTokens: Int = 0 + private var _totalPreparingDuration: TimeInterval = 0 + private var _totalSessionBuildDuration: TimeInterval = 0 + private var _totalPrefillDuration: TimeInterval = 0 + private var _totalGenerationDuration: TimeInterval = 0 func requestStarted(requestId: String, contextLength: Int) { + let now = Date() lock.lock() _activeRequests += 1 _preparingRequests += 1 @@ -40,33 +46,40 @@ final class LiveCounters: @unchecked Sendable { _generationTokens = 0 _tokensPerSecond = 0 _contextMax = contextLength - requestPhases[requestId] = .preparing + requestPhases[requestId] = RequestState(phase: .preparing, phaseStartedAt: now) + refreshCurrentPhaseElapsed(now: now) lock.unlock() } func requestPhaseChanged(requestId: String, phase: RequestPhase) { + let now = Date() lock.lock() if let current = requestPhases[requestId] { - decrementCount(for: current) + decrementCount(for: current.phase) + accumulateDuration(for: current.phase, elapsed: now.timeIntervalSince(current.phaseStartedAt)) } incrementCount(for: phase) - requestPhases[requestId] = phase + requestPhases[requestId] = RequestState(phase: phase, phaseStartedAt: now) _isPrefilling = _prefillRequests > 0 || _sessionBuildRequests > 0 || _preparingRequests > 0 _isGenerating = _generatingRequests > 0 + refreshCurrentPhaseElapsed(now: now) lock.unlock() } func prefillCompleted(requestId: String, promptTokens: Int) { + let now = Date() lock.lock() if let current = requestPhases[requestId] { - decrementCount(for: current) + decrementCount(for: current.phase) + accumulateDuration(for: current.phase, elapsed: now.timeIntervalSince(current.phaseStartedAt)) } incrementCount(for: .generating) - requestPhases[requestId] = .generating + requestPhases[requestId] = RequestState(phase: .generating, phaseStartedAt: now) _promptTokens = promptTokens _totalPromptTokens += promptTokens _isPrefilling = _prefillRequests > 0 || _sessionBuildRequests > 0 || _preparingRequests > 0 _isGenerating = _generatingRequests > 0 + refreshCurrentPhaseElapsed(now: now) lock.unlock() } @@ -78,9 +91,11 @@ final class LiveCounters: @unchecked Sendable { } func requestCompleted(requestId: String, generationTokens: Int) { + let now = Date() lock.lock() if let current = requestPhases.removeValue(forKey: requestId) { - decrementCount(for: current) + decrementCount(for: current.phase) + accumulateDuration(for: current.phase, elapsed: now.timeIntervalSince(current.phaseStartedAt)) } _activeRequests = max(0, _activeRequests - 1) _totalGenerationTokens += generationTokens @@ -92,6 +107,7 @@ final class LiveCounters: @unchecked Sendable { _isPrefilling = _prefillRequests > 0 || _sessionBuildRequests > 0 || _preparingRequests > 0 _isGenerating = _generatingRequests > 0 } + refreshCurrentPhaseElapsed(now: now) lock.unlock() } @@ -109,15 +125,22 @@ final class LiveCounters: @unchecked Sendable { _isPrefilling = false _isGenerating = false _contextMax = 0 + _currentPhaseElapsed = 0 _totalRequests = 0 _totalPromptTokens = 0 _totalGenerationTokens = 0 + _totalPreparingDuration = 0 + _totalSessionBuildDuration = 0 + _totalPrefillDuration = 0 + _totalGenerationDuration = 0 lock.unlock() } /// Atomic snapshot for the UI timer. func snapshot() -> Snapshot { + let now = Date() lock.lock() + refreshCurrentPhaseElapsed(now: now) let s = Snapshot( activeRequests: _activeRequests, preparingRequests: _preparingRequests, @@ -130,9 +153,14 @@ final class LiveCounters: @unchecked Sendable { isPrefilling: _isPrefilling, isGenerating: _isGenerating, contextMax: _contextMax, + currentPhaseElapsed: _currentPhaseElapsed, totalRequests: _totalRequests, totalPromptTokens: _totalPromptTokens, - totalGenerationTokens: _totalGenerationTokens + totalGenerationTokens: _totalGenerationTokens, + totalPreparingDuration: _totalPreparingDuration, + totalSessionBuildDuration: _totalSessionBuildDuration, + totalPrefillDuration: _totalPrefillDuration, + totalGenerationDuration: _totalGenerationDuration ) lock.unlock() return s @@ -150,9 +178,14 @@ final class LiveCounters: @unchecked Sendable { let isPrefilling: Bool let isGenerating: Bool let contextMax: Int + let currentPhaseElapsed: TimeInterval let totalRequests: Int let totalPromptTokens: Int let totalGenerationTokens: Int + let totalPreparingDuration: TimeInterval + let totalSessionBuildDuration: TimeInterval + let totalPrefillDuration: TimeInterval + let totalGenerationDuration: TimeInterval } private func incrementCount(for phase: RequestPhase) { @@ -181,6 +214,28 @@ final class LiveCounters: @unchecked Sendable { } } + private func accumulateDuration(for phase: RequestPhase, elapsed: TimeInterval) { + switch phase { + case .preparing: + _totalPreparingDuration += elapsed + case .sessionBuild: + _totalSessionBuildDuration += elapsed + case .prefilling: + _totalPrefillDuration += elapsed + case .generating: + _totalGenerationDuration += elapsed + } + } + + private func refreshCurrentPhaseElapsed(now: Date) { + _currentPhaseElapsed = requestPhases.values.map { now.timeIntervalSince($0.phaseStartedAt) }.max() ?? 0 + } + + private struct RequestState { + var phase: RequestPhase + var phaseStartedAt: Date + } + enum RequestPhase { case preparing case sessionBuild @@ -208,6 +263,7 @@ final class InferenceStats { var currentTokensPerSecond: Double = 0 var contextUsed: Int = 0 var contextMax: Int = 0 + var currentPhaseElapsed: TimeInterval = 0 // MARK: - Cumulative counters @@ -219,6 +275,10 @@ final class InferenceStats { var totalCacheEvictions: Int = 0 var totalCacheReusePromptTokens: Int = 0 var totalCacheRebuildPromptTokens: Int = 0 + var totalPreparingDuration: TimeInterval = 0 + var totalSessionBuildDuration: TimeInterval = 0 + var totalPrefillDuration: TimeInterval = 0 + var totalGenerationDuration: TimeInterval = 0 // MARK: - Cache state @@ -246,6 +306,9 @@ final class InferenceStats { private(set) var cacheFootprintHistory: [DataPoint] = [] private(set) var cacheReuseHistory: [DataPoint] = [] private(set) var cacheRebuildHistory: [DataPoint] = [] + private(set) var currentPhaseElapsedHistory: [DataPoint] = [] + private(set) var prefillDurationHistory: [DataPoint] = [] + private(set) var sessionBuildDurationHistory: [DataPoint] = [] private static let maxHistoryPoints = 120 // ~2 minutes at 1Hz @@ -255,6 +318,8 @@ final class InferenceStats { private var lastPromptTokenCount: Int = 0 private var lastCacheReuseTokenCount: Int = 0 private var lastCacheRebuildTokenCount: Int = 0 + private var lastPrefillDuration: TimeInterval = 0 + private var lastSessionBuildDuration: TimeInterval = 0 func startSampling() { guard sampleTimer == nil else { return } @@ -287,9 +352,14 @@ final class InferenceStats { isGenerating = snap.isGenerating contextMax = snap.contextMax contextUsed = snap.promptTokens + snap.generationTokens + currentPhaseElapsed = snap.currentPhaseElapsed totalRequests = snap.totalRequests totalPromptTokens = snap.totalPromptTokens totalGenerationTokens = snap.totalGenerationTokens + totalPreparingDuration = snap.totalPreparingDuration + totalSessionBuildDuration = snap.totalSessionBuildDuration + totalPrefillDuration = snap.totalPrefillDuration + totalGenerationDuration = snap.totalGenerationDuration totalCacheHits = cache.totalHits totalCacheMisses = cache.totalMisses totalCacheEvictions = cache.totalEvictions @@ -308,10 +378,14 @@ final class InferenceStats { let promptDelta = snap.totalPromptTokens - lastPromptTokenCount let cacheReuseDelta = cache.totalReusePromptTokens - lastCacheReuseTokenCount let cacheRebuildDelta = cache.totalRebuildPromptTokens - lastCacheRebuildTokenCount + let prefillDurationDelta = snap.totalPrefillDuration - lastPrefillDuration + let sessionBuildDurationDelta = snap.totalSessionBuildDuration - lastSessionBuildDuration lastGenerationTokenCount = snap.totalGenerationTokens lastPromptTokenCount = snap.totalPromptTokens lastCacheReuseTokenCount = cache.totalReusePromptTokens lastCacheRebuildTokenCount = cache.totalRebuildPromptTokens + lastPrefillDuration = snap.totalPrefillDuration + lastSessionBuildDuration = snap.totalSessionBuildDuration tokenRateHistory.append(DataPoint(timestamp: now, value: snap.tokensPerSecond)) generationTokenHistory.append(DataPoint(timestamp: now, value: Double(genDelta))) @@ -321,6 +395,9 @@ final class InferenceStats { cacheFootprintHistory.append(DataPoint(timestamp: now, value: Double(cache.estimatedBytes))) cacheReuseHistory.append(DataPoint(timestamp: now, value: Double(cacheReuseDelta))) cacheRebuildHistory.append(DataPoint(timestamp: now, value: Double(cacheRebuildDelta))) + currentPhaseElapsedHistory.append(DataPoint(timestamp: now, value: snap.currentPhaseElapsed)) + prefillDurationHistory.append(DataPoint(timestamp: now, value: prefillDurationDelta)) + sessionBuildDurationHistory.append(DataPoint(timestamp: now, value: sessionBuildDurationDelta)) if tokenRateHistory.count > Self.maxHistoryPoints { tokenRateHistory.removeFirst(tokenRateHistory.count - Self.maxHistoryPoints) @@ -346,6 +423,15 @@ final class InferenceStats { if cacheRebuildHistory.count > Self.maxHistoryPoints { cacheRebuildHistory.removeFirst(cacheRebuildHistory.count - Self.maxHistoryPoints) } + if currentPhaseElapsedHistory.count > Self.maxHistoryPoints { + currentPhaseElapsedHistory.removeFirst(currentPhaseElapsedHistory.count - Self.maxHistoryPoints) + } + if prefillDurationHistory.count > Self.maxHistoryPoints { + prefillDurationHistory.removeFirst(prefillDurationHistory.count - Self.maxHistoryPoints) + } + if sessionBuildDurationHistory.count > Self.maxHistoryPoints { + sessionBuildDurationHistory.removeFirst(sessionBuildDurationHistory.count - Self.maxHistoryPoints) + } } func reset() { @@ -363,9 +449,14 @@ final class InferenceStats { currentTokensPerSecond = 0 contextUsed = 0 contextMax = 0 + currentPhaseElapsed = 0 totalRequests = 0 totalPromptTokens = 0 totalGenerationTokens = 0 + totalPreparingDuration = 0 + totalSessionBuildDuration = 0 + totalPrefillDuration = 0 + totalGenerationDuration = 0 totalCacheHits = 0 totalCacheMisses = 0 totalCacheEvictions = 0 @@ -386,9 +477,14 @@ final class InferenceStats { cacheFootprintHistory.removeAll() cacheReuseHistory.removeAll() cacheRebuildHistory.removeAll() + currentPhaseElapsedHistory.removeAll() + prefillDurationHistory.removeAll() + sessionBuildDurationHistory.removeAll() lastGenerationTokenCount = 0 lastPromptTokenCount = 0 lastCacheReuseTokenCount = 0 lastCacheRebuildTokenCount = 0 + lastPrefillDuration = 0 + lastSessionBuildDuration = 0 } } diff --git a/MLXServer/Server/APIServer.swift b/MLXServer/Server/APIServer.swift index 26b8eee..bd162cb 100644 --- a/MLXServer/Server/APIServer.swift +++ b/MLXServer/Server/APIServer.swift @@ -393,7 +393,7 @@ final class APIServer { // Extract images from the last message only (ChatSession.streamDetails takes images separately) let lastImages = lastMessage.images - let result: (promptTokens: Int, completionTokens: Int, succeeded: Bool) + let result: (promptTokens: Int, completionTokens: Int, assistantHistoryText: String?, succeeded: Bool) if isStream { result = await handleStreamingResponse( @@ -405,7 +405,8 @@ final class APIServer { images: lastImages, tools: request.tools, created: created, - modelName: modelName + modelName: modelName, + isQwen: isQwen ) } else { result = await handleNonStreamingResponse( @@ -417,16 +418,23 @@ final class APIServer { images: lastImages, tools: request.tools, created: created, - modelName: modelName + modelName: modelName, + isQwen: isQwen ) } if result.succeeded { + var cachedSignatures = messageSignatures + if let assistantHistoryText = result.assistantHistoryText { + cachedSignatures.append( + Self.messageSignature(role: .assistant, content: assistantHistoryText, imageURLs: []) + ) + } ConversationSessionCache.shared.completeRequest( entryId: lease.entryId, session: session, - requestMessageSignatures: messageSignatures, - requestMessageCount: chatMessages.count, + requestMessageSignatures: cachedSignatures, + requestMessageCount: cachedSignatures.count, estimatedPromptTokens: estimatedPromptTokens, estimatedBytes: estimatedBytes, promptTokens: result.promptTokens, @@ -473,8 +481,9 @@ final class APIServer { images: [UserInput.Image], tools: [APIToolDefinition]?, created: Int, - modelName: String - ) async -> (promptTokens: Int, completionTokens: Int, succeeded: Bool) { + modelName: String, + isQwen: Bool + ) async -> (promptTokens: Int, completionTokens: Int, assistantHistoryText: String?, succeeded: Bool) { do { var fullText = "" var promptTokens = 0 @@ -510,48 +519,11 @@ final class APIServer { } } - // Parse tool calls: first check framework-detected ones, then our own text parser - var finishReason = "stop" - var responseContent: String? = fullText - var apiToolCalls: [APIToolCall]? = nil - - if !frameworkToolCalls.isEmpty { - // Framework natively detected tool calls (e.g. Qwen) - finishReason = "tool_calls" - apiToolCalls = frameworkToolCalls.enumerated().map { i, tc in - let argsJSON: String - let argsDict = tc.function.arguments.mapValues { $0.anyValue } - if let data = try? JSONSerialization.data(withJSONObject: argsDict), - let str = String(data: data, encoding: .utf8) { - argsJSON = str - } else { - argsJSON = "{}" - } - let callId = String(format: "call_%d_%08d", i, abs(tc.function.name.hashValue) % 100_000_000) - return APIToolCall( - index: i, - id: callId, - type: "function", - function: APIFunctionCall(name: tc.function.name, arguments: argsJSON) - ) - } - responseContent = fullText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : fullText - } else if let tools, !tools.isEmpty { - // Try our own text parser (e.g. Gemma tool_code blocks) - let (cleanText, parsedCalls) = ToolCallParser.parse(text: fullText, tools: tools) - if !parsedCalls.isEmpty { - finishReason = "tool_calls" - apiToolCalls = parsedCalls.enumerated().map { i, tc in - APIToolCall( - index: i, - id: tc.id, - type: "function", - function: APIFunctionCall(name: tc.name, arguments: tc.arguments) - ) - } - responseContent = cleanText.isEmpty ? nil : cleanText - } - } + let resolved = Self.resolveAssistantResponse( + fullText: fullText, + frameworkToolCalls: frameworkToolCalls, + tools: tools + ) let response = APIChatCompletionResponse( id: requestId, @@ -563,10 +535,10 @@ final class APIServer { index: 0, message: APIChoiceMessage( role: "assistant", - content: responseContent, - tool_calls: apiToolCalls + content: resolved.content, + tool_calls: resolved.toolCalls ), - finish_reason: finishReason + finish_reason: resolved.finishReason ) ], usage: APIUsageInfo( @@ -579,10 +551,15 @@ final class APIServer { if let json = try? JSONEncoder().encode(response) { sendResponse(connection: connection, status: 200, body: String(data: json, encoding: .utf8) ?? "{}") } - return (promptTokens, completionTokens, true) + let assistantHistoryText = Self.normalizedAssistantHistoryContent( + content: resolved.content, + toolCalls: resolved.toolCalls, + isQwen: isQwen + ) + return (promptTokens, completionTokens, assistantHistoryText, true) } catch { sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#) - return (0, 0, false) + return (0, 0, nil, false) } } @@ -597,8 +574,9 @@ final class APIServer { images: [UserInput.Image], tools: [APIToolDefinition]?, created: Int, - modelName: String - ) async -> (promptTokens: Int, completionTokens: Int, succeeded: Bool) { + modelName: String, + isQwen: Bool + ) async -> (promptTokens: Int, completionTokens: Int, assistantHistoryText: String?, succeeded: Bool) { // Send SSE headers let header = [ "HTTP/1.1 200 OK", @@ -657,50 +635,14 @@ final class APIServer { LiveCounters.shared.prefillCompleted(requestId: requestId, promptTokens: promptTokens) } - // Stats were already updated by LiveCounters inside the loop + let resolved = Self.resolveAssistantResponse( + fullText: fullText, + frameworkToolCalls: frameworkToolCalls, + tools: tools + ) - // Post-generation: handle tool calls (framework-detected or text-parsed) - var finishReason = "stop" - - if !frameworkToolCalls.isEmpty { - finishReason = "tool_calls" - for (i, tc) in frameworkToolCalls.enumerated() { - let argsDict = tc.function.arguments.mapValues { $0.anyValue } - let argsJSON: String - if let data = try? JSONSerialization.data(withJSONObject: argsDict), - let str = String(data: data, encoding: .utf8) { - argsJSON = str - } else { - argsJSON = "{}" - } - let callId = String(format: "call_%d_%08d", i, abs(tc.function.name.hashValue) % 100_000_000) - let apiToolCall = APIToolCall( - index: i, - id: callId, - type: "function", - function: APIFunctionCall(name: tc.function.name, arguments: argsJSON) - ) - await Self.sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk( - id: requestId, - object: "chat.completion.chunk", - created: created, - model: modelName, - choices: [APIStreamChoice(index: 0, delta: APIDeltaMessage(role: nil, content: nil, tool_calls: [apiToolCall]), finish_reason: nil)], - usage: nil - )) - } - } else if hasTools { - let (_, parsed) = ToolCallParser.parse(text: fullText, tools: tools) - if !parsed.isEmpty { - finishReason = "tool_calls" - } - for (i, tc) in parsed.enumerated() { - let apiToolCall = APIToolCall( - index: i, - id: tc.id, - type: "function", - function: APIFunctionCall(name: tc.name, arguments: tc.arguments) - ) + if let toolCalls = resolved.toolCalls { + for apiToolCall in toolCalls { await Self.sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk( id: requestId, object: "chat.completion.chunk", @@ -718,7 +660,7 @@ final class APIServer { object: "chat.completion.chunk", created: created, model: modelName, - choices: [APIStreamChoice(index: 0, delta: APIDeltaMessage(role: nil, content: nil, tool_calls: nil), finish_reason: finishReason)], + choices: [APIStreamChoice(index: 0, delta: APIDeltaMessage(role: nil, content: nil, tool_calls: nil), finish_reason: resolved.finishReason)], usage: APIUsageInfo( prompt_tokens: promptTokens, completion_tokens: completionTokens, @@ -729,7 +671,12 @@ final class APIServer { // Send [DONE] and close await Self.sendData(connection: connection, data: "data: [DONE]\n\n".data(using: .utf8)!) connection.cancel() - return (promptTokens, completionTokens, succeeded) + let assistantHistoryText = Self.normalizedAssistantHistoryContent( + content: resolved.content, + toolCalls: resolved.toolCalls, + isQwen: isQwen + ) + return (promptTokens, completionTokens, assistantHistoryText, succeeded) } /// Run the token generation + SSE send loop entirely off MainActor. @@ -876,6 +823,68 @@ final class APIServer { return hash } + + private static func normalizedAssistantHistoryContent( + content: String?, + toolCalls: [APIToolCall]?, + isQwen: Bool + ) -> String? { + var text = content?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if let toolCalls, !toolCalls.isEmpty { + let formattedCalls = isQwen + ? ToolPromptBuilder.formatQwenToolCalls(toolCalls) + : ToolPromptBuilder.formatGemmaToolCalls(toolCalls) + text = text.isEmpty ? formattedCalls : text + "\n" + formattedCalls + } + return text.isEmpty ? nil : text + } + + private static func resolveAssistantResponse( + fullText: String, + frameworkToolCalls: [MLXLMCommon.ToolCall], + tools: [APIToolDefinition]? + ) -> (content: String?, toolCalls: [APIToolCall]?, finishReason: String) { + var finishReason = "stop" + var responseContent: String? = fullText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : fullText + var apiToolCalls: [APIToolCall]? = nil + + if !frameworkToolCalls.isEmpty { + finishReason = "tool_calls" + apiToolCalls = frameworkToolCalls.enumerated().map { i, tc in + let argsJSON: String + let argsDict = tc.function.arguments.mapValues { $0.anyValue } + if let data = try? JSONSerialization.data(withJSONObject: argsDict), + let str = String(data: data, encoding: .utf8) { + argsJSON = str + } else { + argsJSON = "{}" + } + let callId = String(format: "call_%d_%08d", i, abs(tc.function.name.hashValue) % 100_000_000) + return APIToolCall( + index: i, + id: callId, + type: "function", + function: APIFunctionCall(name: tc.function.name, arguments: argsJSON) + ) + } + } else if let tools, !tools.isEmpty { + let (cleanText, parsedCalls) = ToolCallParser.parse(text: fullText, tools: tools) + if !parsedCalls.isEmpty { + finishReason = "tool_calls" + apiToolCalls = parsedCalls.enumerated().map { i, tc in + APIToolCall( + index: i, + id: tc.id, + type: "function", + function: APIFunctionCall(name: tc.name, arguments: tc.arguments) + ) + } + responseContent = cleanText.isEmpty ? nil : cleanText + } + } + + return (responseContent, apiToolCalls, finishReason) + } } private struct DecodedImage { diff --git a/MLXServer/Views/MonitorView.swift b/MLXServer/Views/MonitorView.swift index 0cc3061..6e8d556 100644 --- a/MLXServer/Views/MonitorView.swift +++ b/MLXServer/Views/MonitorView.swift @@ -17,6 +17,7 @@ struct MonitorView: View { LazyVGrid(columns: chartColumns, alignment: .leading, spacing: 16) { tokenRateChart tokenThroughputChart + phaseActivityChart cacheReuseChart cacheFootprintChart cacheSessionChart @@ -90,6 +91,9 @@ struct MonitorView: View { phaseChip(title: "Prefill", count: stats.prefillingRequests, color: .blue) phaseChip(title: "Generating", count: stats.generatingRequests, color: .green) phaseChip(title: "Cache Active", count: stats.activeCacheEntryCount, color: .orange) + if stats.activeRequests > 0 { + phaseChip(title: phaseAgeLabel, count: Int(stats.currentPhaseElapsed.rounded()), color: activityColor) + } } } .padding(12) @@ -161,6 +165,71 @@ struct MonitorView: View { .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10)) } + @ViewBuilder + private var phaseActivityChart: some View { + VStack(alignment: .leading, spacing: 6) { + Text("Phase Activity") + .font(.caption.bold()) + .foregroundStyle(.secondary) + + Chart { + ForEach(stats.currentPhaseElapsedHistory) { point in + LineMark( + x: .value("Time", point.timestamp), + y: .value("Active s", point.value) + ) + .foregroundStyle(activityColor) + .interpolationMethod(.monotone) + } + ForEach(stats.prefillDurationHistory) { point in + BarMark( + x: .value("Time", point.timestamp), + y: .value("Prefill done", point.value) + ) + .foregroundStyle(.blue.opacity(0.45)) + } + ForEach(stats.sessionBuildDurationHistory) { point in + BarMark( + x: .value("Time", point.timestamp), + y: .value("Build done", point.value) + ) + .foregroundStyle(.purple.opacity(0.45)) + } + } + .chartXAxis { + AxisMarks(values: .stride(by: .second, count: 30)) { _ in + AxisGridLine() + } + } + .chartYAxis { + AxisMarks(position: .leading) { value in + AxisGridLine() + AxisValueLabel { + if let v = value.as(Double.self) { + Text(String(format: "%.0f", v)) + .font(.caption2.monospacedDigit()) + } + } + } + } + .frame(height: 150) + + HStack(spacing: 12) { + Label("Active phase age", systemImage: "circle.fill") + .font(.caption2) + .foregroundStyle(activityColor) + Label("Prefill completed", systemImage: "circle.fill") + .font(.caption2) + .foregroundStyle(.blue) + Label("Session build completed", systemImage: "circle.fill") + .font(.caption2) + .foregroundStyle(.purple) + } + } + .padding(12) + .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10)) + } + @ViewBuilder private var cacheReuseChart: some View { VStack(alignment: .leading, spacing: 6) { @@ -717,6 +786,13 @@ struct MonitorView: View { return Double(stats.totalCacheHits) / Double(total) } + private var phaseAgeLabel: String { + if stats.generatingRequests > 0 { return "Generating s" } + if stats.prefillingRequests > 0 { return "Prefill s" } + if stats.sessionBuildRequests > 0 { return "Build s" } + return "Preparing s" + } + private func maxContextRatio(for tokens: Int) -> Double { let maxContext = max(stats.contextMax, modelManager.currentModel?.contextLength ?? 0) guard maxContext > 0 else { return 0 }