feat: more visibility of prefilling

This commit is contained in:
2026-03-19 11:36:46 +01:00
parent 49bd165ce7
commit 577fdf8950
3 changed files with 290 additions and 109 deletions

View File

@@ -9,7 +9,7 @@ final class LiveCounters: @unchecked Sendable {
static let shared = LiveCounters()
private let lock = OSAllocatedUnfairLock()
private var requestPhases: [String: RequestPhase] = [:]
private var requestPhases: [String: RequestState] = [:]
// Current request
private var _activeRequests: Int = 0
@@ -23,13 +23,19 @@ final class LiveCounters: @unchecked Sendable {
private var _isPrefilling: Bool = false
private var _isGenerating: Bool = false
private var _contextMax: Int = 0
private var _currentPhaseElapsed: TimeInterval = 0
// Cumulative
private var _totalRequests: Int = 0
private var _totalPromptTokens: Int = 0
private var _totalGenerationTokens: Int = 0
private var _totalPreparingDuration: TimeInterval = 0
private var _totalSessionBuildDuration: TimeInterval = 0
private var _totalPrefillDuration: TimeInterval = 0
private var _totalGenerationDuration: TimeInterval = 0
func requestStarted(requestId: String, contextLength: Int) {
let now = Date()
lock.lock()
_activeRequests += 1
_preparingRequests += 1
@@ -40,33 +46,40 @@ final class LiveCounters: @unchecked Sendable {
_generationTokens = 0
_tokensPerSecond = 0
_contextMax = contextLength
requestPhases[requestId] = .preparing
requestPhases[requestId] = RequestState(phase: .preparing, phaseStartedAt: now)
refreshCurrentPhaseElapsed(now: now)
lock.unlock()
}
func requestPhaseChanged(requestId: String, phase: RequestPhase) {
let now = Date()
lock.lock()
if let current = requestPhases[requestId] {
decrementCount(for: current)
decrementCount(for: current.phase)
accumulateDuration(for: current.phase, elapsed: now.timeIntervalSince(current.phaseStartedAt))
}
incrementCount(for: phase)
requestPhases[requestId] = phase
requestPhases[requestId] = RequestState(phase: phase, phaseStartedAt: now)
_isPrefilling = _prefillRequests > 0 || _sessionBuildRequests > 0 || _preparingRequests > 0
_isGenerating = _generatingRequests > 0
refreshCurrentPhaseElapsed(now: now)
lock.unlock()
}
func prefillCompleted(requestId: String, promptTokens: Int) {
let now = Date()
lock.lock()
if let current = requestPhases[requestId] {
decrementCount(for: current)
decrementCount(for: current.phase)
accumulateDuration(for: current.phase, elapsed: now.timeIntervalSince(current.phaseStartedAt))
}
incrementCount(for: .generating)
requestPhases[requestId] = .generating
requestPhases[requestId] = RequestState(phase: .generating, phaseStartedAt: now)
_promptTokens = promptTokens
_totalPromptTokens += promptTokens
_isPrefilling = _prefillRequests > 0 || _sessionBuildRequests > 0 || _preparingRequests > 0
_isGenerating = _generatingRequests > 0
refreshCurrentPhaseElapsed(now: now)
lock.unlock()
}
@@ -78,9 +91,11 @@ final class LiveCounters: @unchecked Sendable {
}
func requestCompleted(requestId: String, generationTokens: Int) {
let now = Date()
lock.lock()
if let current = requestPhases.removeValue(forKey: requestId) {
decrementCount(for: current)
decrementCount(for: current.phase)
accumulateDuration(for: current.phase, elapsed: now.timeIntervalSince(current.phaseStartedAt))
}
_activeRequests = max(0, _activeRequests - 1)
_totalGenerationTokens += generationTokens
@@ -92,6 +107,7 @@ final class LiveCounters: @unchecked Sendable {
_isPrefilling = _prefillRequests > 0 || _sessionBuildRequests > 0 || _preparingRequests > 0
_isGenerating = _generatingRequests > 0
}
refreshCurrentPhaseElapsed(now: now)
lock.unlock()
}
@@ -109,15 +125,22 @@ final class LiveCounters: @unchecked Sendable {
_isPrefilling = false
_isGenerating = false
_contextMax = 0
_currentPhaseElapsed = 0
_totalRequests = 0
_totalPromptTokens = 0
_totalGenerationTokens = 0
_totalPreparingDuration = 0
_totalSessionBuildDuration = 0
_totalPrefillDuration = 0
_totalGenerationDuration = 0
lock.unlock()
}
/// Atomic snapshot for the UI timer.
func snapshot() -> Snapshot {
let now = Date()
lock.lock()
refreshCurrentPhaseElapsed(now: now)
let s = Snapshot(
activeRequests: _activeRequests,
preparingRequests: _preparingRequests,
@@ -130,9 +153,14 @@ final class LiveCounters: @unchecked Sendable {
isPrefilling: _isPrefilling,
isGenerating: _isGenerating,
contextMax: _contextMax,
currentPhaseElapsed: _currentPhaseElapsed,
totalRequests: _totalRequests,
totalPromptTokens: _totalPromptTokens,
totalGenerationTokens: _totalGenerationTokens
totalGenerationTokens: _totalGenerationTokens,
totalPreparingDuration: _totalPreparingDuration,
totalSessionBuildDuration: _totalSessionBuildDuration,
totalPrefillDuration: _totalPrefillDuration,
totalGenerationDuration: _totalGenerationDuration
)
lock.unlock()
return s
@@ -150,9 +178,14 @@ final class LiveCounters: @unchecked Sendable {
let isPrefilling: Bool
let isGenerating: Bool
let contextMax: Int
let currentPhaseElapsed: TimeInterval
let totalRequests: Int
let totalPromptTokens: Int
let totalGenerationTokens: Int
let totalPreparingDuration: TimeInterval
let totalSessionBuildDuration: TimeInterval
let totalPrefillDuration: TimeInterval
let totalGenerationDuration: TimeInterval
}
private func incrementCount(for phase: RequestPhase) {
@@ -181,6 +214,28 @@ final class LiveCounters: @unchecked Sendable {
}
}
private func accumulateDuration(for phase: RequestPhase, elapsed: TimeInterval) {
switch phase {
case .preparing:
_totalPreparingDuration += elapsed
case .sessionBuild:
_totalSessionBuildDuration += elapsed
case .prefilling:
_totalPrefillDuration += elapsed
case .generating:
_totalGenerationDuration += elapsed
}
}
private func refreshCurrentPhaseElapsed(now: Date) {
_currentPhaseElapsed = requestPhases.values.map { now.timeIntervalSince($0.phaseStartedAt) }.max() ?? 0
}
private struct RequestState {
var phase: RequestPhase
var phaseStartedAt: Date
}
enum RequestPhase {
case preparing
case sessionBuild
@@ -208,6 +263,7 @@ final class InferenceStats {
var currentTokensPerSecond: Double = 0
var contextUsed: Int = 0
var contextMax: Int = 0
var currentPhaseElapsed: TimeInterval = 0
// MARK: - Cumulative counters
@@ -219,6 +275,10 @@ final class InferenceStats {
var totalCacheEvictions: Int = 0
var totalCacheReusePromptTokens: Int = 0
var totalCacheRebuildPromptTokens: Int = 0
var totalPreparingDuration: TimeInterval = 0
var totalSessionBuildDuration: TimeInterval = 0
var totalPrefillDuration: TimeInterval = 0
var totalGenerationDuration: TimeInterval = 0
// MARK: - Cache state
@@ -246,6 +306,9 @@ final class InferenceStats {
private(set) var cacheFootprintHistory: [DataPoint] = []
private(set) var cacheReuseHistory: [DataPoint] = []
private(set) var cacheRebuildHistory: [DataPoint] = []
private(set) var currentPhaseElapsedHistory: [DataPoint] = []
private(set) var prefillDurationHistory: [DataPoint] = []
private(set) var sessionBuildDurationHistory: [DataPoint] = []
private static let maxHistoryPoints = 120 // ~2 minutes at 1Hz
@@ -255,6 +318,8 @@ final class InferenceStats {
private var lastPromptTokenCount: Int = 0
private var lastCacheReuseTokenCount: Int = 0
private var lastCacheRebuildTokenCount: Int = 0
private var lastPrefillDuration: TimeInterval = 0
private var lastSessionBuildDuration: TimeInterval = 0
func startSampling() {
guard sampleTimer == nil else { return }
@@ -287,9 +352,14 @@ final class InferenceStats {
isGenerating = snap.isGenerating
contextMax = snap.contextMax
contextUsed = snap.promptTokens + snap.generationTokens
currentPhaseElapsed = snap.currentPhaseElapsed
totalRequests = snap.totalRequests
totalPromptTokens = snap.totalPromptTokens
totalGenerationTokens = snap.totalGenerationTokens
totalPreparingDuration = snap.totalPreparingDuration
totalSessionBuildDuration = snap.totalSessionBuildDuration
totalPrefillDuration = snap.totalPrefillDuration
totalGenerationDuration = snap.totalGenerationDuration
totalCacheHits = cache.totalHits
totalCacheMisses = cache.totalMisses
totalCacheEvictions = cache.totalEvictions
@@ -308,10 +378,14 @@ final class InferenceStats {
let promptDelta = snap.totalPromptTokens - lastPromptTokenCount
let cacheReuseDelta = cache.totalReusePromptTokens - lastCacheReuseTokenCount
let cacheRebuildDelta = cache.totalRebuildPromptTokens - lastCacheRebuildTokenCount
let prefillDurationDelta = snap.totalPrefillDuration - lastPrefillDuration
let sessionBuildDurationDelta = snap.totalSessionBuildDuration - lastSessionBuildDuration
lastGenerationTokenCount = snap.totalGenerationTokens
lastPromptTokenCount = snap.totalPromptTokens
lastCacheReuseTokenCount = cache.totalReusePromptTokens
lastCacheRebuildTokenCount = cache.totalRebuildPromptTokens
lastPrefillDuration = snap.totalPrefillDuration
lastSessionBuildDuration = snap.totalSessionBuildDuration
tokenRateHistory.append(DataPoint(timestamp: now, value: snap.tokensPerSecond))
generationTokenHistory.append(DataPoint(timestamp: now, value: Double(genDelta)))
@@ -321,6 +395,9 @@ final class InferenceStats {
cacheFootprintHistory.append(DataPoint(timestamp: now, value: Double(cache.estimatedBytes)))
cacheReuseHistory.append(DataPoint(timestamp: now, value: Double(cacheReuseDelta)))
cacheRebuildHistory.append(DataPoint(timestamp: now, value: Double(cacheRebuildDelta)))
currentPhaseElapsedHistory.append(DataPoint(timestamp: now, value: snap.currentPhaseElapsed))
prefillDurationHistory.append(DataPoint(timestamp: now, value: prefillDurationDelta))
sessionBuildDurationHistory.append(DataPoint(timestamp: now, value: sessionBuildDurationDelta))
if tokenRateHistory.count > Self.maxHistoryPoints {
tokenRateHistory.removeFirst(tokenRateHistory.count - Self.maxHistoryPoints)
@@ -346,6 +423,15 @@ final class InferenceStats {
if cacheRebuildHistory.count > Self.maxHistoryPoints {
cacheRebuildHistory.removeFirst(cacheRebuildHistory.count - Self.maxHistoryPoints)
}
if currentPhaseElapsedHistory.count > Self.maxHistoryPoints {
currentPhaseElapsedHistory.removeFirst(currentPhaseElapsedHistory.count - Self.maxHistoryPoints)
}
if prefillDurationHistory.count > Self.maxHistoryPoints {
prefillDurationHistory.removeFirst(prefillDurationHistory.count - Self.maxHistoryPoints)
}
if sessionBuildDurationHistory.count > Self.maxHistoryPoints {
sessionBuildDurationHistory.removeFirst(sessionBuildDurationHistory.count - Self.maxHistoryPoints)
}
}
func reset() {
@@ -363,9 +449,14 @@ final class InferenceStats {
currentTokensPerSecond = 0
contextUsed = 0
contextMax = 0
currentPhaseElapsed = 0
totalRequests = 0
totalPromptTokens = 0
totalGenerationTokens = 0
totalPreparingDuration = 0
totalSessionBuildDuration = 0
totalPrefillDuration = 0
totalGenerationDuration = 0
totalCacheHits = 0
totalCacheMisses = 0
totalCacheEvictions = 0
@@ -386,9 +477,14 @@ final class InferenceStats {
cacheFootprintHistory.removeAll()
cacheReuseHistory.removeAll()
cacheRebuildHistory.removeAll()
currentPhaseElapsedHistory.removeAll()
prefillDurationHistory.removeAll()
sessionBuildDurationHistory.removeAll()
lastGenerationTokenCount = 0
lastPromptTokenCount = 0
lastCacheReuseTokenCount = 0
lastCacheRebuildTokenCount = 0
lastPrefillDuration = 0
lastSessionBuildDuration = 0
}
}

View File

@@ -393,7 +393,7 @@ final class APIServer {
// Extract images from the last message only (ChatSession.streamDetails takes images separately)
let lastImages = lastMessage.images
let result: (promptTokens: Int, completionTokens: Int, succeeded: Bool)
let result: (promptTokens: Int, completionTokens: Int, assistantHistoryText: String?, succeeded: Bool)
if isStream {
result = await handleStreamingResponse(
@@ -405,7 +405,8 @@ final class APIServer {
images: lastImages,
tools: request.tools,
created: created,
modelName: modelName
modelName: modelName,
isQwen: isQwen
)
} else {
result = await handleNonStreamingResponse(
@@ -417,16 +418,23 @@ final class APIServer {
images: lastImages,
tools: request.tools,
created: created,
modelName: modelName
modelName: modelName,
isQwen: isQwen
)
}
if result.succeeded {
var cachedSignatures = messageSignatures
if let assistantHistoryText = result.assistantHistoryText {
cachedSignatures.append(
Self.messageSignature(role: .assistant, content: assistantHistoryText, imageURLs: [])
)
}
ConversationSessionCache.shared.completeRequest(
entryId: lease.entryId,
session: session,
requestMessageSignatures: messageSignatures,
requestMessageCount: chatMessages.count,
requestMessageSignatures: cachedSignatures,
requestMessageCount: cachedSignatures.count,
estimatedPromptTokens: estimatedPromptTokens,
estimatedBytes: estimatedBytes,
promptTokens: result.promptTokens,
@@ -473,8 +481,9 @@ final class APIServer {
images: [UserInput.Image],
tools: [APIToolDefinition]?,
created: Int,
modelName: String
) async -> (promptTokens: Int, completionTokens: Int, succeeded: Bool) {
modelName: String,
isQwen: Bool
) async -> (promptTokens: Int, completionTokens: Int, assistantHistoryText: String?, succeeded: Bool) {
do {
var fullText = ""
var promptTokens = 0
@@ -510,48 +519,11 @@ final class APIServer {
}
}
// Parse tool calls: first check framework-detected ones, then our own text parser
var finishReason = "stop"
var responseContent: String? = fullText
var apiToolCalls: [APIToolCall]? = nil
if !frameworkToolCalls.isEmpty {
// Framework natively detected tool calls (e.g. Qwen)
finishReason = "tool_calls"
apiToolCalls = frameworkToolCalls.enumerated().map { i, tc in
let argsJSON: String
let argsDict = tc.function.arguments.mapValues { $0.anyValue }
if let data = try? JSONSerialization.data(withJSONObject: argsDict),
let str = String(data: data, encoding: .utf8) {
argsJSON = str
} else {
argsJSON = "{}"
}
let callId = String(format: "call_%d_%08d", i, abs(tc.function.name.hashValue) % 100_000_000)
return APIToolCall(
index: i,
id: callId,
type: "function",
function: APIFunctionCall(name: tc.function.name, arguments: argsJSON)
)
}
responseContent = fullText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : fullText
} else if let tools, !tools.isEmpty {
// Try our own text parser (e.g. Gemma tool_code blocks)
let (cleanText, parsedCalls) = ToolCallParser.parse(text: fullText, tools: tools)
if !parsedCalls.isEmpty {
finishReason = "tool_calls"
apiToolCalls = parsedCalls.enumerated().map { i, tc in
APIToolCall(
index: i,
id: tc.id,
type: "function",
function: APIFunctionCall(name: tc.name, arguments: tc.arguments)
)
}
responseContent = cleanText.isEmpty ? nil : cleanText
}
}
let resolved = Self.resolveAssistantResponse(
fullText: fullText,
frameworkToolCalls: frameworkToolCalls,
tools: tools
)
let response = APIChatCompletionResponse(
id: requestId,
@@ -563,10 +535,10 @@ final class APIServer {
index: 0,
message: APIChoiceMessage(
role: "assistant",
content: responseContent,
tool_calls: apiToolCalls
content: resolved.content,
tool_calls: resolved.toolCalls
),
finish_reason: finishReason
finish_reason: resolved.finishReason
)
],
usage: APIUsageInfo(
@@ -579,10 +551,15 @@ final class APIServer {
if let json = try? JSONEncoder().encode(response) {
sendResponse(connection: connection, status: 200, body: String(data: json, encoding: .utf8) ?? "{}")
}
return (promptTokens, completionTokens, true)
let assistantHistoryText = Self.normalizedAssistantHistoryContent(
content: resolved.content,
toolCalls: resolved.toolCalls,
isQwen: isQwen
)
return (promptTokens, completionTokens, assistantHistoryText, true)
} catch {
sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#)
return (0, 0, false)
return (0, 0, nil, false)
}
}
@@ -597,8 +574,9 @@ final class APIServer {
images: [UserInput.Image],
tools: [APIToolDefinition]?,
created: Int,
modelName: String
) async -> (promptTokens: Int, completionTokens: Int, succeeded: Bool) {
modelName: String,
isQwen: Bool
) async -> (promptTokens: Int, completionTokens: Int, assistantHistoryText: String?, succeeded: Bool) {
// Send SSE headers
let header = [
"HTTP/1.1 200 OK",
@@ -657,50 +635,14 @@ final class APIServer {
LiveCounters.shared.prefillCompleted(requestId: requestId, promptTokens: promptTokens)
}
// Stats were already updated by LiveCounters inside the loop
let resolved = Self.resolveAssistantResponse(
fullText: fullText,
frameworkToolCalls: frameworkToolCalls,
tools: tools
)
// Post-generation: handle tool calls (framework-detected or text-parsed)
var finishReason = "stop"
if !frameworkToolCalls.isEmpty {
finishReason = "tool_calls"
for (i, tc) in frameworkToolCalls.enumerated() {
let argsDict = tc.function.arguments.mapValues { $0.anyValue }
let argsJSON: String
if let data = try? JSONSerialization.data(withJSONObject: argsDict),
let str = String(data: data, encoding: .utf8) {
argsJSON = str
} else {
argsJSON = "{}"
}
let callId = String(format: "call_%d_%08d", i, abs(tc.function.name.hashValue) % 100_000_000)
let apiToolCall = APIToolCall(
index: i,
id: callId,
type: "function",
function: APIFunctionCall(name: tc.function.name, arguments: argsJSON)
)
await Self.sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk(
id: requestId,
object: "chat.completion.chunk",
created: created,
model: modelName,
choices: [APIStreamChoice(index: 0, delta: APIDeltaMessage(role: nil, content: nil, tool_calls: [apiToolCall]), finish_reason: nil)],
usage: nil
))
}
} else if hasTools {
let (_, parsed) = ToolCallParser.parse(text: fullText, tools: tools)
if !parsed.isEmpty {
finishReason = "tool_calls"
}
for (i, tc) in parsed.enumerated() {
let apiToolCall = APIToolCall(
index: i,
id: tc.id,
type: "function",
function: APIFunctionCall(name: tc.name, arguments: tc.arguments)
)
if let toolCalls = resolved.toolCalls {
for apiToolCall in toolCalls {
await Self.sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk(
id: requestId,
object: "chat.completion.chunk",
@@ -718,7 +660,7 @@ final class APIServer {
object: "chat.completion.chunk",
created: created,
model: modelName,
choices: [APIStreamChoice(index: 0, delta: APIDeltaMessage(role: nil, content: nil, tool_calls: nil), finish_reason: finishReason)],
choices: [APIStreamChoice(index: 0, delta: APIDeltaMessage(role: nil, content: nil, tool_calls: nil), finish_reason: resolved.finishReason)],
usage: APIUsageInfo(
prompt_tokens: promptTokens,
completion_tokens: completionTokens,
@@ -729,7 +671,12 @@ final class APIServer {
// Send [DONE] and close
await Self.sendData(connection: connection, data: "data: [DONE]\n\n".data(using: .utf8)!)
connection.cancel()
return (promptTokens, completionTokens, succeeded)
let assistantHistoryText = Self.normalizedAssistantHistoryContent(
content: resolved.content,
toolCalls: resolved.toolCalls,
isQwen: isQwen
)
return (promptTokens, completionTokens, assistantHistoryText, succeeded)
}
/// Run the token generation + SSE send loop entirely off MainActor.
@@ -876,6 +823,68 @@ final class APIServer {
return hash
}
private static func normalizedAssistantHistoryContent(
content: String?,
toolCalls: [APIToolCall]?,
isQwen: Bool
) -> String? {
var text = content?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
if let toolCalls, !toolCalls.isEmpty {
let formattedCalls = isQwen
? ToolPromptBuilder.formatQwenToolCalls(toolCalls)
: ToolPromptBuilder.formatGemmaToolCalls(toolCalls)
text = text.isEmpty ? formattedCalls : text + "\n" + formattedCalls
}
return text.isEmpty ? nil : text
}
private static func resolveAssistantResponse(
fullText: String,
frameworkToolCalls: [MLXLMCommon.ToolCall],
tools: [APIToolDefinition]?
) -> (content: String?, toolCalls: [APIToolCall]?, finishReason: String) {
var finishReason = "stop"
var responseContent: String? = fullText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : fullText
var apiToolCalls: [APIToolCall]? = nil
if !frameworkToolCalls.isEmpty {
finishReason = "tool_calls"
apiToolCalls = frameworkToolCalls.enumerated().map { i, tc in
let argsJSON: String
let argsDict = tc.function.arguments.mapValues { $0.anyValue }
if let data = try? JSONSerialization.data(withJSONObject: argsDict),
let str = String(data: data, encoding: .utf8) {
argsJSON = str
} else {
argsJSON = "{}"
}
let callId = String(format: "call_%d_%08d", i, abs(tc.function.name.hashValue) % 100_000_000)
return APIToolCall(
index: i,
id: callId,
type: "function",
function: APIFunctionCall(name: tc.function.name, arguments: argsJSON)
)
}
} else if let tools, !tools.isEmpty {
let (cleanText, parsedCalls) = ToolCallParser.parse(text: fullText, tools: tools)
if !parsedCalls.isEmpty {
finishReason = "tool_calls"
apiToolCalls = parsedCalls.enumerated().map { i, tc in
APIToolCall(
index: i,
id: tc.id,
type: "function",
function: APIFunctionCall(name: tc.name, arguments: tc.arguments)
)
}
responseContent = cleanText.isEmpty ? nil : cleanText
}
}
return (responseContent, apiToolCalls, finishReason)
}
}
private struct DecodedImage {

View File

@@ -17,6 +17,7 @@ struct MonitorView: View {
LazyVGrid(columns: chartColumns, alignment: .leading, spacing: 16) {
tokenRateChart
tokenThroughputChart
phaseActivityChart
cacheReuseChart
cacheFootprintChart
cacheSessionChart
@@ -90,6 +91,9 @@ struct MonitorView: View {
phaseChip(title: "Prefill", count: stats.prefillingRequests, color: .blue)
phaseChip(title: "Generating", count: stats.generatingRequests, color: .green)
phaseChip(title: "Cache Active", count: stats.activeCacheEntryCount, color: .orange)
if stats.activeRequests > 0 {
phaseChip(title: phaseAgeLabel, count: Int(stats.currentPhaseElapsed.rounded()), color: activityColor)
}
}
}
.padding(12)
@@ -161,6 +165,71 @@ struct MonitorView: View {
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
}
@ViewBuilder
private var phaseActivityChart: some View {
VStack(alignment: .leading, spacing: 6) {
Text("Phase Activity")
.font(.caption.bold())
.foregroundStyle(.secondary)
Chart {
ForEach(stats.currentPhaseElapsedHistory) { point in
LineMark(
x: .value("Time", point.timestamp),
y: .value("Active s", point.value)
)
.foregroundStyle(activityColor)
.interpolationMethod(.monotone)
}
ForEach(stats.prefillDurationHistory) { point in
BarMark(
x: .value("Time", point.timestamp),
y: .value("Prefill done", point.value)
)
.foregroundStyle(.blue.opacity(0.45))
}
ForEach(stats.sessionBuildDurationHistory) { point in
BarMark(
x: .value("Time", point.timestamp),
y: .value("Build done", point.value)
)
.foregroundStyle(.purple.opacity(0.45))
}
}
.chartXAxis {
AxisMarks(values: .stride(by: .second, count: 30)) { _ in
AxisGridLine()
}
}
.chartYAxis {
AxisMarks(position: .leading) { value in
AxisGridLine()
AxisValueLabel {
if let v = value.as(Double.self) {
Text(String(format: "%.0f", v))
.font(.caption2.monospacedDigit())
}
}
}
}
.frame(height: 150)
HStack(spacing: 12) {
Label("Active phase age", systemImage: "circle.fill")
.font(.caption2)
.foregroundStyle(activityColor)
Label("Prefill completed", systemImage: "circle.fill")
.font(.caption2)
.foregroundStyle(.blue)
Label("Session build completed", systemImage: "circle.fill")
.font(.caption2)
.foregroundStyle(.purple)
}
}
.padding(12)
.background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
}
@ViewBuilder
private var cacheReuseChart: some View {
VStack(alignment: .leading, spacing: 6) {
@@ -717,6 +786,13 @@ struct MonitorView: View {
return Double(stats.totalCacheHits) / Double(total)
}
private var phaseAgeLabel: String {
if stats.generatingRequests > 0 { return "Generating s" }
if stats.prefillingRequests > 0 { return "Prefill s" }
if stats.sessionBuildRequests > 0 { return "Build s" }
return "Preparing s"
}
private func maxContextRatio(for tokens: Int) -> Double {
let maxContext = max(stats.contextMax, modelManager.currentModel?.contextLength ?? 0)
guard maxContext > 0 else { return 0 }