diff --git a/MLXServer/Models/InferenceStats.swift b/MLXServer/Models/InferenceStats.swift
index aaa1208..4e0da16 100644
--- a/MLXServer/Models/InferenceStats.swift
+++ b/MLXServer/Models/InferenceStats.swift
@@ -9,7 +9,7 @@ final class LiveCounters: @unchecked Sendable {
     static let shared = LiveCounters()
 
     private let lock = OSAllocatedUnfairLock()
-    private var requestPhases: [String: RequestPhase] = [:]
+    private var requestPhases: [String: RequestState] = [:]
 
     // Current request
     private var _activeRequests: Int = 0
@@ -23,13 +23,19 @@ final class LiveCounters: @unchecked Sendable {
     private var _isPrefilling: Bool = false
     private var _isGenerating: Bool = false
     private var _contextMax: Int = 0
+    private var _currentPhaseElapsed: TimeInterval = 0
 
     // Cumulative
     private var _totalRequests: Int = 0
     private var _totalPromptTokens: Int = 0
     private var _totalGenerationTokens: Int = 0
+    private var _totalPreparingDuration: TimeInterval = 0
+    private var _totalSessionBuildDuration: TimeInterval = 0
+    private var _totalPrefillDuration: TimeInterval = 0
+    private var _totalGenerationDuration: TimeInterval = 0
 
     func requestStarted(requestId: String, contextLength: Int) {
+        let now = Date()
         lock.lock()
         _activeRequests += 1
         _preparingRequests += 1
@@ -40,33 +46,40 @@ final class LiveCounters: @unchecked Sendable {
         _generationTokens = 0
         _tokensPerSecond = 0
         _contextMax = contextLength
-        requestPhases[requestId] = .preparing
+        requestPhases[requestId] = RequestState(phase: .preparing, phaseStartedAt: now)
+        refreshCurrentPhaseElapsed(now: now)
         lock.unlock()
     }
 
     func requestPhaseChanged(requestId: String, phase: RequestPhase) {
+        let now = Date()
         lock.lock()
         if let current = requestPhases[requestId] {
-            decrementCount(for: current)
+            decrementCount(for: current.phase)
+            accumulateDuration(for: current.phase, elapsed: now.timeIntervalSince(current.phaseStartedAt))
         }
         incrementCount(for: phase)
-        requestPhases[requestId] = phase
+        requestPhases[requestId] = RequestState(phase: phase, phaseStartedAt: now)
         _isPrefilling = _prefillRequests > 0 || _sessionBuildRequests > 0 || _preparingRequests > 0
         _isGenerating = _generatingRequests > 0
+        refreshCurrentPhaseElapsed(now: now)
         lock.unlock()
     }
 
     func prefillCompleted(requestId: String, promptTokens: Int) {
+        let now = Date()
         lock.lock()
         if let current = requestPhases[requestId] {
-            decrementCount(for: current)
+            decrementCount(for: current.phase)
+            accumulateDuration(for: current.phase, elapsed: now.timeIntervalSince(current.phaseStartedAt))
         }
         incrementCount(for: .generating)
-        requestPhases[requestId] = .generating
+        requestPhases[requestId] = RequestState(phase: .generating, phaseStartedAt: now)
         _promptTokens = promptTokens
         _totalPromptTokens += promptTokens
         _isPrefilling = _prefillRequests > 0 || _sessionBuildRequests > 0 || _preparingRequests > 0
         _isGenerating = _generatingRequests > 0
+        refreshCurrentPhaseElapsed(now: now)
         lock.unlock()
     }
 
@@ -78,9 +91,11 @@ final class LiveCounters: @unchecked Sendable {
     }
 
     func requestCompleted(requestId: String, generationTokens: Int) {
+        let now = Date()
         lock.lock()
         if let current = requestPhases.removeValue(forKey: requestId) {
-            decrementCount(for: current)
+            decrementCount(for: current.phase)
+            accumulateDuration(for: current.phase, elapsed: now.timeIntervalSince(current.phaseStartedAt))
         }
         _activeRequests = max(0, _activeRequests - 1)
         _totalGenerationTokens += generationTokens
@@ -92,6 +107,7 @@ final class LiveCounters: @unchecked Sendable {
             _isPrefilling = _prefillRequests > 0 || _sessionBuildRequests > 0 || _preparingRequests > 0
             _isGenerating = _generatingRequests > 0
         }
+        refreshCurrentPhaseElapsed(now: now)
         lock.unlock()
     }
 
@@ -109,15 +125,22 @@ final class LiveCounters: @unchecked Sendable {
         _isPrefilling = false
         _isGenerating = false
         _contextMax = 0
+        _currentPhaseElapsed = 0
         _totalRequests = 0
         _totalPromptTokens = 0
         _totalGenerationTokens = 0
+        _totalPreparingDuration = 0
+        _totalSessionBuildDuration = 0
+        _totalPrefillDuration = 0
+        _totalGenerationDuration = 0
         lock.unlock()
     }
 
     /// Atomic snapshot for the UI timer.
     func snapshot() -> Snapshot {
+        let now = Date()
         lock.lock()
+        refreshCurrentPhaseElapsed(now: now)
         let s = Snapshot(
             activeRequests: _activeRequests,
             preparingRequests: _preparingRequests,
@@ -130,9 +153,14 @@ final class LiveCounters: @unchecked Sendable {
             isPrefilling: _isPrefilling,
             isGenerating: _isGenerating,
             contextMax: _contextMax,
+            currentPhaseElapsed: _currentPhaseElapsed,
             totalRequests: _totalRequests,
             totalPromptTokens: _totalPromptTokens,
-            totalGenerationTokens: _totalGenerationTokens
+            totalGenerationTokens: _totalGenerationTokens,
+            totalPreparingDuration: _totalPreparingDuration,
+            totalSessionBuildDuration: _totalSessionBuildDuration,
+            totalPrefillDuration: _totalPrefillDuration,
+            totalGenerationDuration: _totalGenerationDuration
         )
         lock.unlock()
         return s
@@ -150,9 +178,14 @@ final class LiveCounters: @unchecked Sendable {
         let isPrefilling: Bool
         let isGenerating: Bool
         let contextMax: Int
+        let currentPhaseElapsed: TimeInterval
         let totalRequests: Int
         let totalPromptTokens: Int
         let totalGenerationTokens: Int
+        let totalPreparingDuration: TimeInterval
+        let totalSessionBuildDuration: TimeInterval
+        let totalPrefillDuration: TimeInterval
+        let totalGenerationDuration: TimeInterval
     }
 
     private func incrementCount(for phase: RequestPhase) {
@@ -181,6 +214,28 @@ final class LiveCounters: @unchecked Sendable {
         }
     }
 
+    private func accumulateDuration(for phase: RequestPhase, elapsed: TimeInterval) {
+        switch phase {
+        case .preparing:
+            _totalPreparingDuration += elapsed
+        case .sessionBuild:
+            _totalSessionBuildDuration += elapsed
+        case .prefilling:
+            _totalPrefillDuration += elapsed
+        case .generating:
+            _totalGenerationDuration += elapsed
+        }
+    }
+
+    private func refreshCurrentPhaseElapsed(now: Date) {
+        _currentPhaseElapsed = requestPhases.values.map { now.timeIntervalSince($0.phaseStartedAt) }.max() ?? 0
+    }
+
+    private struct RequestState {
+        var phase: RequestPhase
+        var phaseStartedAt: Date
+    }
+
     enum RequestPhase {
         case preparing
         case sessionBuild
@@ -208,6 +263,7 @@ final class InferenceStats {
     var currentTokensPerSecond: Double = 0
     var contextUsed: Int = 0
     var contextMax: Int = 0
+    var currentPhaseElapsed: TimeInterval = 0
 
     // MARK: - Cumulative counters
 
@@ -219,6 +275,10 @@ final class InferenceStats {
     var totalCacheEvictions: Int = 0
     var totalCacheReusePromptTokens: Int = 0
     var totalCacheRebuildPromptTokens: Int = 0
+    var totalPreparingDuration: TimeInterval = 0
+    var totalSessionBuildDuration: TimeInterval = 0
+    var totalPrefillDuration: TimeInterval = 0
+    var totalGenerationDuration: TimeInterval = 0
 
     // MARK: - Cache state
 
@@ -246,6 +306,9 @@ final class InferenceStats {
     private(set) var cacheFootprintHistory: [DataPoint] = []
     private(set) var cacheReuseHistory: [DataPoint] = []
     private(set) var cacheRebuildHistory: [DataPoint] = []
+    private(set) var currentPhaseElapsedHistory: [DataPoint] = []
+    private(set) var prefillDurationHistory: [DataPoint] = []
+    private(set) var sessionBuildDurationHistory: [DataPoint] = []
 
     private static let maxHistoryPoints = 120 // ~2 minutes at 1Hz
 
@@ -255,6 +318,8 @@ final class InferenceStats {
     private var lastPromptTokenCount: Int = 0
     private var lastCacheReuseTokenCount: Int = 0
     private var lastCacheRebuildTokenCount: Int = 0
+    private var lastPrefillDuration: TimeInterval = 0
+    private var lastSessionBuildDuration: TimeInterval = 0
 
     func startSampling() {
         guard sampleTimer == nil else { return }
@@ -287,9 +352,14 @@ final class InferenceStats {
         isGenerating = snap.isGenerating
         contextMax = snap.contextMax
         contextUsed = snap.promptTokens + snap.generationTokens
+        currentPhaseElapsed = snap.currentPhaseElapsed
         totalRequests = snap.totalRequests
         totalPromptTokens = snap.totalPromptTokens
         totalGenerationTokens = snap.totalGenerationTokens
+        totalPreparingDuration = snap.totalPreparingDuration
+        totalSessionBuildDuration = snap.totalSessionBuildDuration
+        totalPrefillDuration = snap.totalPrefillDuration
+        totalGenerationDuration = snap.totalGenerationDuration
         totalCacheHits = cache.totalHits
         totalCacheMisses = cache.totalMisses
         totalCacheEvictions = cache.totalEvictions
@@ -308,10 +378,14 @@ final class InferenceStats {
         let promptDelta = snap.totalPromptTokens - lastPromptTokenCount
         let cacheReuseDelta = cache.totalReusePromptTokens - lastCacheReuseTokenCount
         let cacheRebuildDelta = cache.totalRebuildPromptTokens - lastCacheRebuildTokenCount
+        let prefillDurationDelta = snap.totalPrefillDuration - lastPrefillDuration
+        let sessionBuildDurationDelta = snap.totalSessionBuildDuration - lastSessionBuildDuration
         lastGenerationTokenCount = snap.totalGenerationTokens
         lastPromptTokenCount = snap.totalPromptTokens
         lastCacheReuseTokenCount = cache.totalReusePromptTokens
         lastCacheRebuildTokenCount = cache.totalRebuildPromptTokens
+        lastPrefillDuration = snap.totalPrefillDuration
+        lastSessionBuildDuration = snap.totalSessionBuildDuration
 
         tokenRateHistory.append(DataPoint(timestamp: now, value: snap.tokensPerSecond))
         generationTokenHistory.append(DataPoint(timestamp: now, value: Double(genDelta)))
@@ -321,6 +395,9 @@ final class InferenceStats {
         cacheFootprintHistory.append(DataPoint(timestamp: now, value: Double(cache.estimatedBytes)))
         cacheReuseHistory.append(DataPoint(timestamp: now, value: Double(cacheReuseDelta)))
         cacheRebuildHistory.append(DataPoint(timestamp: now, value: Double(cacheRebuildDelta)))
+        currentPhaseElapsedHistory.append(DataPoint(timestamp: now, value: snap.currentPhaseElapsed))
+        prefillDurationHistory.append(DataPoint(timestamp: now, value: prefillDurationDelta))
+        sessionBuildDurationHistory.append(DataPoint(timestamp: now, value: sessionBuildDurationDelta))
 
         if tokenRateHistory.count > Self.maxHistoryPoints {
             tokenRateHistory.removeFirst(tokenRateHistory.count - Self.maxHistoryPoints)
@@ -346,6 +423,15 @@ final class InferenceStats {
         if cacheRebuildHistory.count > Self.maxHistoryPoints {
             cacheRebuildHistory.removeFirst(cacheRebuildHistory.count - Self.maxHistoryPoints)
         }
+        if currentPhaseElapsedHistory.count > Self.maxHistoryPoints {
+            currentPhaseElapsedHistory.removeFirst(currentPhaseElapsedHistory.count - Self.maxHistoryPoints)
+        }
+        if prefillDurationHistory.count > Self.maxHistoryPoints {
+            prefillDurationHistory.removeFirst(prefillDurationHistory.count - Self.maxHistoryPoints)
+        }
+        if sessionBuildDurationHistory.count > Self.maxHistoryPoints {
+            sessionBuildDurationHistory.removeFirst(sessionBuildDurationHistory.count - Self.maxHistoryPoints)
+        }
     }
 
     func reset() {
@@ -363,9 +449,14 @@ final class InferenceStats {
         currentTokensPerSecond = 0
         contextUsed = 0
         contextMax = 0
+        currentPhaseElapsed = 0
         totalRequests = 0
         totalPromptTokens = 0
         totalGenerationTokens = 0
+        totalPreparingDuration = 0
+        totalSessionBuildDuration = 0
+        totalPrefillDuration = 0
+        totalGenerationDuration = 0
         totalCacheHits = 0
         totalCacheMisses = 0
         totalCacheEvictions = 0
@@ -386,9 +477,14 @@ final class InferenceStats {
         cacheFootprintHistory.removeAll()
         cacheReuseHistory.removeAll()
         cacheRebuildHistory.removeAll()
+        currentPhaseElapsedHistory.removeAll()
+        prefillDurationHistory.removeAll()
+        sessionBuildDurationHistory.removeAll()
         lastGenerationTokenCount = 0
         lastPromptTokenCount = 0
         lastCacheReuseTokenCount = 0
         lastCacheRebuildTokenCount = 0
+        lastPrefillDuration = 0
+        lastSessionBuildDuration = 0
     }
 }
diff --git a/MLXServer/Server/APIServer.swift b/MLXServer/Server/APIServer.swift
index 26b8eee..bd162cb 100644
--- a/MLXServer/Server/APIServer.swift
+++ b/MLXServer/Server/APIServer.swift
@@ -393,7 +393,7 @@ final class APIServer {
         // Extract images from the last message only (ChatSession.streamDetails takes images separately)
         let lastImages = lastMessage.images
 
-        let result: (promptTokens: Int, completionTokens: Int, succeeded: Bool)
+        let result: (promptTokens: Int, completionTokens: Int, assistantHistoryText: String?, succeeded: Bool)
 
         if isStream {
             result = await handleStreamingResponse(
@@ -405,7 +405,8 @@ final class APIServer {
                 images: lastImages,
                 tools: request.tools,
                 created: created,
-                modelName: modelName
+                modelName: modelName,
+                isQwen: isQwen
             )
         } else {
             result = await handleNonStreamingResponse(
@@ -417,16 +418,23 @@ final class APIServer {
                 images: lastImages,
                 tools: request.tools,
                 created: created,
-                modelName: modelName
+                modelName: modelName,
+                isQwen: isQwen
             )
         }
 
         if result.succeeded {
+            var cachedSignatures = messageSignatures
+            if let assistantHistoryText = result.assistantHistoryText {
+                cachedSignatures.append(
+                    Self.messageSignature(role: .assistant, content: assistantHistoryText, imageURLs: [])
+                )
+            }
             ConversationSessionCache.shared.completeRequest(
                 entryId: lease.entryId,
                 session: session,
-                requestMessageSignatures: messageSignatures,
-                requestMessageCount: chatMessages.count,
+                requestMessageSignatures: cachedSignatures,
+                requestMessageCount: cachedSignatures.count,
                 estimatedPromptTokens: estimatedPromptTokens,
                 estimatedBytes: estimatedBytes,
                 promptTokens: result.promptTokens,
@@ -473,8 +481,9 @@ final class APIServer {
         images: [UserInput.Image],
         tools: [APIToolDefinition]?,
         created: Int,
-        modelName: String
-    ) async -> (promptTokens: Int, completionTokens: Int, succeeded: Bool) {
+        modelName: String,
+        isQwen: Bool
+    ) async -> (promptTokens: Int, completionTokens: Int, assistantHistoryText: String?, succeeded: Bool) {
         do {
             var fullText = ""
             var promptTokens = 0
@@ -510,48 +519,11 @@ final class APIServer {
                 }
             }
 
-            // Parse tool calls: first check framework-detected ones, then our own text parser
-            var finishReason = "stop"
-            var responseContent: String? = fullText
-            var apiToolCalls: [APIToolCall]? = nil
-
-            if !frameworkToolCalls.isEmpty {
-                // Framework natively detected tool calls (e.g. Qwen)
-                finishReason = "tool_calls"
-                apiToolCalls = frameworkToolCalls.enumerated().map { i, tc in
-                    let argsJSON: String
-                    let argsDict = tc.function.arguments.mapValues { $0.anyValue }
-                    if let data = try? JSONSerialization.data(withJSONObject: argsDict),
-                       let str = String(data: data, encoding: .utf8) {
-                        argsJSON = str
-                    } else {
-                        argsJSON = "{}"
-                    }
-                    let callId = String(format: "call_%d_%08d", i, abs(tc.function.name.hashValue) % 100_000_000)
-                    return APIToolCall(
-                        index: i,
-                        id: callId,
-                        type: "function",
-                        function: APIFunctionCall(name: tc.function.name, arguments: argsJSON)
-                    )
-                }
-                responseContent = fullText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : fullText
-            } else if let tools, !tools.isEmpty {
-                // Try our own text parser (e.g. Gemma tool_code blocks)
-                let (cleanText, parsedCalls) = ToolCallParser.parse(text: fullText, tools: tools)
-                if !parsedCalls.isEmpty {
-                    finishReason = "tool_calls"
-                    apiToolCalls = parsedCalls.enumerated().map { i, tc in
-                        APIToolCall(
-                            index: i,
-                            id: tc.id,
-                            type: "function",
-                            function: APIFunctionCall(name: tc.name, arguments: tc.arguments)
-                        )
-                    }
-                    responseContent = cleanText.isEmpty ? nil : cleanText
-                }
-            }
+            let resolved = Self.resolveAssistantResponse(
+                fullText: fullText,
+                frameworkToolCalls: frameworkToolCalls,
+                tools: tools
+            )
 
             let response = APIChatCompletionResponse(
                 id: requestId,
@@ -563,10 +535,10 @@ final class APIServer {
                         index: 0,
                         message: APIChoiceMessage(
                             role: "assistant",
-                            content: responseContent,
-                            tool_calls: apiToolCalls
+                            content: resolved.content,
+                            tool_calls: resolved.toolCalls
                         ),
-                        finish_reason: finishReason
+                        finish_reason: resolved.finishReason
                     )
                 ],
                 usage: APIUsageInfo(
@@ -579,10 +551,15 @@ final class APIServer {
             if let json = try? JSONEncoder().encode(response) {
                 sendResponse(connection: connection, status: 200, body: String(data: json, encoding: .utf8) ?? "{}")
             }
-            return (promptTokens, completionTokens, true)
+            let assistantHistoryText = Self.normalizedAssistantHistoryContent(
+                content: resolved.content,
+                toolCalls: resolved.toolCalls,
+                isQwen: isQwen
+            )
+            return (promptTokens, completionTokens, assistantHistoryText, true)
         } catch {
             sendResponse(connection: connection, status: 500, body: #"{"error":"\#(error.localizedDescription)"}"#)
-            return (0, 0, false)
+            return (0, 0, nil, false)
         }
     }
 
@@ -597,8 +574,9 @@ final class APIServer {
         images: [UserInput.Image],
         tools: [APIToolDefinition]?,
         created: Int,
-        modelName: String
-    ) async -> (promptTokens: Int, completionTokens: Int, succeeded: Bool) {
+        modelName: String,
+        isQwen: Bool
+    ) async -> (promptTokens: Int, completionTokens: Int, assistantHistoryText: String?, succeeded: Bool) {
         // Send SSE headers
         let header = [
             "HTTP/1.1 200 OK",
@@ -657,50 +635,14 @@ final class APIServer {
             LiveCounters.shared.prefillCompleted(requestId: requestId, promptTokens: promptTokens)
         }
 
-        // Stats were already updated by LiveCounters inside the loop
+        let resolved = Self.resolveAssistantResponse(
+            fullText: fullText,
+            frameworkToolCalls: frameworkToolCalls,
+            tools: tools
+        )
 
-        // Post-generation: handle tool calls (framework-detected or text-parsed)
-        var finishReason = "stop"
-
-        if !frameworkToolCalls.isEmpty {
-            finishReason = "tool_calls"
-            for (i, tc) in frameworkToolCalls.enumerated() {
-                let argsDict = tc.function.arguments.mapValues { $0.anyValue }
-                let argsJSON: String
-                if let data = try? JSONSerialization.data(withJSONObject: argsDict),
-                   let str = String(data: data, encoding: .utf8) {
-                    argsJSON = str
-                } else {
-                    argsJSON = "{}"
-                }
-                let callId = String(format: "call_%d_%08d", i, abs(tc.function.name.hashValue) % 100_000_000)
-                let apiToolCall = APIToolCall(
-                    index: i,
-                    id: callId,
-                    type: "function",
-                    function: APIFunctionCall(name: tc.function.name, arguments: argsJSON)
-                )
-                await Self.sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk(
-                    id: requestId,
-                    object: "chat.completion.chunk",
-                    created: created,
-                    model: modelName,
-                    choices: [APIStreamChoice(index: 0, delta: APIDeltaMessage(role: nil, content: nil, tool_calls: [apiToolCall]), finish_reason: nil)],
-                    usage: nil
-                ))
-            }
-        } else if hasTools {
-            let (_, parsed) = ToolCallParser.parse(text: fullText, tools: tools)
-            if !parsed.isEmpty {
-                finishReason = "tool_calls"
-            }
-            for (i, tc) in parsed.enumerated() {
-                let apiToolCall = APIToolCall(
-                    index: i,
-                    id: tc.id,
-                    type: "function",
-                    function: APIFunctionCall(name: tc.name, arguments: tc.arguments)
-                )
+        if let toolCalls = resolved.toolCalls {
+            for apiToolCall in toolCalls {
                 await Self.sendSSEEvent(connection: connection, chunk: APIChatCompletionChunk(
                     id: requestId,
                     object: "chat.completion.chunk",
@@ -718,7 +660,7 @@ final class APIServer {
             object: "chat.completion.chunk",
             created: created,
             model: modelName,
-            choices: [APIStreamChoice(index: 0, delta: APIDeltaMessage(role: nil, content: nil, tool_calls: nil), finish_reason: finishReason)],
+            choices: [APIStreamChoice(index: 0, delta: APIDeltaMessage(role: nil, content: nil, tool_calls: nil), finish_reason: resolved.finishReason)],
             usage: APIUsageInfo(
                 prompt_tokens: promptTokens,
                 completion_tokens: completionTokens,
@@ -729,7 +671,12 @@ final class APIServer {
         // Send [DONE] and close
         await Self.sendData(connection: connection, data: "data: [DONE]\n\n".data(using: .utf8)!)
         connection.cancel()
-        return (promptTokens, completionTokens, succeeded)
+        let assistantHistoryText = Self.normalizedAssistantHistoryContent(
+            content: resolved.content,
+            toolCalls: resolved.toolCalls,
+            isQwen: isQwen
+        )
+        return (promptTokens, completionTokens, assistantHistoryText, succeeded)
     }
 
     /// Run the token generation + SSE send loop entirely off MainActor.
@@ -876,6 +823,68 @@ final class APIServer {
 
         return hash
     }
+
+    private static func normalizedAssistantHistoryContent(
+        content: String?,
+        toolCalls: [APIToolCall]?,
+        isQwen: Bool
+    ) -> String? {
+        var text = content?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
+        if let toolCalls, !toolCalls.isEmpty {
+            let formattedCalls = isQwen
+                ? ToolPromptBuilder.formatQwenToolCalls(toolCalls)
+                : ToolPromptBuilder.formatGemmaToolCalls(toolCalls)
+            text = text.isEmpty ? formattedCalls : text + "\n" + formattedCalls
+        }
+        return text.isEmpty ? nil : text
+    }
+
+    private static func resolveAssistantResponse(
+        fullText: String,
+        frameworkToolCalls: [MLXLMCommon.ToolCall],
+        tools: [APIToolDefinition]?
+    ) -> (content: String?, toolCalls: [APIToolCall]?, finishReason: String) {
+        var finishReason = "stop"
+        var responseContent: String? = fullText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : fullText
+        var apiToolCalls: [APIToolCall]? = nil
+
+        if !frameworkToolCalls.isEmpty {
+            finishReason = "tool_calls"
+            apiToolCalls = frameworkToolCalls.enumerated().map { i, tc in
+                let argsJSON: String
+                let argsDict = tc.function.arguments.mapValues { $0.anyValue }
+                if let data = try? JSONSerialization.data(withJSONObject: argsDict),
+                   let str = String(data: data, encoding: .utf8) {
+                    argsJSON = str
+                } else {
+                    argsJSON = "{}"
+                }
+                let callId = String(format: "call_%d_%08d", i, abs(tc.function.name.hashValue) % 100_000_000)
+                return APIToolCall(
+                    index: i,
+                    id: callId,
+                    type: "function",
+                    function: APIFunctionCall(name: tc.function.name, arguments: argsJSON)
+                )
+            }
+        } else if let tools, !tools.isEmpty {
+            let (cleanText, parsedCalls) = ToolCallParser.parse(text: fullText, tools: tools)
+            if !parsedCalls.isEmpty {
+                finishReason = "tool_calls"
+                apiToolCalls = parsedCalls.enumerated().map { i, tc in
+                    APIToolCall(
+                        index: i,
+                        id: tc.id,
+                        type: "function",
+                        function: APIFunctionCall(name: tc.name, arguments: tc.arguments)
+                    )
+                }
+                responseContent = cleanText.isEmpty ? nil : cleanText
+            }
+        }
+
+        return (responseContent, apiToolCalls, finishReason)
+    }
 }
 
 private struct DecodedImage {
diff --git a/MLXServer/Views/MonitorView.swift b/MLXServer/Views/MonitorView.swift
index 0cc3061..6e8d556 100644
--- a/MLXServer/Views/MonitorView.swift
+++ b/MLXServer/Views/MonitorView.swift
@@ -17,6 +17,7 @@ struct MonitorView: View {
                 LazyVGrid(columns: chartColumns, alignment: .leading, spacing: 16) {
                     tokenRateChart
                     tokenThroughputChart
+                    phaseActivityChart
                     cacheReuseChart
                     cacheFootprintChart
                     cacheSessionChart
@@ -90,6 +91,9 @@ struct MonitorView: View {
                 phaseChip(title: "Prefill", count: stats.prefillingRequests, color: .blue)
                 phaseChip(title: "Generating", count: stats.generatingRequests, color: .green)
                 phaseChip(title: "Cache Active", count: stats.activeCacheEntryCount, color: .orange)
+                if stats.activeRequests > 0 {
+                    phaseChip(title: phaseAgeLabel, count: Int(stats.currentPhaseElapsed.rounded()), color: activityColor)
+                }
             }
         }
         .padding(12)
@@ -161,6 +165,71 @@ struct MonitorView: View {
         .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
     }
 
+    @ViewBuilder
+    private var phaseActivityChart: some View {
+        VStack(alignment: .leading, spacing: 6) {
+            Text("Phase Activity")
+                .font(.caption.bold())
+                .foregroundStyle(.secondary)
+
+            Chart {
+                ForEach(stats.currentPhaseElapsedHistory) { point in
+                    LineMark(
+                        x: .value("Time", point.timestamp),
+                        y: .value("Active s", point.value)
+                    )
+                    .foregroundStyle(activityColor)
+                    .interpolationMethod(.monotone)
+                }
+                ForEach(stats.prefillDurationHistory) { point in
+                    BarMark(
+                        x: .value("Time", point.timestamp),
+                        y: .value("Prefill done", point.value)
+                    )
+                    .foregroundStyle(.blue.opacity(0.45))
+                }
+                ForEach(stats.sessionBuildDurationHistory) { point in
+                    BarMark(
+                        x: .value("Time", point.timestamp),
+                        y: .value("Build done", point.value)
+                    )
+                    .foregroundStyle(.purple.opacity(0.45))
+                }
+            }
+            .chartXAxis {
+                AxisMarks(values: .stride(by: .second, count: 30)) { _ in
+                    AxisGridLine()
+                }
+            }
+            .chartYAxis {
+                AxisMarks(position: .leading) { value in
+                    AxisGridLine()
+                    AxisValueLabel {
+                        if let v = value.as(Double.self) {
+                            Text(String(format: "%.0f", v))
+                                .font(.caption2.monospacedDigit())
+                        }
+                    }
+                }
+            }
+            .frame(height: 150)
+
+            HStack(spacing: 12) {
+                Label("Active phase age", systemImage: "circle.fill")
+                    .font(.caption2)
+                    .foregroundStyle(activityColor)
+                Label("Prefill completed", systemImage: "circle.fill")
+                    .font(.caption2)
+                    .foregroundStyle(.blue)
+                Label("Session build completed", systemImage: "circle.fill")
+                    .font(.caption2)
+                    .foregroundStyle(.purple)
+            }
+        }
+        .padding(12)
+        .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 10))
+    }
+
     @ViewBuilder
     private var cacheReuseChart: some View {
         VStack(alignment: .leading, spacing: 6) {
@@ -717,6 +786,13 @@ struct MonitorView: View {
         return Double(stats.totalCacheHits) / Double(total)
     }
 
+    private var phaseAgeLabel: String {
+        if stats.generatingRequests > 0 { return "Generating s" }
+        if stats.prefillingRequests > 0 { return "Prefill s" }
+        if stats.sessionBuildRequests > 0 { return "Build s" }
+        return "Preparing s"
+    }
+
     private func maxContextRatio(for tokens: Int) -> Double {
         let maxContext = max(stats.contextMax, modelManager.currentModel?.contextLength ?? 0)
         guard maxContext > 0 else { return 0 }