feat: implemented more of phase 4
This commit is contained in:
@@ -36,6 +36,8 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
let firstSnapshot = TokenPrefixCache.shared.snapshot()
|
||||
let firstLiveSnapshot = LiveCounters.shared.snapshot()
|
||||
XCTAssertGreaterThan(firstSnapshot.totalEntries, 0)
|
||||
XCTAssertGreaterThan(firstLiveSnapshot.prefillTokensPerSecond, 0)
|
||||
XCTAssertGreaterThan(firstLiveSnapshot.timeToFirstToken, 0)
|
||||
|
||||
_ = try await sendChatCompletion(request, port: harness.port)
|
||||
|
||||
@@ -46,6 +48,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
let secondLiveSnapshot = LiveCounters.shared.snapshot()
|
||||
XCTAssertGreaterThan(secondSnapshot.totalHits, firstSnapshot.totalHits)
|
||||
XCTAssertGreaterThan(secondLiveSnapshot.totalCacheReusePromptTokens, firstLiveSnapshot.totalCacheReusePromptTokens)
|
||||
XCTAssertGreaterThan(secondLiveSnapshot.cacheMatchDepth, 0)
|
||||
}
|
||||
|
||||
func testStreamingChatCompletionReusesCacheAcrossThreeProgressivelyLongerTurns() async throws {
|
||||
@@ -429,6 +432,7 @@ final class APIServerRewriteTests: XCTestCase {
|
||||
let afterDisconnectSnapshot = TokenPrefixCache.shared.snapshot()
|
||||
let afterDisconnectLiveSnapshot = LiveCounters.shared.snapshot()
|
||||
XCTAssertGreaterThan(afterDisconnectSnapshot.totalEntries, initialSnapshot.totalEntries)
|
||||
XCTAssertGreaterThan(afterDisconnectLiveSnapshot.totalDisconnects, 0)
|
||||
|
||||
_ = try await sendChatCompletion(
|
||||
APIChatCompletionRequest(
|
||||
|
||||
46
MLXServerTests/Server/LiveCountersTests.swift
Normal file
46
MLXServerTests/Server/LiveCountersTests.swift
Normal file
@@ -0,0 +1,46 @@
|
||||
import Foundation
|
||||
import XCTest
|
||||
@testable import MLX_Server
|
||||
|
||||
final class LiveCountersTests: XCTestCase {
|
||||
override func tearDown() {
|
||||
LiveCounters.shared.reset()
|
||||
super.tearDown()
|
||||
}
|
||||
|
||||
func testTracksRequestMetricsAndDeduplicatesDisconnects() {
|
||||
let requestId = "req-1"
|
||||
|
||||
LiveCounters.shared.reset()
|
||||
LiveCounters.shared.requestStarted(requestId: requestId, contextLength: 8_192)
|
||||
LiveCounters.shared.requestPhaseChanged(requestId: requestId, phase: .prefilling)
|
||||
LiveCounters.shared.recordPrefillReuse(requestId: requestId, matchedPromptTokens: 40, promptTokenCount: 64)
|
||||
LiveCounters.shared.visionProcessingCompleted(requestId: requestId, duration: 0.25)
|
||||
|
||||
Thread.sleep(forTimeInterval: 0.01)
|
||||
LiveCounters.shared.prefillCompleted(requestId: requestId, promptTokens: 64)
|
||||
|
||||
Thread.sleep(forTimeInterval: 0.01)
|
||||
LiveCounters.shared.firstTokenGenerated(requestId: requestId)
|
||||
LiveCounters.shared.tokenGenerated(tokensPerSecond: 12.5, totalGenerated: 3)
|
||||
LiveCounters.shared.disconnectDetected(requestId: requestId)
|
||||
LiveCounters.shared.disconnectDetected(requestId: requestId)
|
||||
|
||||
let inFlight = LiveCounters.shared.snapshot()
|
||||
XCTAssertEqual(inFlight.cacheMatchDepth, 40)
|
||||
XCTAssertEqual(inFlight.currentCacheMatchedPromptTokens, 40)
|
||||
XCTAssertEqual(inFlight.currentCacheRebuiltPromptTokens, 24)
|
||||
XCTAssertEqual(inFlight.visionEncoderTime, 0.25, accuracy: 0.0001)
|
||||
XCTAssertGreaterThan(inFlight.prefillTokensPerSecond, 0)
|
||||
XCTAssertGreaterThan(inFlight.timeToFirstToken, 0)
|
||||
XCTAssertEqual(inFlight.totalDisconnects, 1)
|
||||
|
||||
LiveCounters.shared.requestCompleted(requestId: requestId, generationTokens: 3)
|
||||
|
||||
let completed = LiveCounters.shared.snapshot()
|
||||
XCTAssertEqual(completed.totalPromptTokens, 64)
|
||||
XCTAssertEqual(completed.totalGenerationTokens, 3)
|
||||
XCTAssertEqual(completed.totalVisionEncoderDuration, 0.25, accuracy: 0.0001)
|
||||
XCTAssertEqual(completed.totalDisconnects, 1)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user