feat: idle-unload of models

This commit is contained in:
2026-03-17 20:01:44 +01:00
parent 033443589c
commit aa2712555a
5 changed files with 68 additions and 1 deletions

View File

@@ -189,11 +189,23 @@ final class APIServer {
// If we can't resolve the model, continue with whatever is loaded // If we can't resolve the model, continue with whatever is loaded
} }
// Reload model if it was idle-unloaded
if modelManager.modelContainer == nil, let lastModelId = Preferences.lastModelId,
let config = ModelConfig.resolve(lastModelId) {
print("[APIServer] Reloading idle-unloaded model: \(config.repoId)")
cachedSession = nil
cachedMessages = nil
cachedModelId = nil
await modelManager.loadModel(config)
}
guard modelManager.isReady, let container = modelManager.modelContainer else { guard modelManager.isReady, let container = modelManager.modelContainer else {
sendResponse(connection: connection, status: 503, body: #"{"error":"No model loaded"}"#) sendResponse(connection: connection, status: 503, body: #"{"error":"No model loaded"}"#)
return return
} }
modelManager.touchActivity()
let isStream = request.stream ?? false let isStream = request.stream ?? false
let temperature = request.temperature ?? 0.7 let temperature = request.temperature ?? 0.7
let topP = request.top_p ?? 1.0 let topP = request.top_p ?? 1.0

View File

@@ -39,4 +39,16 @@ enum Preferences {
get { defaults.bool(forKey: apiAutoStartKey) } get { defaults.bool(forKey: apiAutoStartKey) }
set { defaults.set(newValue, forKey: apiAutoStartKey) } set { defaults.set(newValue, forKey: apiAutoStartKey) }
} }
// MARK: - Idle unload
private static let idleUnloadMinutesKey = "idleUnloadMinutes"
static var idleUnloadMinutes: Int {
get {
let val = defaults.integer(forKey: idleUnloadMinutesKey)
return val > 0 ? val : 3
}
set { defaults.set(newValue, forKey: idleUnloadMinutesKey) }
}
} }

View File

@@ -43,6 +43,7 @@ final class ChatViewModel {
let text = inputText.trimmingCharacters(in: .whitespacesAndNewlines) let text = inputText.trimmingCharacters(in: .whitespacesAndNewlines)
guard !text.isEmpty, modelManager.isReady else { return } guard !text.isEmpty, modelManager.isReady else { return }
modelManager.touchActivity()
ensureSession() ensureSession()
guard let session = chatSession else { return } guard let session = chatSession else { return }

View File

@@ -14,6 +14,9 @@ final class ModelManager {
var loadingModelName: String = "" var loadingModelName: String = ""
var errorMessage: String? var errorMessage: String?
private var idleTimer: Timer?
private(set) var lastUsed: Date?
/// Load a model, unloading the current one first. /// Load a model, unloading the current one first.
/// Prefers the local snapshot from ~/.cache/huggingface/hub/ (shared with the Python server). /// Prefers the local snapshot from ~/.cache/huggingface/hub/ (shared with the Python server).
/// Only downloads if the model isn't cached locally. /// Only downloads if the model isn't cached locally.
@@ -50,6 +53,7 @@ final class ModelManager {
self.modelContainer = container self.modelContainer = container
self.currentModel = config self.currentModel = config
touchActivity()
} catch { } catch {
self.errorMessage = "Failed to load model: \(error.localizedDescription)" self.errorMessage = "Failed to load model: \(error.localizedDescription)"
} }
@@ -59,11 +63,29 @@ final class ModelManager {
/// Unload the current model and free GPU memory. /// Unload the current model and free GPU memory.
func unloadModel() { func unloadModel() {
idleTimer?.invalidate()
idleTimer = nil
lastUsed = nil
modelContainer = nil modelContainer = nil
currentModel = nil currentModel = nil
MLX.GPU.clearCache() MLX.GPU.clearCache()
} }
/// Record model activity and reset the idle unload timer.
func touchActivity() {
lastUsed = Date()
idleTimer?.invalidate()
let minutes = Preferences.idleUnloadMinutes
guard minutes > 0 else { return }
idleTimer = Timer.scheduledTimer(withTimeInterval: TimeInterval(minutes * 60), repeats: false) { [weak self] _ in
Task { @MainActor [weak self] in
guard let self, self.modelContainer != nil else { return }
print("[ModelManager] Idle for \(minutes) min — unloading model")
self.unloadModel()
}
}
}
/// Whether a model is ready for generation. /// Whether a model is ready for generation.
var isReady: Bool { var isReady: Bool {
modelContainer != nil && !isLoading modelContainer != nil && !isLoading

View File

@@ -4,6 +4,7 @@ struct SettingsView: View {
@State private var systemPrompt: String = Preferences.systemPrompt @State private var systemPrompt: String = Preferences.systemPrompt
@State private var apiPort: String = String(Preferences.apiPort) @State private var apiPort: String = String(Preferences.apiPort)
@State private var apiAutoStart: Bool = Preferences.apiAutoStart @State private var apiAutoStart: Bool = Preferences.apiAutoStart
@State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes)
var body: some View { var body: some View {
Form { Form {
@@ -37,8 +38,27 @@ struct SettingsView: View {
Preferences.apiAutoStart = apiAutoStart Preferences.apiAutoStart = apiAutoStart
} }
} }
Section("Memory") {
HStack {
Text("Unload model after idle")
TextField("3", text: $idleUnloadMinutes)
.frame(width: 50)
.onChange(of: idleUnloadMinutes) {
if let mins = Int(idleUnloadMinutes), mins > 0 {
Preferences.idleUnloadMinutes = mins
}
}
Text("minutes")
.foregroundStyle(.secondary)
}
Text("The model is automatically unloaded to free memory after being idle, and reloaded on the next request.")
.font(.caption)
.foregroundStyle(.secondary)
}
} }
.formStyle(.grouped) .formStyle(.grouped)
.frame(width: 450, height: 300) .frame(width: 450, height: 380)
} }
} }