feat: idle-unload of models

This commit is contained in:
2026-03-17 20:01:44 +01:00
parent 033443589c
commit aa2712555a
5 changed files with 68 additions and 1 deletions

View File

@@ -189,11 +189,23 @@ final class APIServer {
// If we can't resolve the model, continue with whatever is loaded
}
// Reload model if it was idle-unloaded
if modelManager.modelContainer == nil, let lastModelId = Preferences.lastModelId,
let config = ModelConfig.resolve(lastModelId) {
print("[APIServer] Reloading idle-unloaded model: \(config.repoId)")
cachedSession = nil
cachedMessages = nil
cachedModelId = nil
await modelManager.loadModel(config)
}
guard modelManager.isReady, let container = modelManager.modelContainer else {
sendResponse(connection: connection, status: 503, body: #"{"error":"No model loaded"}"#)
return
}
modelManager.touchActivity()
let isStream = request.stream ?? false
let temperature = request.temperature ?? 0.7
let topP = request.top_p ?? 1.0

View File

@@ -39,4 +39,16 @@ enum Preferences {
get { defaults.bool(forKey: apiAutoStartKey) }
set { defaults.set(newValue, forKey: apiAutoStartKey) }
}
// MARK: - Idle unload
private static let idleUnloadMinutesKey = "idleUnloadMinutes"
static var idleUnloadMinutes: Int {
get {
let val = defaults.integer(forKey: idleUnloadMinutesKey)
return val > 0 ? val : 3
}
set { defaults.set(newValue, forKey: idleUnloadMinutesKey) }
}
}

View File

@@ -43,6 +43,7 @@ final class ChatViewModel {
let text = inputText.trimmingCharacters(in: .whitespacesAndNewlines)
guard !text.isEmpty, modelManager.isReady else { return }
modelManager.touchActivity()
ensureSession()
guard let session = chatSession else { return }

View File

@@ -14,6 +14,9 @@ final class ModelManager {
var loadingModelName: String = ""
var errorMessage: String?
private var idleTimer: Timer?
private(set) var lastUsed: Date?
/// Load a model, unloading the current one first.
/// Prefers the local snapshot from ~/.cache/huggingface/hub/ (shared with the Python server).
/// Only downloads if the model isn't cached locally.
@@ -50,6 +53,7 @@ final class ModelManager {
self.modelContainer = container
self.currentModel = config
touchActivity()
} catch {
self.errorMessage = "Failed to load model: \(error.localizedDescription)"
}
@@ -59,11 +63,29 @@ final class ModelManager {
/// Unload the current model and free GPU memory.
func unloadModel() {
idleTimer?.invalidate()
idleTimer = nil
lastUsed = nil
modelContainer = nil
currentModel = nil
MLX.GPU.clearCache()
}
/// Record model activity and reset the idle unload timer.
func touchActivity() {
lastUsed = Date()
idleTimer?.invalidate()
let minutes = Preferences.idleUnloadMinutes
guard minutes > 0 else { return }
idleTimer = Timer.scheduledTimer(withTimeInterval: TimeInterval(minutes * 60), repeats: false) { [weak self] _ in
Task { @MainActor [weak self] in
guard let self, self.modelContainer != nil else { return }
print("[ModelManager] Idle for \(minutes) min — unloading model")
self.unloadModel()
}
}
}
/// Whether a model is ready for generation.
var isReady: Bool {
modelContainer != nil && !isLoading

View File

@@ -4,6 +4,7 @@ struct SettingsView: View {
@State private var systemPrompt: String = Preferences.systemPrompt
@State private var apiPort: String = String(Preferences.apiPort)
@State private var apiAutoStart: Bool = Preferences.apiAutoStart
@State private var idleUnloadMinutes: String = String(Preferences.idleUnloadMinutes)
var body: some View {
Form {
@@ -37,8 +38,27 @@ struct SettingsView: View {
Preferences.apiAutoStart = apiAutoStart
}
}
Section("Memory") {
HStack {
Text("Unload model after idle")
TextField("3", text: $idleUnloadMinutes)
.frame(width: 50)
.onChange(of: idleUnloadMinutes) {
if let mins = Int(idleUnloadMinutes), mins > 0 {
Preferences.idleUnloadMinutes = mins
}
}
Text("minutes")
.foregroundStyle(.secondary)
}
Text("The model is automatically unloaded to free memory after being idle, and reloaded on the next request.")
.font(.caption)
.foregroundStyle(.secondary)
}
}
.formStyle(.grouped)
.frame(width: 450, height: 300)
.frame(width: 450, height: 380)
}
}