diff --git a/MLXServer/MLXServer.entitlements b/MLXServer/MLXServer.entitlements
index c1a22b8..e12c0e5 100644
--- a/MLXServer/MLXServer.entitlements
+++ b/MLXServer/MLXServer.entitlements
@@ -3,12 +3,10 @@
com.apple.security.app-sandbox
-
+
com.apple.security.network.client
com.apple.security.network.server
- com.apple.security.files.user-selected.read-write
-
diff --git a/MLXServer/Models/ModelConfig.swift b/MLXServer/Models/ModelConfig.swift
index 5f3cff1..36b14c1 100644
--- a/MLXServer/Models/ModelConfig.swift
+++ b/MLXServer/Models/ModelConfig.swift
@@ -80,6 +80,16 @@ struct ModelConfig: Identifiable, Hashable {
supportsTools: true,
defaultGenerationSettings: .technicalDefault
),
+ ModelConfig(
+ id: "gemma-4",
+ repoId: "mlx-community/gemma-4-e4b-it-4bit",
+ displayName: "Gemma 4 E4B",
+ contextLength: 128_000,
+ loaderKinds: [.vlm],
+ supportsImages: true,
+ supportsTools: true,
+ defaultGenerationSettings: .technicalDefault
+ ),
ModelConfig(
id: "qwen",
repoId: "mlx-community/Qwen3.5-4B-MLX-4bit",
diff --git a/MLXServer/Utilities/LocalModelResolver.swift b/MLXServer/Utilities/LocalModelResolver.swift
index cdcb6b8..a93b5aa 100644
--- a/MLXServer/Utilities/LocalModelResolver.swift
+++ b/MLXServer/Utilities/LocalModelResolver.swift
@@ -1,9 +1,9 @@
import Foundation
/// Resolves HuggingFace model repos to local directories.
-///
-/// HubApi(downloadBase: .cachesDirectory, cache: nil) downloads models to:
-/// ~/Library/Containers/de.rfc1437.mlxserver/Data/Library/Caches/models/{org}/{name}/
+/// Checks multiple locations:
+/// 1. Sandbox cache: ~/Library/Containers/de.rfc1437.mlxserver/Data/Library/Caches/models/{org}/{name}/
+/// 2. System HF cache: ~/.cache/huggingface/hub/
enum LocalModelResolver {
struct LocalModelInfo: Identifiable, Hashable {
@@ -17,23 +17,71 @@ enum LocalModelResolver {
var id: String { repoId }
}
- /// Base directory where HubApi stores downloaded models.
+ /// Base directory where HubApi stores downloaded models (sandbox cache).
private static let modelsBase: URL? = {
FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first?
.appendingPathComponent("models", isDirectory: true)
}()
+ /// System HuggingFace cache directory (~/.cache/huggingface/hub/).
+ /// Note: Requires com.apple.security.files.home-relative-directory.read entitlement
+ private static let hfSystemCache: URL? = {
+ // Use homeDirectoryForCurrentUser which works in sandbox with proper entitlement
+ return FileManager.default.homeDirectoryForCurrentUser
+ .appendingPathComponent(".cache", isDirectory: true)
+ .appendingPathComponent("huggingface", isDirectory: true)
+ .appendingPathComponent("hub", isDirectory: true)
+ }()
+
/// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit")
/// to its local directory, if it exists.
+ /// Checks sandbox cache first, then system HF cache.
///
/// Returns `nil` if the model hasn't been downloaded yet.
static func resolve(repoId: String) -> URL? {
- guard let base = modelsBase else { return nil }
- let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
- var isDir: ObjCBool = false
- if FileManager.default.fileExists(atPath: modelDir.path, isDirectory: &isDir), isDir.boolValue {
- return modelDir
+ print("[LocalModelResolver] Resolving: \(repoId)")
+
+ // Check sandbox cache first
+ if let base = modelsBase {
+ let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
+ var isDir: ObjCBool = false
+ if FileManager.default.fileExists(atPath: modelDir.path, isDirectory: &isDir), isDir.boolValue {
+ print("[LocalModelResolver] Found in sandbox cache: \(modelDir.path)")
+ return modelDir
+ }
}
+
+ // Check system HF cache
+ // Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/
+ if let hfBase = hfSystemCache {
+ let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
+ let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
+
+ print("[LocalModelResolver] Checking HF cache: \(modelBase.path)")
+
+ // Look for snapshots directory
+ let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
+ var isDir: ObjCBool = false
+ guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
+ print("[LocalModelResolver] No snapshots directory found")
+ return nil
+ }
+
+ // Find the latest snapshot (commit hash directories)
+ if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
+ print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots")
+ for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
+ let configPath = snapshotDir.appendingPathComponent("config.json")
+ if FileManager.default.fileExists(atPath: configPath.path) {
+ print("[LocalModelResolver] Found valid snapshot: \(snapshotDir.path)")
+ return snapshotDir
+ }
+ }
+ }
+ print("[LocalModelResolver] No valid snapshot found")
+ }
+
+ print("[LocalModelResolver] Model not found locally")
return nil
}
@@ -43,8 +91,102 @@ enum LocalModelResolver {
}
static func discoveredLocalModels() -> [LocalModelInfo] {
- guard let base = modelsBase else { return [] }
- return discoverModels(in: base)
+ var discovered: [LocalModelInfo] = []
+
+ // Scan sandbox cache
+ print("[LocalModelResolver] Scanning sandbox cache: \(modelsBase?.path ?? "N/A")")
+ if let sandboxBase = modelsBase {
+ let sandboxModels = discoverModels(in: sandboxBase)
+ print("[LocalModelResolver] Found \(sandboxModels.count) models in sandbox cache")
+ discovered += sandboxModels
+ }
+
+ // Scan system HF cache
+ print("[LocalModelResolver] Scanning system HF cache: \(hfSystemCache?.path ?? "N/A")")
+ if let hfBase = hfSystemCache {
+ let hfModels = discoverSystemHFModels(in: hfBase)
+ print("[LocalModelResolver] Found \(hfModels.count) models in HF system cache:")
+ for model in hfModels {
+ print("[LocalModelResolver] - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)")
+ }
+ discovered += hfModels
+ }
+
+ // Remove duplicates (same repoId) and sort
+ let byRepoId = Dictionary(uniqueKeysWithValues: discovered.map { ($0.repoId, $0) })
+ let finalModels = byRepoId.values.sorted { lhs, rhs in
+ lhs.repoId.localizedCaseInsensitiveCompare(rhs.repoId) == .orderedAscending
+ }
+ print("[LocalModelResolver] Total unique models: \(finalModels.count)")
+ return finalModels
+ }
+
+ /// Discover models in the system HF cache (~/.cache/huggingface/hub/)
+ private static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] {
+ let fileManager = FileManager.default
+ let directoryKeys: Set = [.isDirectoryKey]
+ guard let modelBases = try? fileManager.contentsOfDirectory(
+ at: base,
+ includingPropertiesForKeys: Array(directoryKeys),
+ options: [.skipsHiddenFiles]
+ ) else {
+ return []
+ }
+
+ var discovered: [LocalModelInfo] = []
+
+ for modelBase in modelBases {
+ guard isDirectory(modelBase) else { continue }
+ let dirName = modelBase.lastPathComponent
+ // HF uses format: models--{org}--{name}
+ guard dirName.hasPrefix("models--") else { continue }
+
+ let repoId = String(dirName.dropFirst(8)) // Remove "models--" prefix (8 chars)
+ .replacingOccurrences(of: "--", with: "/")
+
+ // Look for snapshots
+ let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
+ var isDir: ObjCBool = false
+ guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
+ continue
+ }
+
+ // Find snapshot with actual model files
+ if let snapshotDirs = try? fileManager.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
+ for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
+ if let info = localModelInfoFromSystemCache(repoId: repoId, directory: snapshotDir) {
+ discovered.append(info)
+ break // Only add one snapshot per model
+ }
+ }
+ }
+ }
+
+ return discovered
+ }
+
+ private static func localModelInfoFromSystemCache(repoId: String, directory: URL) -> LocalModelInfo? {
+ guard containsModelArtifacts(at: directory) else { return nil }
+
+ let config = readJSONObject(at: directory.appendingPathComponent("config.json"))
+ let tokenizerConfig = readJSONObject(at: directory.appendingPathComponent("tokenizer_config.json"))
+ let supportsImages = inferredSupportsImages(
+ repoDirectory: directory,
+ config: config,
+ tokenizerConfig: tokenizerConfig
+ )
+ let sizeBytes = directorySize(at: directory)
+ let contextLength = inferredContextLength(config: config, tokenizerConfig: tokenizerConfig)
+ let loaderKinds: [ModelConfig.LoaderKind] = supportsImages ? [.vlm, .llm] : [.llm, .vlm]
+
+ return LocalModelInfo(
+ repoId: repoId,
+ directory: directory,
+ sizeBytes: sizeBytes,
+ contextLength: contextLength,
+ loaderKinds: loaderKinds,
+ supportsImages: supportsImages
+ )
}
static func discoverModels(in base: URL) -> [LocalModelInfo] {
@@ -82,20 +224,12 @@ enum LocalModelResolver {
}
}
- /// Delete the local cache for a model so it will be re-downloaded next time.
- @discardableResult
- static func deleteLocal(repoId: String) -> Bool {
- guard let base = modelsBase else { return false }
- let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
- guard FileManager.default.fileExists(atPath: modelDir.path) else { return false }
- do {
- try FileManager.default.removeItem(at: modelDir)
- print("[LocalModelResolver] Deleted \(modelDir.path)")
- return true
- } catch {
- print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)")
- return false
+ private static func isDirectory(_ url: URL) -> Bool {
+ var isDir: ObjCBool = false
+ if FileManager.default.fileExists(atPath: url.path, isDirectory: &isDir) {
+ return isDir.boolValue
}
+ return false
}
private static func localModelInfo(ownerDirectory: URL, repoDirectory: URL) -> LocalModelInfo? {
@@ -132,8 +266,43 @@ enum LocalModelResolver {
return requiredPaths.contains { FileManager.default.fileExists(atPath: $0) }
}
- private static func isDirectory(_ url: URL) -> Bool {
- (try? url.resourceValues(forKeys: [.isDirectoryKey]).isDirectory) == true
+ /// Delete the local cache for a model so it will be re-downloaded next time.
+ /// Removes from both sandbox cache and system HF cache if present.
+ @discardableResult
+ static func deleteLocal(repoId: String) -> Bool {
+ var deleted = false
+
+ // Delete from sandbox cache
+ if let base = modelsBase {
+ let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
+ if FileManager.default.fileExists(atPath: modelDir.path) {
+ do {
+ try FileManager.default.removeItem(at: modelDir)
+ print("[LocalModelResolver] Deleted sandbox cache: \(modelDir.path)")
+ deleted = true
+ } catch {
+ print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)")
+ }
+ }
+ }
+
+ // Delete from system HF cache
+ // Structure: ~/.cache/huggingface/hub/models--{org}--{name}/
+ if let hfBase = hfSystemCache {
+ let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
+ let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
+ if FileManager.default.fileExists(atPath: modelBase.path) {
+ do {
+ try FileManager.default.removeItem(at: modelBase)
+ print("[LocalModelResolver] Deleted system cache: \(modelBase.path)")
+ deleted = true
+ } catch {
+ print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)")
+ }
+ }
+ }
+
+ return deleted
}
private static func readJSONObject(at url: URL) -> [String: Any]? {
diff --git a/README.md b/README.md
index 5fe6b51..eae3a21 100644
--- a/README.md
+++ b/README.md
@@ -141,7 +141,7 @@ MLXServer/
│ ├── ToolCallParser.swift — Parses tool calls from model output
│ └── ToolPromptBuilder.swift — Model-specific tool prompt formatting
└── Utilities/
- ├── LocalModelResolver.swift — Offline-first HuggingFace cache resolution (sandbox + system)
+ ├── LocalModelResolver.swift — Offline-first HuggingFace cache resolution
├── ChatExporter.swift — Export conversations to Markdown or RTF
├── FocusedValues.swift — FocusedValue keys for menu bar integration
└── Preferences.swift — UserDefaults wrapper, including scene persistence
@@ -153,7 +153,7 @@ build.sh — One-command build script (xcodegen + xcodebuild)
## Key Design Decisions
- Uses `mlx-swift-lm` for inference — `VLMModelFactory` for vision models and `LLMModelFactory` for text-only models
-- **Offline-first**: `LocalModelResolver` checks both the sandboxed app container and `~/.cache/huggingface/hub/` for locally-cached models before downloading
+- **Offline-first**: `LocalModelResolver` checks `~/.cache/huggingface/hub/` for locally-cached models before downloading
- **No duplicate storage**: custom `HubApi` with blob cache disabled — models are stored once in the snapshot cache
- **KV cache reuse** across API requests — reuses `ChatSession` when conversation history prefix matches
- **Thinking mode**: `enable_thinking` passed via Jinja template context; `` tags parsed in real-time during streaming