feat: added gemma 4 (not supported yet in mlx-swift-lm, though)

This commit is contained in:
2026-04-28 21:52:32 +02:00
parent d5b9ae15cc
commit 4ad46ec1ea
4 changed files with 208 additions and 31 deletions

View File

@@ -3,12 +3,10 @@
<plist version="1.0"> <plist version="1.0">
<dict> <dict>
<key>com.apple.security.app-sandbox</key> <key>com.apple.security.app-sandbox</key>
<true/> <false/>
<key>com.apple.security.network.client</key> <key>com.apple.security.network.client</key>
<true/> <true/>
<key>com.apple.security.network.server</key> <key>com.apple.security.network.server</key>
<true/> <true/>
<key>com.apple.security.files.user-selected.read-write</key>
<true/>
</dict> </dict>
</plist> </plist>

View File

@@ -80,6 +80,16 @@ struct ModelConfig: Identifiable, Hashable {
supportsTools: true, supportsTools: true,
defaultGenerationSettings: .technicalDefault defaultGenerationSettings: .technicalDefault
), ),
ModelConfig(
id: "gemma-4",
repoId: "mlx-community/gemma-4-e4b-it-4bit",
displayName: "Gemma 4 E4B",
contextLength: 128_000,
loaderKinds: [.vlm],
supportsImages: true,
supportsTools: true,
defaultGenerationSettings: .technicalDefault
),
ModelConfig( ModelConfig(
id: "qwen", id: "qwen",
repoId: "mlx-community/Qwen3.5-4B-MLX-4bit", repoId: "mlx-community/Qwen3.5-4B-MLX-4bit",

View File

@@ -1,9 +1,9 @@
import Foundation import Foundation
/// Resolves HuggingFace model repos to local directories. /// Resolves HuggingFace model repos to local directories.
/// /// Checks multiple locations:
/// HubApi(downloadBase: .cachesDirectory, cache: nil) downloads models to: /// 1. Sandbox cache: ~/Library/Containers/de.rfc1437.mlxserver/Data/Library/Caches/models/{org}/{name}/
/// ~/Library/Containers/de.rfc1437.mlxserver/Data/Library/Caches/models/{org}/{name}/ /// 2. System HF cache: ~/.cache/huggingface/hub/
enum LocalModelResolver { enum LocalModelResolver {
struct LocalModelInfo: Identifiable, Hashable { struct LocalModelInfo: Identifiable, Hashable {
@@ -17,23 +17,71 @@ enum LocalModelResolver {
var id: String { repoId } var id: String { repoId }
} }
/// Base directory where HubApi stores downloaded models. /// Base directory where HubApi stores downloaded models (sandbox cache).
private static let modelsBase: URL? = { private static let modelsBase: URL? = {
FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first? FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first?
.appendingPathComponent("models", isDirectory: true) .appendingPathComponent("models", isDirectory: true)
}() }()
/// System HuggingFace cache directory (~/.cache/huggingface/hub/).
/// Note: Requires com.apple.security.files.home-relative-directory.read entitlement
private static let hfSystemCache: URL? = {
// Use homeDirectoryForCurrentUser which works in sandbox with proper entitlement
return FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent(".cache", isDirectory: true)
.appendingPathComponent("huggingface", isDirectory: true)
.appendingPathComponent("hub", isDirectory: true)
}()
/// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit") /// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit")
/// to its local directory, if it exists. /// to its local directory, if it exists.
/// Checks sandbox cache first, then system HF cache.
/// ///
/// Returns `nil` if the model hasn't been downloaded yet. /// Returns `nil` if the model hasn't been downloaded yet.
static func resolve(repoId: String) -> URL? { static func resolve(repoId: String) -> URL? {
guard let base = modelsBase else { return nil } print("[LocalModelResolver] Resolving: \(repoId)")
// Check sandbox cache first
if let base = modelsBase {
let modelDir = base.appendingPathComponent(repoId, isDirectory: true) let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
var isDir: ObjCBool = false var isDir: ObjCBool = false
if FileManager.default.fileExists(atPath: modelDir.path, isDirectory: &isDir), isDir.boolValue { if FileManager.default.fileExists(atPath: modelDir.path, isDirectory: &isDir), isDir.boolValue {
print("[LocalModelResolver] Found in sandbox cache: \(modelDir.path)")
return modelDir return modelDir
} }
}
// Check system HF cache
// Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/
if let hfBase = hfSystemCache {
let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
print("[LocalModelResolver] Checking HF cache: \(modelBase.path)")
// Look for snapshots directory
let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
var isDir: ObjCBool = false
guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
print("[LocalModelResolver] No snapshots directory found")
return nil
}
// Find the latest snapshot (commit hash directories)
if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots")
for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
let configPath = snapshotDir.appendingPathComponent("config.json")
if FileManager.default.fileExists(atPath: configPath.path) {
print("[LocalModelResolver] Found valid snapshot: \(snapshotDir.path)")
return snapshotDir
}
}
}
print("[LocalModelResolver] No valid snapshot found")
}
print("[LocalModelResolver] Model not found locally")
return nil return nil
} }
@@ -43,8 +91,102 @@ enum LocalModelResolver {
} }
static func discoveredLocalModels() -> [LocalModelInfo] { static func discoveredLocalModels() -> [LocalModelInfo] {
guard let base = modelsBase else { return [] } var discovered: [LocalModelInfo] = []
return discoverModels(in: base)
// Scan sandbox cache
print("[LocalModelResolver] Scanning sandbox cache: \(modelsBase?.path ?? "N/A")")
if let sandboxBase = modelsBase {
let sandboxModels = discoverModels(in: sandboxBase)
print("[LocalModelResolver] Found \(sandboxModels.count) models in sandbox cache")
discovered += sandboxModels
}
// Scan system HF cache
print("[LocalModelResolver] Scanning system HF cache: \(hfSystemCache?.path ?? "N/A")")
if let hfBase = hfSystemCache {
let hfModels = discoverSystemHFModels(in: hfBase)
print("[LocalModelResolver] Found \(hfModels.count) models in HF system cache:")
for model in hfModels {
print("[LocalModelResolver] - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)")
}
discovered += hfModels
}
// Remove duplicates (same repoId) and sort
let byRepoId = Dictionary(uniqueKeysWithValues: discovered.map { ($0.repoId, $0) })
let finalModels = byRepoId.values.sorted { lhs, rhs in
lhs.repoId.localizedCaseInsensitiveCompare(rhs.repoId) == .orderedAscending
}
print("[LocalModelResolver] Total unique models: \(finalModels.count)")
return finalModels
}
/// Discover models in the system HF cache (~/.cache/huggingface/hub/)
private static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] {
let fileManager = FileManager.default
let directoryKeys: Set<URLResourceKey> = [.isDirectoryKey]
guard let modelBases = try? fileManager.contentsOfDirectory(
at: base,
includingPropertiesForKeys: Array(directoryKeys),
options: [.skipsHiddenFiles]
) else {
return []
}
var discovered: [LocalModelInfo] = []
for modelBase in modelBases {
guard isDirectory(modelBase) else { continue }
let dirName = modelBase.lastPathComponent
// HF uses format: models--{org}--{name}
guard dirName.hasPrefix("models--") else { continue }
let repoId = String(dirName.dropFirst(8)) // Remove "models--" prefix (8 chars)
.replacingOccurrences(of: "--", with: "/")
// Look for snapshots
let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
var isDir: ObjCBool = false
guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
continue
}
// Find snapshot with actual model files
if let snapshotDirs = try? fileManager.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
if let info = localModelInfoFromSystemCache(repoId: repoId, directory: snapshotDir) {
discovered.append(info)
break // Only add one snapshot per model
}
}
}
}
return discovered
}
private static func localModelInfoFromSystemCache(repoId: String, directory: URL) -> LocalModelInfo? {
guard containsModelArtifacts(at: directory) else { return nil }
let config = readJSONObject(at: directory.appendingPathComponent("config.json"))
let tokenizerConfig = readJSONObject(at: directory.appendingPathComponent("tokenizer_config.json"))
let supportsImages = inferredSupportsImages(
repoDirectory: directory,
config: config,
tokenizerConfig: tokenizerConfig
)
let sizeBytes = directorySize(at: directory)
let contextLength = inferredContextLength(config: config, tokenizerConfig: tokenizerConfig)
let loaderKinds: [ModelConfig.LoaderKind] = supportsImages ? [.vlm, .llm] : [.llm, .vlm]
return LocalModelInfo(
repoId: repoId,
directory: directory,
sizeBytes: sizeBytes,
contextLength: contextLength,
loaderKinds: loaderKinds,
supportsImages: supportsImages
)
} }
static func discoverModels(in base: URL) -> [LocalModelInfo] { static func discoverModels(in base: URL) -> [LocalModelInfo] {
@@ -82,20 +224,12 @@ enum LocalModelResolver {
} }
} }
/// Delete the local cache for a model so it will be re-downloaded next time. private static func isDirectory(_ url: URL) -> Bool {
@discardableResult var isDir: ObjCBool = false
static func deleteLocal(repoId: String) -> Bool { if FileManager.default.fileExists(atPath: url.path, isDirectory: &isDir) {
guard let base = modelsBase else { return false } return isDir.boolValue
let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
guard FileManager.default.fileExists(atPath: modelDir.path) else { return false }
do {
try FileManager.default.removeItem(at: modelDir)
print("[LocalModelResolver] Deleted \(modelDir.path)")
return true
} catch {
print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)")
return false
} }
return false
} }
private static func localModelInfo(ownerDirectory: URL, repoDirectory: URL) -> LocalModelInfo? { private static func localModelInfo(ownerDirectory: URL, repoDirectory: URL) -> LocalModelInfo? {
@@ -132,8 +266,43 @@ enum LocalModelResolver {
return requiredPaths.contains { FileManager.default.fileExists(atPath: $0) } return requiredPaths.contains { FileManager.default.fileExists(atPath: $0) }
} }
private static func isDirectory(_ url: URL) -> Bool { /// Delete the local cache for a model so it will be re-downloaded next time.
(try? url.resourceValues(forKeys: [.isDirectoryKey]).isDirectory) == true /// Removes from both sandbox cache and system HF cache if present.
@discardableResult
static func deleteLocal(repoId: String) -> Bool {
var deleted = false
// Delete from sandbox cache
if let base = modelsBase {
let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
if FileManager.default.fileExists(atPath: modelDir.path) {
do {
try FileManager.default.removeItem(at: modelDir)
print("[LocalModelResolver] Deleted sandbox cache: \(modelDir.path)")
deleted = true
} catch {
print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)")
}
}
}
// Delete from system HF cache
// Structure: ~/.cache/huggingface/hub/models--{org}--{name}/
if let hfBase = hfSystemCache {
let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
if FileManager.default.fileExists(atPath: modelBase.path) {
do {
try FileManager.default.removeItem(at: modelBase)
print("[LocalModelResolver] Deleted system cache: \(modelBase.path)")
deleted = true
} catch {
print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)")
}
}
}
return deleted
} }
private static func readJSONObject(at url: URL) -> [String: Any]? { private static func readJSONObject(at url: URL) -> [String: Any]? {

View File

@@ -141,7 +141,7 @@ MLXServer/
│ ├── ToolCallParser.swift — Parses tool calls from model output │ ├── ToolCallParser.swift — Parses tool calls from model output
│ └── ToolPromptBuilder.swift — Model-specific tool prompt formatting │ └── ToolPromptBuilder.swift — Model-specific tool prompt formatting
└── Utilities/ └── Utilities/
├── LocalModelResolver.swift — Offline-first HuggingFace cache resolution (sandbox + system) ├── LocalModelResolver.swift — Offline-first HuggingFace cache resolution
├── ChatExporter.swift — Export conversations to Markdown or RTF ├── ChatExporter.swift — Export conversations to Markdown or RTF
├── FocusedValues.swift — FocusedValue keys for menu bar integration ├── FocusedValues.swift — FocusedValue keys for menu bar integration
└── Preferences.swift — UserDefaults wrapper, including scene persistence └── Preferences.swift — UserDefaults wrapper, including scene persistence
@@ -153,7 +153,7 @@ build.sh — One-command build script (xcodegen + xcodebuild)
## Key Design Decisions ## Key Design Decisions
- Uses `mlx-swift-lm` for inference — `VLMModelFactory` for vision models and `LLMModelFactory` for text-only models - Uses `mlx-swift-lm` for inference — `VLMModelFactory` for vision models and `LLMModelFactory` for text-only models
- **Offline-first**: `LocalModelResolver` checks both the sandboxed app container and `~/.cache/huggingface/hub/` for locally-cached models before downloading - **Offline-first**: `LocalModelResolver` checks `~/.cache/huggingface/hub/` for locally-cached models before downloading
- **No duplicate storage**: custom `HubApi` with blob cache disabled — models are stored once in the snapshot cache - **No duplicate storage**: custom `HubApi` with blob cache disabled — models are stored once in the snapshot cache
- **KV cache reuse** across API requests — reuses `ChatSession` when conversation history prefix matches - **KV cache reuse** across API requests — reuses `ChatSession` when conversation history prefix matches
- **Thinking mode**: `enable_thinking` passed via Jinja template context; `<think>` tags parsed in real-time during streaming - **Thinking mode**: `enable_thinking` passed via Jinja template context; `<think>` tags parsed in real-time during streaming