Compare commits

...

2 Commits

Author SHA1 Message Date
11300e3034 feat: more o n migration to v3 2026-04-30 11:58:53 +02:00
3502266ff9 feat: migration to mlx-swift-lm v3 2026-04-30 09:18:37 +02:00
17 changed files with 287 additions and 258 deletions

View File

@@ -1,6 +1,7 @@
{
"chat.tools.terminal.autoApprove": {
"./test.sh": true,
"setopt": true
"setopt": true,
"./build.sh": true
}
}

View File

@@ -17,6 +17,7 @@
20FFB5DBF75AA6C359AAE31C /* SceneManagementView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FEB592E5E717F817B03151 /* SceneManagementView.swift */; };
221DEC86374902FCFD661A01 /* TokenPrefixCacheTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */; };
2640EDCA9033D85C0B785557 /* GenerationSettings.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */; };
28A780EEB6DC74B5B0BBF03D /* HuggingFace in Frameworks */ = {isa = PBXBuildFile; productRef = FDBFD829EE956976552514CC /* HuggingFace */; };
29879D696584B96CC56560DF /* ChatExporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D7C9BAD674E29688ACE53B0B /* ChatExporter.swift */; };
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; };
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; };
@@ -38,6 +39,7 @@
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */ = {isa = PBXBuildFile; fileRef = D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */; };
741692862DB1F13EA0B2D14D /* TokenPrefixCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1962D530BEABCC7F1E8E0ED1 /* TokenPrefixCache.swift */; };
75E046B4ABB1E6FEF17C1A60 /* ModelManagementWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 721D6F203A10434FE0223042 /* ModelManagementWindow.swift */; };
777AEBB3471D8838F0F51D08 /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; };
7936325B425DFA2931F6E421 /* ModelBackedQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F7E6F18C80D9859E89D2B4E3 /* ModelBackedQuantizationTests.swift */; };
7CD765C1E2F9F4D7504C8D09 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B629DA084A9A40E54F8EA5FA /* Assets.xcassets */; };
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; };
@@ -67,7 +69,8 @@
E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */; };
EC4FC68608DDFA6A3DF133CC /* InferenceEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02EBDE0C72D1C5CE220E5B93 /* InferenceEngine.swift */; };
EDE59C241940E7B9B53D520D /* TokenPrefixCacheQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D50504058693CDE533D755B5 /* TokenPrefixCacheQuantizationTests.swift */; };
F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; };
F2A137B60D5DFCC591A01420 /* Tokenizers in Frameworks */ = {isa = PBXBuildFile; productRef = BD266A137966DB9451C2C352 /* Tokenizers */; };
F546CE5955ED253D8A793D5E /* MLXHuggingFace in Frameworks */ = {isa = PBXBuildFile; productRef = 269A55730E9BDC735F9C2B78 /* MLXHuggingFace */; };
FAF7D4714AC6D02674920208 /* ChatMessage.swift in Sources */ = {isa = PBXBuildFile; fileRef = A4B359324B5FD8D106C74338 /* ChatMessage.swift */; };
FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */; };
FE4405F66873C75CD6FA19A5 /* StreamingSSEEncoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */; };
@@ -158,7 +161,10 @@
FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */,
945474365D0B3E961811909A /* MLXVLM in Frameworks */,
B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */,
F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */,
F546CE5955ED253D8A793D5E /* MLXHuggingFace in Frameworks */,
28A780EEB6DC74B5B0BBF03D /* HuggingFace in Frameworks */,
F2A137B60D5DFCC591A01420 /* Tokenizers in Frameworks */,
777AEBB3471D8838F0F51D08 /* MarkdownUI in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -343,6 +349,9 @@
3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */,
D5E8E1C2DD8D8AABB4306193 /* MLXVLM */,
9090667D4134056AE66DC2F1 /* MLXLMCommon */,
269A55730E9BDC735F9C2B78 /* MLXHuggingFace */,
FDBFD829EE956976552514CC /* HuggingFace */,
BD266A137966DB9451C2C352 /* Tokenizers */,
A98257123539E9E738213BFA /* MarkdownUI */,
);
productName = MLXServer;
@@ -390,6 +399,8 @@
packageReferences = (
D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */,
1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */,
A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */,
5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */,
);
preferredProjectObjectVersion = 77;
productRefGroup = 652987C2A419DBFC79E32CDE /* Products */;
@@ -736,8 +747,24 @@
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/ml-explore/mlx-swift-lm";
requirement = {
branch = main;
kind = branch;
kind = upToNextMajorVersion;
minimumVersion = 3.31.3;
};
};
5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */ = {
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/huggingface/swift-transformers";
requirement = {
kind = upToNextMajorVersion;
minimumVersion = 1.2.0;
};
};
A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */ = {
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/huggingface/swift-huggingface";
requirement = {
kind = upToNextMajorVersion;
minimumVersion = 0.9.0;
};
};
D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */ = {
@@ -751,6 +778,11 @@
/* End XCRemoteSwiftPackageReference section */
/* Begin XCSwiftPackageProductDependency section */
269A55730E9BDC735F9C2B78 /* MLXHuggingFace */ = {
isa = XCSwiftPackageProductDependency;
package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
productName = MLXHuggingFace;
};
3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */ = {
isa = XCSwiftPackageProductDependency;
package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
@@ -766,11 +798,21 @@
package = D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */;
productName = MarkdownUI;
};
BD266A137966DB9451C2C352 /* Tokenizers */ = {
isa = XCSwiftPackageProductDependency;
package = 5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */;
productName = Tokenizers;
};
D5E8E1C2DD8D8AABB4306193 /* MLXVLM */ = {
isa = XCSwiftPackageProductDependency;
package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
productName = MLXVLM;
};
FDBFD829EE956976552514CC /* HuggingFace */ = {
isa = XCSwiftPackageProductDependency;
package = A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */;
productName = HuggingFace;
};
/* End XCSwiftPackageProductDependency section */
};
rootObject = 938BC479816FCA8527B731F9 /* Project object */;

View File

@@ -1,5 +1,5 @@
{
"originHash" : "418f7299ccb303e0e8992dfc960a3df5df98d527f18667aa162699027b29b6cd",
"originHash" : "af28e5c426709ddbdb4b91bab23f3971aba7ff96fb35d16285d757a8f482e340",
"pins" : [
{
"identity" : "eventsource",
@@ -15,8 +15,8 @@
"kind" : "remoteSourceControl",
"location" : "https://github.com/ml-explore/mlx-swift",
"state" : {
"revision" : "6ba4827fb82c97d012eec9ab4b2de21f85c3b33d",
"version" : "0.30.6"
"revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896",
"version" : "0.31.3"
}
},
{
@@ -24,8 +24,8 @@
"kind" : "remoteSourceControl",
"location" : "https://github.com/ml-explore/mlx-swift-lm",
"state" : {
"branch" : "main",
"revision" : "bc3c20ef4644c86f2b347debcfe1efe4308712a6"
"revision" : "1c05248bb0899e2a7a4962b84d319cf12f4e12aa",
"version" : "3.31.3"
}
},
{
@@ -127,6 +127,15 @@
"version" : "1.1.1"
}
},
{
"identity" : "swift-syntax",
"kind" : "remoteSourceControl",
"location" : "https://github.com/swiftlang/swift-syntax.git",
"state" : {
"revision" : "0687f71944021d616d34d922343dcef086855920",
"version" : "600.0.1"
}
},
{
"identity" : "swift-system",
"kind" : "remoteSourceControl",

View File

@@ -335,19 +335,23 @@ final class APIServer {
}
}
// NOTE: repetition / presence / frequency penalties are intentionally
// not forwarded to GenerateParameters. mlx-swift-lm 3.31.3's
// PenaltyProcessor uses TokenRing.loadPrompt, which assumes a 1-D
// prompt MLXArray. VLM models (Gemma3, Qwen-VL, ) hand it a 2-D
// [1, N] tokens array, so the ring buffer ends up the wrong size and
// every later MLX.where in TokenRing.append crashes via fatalError.
// Re-enable once upstream fixes TokenRing to flatten the prompt.
let generateParams = GenerateParameters(
maxTokens: maxTokens,
temperature: Float(generationSettings.temperature),
topP: Float(generationSettings.topP),
topK: generationSettings.topK,
minP: Float(generationSettings.minP),
repetitionPenalty: generationSettings.repetitionPenalty.map(Float.init),
repetitionContextSize: 128,
presencePenalty: generationSettings.presencePenalty.map(Float.init),
presenceContextSize: 128,
frequencyPenalty: generationSettings.frequencyPenalty.map(Float.init),
frequencyContextSize: 128
minP: Float(generationSettings.minP)
)
_ = generationSettings.repetitionPenalty
_ = generationSettings.presencePenalty
_ = generationSettings.frequencyPenalty
let currentModelId = modelManager.currentModel?.id ?? modelName
let engine = InferenceEngine(container: container)
let preparedInference: InferenceEngine.PreparedInference

View File

@@ -1,9 +1,6 @@
import Foundation
/// Resolves HuggingFace model repos to local directories.
/// Checks multiple locations:
/// 1. Sandbox cache: ~/Library/Containers/de.rfc1437.mlxserver/Data/Library/Caches/models/{org}/{name}/
/// 2. System HF cache: ~/.cache/huggingface/hub/
/// Resolves HuggingFace model repos to local directories in ~/.cache/huggingface/hub/.
enum LocalModelResolver {
struct LocalModelInfo: Identifiable, Hashable {
@@ -17,16 +14,8 @@ enum LocalModelResolver {
var id: String { repoId }
}
/// Base directory where HubApi stores downloaded models (sandbox cache).
private static let modelsBase: URL? = {
FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first?
.appendingPathComponent("models", isDirectory: true)
}()
/// System HuggingFace cache directory (~/.cache/huggingface/hub/).
/// Note: Requires com.apple.security.files.home-relative-directory.read entitlement
private static let hfSystemCache: URL? = {
// Use homeDirectoryForCurrentUser which works in sandbox with proper entitlement
/// HuggingFace cache directory (~/.cache/huggingface/hub/).
private static let hfCacheBase: URL? = {
return FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent(".cache", isDirectory: true)
.appendingPathComponent("huggingface", isDirectory: true)
@@ -35,50 +24,41 @@ enum LocalModelResolver {
/// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit")
/// to its local directory, if it exists.
/// Checks sandbox cache first, then system HF cache.
///
/// Returns `nil` if the model hasn't been downloaded yet.
static func resolve(repoId: String) -> URL? {
print("[LocalModelResolver] Resolving: \(repoId)")
// Check sandbox cache first
if let base = modelsBase {
let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
var isDir: ObjCBool = false
if FileManager.default.fileExists(atPath: modelDir.path, isDirectory: &isDir), isDir.boolValue {
print("[LocalModelResolver] Found in sandbox cache: \(modelDir.path)")
return modelDir
}
// Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/
guard let hfBase = hfCacheBase else {
print("[LocalModelResolver] No cache base")
return nil
}
// Check system HF cache
// Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/
if let hfBase = hfSystemCache {
let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
print("[LocalModelResolver] Checking HF cache: \(modelBase.path)")
print("[LocalModelResolver] Checking HF cache: \(modelBase.path)")
// Look for snapshots directory
let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
var isDir: ObjCBool = false
guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
print("[LocalModelResolver] No snapshots directory found")
return nil
}
let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
var isDir: ObjCBool = false
guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
print("[LocalModelResolver] No snapshots directory found")
return nil
}
// Find the latest snapshot (commit hash directories)
if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots")
for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
let configPath = snapshotDir.appendingPathComponent("config.json")
if FileManager.default.fileExists(atPath: configPath.path) {
print("[LocalModelResolver] Found valid snapshot: \(snapshotDir.path)")
return snapshotDir
}
if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots")
for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
let configPath = snapshotDir.appendingPathComponent("config.json")
guard FileManager.default.fileExists(atPath: configPath.path) else { continue }
guard hasCompleteWeights(at: snapshotDir) else {
print("[LocalModelResolver] Snapshot missing weight files (incomplete download): \(snapshotDir.path)")
continue
}
print("[LocalModelResolver] Found valid snapshot: \(snapshotDir.path)")
return snapshotDir
}
print("[LocalModelResolver] No valid snapshot found")
}
print("[LocalModelResolver] Model not found locally")
@@ -91,38 +71,18 @@ enum LocalModelResolver {
}
static func discoveredLocalModels() -> [LocalModelInfo] {
var discovered: [LocalModelInfo] = []
print("[LocalModelResolver] Scanning HF cache: \(hfCacheBase?.path ?? "N/A")")
guard let hfBase = hfCacheBase else { return [] }
// Scan sandbox cache
print("[LocalModelResolver] Scanning sandbox cache: \(modelsBase?.path ?? "N/A")")
if let sandboxBase = modelsBase {
let sandboxModels = discoverModels(in: sandboxBase)
print("[LocalModelResolver] Found \(sandboxModels.count) models in sandbox cache")
discovered += sandboxModels
let models = discoverSystemHFModels(in: hfBase)
print("[LocalModelResolver] Found \(models.count) models:")
for model in models {
print("[LocalModelResolver] - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)")
}
// Scan system HF cache
print("[LocalModelResolver] Scanning system HF cache: \(hfSystemCache?.path ?? "N/A")")
if let hfBase = hfSystemCache {
let hfModels = discoverSystemHFModels(in: hfBase)
print("[LocalModelResolver] Found \(hfModels.count) models in HF system cache:")
for model in hfModels {
print("[LocalModelResolver] - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)")
}
discovered += hfModels
}
// Remove duplicates (same repoId) and sort
let byRepoId = Dictionary(uniqueKeysWithValues: discovered.map { ($0.repoId, $0) })
let finalModels = byRepoId.values.sorted { lhs, rhs in
lhs.repoId.localizedCaseInsensitiveCompare(rhs.repoId) == .orderedAscending
}
print("[LocalModelResolver] Total unique models: \(finalModels.count)")
return finalModels
return models
}
/// Discover models in the system HF cache (~/.cache/huggingface/hub/)
private static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] {
static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] {
let fileManager = FileManager.default
let directoryKeys: Set<URLResourceKey> = [.isDirectoryKey]
guard let modelBases = try? fileManager.contentsOfDirectory(
@@ -189,41 +149,6 @@ enum LocalModelResolver {
)
}
static func discoverModels(in base: URL) -> [LocalModelInfo] {
let fileManager = FileManager.default
let directoryKeys: Set<URLResourceKey> = [.isDirectoryKey]
guard let ownerDirectories = try? fileManager.contentsOfDirectory(
at: base,
includingPropertiesForKeys: Array(directoryKeys),
options: [.skipsHiddenFiles]
) else {
return []
}
var discovered: [LocalModelInfo] = []
for ownerDirectory in ownerDirectories {
guard isDirectory(ownerDirectory) else { continue }
guard let repoDirectories = try? fileManager.contentsOfDirectory(
at: ownerDirectory,
includingPropertiesForKeys: Array(directoryKeys),
options: [.skipsHiddenFiles]
) else {
continue
}
for repoDirectory in repoDirectories where isDirectory(repoDirectory) {
if let info = localModelInfo(ownerDirectory: ownerDirectory, repoDirectory: repoDirectory) {
discovered.append(info)
}
}
}
return discovered.sorted {
$0.repoId.localizedCaseInsensitiveCompare($1.repoId) == .orderedAscending
}
}
private static func isDirectory(_ url: URL) -> Bool {
var isDir: ObjCBool = false
if FileManager.default.fileExists(atPath: url.path, isDirectory: &isDir) {
@@ -232,77 +157,56 @@ enum LocalModelResolver {
return false
}
private static func localModelInfo(ownerDirectory: URL, repoDirectory: URL) -> LocalModelInfo? {
let repoId = "\(ownerDirectory.lastPathComponent)/\(repoDirectory.lastPathComponent)"
guard containsModelArtifacts(at: repoDirectory) else { return nil }
let config = readJSONObject(at: repoDirectory.appendingPathComponent("config.json"))
let tokenizerConfig = readJSONObject(at: repoDirectory.appendingPathComponent("tokenizer_config.json"))
let supportsImages = inferredSupportsImages(
repoDirectory: repoDirectory,
config: config,
tokenizerConfig: tokenizerConfig
)
let sizeBytes = directorySize(at: repoDirectory)
let contextLength = inferredContextLength(config: config, tokenizerConfig: tokenizerConfig)
let loaderKinds: [ModelConfig.LoaderKind] = supportsImages ? [.vlm, .llm] : [.llm, .vlm]
return LocalModelInfo(
repoId: repoId,
directory: repoDirectory,
sizeBytes: sizeBytes,
contextLength: contextLength,
loaderKinds: loaderKinds,
supportsImages: supportsImages
private static func containsModelArtifacts(at directory: URL) -> Bool {
let configExists = FileManager.default.fileExists(
atPath: directory.appendingPathComponent("config.json").path
)
return configExists && hasCompleteWeights(at: directory)
}
private static func containsModelArtifacts(at directory: URL) -> Bool {
let requiredPaths = [
directory.appendingPathComponent("config.json").path,
directory.appendingPathComponent("model.safetensors").path,
directory.appendingPathComponent("model.safetensors.index.json").path,
]
return requiredPaths.contains { FileManager.default.fileExists(atPath: $0) }
/// Returns true when the snapshot has the actual weight files on disk:
/// either a single `model.safetensors`, or every shard listed in
/// `model.safetensors.index.json`. Returns false for partial/interrupted downloads.
static func hasCompleteWeights(at directory: URL) -> Bool {
let fm = FileManager.default
let single = directory.appendingPathComponent("model.safetensors")
if fm.fileExists(atPath: single.path) {
return true
}
let indexURL = directory.appendingPathComponent("model.safetensors.index.json")
guard fm.fileExists(atPath: indexURL.path),
let data = try? Data(contentsOf: indexURL),
let json = (try? JSONSerialization.jsonObject(with: data)) as? [String: Any],
let weightMap = json["weight_map"] as? [String: Any]
else {
return false
}
let shardNames = Set(weightMap.values.compactMap { $0 as? String })
guard !shardNames.isEmpty else { return false }
return shardNames.allSatisfy { name in
fm.fileExists(atPath: directory.appendingPathComponent(name).path)
}
}
/// Delete the local cache for a model so it will be re-downloaded next time.
/// Removes from both sandbox cache and system HF cache if present.
@discardableResult
static func deleteLocal(repoId: String) -> Bool {
var deleted = false
guard let hfBase = hfCacheBase else { return false }
// Delete from sandbox cache
if let base = modelsBase {
let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
if FileManager.default.fileExists(atPath: modelDir.path) {
do {
try FileManager.default.removeItem(at: modelDir)
print("[LocalModelResolver] Deleted sandbox cache: \(modelDir.path)")
deleted = true
} catch {
print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)")
}
}
let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
guard FileManager.default.fileExists(atPath: modelBase.path) else { return false }
do {
try FileManager.default.removeItem(at: modelBase)
print("[LocalModelResolver] Deleted cache: \(modelBase.path)")
return true
} catch {
print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)")
return false
}
// Delete from system HF cache
// Structure: ~/.cache/huggingface/hub/models--{org}--{name}/
if let hfBase = hfSystemCache {
let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
if FileManager.default.fileExists(atPath: modelBase.path) {
do {
try FileManager.default.removeItem(at: modelBase)
print("[LocalModelResolver] Deleted system cache: \(modelBase.path)")
deleted = true
} catch {
print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)")
}
}
}
return deleted
}
private static func readJSONObject(at url: URL) -> [String: Any]? {

View File

@@ -88,18 +88,19 @@ final class ChatViewModel {
let thinkingContext: [String: any Sendable]? = generationSettings.thinkingEnabled
? nil
: ["enable_thinking": false]
// NOTE: repetition / presence / frequency penalties are intentionally
// not forwarded to GenerateParameters. mlx-swift-lm 3.31.3's
// PenaltyProcessor uses TokenRing.loadPrompt, which assumes a 1-D
// prompt MLXArray. VLM models (Gemma3, Qwen-VL, ) hand it a 2-D
// [1, N] tokens array, so the ring buffer ends up the wrong size and
// every later MLX.where in TokenRing.append crashes via fatalError.
// Re-enable once upstream fixes TokenRing to flatten the prompt.
let generateParameters = GenerateParameters(
maxTokens: generationSettings.maxTokens,
temperature: Float(generationSettings.temperature),
topP: Float(generationSettings.topP),
topK: generationSettings.topK,
minP: Float(generationSettings.minP),
repetitionPenalty: generationSettings.repetitionPenalty.map(Float.init),
repetitionContextSize: 128,
presencePenalty: generationSettings.presencePenalty.map(Float.init),
presenceContextSize: 128,
frequencyPenalty: generationSettings.frequencyPenalty.map(Float.init),
frequencyContextSize: 128
minP: Float(generationSettings.minP)
)
let history = conversation.messages.compactMap(historyMessage(from:))
if history.isEmpty {

View File

@@ -1,24 +1,18 @@
import Foundation
import Hub
import HuggingFace
import MLX
import MLXHuggingFace
import MLXLLM
import MLXLMCommon
import MLXVLM
import Tokenizers
/// Manages model loading, switching, and generation.
@Observable
@MainActor
final class ModelManager {
/// HubApi with blob cache disabled to avoid storing every model twice.
/// swift-huggingface defaults to caching in both huggingface/hub/ (snapshots)
/// AND models/ (content-addressed blobs). We only need the snapshots.
/// Must use the same downloadBase as defaultHubApi (.cachesDirectory) so
/// LocalModelResolver can find downloaded models.
private static let hub: HubApi = {
let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
return HubApi(downloadBase: cachesDir, cache: nil)
}()
private static let hubClient = HubClient.default
var currentModel: ModelConfig?
var availableModels: [ModelConfig]
@@ -31,8 +25,8 @@ final class ModelManager {
// Download-specific state for the modal
var isDownloading = false
var downloadFilesTotal: Int64 = 0
var downloadFilesCompleted: Int64 = 0
var downloadBytesTotal: Int64 = 0
var downloadBytesCompleted: Int64 = 0
var downloadSpeed: Double = 0 // bytes/sec
private var idleTimer: Timer?
@@ -93,8 +87,8 @@ final class ModelManager {
isDownloading = false
downloadProgress = 0
loadingModelName = ""
downloadFilesTotal = 0
downloadFilesCompleted = 0
downloadBytesTotal = 0
downloadBytesCompleted = 0
downloadSpeed = 0
}
@@ -122,8 +116,8 @@ final class ModelManager {
let needsDownload = !effectiveConfig.isLocal
if needsDownload {
isDownloading = true
downloadFilesTotal = 0
downloadFilesCompleted = 0
downloadBytesTotal = 0
downloadBytesCompleted = 0
downloadSpeed = 0
}
@@ -132,8 +126,8 @@ final class ModelManager {
Task { @MainActor in
self.downloadProgress = progress.fractionCompleted
if self.isDownloading {
self.downloadFilesTotal = progress.totalUnitCount
self.downloadFilesCompleted = progress.completedUnitCount
self.downloadBytesTotal = progress.totalUnitCount
self.downloadBytesCompleted = progress.completedUnitCount
if let speed = progress.userInfo[.throughputKey] as? Double {
self.downloadSpeed = speed
}
@@ -235,13 +229,15 @@ final class ModelManager {
switch loaderKind {
case .llm:
return try await LLMModelFactory.shared.loadContainer(
hub: Self.hub,
from: #hubDownloader(Self.hubClient),
using: #huggingFaceTokenizerLoader(),
configuration: configuration,
progressHandler: progressHandler
)
case .vlm:
return try await VLMModelFactory.shared.loadContainer(
hub: Self.hub,
from: #hubDownloader(Self.hubClient),
using: #huggingFaceTokenizerLoader(),
configuration: configuration,
progressHandler: progressHandler
)

View File

@@ -20,9 +20,9 @@ struct DownloadModalView: View {
.progressViewStyle(.linear)
HStack {
// Files progress
if modelManager.downloadFilesTotal > 0 {
Text("File \(modelManager.downloadFilesCompleted)/\(modelManager.downloadFilesTotal)")
// Bytes progress
if modelManager.downloadBytesTotal > 0 {
Text("\(formatBytes(modelManager.downloadBytesCompleted)) / \(formatBytes(modelManager.downloadBytesTotal))")
.font(.caption.monospacedDigit())
.foregroundStyle(.secondary)
}
@@ -65,4 +65,17 @@ struct DownloadModalView: View {
return String(format: "%.0f B/s", bytesPerSec)
}
}
private func formatBytes(_ bytes: Int64) -> String {
let value = Double(bytes)
if value >= 1_073_741_824 {
return String(format: "%.2f GB", value / 1_073_741_824)
} else if value >= 1_048_576 {
return String(format: "%.0f MB", value / 1_048_576)
} else if value >= 1024 {
return String(format: "%.0f KB", value / 1024)
} else {
return "\(bytes) B"
}
}
}

View File

@@ -3,30 +3,27 @@ import XCTest
@testable import MLX_Server
final class LocalModelResolverTests: XCTestCase {
func testDiscoverModelsInfersTextOnlyMetadataAndDirectorySize() throws {
let base = try makeTempModelsRoot()
let repoDirectory = try makeRepoDirectory(base: base, owner: "example", repo: "text-only")
let configURL = repoDirectory.appendingPathComponent("config.json")
let modelURL = repoDirectory.appendingPathComponent("model.safetensors")
let tokenizerURL = repoDirectory.appendingPathComponent("tokenizer.json")
func testDiscoverSystemHFModelsInfersTextOnlyMetadata() throws {
let base = try makeTempHFCache()
let snapshotDir = try makeHFSnapshot(base: base, repoId: "example/text-only")
try writeJSON(
[
"architectures": ["LlamaForCausalLM"],
"max_position_embeddings": 32768,
],
to: configURL
to: snapshotDir.appendingPathComponent("config.json")
)
try Data(repeating: 0x11, count: 64).write(to: modelURL)
try Data(repeating: 0x22, count: 19).write(to: tokenizerURL)
try Data(repeating: 0x11, count: 64).write(to: snapshotDir.appendingPathComponent("model.safetensors"))
try Data(repeating: 0x22, count: 19).write(to: snapshotDir.appendingPathComponent("tokenizer.json"))
let expectedSize = Int64(
try Data(contentsOf: configURL).count
+ Data(contentsOf: modelURL).count
+ Data(contentsOf: tokenizerURL).count
try Data(contentsOf: snapshotDir.appendingPathComponent("config.json")).count
+ Data(contentsOf: snapshotDir.appendingPathComponent("model.safetensors")).count
+ Data(contentsOf: snapshotDir.appendingPathComponent("tokenizer.json")).count
)
let discovered = LocalModelResolver.discoverModels(in: base)
let discovered = LocalModelResolver.discoverSystemHFModels(in: base)
let model = try XCTUnwrap(discovered.first)
XCTAssertEqual(model.repoId, "example/text-only")
@@ -36,21 +33,25 @@ final class LocalModelResolverTests: XCTestCase {
XCTAssertEqual(model.sizeBytes, expectedSize)
}
func testDiscoverModelsInfersVisionMetadataFromProcessorFiles() throws {
let base = try makeTempModelsRoot()
let repoDirectory = try makeRepoDirectory(base: base, owner: "example", repo: "vision-model")
func testDiscoverSystemHFModelsInfersVisionMetadata() throws {
let base = try makeTempHFCache()
let snapshotDir = try makeHFSnapshot(base: base, repoId: "example/vision-model")
try writeJSON(
[
"text_config": ["max_position_embeddings": 262144],
"vision_config": ["hidden_size": 768],
],
to: repoDirectory.appendingPathComponent("config.json")
to: snapshotDir.appendingPathComponent("config.json")
)
try writeJSON(["processor_class": "Qwen3VLProcessor"], to: repoDirectory.appendingPathComponent("tokenizer_config.json"))
try Data(repeating: 0x33, count: 12).write(to: repoDirectory.appendingPathComponent("processor_config.json"))
try Data(repeating: 0x44, count: 8).write(to: repoDirectory.appendingPathComponent("model.safetensors.index.json"))
try writeJSON(
["processor_class": "Qwen3VLProcessor"],
to: snapshotDir.appendingPathComponent("tokenizer_config.json")
)
try Data(repeating: 0x33, count: 12).write(to: snapshotDir.appendingPathComponent("processor_config.json"))
try Data(repeating: 0x44, count: 8).write(to: snapshotDir.appendingPathComponent("model.safetensors.index.json"))
let discovered = LocalModelResolver.discoverModels(in: base)
let discovered = LocalModelResolver.discoverSystemHFModels(in: base)
let model = try XCTUnwrap(discovered.first)
XCTAssertEqual(model.repoId, "example/vision-model")
@@ -155,7 +156,7 @@ final class LocalModelResolverTests: XCTestCase {
XCTAssertTrue(config.supportsTools)
}
private func makeTempModelsRoot() throws -> URL {
private func makeTempHFCache() throws -> URL {
let root = FileManager.default.temporaryDirectory
.appendingPathComponent(UUID().uuidString, isDirectory: true)
try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true)
@@ -165,12 +166,14 @@ final class LocalModelResolverTests: XCTestCase {
return root
}
private func makeRepoDirectory(base: URL, owner: String, repo: String) throws -> URL {
let directory = base
.appendingPathComponent(owner, isDirectory: true)
.appendingPathComponent(repo, isDirectory: true)
try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true)
return directory
private func makeHFSnapshot(base: URL, repoId: String, hash: String = "abc123") throws -> URL {
let slug = repoId.replacingOccurrences(of: "/", with: "--")
let snapshotDir = base
.appendingPathComponent("models--\(slug)", isDirectory: true)
.appendingPathComponent("snapshots", isDirectory: true)
.appendingPathComponent(hash, isDirectory: true)
try FileManager.default.createDirectory(at: snapshotDir, withIntermediateDirectories: true)
return snapshotDir
}
private func writeJSON(_ object: Any, to url: URL) throws {

View File

@@ -1,7 +1,9 @@
import Foundation
import Hub
import HuggingFace
import MLXHuggingFace
import MLXLMCommon
import MLXVLM
import Tokenizers
import XCTest
@testable import MLX_Server
@@ -671,10 +673,9 @@ private actor LocalGemmaFixture {
}
let loadTask = Task<ModelContainer, Error> {
let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
let hub = HubApi(downloadBase: cachesDir, cache: nil)
return try await VLMModelFactory.shared.loadContainer(
hub: hub,
from: #hubDownloader(HubClient.default),
using: #huggingFaceTokenizerLoader(),
configuration: ModelConfiguration(directory: localDir),
progressHandler: { _ in }
)

View File

@@ -1,8 +1,10 @@
import Foundation
import Hub
import HuggingFace
import MLX
import MLXHuggingFace
import MLXLMCommon
import MLXVLM
import Tokenizers
import XCTest
@testable import MLX_Server
@@ -230,10 +232,9 @@ private actor LocalGemmaFixture {
}
let loadTask = Task<ModelContainer, Error> {
let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
let hub = HubApi(downloadBase: cachesDir, cache: nil)
return try await VLMModelFactory.shared.loadContainer(
hub: hub,
from: #hubDownloader(HubClient.default),
using: #huggingFaceTokenizerLoader(),
configuration: ModelConfiguration(directory: localDir),
progressHandler: { _ in }
)

View File

@@ -249,4 +249,11 @@ private final class NonStandardCache: KVCache {
) -> MLXFast.ScaledDotProductAttentionMaskMode {
.none
}
func copy() -> any KVCache {
let c = NonStandardCache(tokenCount: 0, headDim: 0)
c.state = state
c.offset = offset
return c
}
}

View File

@@ -388,4 +388,10 @@ private final class TestTrimRecordingCache: KVCache {
) -> MLXFast.ScaledDotProductAttentionMaskMode {
.none
}
func copy() -> any KVCache {
let c = TestTrimRecordingCache(offset: offset, trimmable: trimmable)
c.state = state
return c
}
}

View File

@@ -19,6 +19,7 @@ xcodebuild \
-scheme MLXServer \
-destination 'platform=macOS' \
-configuration "$CONFIG" \
-skipMacroValidation \
SYMROOT="$BUILD_DIR" \
build 2>&1 | \
grep -E "(CompileSwift .* 'MLXServer'|error:|warning:.*MLXServer/|BUILD )" | \

View File

@@ -9,7 +9,13 @@ options:
packages:
mlx-swift-lm:
url: https://github.com/ml-explore/mlx-swift-lm
branch: main
from: "3.31.3"
swift-huggingface:
url: https://github.com/huggingface/swift-huggingface
from: "0.9.0"
swift-transformers:
url: https://github.com/huggingface/swift-transformers
from: "1.2.0"
MarkdownUI:
url: https://github.com/gonzalezreal/swift-markdown-ui
from: "2.4.0"
@@ -40,6 +46,12 @@ targets:
product: MLXVLM
- package: mlx-swift-lm
product: MLXLMCommon
- package: mlx-swift-lm
product: MLXHuggingFace
- package: swift-huggingface
product: HuggingFace
- package: swift-transformers
product: Tokenizers
- package: MarkdownUI
product: MarkdownUI
MLXServerTests:

27
resolve-packages.sh Executable file
View File

@@ -0,0 +1,27 @@
#!/bin/bash
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_PATH="$PROJECT_DIR/MLXServer.xcodeproj"
RESOLVED_PATH="$PROJECT_PATH/project.xcworkspace/xcshareddata/swiftpm/Package.resolved"
SPM_STATE_DIR="$PROJECT_DIR/build/swiftpm"
PACKAGE_CACHE_PATH="$SPM_STATE_DIR/cache"
CLONED_SOURCES_PATH="$SPM_STATE_DIR/clones"
DERIVED_DATA_PATH="$PROJECT_DIR/build/DerivedData"
echo "==> Resolving Swift packages from project.yml constraints"
# For branch-based dependencies (like mlx-swift-lm main), force a fresh resolve
# so the lockfile follows the current branch head.
rm -f "$RESOLVED_PATH"
rm -rf "$PACKAGE_CACHE_PATH" "$CLONED_SOURCES_PATH"
rm -rf "$DERIVED_DATA_PATH"
mkdir -p "$PACKAGE_CACHE_PATH" "$CLONED_SOURCES_PATH"
xcodebuild \
-resolvePackageDependencies \
-project "$PROJECT_PATH" \
-scheme MLXServer \
-disablePackageRepositoryCache \
-packageCachePath "$PACKAGE_CACHE_PATH" \
-clonedSourcePackagesDirPath "$CLONED_SOURCES_PATH"

View File

@@ -21,6 +21,7 @@ XCODEBUILD_ARGS=(
-scheme MLXServer
-destination "$DESTINATION"
-configuration "$CONFIG"
-skipMacroValidation
SYMROOT="$BUILD_DIR"
)