feat: migration to mlx-swift-lm v3

This commit is contained in:
2026-04-30 09:18:37 +02:00
parent 4ad46ec1ea
commit 3502266ff9
13 changed files with 211 additions and 228 deletions

View File

@@ -17,6 +17,7 @@
20FFB5DBF75AA6C359AAE31C /* SceneManagementView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FEB592E5E717F817B03151 /* SceneManagementView.swift */; }; 20FFB5DBF75AA6C359AAE31C /* SceneManagementView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FEB592E5E717F817B03151 /* SceneManagementView.swift */; };
221DEC86374902FCFD661A01 /* TokenPrefixCacheTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */; }; 221DEC86374902FCFD661A01 /* TokenPrefixCacheTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */; };
2640EDCA9033D85C0B785557 /* GenerationSettings.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */; }; 2640EDCA9033D85C0B785557 /* GenerationSettings.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */; };
28A780EEB6DC74B5B0BBF03D /* HuggingFace in Frameworks */ = {isa = PBXBuildFile; productRef = FDBFD829EE956976552514CC /* HuggingFace */; };
29879D696584B96CC56560DF /* ChatExporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D7C9BAD674E29688ACE53B0B /* ChatExporter.swift */; }; 29879D696584B96CC56560DF /* ChatExporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D7C9BAD674E29688ACE53B0B /* ChatExporter.swift */; };
2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; }; 2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; };
2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; }; 2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; };
@@ -38,6 +39,7 @@
6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */ = {isa = PBXBuildFile; fileRef = D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */; }; 6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */ = {isa = PBXBuildFile; fileRef = D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */; };
741692862DB1F13EA0B2D14D /* TokenPrefixCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1962D530BEABCC7F1E8E0ED1 /* TokenPrefixCache.swift */; }; 741692862DB1F13EA0B2D14D /* TokenPrefixCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1962D530BEABCC7F1E8E0ED1 /* TokenPrefixCache.swift */; };
75E046B4ABB1E6FEF17C1A60 /* ModelManagementWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 721D6F203A10434FE0223042 /* ModelManagementWindow.swift */; }; 75E046B4ABB1E6FEF17C1A60 /* ModelManagementWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 721D6F203A10434FE0223042 /* ModelManagementWindow.swift */; };
777AEBB3471D8838F0F51D08 /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; };
7936325B425DFA2931F6E421 /* ModelBackedQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F7E6F18C80D9859E89D2B4E3 /* ModelBackedQuantizationTests.swift */; }; 7936325B425DFA2931F6E421 /* ModelBackedQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F7E6F18C80D9859E89D2B4E3 /* ModelBackedQuantizationTests.swift */; };
7CD765C1E2F9F4D7504C8D09 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B629DA084A9A40E54F8EA5FA /* Assets.xcassets */; }; 7CD765C1E2F9F4D7504C8D09 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B629DA084A9A40E54F8EA5FA /* Assets.xcassets */; };
80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; }; 80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; };
@@ -67,7 +69,8 @@
E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */; }; E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */; };
EC4FC68608DDFA6A3DF133CC /* InferenceEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02EBDE0C72D1C5CE220E5B93 /* InferenceEngine.swift */; }; EC4FC68608DDFA6A3DF133CC /* InferenceEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02EBDE0C72D1C5CE220E5B93 /* InferenceEngine.swift */; };
EDE59C241940E7B9B53D520D /* TokenPrefixCacheQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D50504058693CDE533D755B5 /* TokenPrefixCacheQuantizationTests.swift */; }; EDE59C241940E7B9B53D520D /* TokenPrefixCacheQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D50504058693CDE533D755B5 /* TokenPrefixCacheQuantizationTests.swift */; };
F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; }; F2A137B60D5DFCC591A01420 /* Tokenizers in Frameworks */ = {isa = PBXBuildFile; productRef = BD266A137966DB9451C2C352 /* Tokenizers */; };
F546CE5955ED253D8A793D5E /* MLXHuggingFace in Frameworks */ = {isa = PBXBuildFile; productRef = 269A55730E9BDC735F9C2B78 /* MLXHuggingFace */; };
FAF7D4714AC6D02674920208 /* ChatMessage.swift in Sources */ = {isa = PBXBuildFile; fileRef = A4B359324B5FD8D106C74338 /* ChatMessage.swift */; }; FAF7D4714AC6D02674920208 /* ChatMessage.swift in Sources */ = {isa = PBXBuildFile; fileRef = A4B359324B5FD8D106C74338 /* ChatMessage.swift */; };
FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */; }; FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */; };
FE4405F66873C75CD6FA19A5 /* StreamingSSEEncoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */; }; FE4405F66873C75CD6FA19A5 /* StreamingSSEEncoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */; };
@@ -158,7 +161,10 @@
FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */, FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */,
945474365D0B3E961811909A /* MLXVLM in Frameworks */, 945474365D0B3E961811909A /* MLXVLM in Frameworks */,
B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */, B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */,
F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */, F546CE5955ED253D8A793D5E /* MLXHuggingFace in Frameworks */,
28A780EEB6DC74B5B0BBF03D /* HuggingFace in Frameworks */,
F2A137B60D5DFCC591A01420 /* Tokenizers in Frameworks */,
777AEBB3471D8838F0F51D08 /* MarkdownUI in Frameworks */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@@ -343,6 +349,9 @@
3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */, 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */,
D5E8E1C2DD8D8AABB4306193 /* MLXVLM */, D5E8E1C2DD8D8AABB4306193 /* MLXVLM */,
9090667D4134056AE66DC2F1 /* MLXLMCommon */, 9090667D4134056AE66DC2F1 /* MLXLMCommon */,
269A55730E9BDC735F9C2B78 /* MLXHuggingFace */,
FDBFD829EE956976552514CC /* HuggingFace */,
BD266A137966DB9451C2C352 /* Tokenizers */,
A98257123539E9E738213BFA /* MarkdownUI */, A98257123539E9E738213BFA /* MarkdownUI */,
); );
productName = MLXServer; productName = MLXServer;
@@ -390,6 +399,8 @@
packageReferences = ( packageReferences = (
D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */, D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */,
1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */, 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */,
A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */,
5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */,
); );
preferredProjectObjectVersion = 77; preferredProjectObjectVersion = 77;
productRefGroup = 652987C2A419DBFC79E32CDE /* Products */; productRefGroup = 652987C2A419DBFC79E32CDE /* Products */;
@@ -736,8 +747,24 @@
isa = XCRemoteSwiftPackageReference; isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/ml-explore/mlx-swift-lm"; repositoryURL = "https://github.com/ml-explore/mlx-swift-lm";
requirement = { requirement = {
branch = main; kind = upToNextMajorVersion;
kind = branch; minimumVersion = 3.31.3;
};
};
5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */ = {
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/huggingface/swift-transformers";
requirement = {
kind = upToNextMajorVersion;
minimumVersion = 1.2.0;
};
};
A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */ = {
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/huggingface/swift-huggingface";
requirement = {
kind = upToNextMajorVersion;
minimumVersion = 0.9.0;
}; };
}; };
D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */ = { D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */ = {
@@ -751,6 +778,11 @@
/* End XCRemoteSwiftPackageReference section */ /* End XCRemoteSwiftPackageReference section */
/* Begin XCSwiftPackageProductDependency section */ /* Begin XCSwiftPackageProductDependency section */
269A55730E9BDC735F9C2B78 /* MLXHuggingFace */ = {
isa = XCSwiftPackageProductDependency;
package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
productName = MLXHuggingFace;
};
3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */ = { 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */ = {
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */; package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
@@ -766,11 +798,21 @@
package = D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */; package = D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */;
productName = MarkdownUI; productName = MarkdownUI;
}; };
BD266A137966DB9451C2C352 /* Tokenizers */ = {
isa = XCSwiftPackageProductDependency;
package = 5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */;
productName = Tokenizers;
};
D5E8E1C2DD8D8AABB4306193 /* MLXVLM */ = { D5E8E1C2DD8D8AABB4306193 /* MLXVLM */ = {
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */; package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
productName = MLXVLM; productName = MLXVLM;
}; };
FDBFD829EE956976552514CC /* HuggingFace */ = {
isa = XCSwiftPackageProductDependency;
package = A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */;
productName = HuggingFace;
};
/* End XCSwiftPackageProductDependency section */ /* End XCSwiftPackageProductDependency section */
}; };
rootObject = 938BC479816FCA8527B731F9 /* Project object */; rootObject = 938BC479816FCA8527B731F9 /* Project object */;

View File

@@ -1,5 +1,5 @@
{ {
"originHash" : "418f7299ccb303e0e8992dfc960a3df5df98d527f18667aa162699027b29b6cd", "originHash" : "af28e5c426709ddbdb4b91bab23f3971aba7ff96fb35d16285d757a8f482e340",
"pins" : [ "pins" : [
{ {
"identity" : "eventsource", "identity" : "eventsource",
@@ -15,8 +15,8 @@
"kind" : "remoteSourceControl", "kind" : "remoteSourceControl",
"location" : "https://github.com/ml-explore/mlx-swift", "location" : "https://github.com/ml-explore/mlx-swift",
"state" : { "state" : {
"revision" : "6ba4827fb82c97d012eec9ab4b2de21f85c3b33d", "revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896",
"version" : "0.30.6" "version" : "0.31.3"
} }
}, },
{ {
@@ -24,8 +24,8 @@
"kind" : "remoteSourceControl", "kind" : "remoteSourceControl",
"location" : "https://github.com/ml-explore/mlx-swift-lm", "location" : "https://github.com/ml-explore/mlx-swift-lm",
"state" : { "state" : {
"branch" : "main", "revision" : "1c05248bb0899e2a7a4962b84d319cf12f4e12aa",
"revision" : "bc3c20ef4644c86f2b347debcfe1efe4308712a6" "version" : "3.31.3"
} }
}, },
{ {
@@ -127,6 +127,15 @@
"version" : "1.1.1" "version" : "1.1.1"
} }
}, },
{
"identity" : "swift-syntax",
"kind" : "remoteSourceControl",
"location" : "https://github.com/swiftlang/swift-syntax.git",
"state" : {
"revision" : "0687f71944021d616d34d922343dcef086855920",
"version" : "600.0.1"
}
},
{ {
"identity" : "swift-system", "identity" : "swift-system",
"kind" : "remoteSourceControl", "kind" : "remoteSourceControl",

View File

@@ -1,9 +1,6 @@
import Foundation import Foundation
/// Resolves HuggingFace model repos to local directories. /// Resolves HuggingFace model repos to local directories in ~/.cache/huggingface/hub/.
/// Checks multiple locations:
/// 1. Sandbox cache: ~/Library/Containers/de.rfc1437.mlxserver/Data/Library/Caches/models/{org}/{name}/
/// 2. System HF cache: ~/.cache/huggingface/hub/
enum LocalModelResolver { enum LocalModelResolver {
struct LocalModelInfo: Identifiable, Hashable { struct LocalModelInfo: Identifiable, Hashable {
@@ -17,16 +14,8 @@ enum LocalModelResolver {
var id: String { repoId } var id: String { repoId }
} }
/// Base directory where HubApi stores downloaded models (sandbox cache). /// HuggingFace cache directory (~/.cache/huggingface/hub/).
private static let modelsBase: URL? = { private static let hfCacheBase: URL? = {
FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first?
.appendingPathComponent("models", isDirectory: true)
}()
/// System HuggingFace cache directory (~/.cache/huggingface/hub/).
/// Note: Requires com.apple.security.files.home-relative-directory.read entitlement
private static let hfSystemCache: URL? = {
// Use homeDirectoryForCurrentUser which works in sandbox with proper entitlement
return FileManager.default.homeDirectoryForCurrentUser return FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent(".cache", isDirectory: true) .appendingPathComponent(".cache", isDirectory: true)
.appendingPathComponent("huggingface", isDirectory: true) .appendingPathComponent("huggingface", isDirectory: true)
@@ -35,31 +24,22 @@ enum LocalModelResolver {
/// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit") /// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit")
/// to its local directory, if it exists. /// to its local directory, if it exists.
/// Checks sandbox cache first, then system HF cache.
/// ///
/// Returns `nil` if the model hasn't been downloaded yet. /// Returns `nil` if the model hasn't been downloaded yet.
static func resolve(repoId: String) -> URL? { static func resolve(repoId: String) -> URL? {
print("[LocalModelResolver] Resolving: \(repoId)") print("[LocalModelResolver] Resolving: \(repoId)")
// Check sandbox cache first // Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/
if let base = modelsBase { guard let hfBase = hfCacheBase else {
let modelDir = base.appendingPathComponent(repoId, isDirectory: true) print("[LocalModelResolver] No cache base")
var isDir: ObjCBool = false return nil
if FileManager.default.fileExists(atPath: modelDir.path, isDirectory: &isDir), isDir.boolValue {
print("[LocalModelResolver] Found in sandbox cache: \(modelDir.path)")
return modelDir
}
} }
// Check system HF cache
// Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/
if let hfBase = hfSystemCache {
let repoSlug = repoId.replacingOccurrences(of: "/", with: "--") let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true) let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
print("[LocalModelResolver] Checking HF cache: \(modelBase.path)") print("[LocalModelResolver] Checking HF cache: \(modelBase.path)")
// Look for snapshots directory
let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true) let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
var isDir: ObjCBool = false var isDir: ObjCBool = false
guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else { guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
@@ -67,7 +47,6 @@ enum LocalModelResolver {
return nil return nil
} }
// Find the latest snapshot (commit hash directories)
if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) { if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots") print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots")
for snapshotDir in snapshotDirs where isDirectory(snapshotDir) { for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
@@ -78,8 +57,6 @@ enum LocalModelResolver {
} }
} }
} }
print("[LocalModelResolver] No valid snapshot found")
}
print("[LocalModelResolver] Model not found locally") print("[LocalModelResolver] Model not found locally")
return nil return nil
@@ -91,38 +68,18 @@ enum LocalModelResolver {
} }
static func discoveredLocalModels() -> [LocalModelInfo] { static func discoveredLocalModels() -> [LocalModelInfo] {
var discovered: [LocalModelInfo] = [] print("[LocalModelResolver] Scanning HF cache: \(hfCacheBase?.path ?? "N/A")")
guard let hfBase = hfCacheBase else { return [] }
// Scan sandbox cache let models = discoverSystemHFModels(in: hfBase)
print("[LocalModelResolver] Scanning sandbox cache: \(modelsBase?.path ?? "N/A")") print("[LocalModelResolver] Found \(models.count) models:")
if let sandboxBase = modelsBase { for model in models {
let sandboxModels = discoverModels(in: sandboxBase)
print("[LocalModelResolver] Found \(sandboxModels.count) models in sandbox cache")
discovered += sandboxModels
}
// Scan system HF cache
print("[LocalModelResolver] Scanning system HF cache: \(hfSystemCache?.path ?? "N/A")")
if let hfBase = hfSystemCache {
let hfModels = discoverSystemHFModels(in: hfBase)
print("[LocalModelResolver] Found \(hfModels.count) models in HF system cache:")
for model in hfModels {
print("[LocalModelResolver] - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)") print("[LocalModelResolver] - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)")
} }
discovered += hfModels return models
} }
// Remove duplicates (same repoId) and sort static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] {
let byRepoId = Dictionary(uniqueKeysWithValues: discovered.map { ($0.repoId, $0) })
let finalModels = byRepoId.values.sorted { lhs, rhs in
lhs.repoId.localizedCaseInsensitiveCompare(rhs.repoId) == .orderedAscending
}
print("[LocalModelResolver] Total unique models: \(finalModels.count)")
return finalModels
}
/// Discover models in the system HF cache (~/.cache/huggingface/hub/)
private static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] {
let fileManager = FileManager.default let fileManager = FileManager.default
let directoryKeys: Set<URLResourceKey> = [.isDirectoryKey] let directoryKeys: Set<URLResourceKey> = [.isDirectoryKey]
guard let modelBases = try? fileManager.contentsOfDirectory( guard let modelBases = try? fileManager.contentsOfDirectory(
@@ -189,41 +146,6 @@ enum LocalModelResolver {
) )
} }
static func discoverModels(in base: URL) -> [LocalModelInfo] {
let fileManager = FileManager.default
let directoryKeys: Set<URLResourceKey> = [.isDirectoryKey]
guard let ownerDirectories = try? fileManager.contentsOfDirectory(
at: base,
includingPropertiesForKeys: Array(directoryKeys),
options: [.skipsHiddenFiles]
) else {
return []
}
var discovered: [LocalModelInfo] = []
for ownerDirectory in ownerDirectories {
guard isDirectory(ownerDirectory) else { continue }
guard let repoDirectories = try? fileManager.contentsOfDirectory(
at: ownerDirectory,
includingPropertiesForKeys: Array(directoryKeys),
options: [.skipsHiddenFiles]
) else {
continue
}
for repoDirectory in repoDirectories where isDirectory(repoDirectory) {
if let info = localModelInfo(ownerDirectory: ownerDirectory, repoDirectory: repoDirectory) {
discovered.append(info)
}
}
}
return discovered.sorted {
$0.repoId.localizedCaseInsensitiveCompare($1.repoId) == .orderedAscending
}
}
private static func isDirectory(_ url: URL) -> Bool { private static func isDirectory(_ url: URL) -> Bool {
var isDir: ObjCBool = false var isDir: ObjCBool = false
if FileManager.default.fileExists(atPath: url.path, isDirectory: &isDir) { if FileManager.default.fileExists(atPath: url.path, isDirectory: &isDir) {
@@ -232,31 +154,6 @@ enum LocalModelResolver {
return false return false
} }
private static func localModelInfo(ownerDirectory: URL, repoDirectory: URL) -> LocalModelInfo? {
let repoId = "\(ownerDirectory.lastPathComponent)/\(repoDirectory.lastPathComponent)"
guard containsModelArtifacts(at: repoDirectory) else { return nil }
let config = readJSONObject(at: repoDirectory.appendingPathComponent("config.json"))
let tokenizerConfig = readJSONObject(at: repoDirectory.appendingPathComponent("tokenizer_config.json"))
let supportsImages = inferredSupportsImages(
repoDirectory: repoDirectory,
config: config,
tokenizerConfig: tokenizerConfig
)
let sizeBytes = directorySize(at: repoDirectory)
let contextLength = inferredContextLength(config: config, tokenizerConfig: tokenizerConfig)
let loaderKinds: [ModelConfig.LoaderKind] = supportsImages ? [.vlm, .llm] : [.llm, .vlm]
return LocalModelInfo(
repoId: repoId,
directory: repoDirectory,
sizeBytes: sizeBytes,
contextLength: contextLength,
loaderKinds: loaderKinds,
supportsImages: supportsImages
)
}
private static func containsModelArtifacts(at directory: URL) -> Bool { private static func containsModelArtifacts(at directory: URL) -> Bool {
let requiredPaths = [ let requiredPaths = [
directory.appendingPathComponent("config.json").path, directory.appendingPathComponent("config.json").path,
@@ -267,43 +164,23 @@ enum LocalModelResolver {
} }
/// Delete the local cache for a model so it will be re-downloaded next time. /// Delete the local cache for a model so it will be re-downloaded next time.
/// Removes from both sandbox cache and system HF cache if present.
@discardableResult @discardableResult
static func deleteLocal(repoId: String) -> Bool { static func deleteLocal(repoId: String) -> Bool {
var deleted = false guard let hfBase = hfCacheBase else { return false }
// Delete from sandbox cache
if let base = modelsBase {
let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
if FileManager.default.fileExists(atPath: modelDir.path) {
do {
try FileManager.default.removeItem(at: modelDir)
print("[LocalModelResolver] Deleted sandbox cache: \(modelDir.path)")
deleted = true
} catch {
print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)")
}
}
}
// Delete from system HF cache
// Structure: ~/.cache/huggingface/hub/models--{org}--{name}/
if let hfBase = hfSystemCache {
let repoSlug = repoId.replacingOccurrences(of: "/", with: "--") let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true) let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
if FileManager.default.fileExists(atPath: modelBase.path) { guard FileManager.default.fileExists(atPath: modelBase.path) else { return false }
do { do {
try FileManager.default.removeItem(at: modelBase) try FileManager.default.removeItem(at: modelBase)
print("[LocalModelResolver] Deleted system cache: \(modelBase.path)") print("[LocalModelResolver] Deleted cache: \(modelBase.path)")
deleted = true return true
} catch { } catch {
print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)") print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)")
return false
} }
} }
}
return deleted
}
private static func readJSONObject(at url: URL) -> [String: Any]? { private static func readJSONObject(at url: URL) -> [String: Any]? {
guard let data = try? Data(contentsOf: url) else { return nil } guard let data = try? Data(contentsOf: url) else { return nil }

View File

@@ -1,24 +1,18 @@
import Foundation import Foundation
import Hub import HuggingFace
import MLX import MLX
import MLXHuggingFace
import MLXLLM import MLXLLM
import MLXLMCommon import MLXLMCommon
import MLXVLM import MLXVLM
import Tokenizers
/// Manages model loading, switching, and generation. /// Manages model loading, switching, and generation.
@Observable @Observable
@MainActor @MainActor
final class ModelManager { final class ModelManager {
/// HubApi with blob cache disabled to avoid storing every model twice. private static let hubClient = HubClient.default
/// swift-huggingface defaults to caching in both huggingface/hub/ (snapshots)
/// AND models/ (content-addressed blobs). We only need the snapshots.
/// Must use the same downloadBase as defaultHubApi (.cachesDirectory) so
/// LocalModelResolver can find downloaded models.
private static let hub: HubApi = {
let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
return HubApi(downloadBase: cachesDir, cache: nil)
}()
var currentModel: ModelConfig? var currentModel: ModelConfig?
var availableModels: [ModelConfig] var availableModels: [ModelConfig]
@@ -235,13 +229,15 @@ final class ModelManager {
switch loaderKind { switch loaderKind {
case .llm: case .llm:
return try await LLMModelFactory.shared.loadContainer( return try await LLMModelFactory.shared.loadContainer(
hub: Self.hub, from: #hubDownloader(Self.hubClient),
using: #huggingFaceTokenizerLoader(),
configuration: configuration, configuration: configuration,
progressHandler: progressHandler progressHandler: progressHandler
) )
case .vlm: case .vlm:
return try await VLMModelFactory.shared.loadContainer( return try await VLMModelFactory.shared.loadContainer(
hub: Self.hub, from: #hubDownloader(Self.hubClient),
using: #huggingFaceTokenizerLoader(),
configuration: configuration, configuration: configuration,
progressHandler: progressHandler progressHandler: progressHandler
) )

View File

@@ -3,30 +3,27 @@ import XCTest
@testable import MLX_Server @testable import MLX_Server
final class LocalModelResolverTests: XCTestCase { final class LocalModelResolverTests: XCTestCase {
func testDiscoverModelsInfersTextOnlyMetadataAndDirectorySize() throws { func testDiscoverSystemHFModelsInfersTextOnlyMetadata() throws {
let base = try makeTempModelsRoot() let base = try makeTempHFCache()
let repoDirectory = try makeRepoDirectory(base: base, owner: "example", repo: "text-only") let snapshotDir = try makeHFSnapshot(base: base, repoId: "example/text-only")
let configURL = repoDirectory.appendingPathComponent("config.json")
let modelURL = repoDirectory.appendingPathComponent("model.safetensors")
let tokenizerURL = repoDirectory.appendingPathComponent("tokenizer.json")
try writeJSON( try writeJSON(
[ [
"architectures": ["LlamaForCausalLM"], "architectures": ["LlamaForCausalLM"],
"max_position_embeddings": 32768, "max_position_embeddings": 32768,
], ],
to: configURL to: snapshotDir.appendingPathComponent("config.json")
) )
try Data(repeating: 0x11, count: 64).write(to: modelURL) try Data(repeating: 0x11, count: 64).write(to: snapshotDir.appendingPathComponent("model.safetensors"))
try Data(repeating: 0x22, count: 19).write(to: tokenizerURL) try Data(repeating: 0x22, count: 19).write(to: snapshotDir.appendingPathComponent("tokenizer.json"))
let expectedSize = Int64( let expectedSize = Int64(
try Data(contentsOf: configURL).count try Data(contentsOf: snapshotDir.appendingPathComponent("config.json")).count
+ Data(contentsOf: modelURL).count + Data(contentsOf: snapshotDir.appendingPathComponent("model.safetensors")).count
+ Data(contentsOf: tokenizerURL).count + Data(contentsOf: snapshotDir.appendingPathComponent("tokenizer.json")).count
) )
let discovered = LocalModelResolver.discoverModels(in: base) let discovered = LocalModelResolver.discoverSystemHFModels(in: base)
let model = try XCTUnwrap(discovered.first) let model = try XCTUnwrap(discovered.first)
XCTAssertEqual(model.repoId, "example/text-only") XCTAssertEqual(model.repoId, "example/text-only")
@@ -36,21 +33,25 @@ final class LocalModelResolverTests: XCTestCase {
XCTAssertEqual(model.sizeBytes, expectedSize) XCTAssertEqual(model.sizeBytes, expectedSize)
} }
func testDiscoverModelsInfersVisionMetadataFromProcessorFiles() throws { func testDiscoverSystemHFModelsInfersVisionMetadata() throws {
let base = try makeTempModelsRoot() let base = try makeTempHFCache()
let repoDirectory = try makeRepoDirectory(base: base, owner: "example", repo: "vision-model") let snapshotDir = try makeHFSnapshot(base: base, repoId: "example/vision-model")
try writeJSON( try writeJSON(
[ [
"text_config": ["max_position_embeddings": 262144], "text_config": ["max_position_embeddings": 262144],
"vision_config": ["hidden_size": 768], "vision_config": ["hidden_size": 768],
], ],
to: repoDirectory.appendingPathComponent("config.json") to: snapshotDir.appendingPathComponent("config.json")
) )
try writeJSON(["processor_class": "Qwen3VLProcessor"], to: repoDirectory.appendingPathComponent("tokenizer_config.json")) try writeJSON(
try Data(repeating: 0x33, count: 12).write(to: repoDirectory.appendingPathComponent("processor_config.json")) ["processor_class": "Qwen3VLProcessor"],
try Data(repeating: 0x44, count: 8).write(to: repoDirectory.appendingPathComponent("model.safetensors.index.json")) to: snapshotDir.appendingPathComponent("tokenizer_config.json")
)
try Data(repeating: 0x33, count: 12).write(to: snapshotDir.appendingPathComponent("processor_config.json"))
try Data(repeating: 0x44, count: 8).write(to: snapshotDir.appendingPathComponent("model.safetensors.index.json"))
let discovered = LocalModelResolver.discoverModels(in: base) let discovered = LocalModelResolver.discoverSystemHFModels(in: base)
let model = try XCTUnwrap(discovered.first) let model = try XCTUnwrap(discovered.first)
XCTAssertEqual(model.repoId, "example/vision-model") XCTAssertEqual(model.repoId, "example/vision-model")
@@ -155,7 +156,7 @@ final class LocalModelResolverTests: XCTestCase {
XCTAssertTrue(config.supportsTools) XCTAssertTrue(config.supportsTools)
} }
private func makeTempModelsRoot() throws -> URL { private func makeTempHFCache() throws -> URL {
let root = FileManager.default.temporaryDirectory let root = FileManager.default.temporaryDirectory
.appendingPathComponent(UUID().uuidString, isDirectory: true) .appendingPathComponent(UUID().uuidString, isDirectory: true)
try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true) try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true)
@@ -165,12 +166,14 @@ final class LocalModelResolverTests: XCTestCase {
return root return root
} }
private func makeRepoDirectory(base: URL, owner: String, repo: String) throws -> URL { private func makeHFSnapshot(base: URL, repoId: String, hash: String = "abc123") throws -> URL {
let directory = base let slug = repoId.replacingOccurrences(of: "/", with: "--")
.appendingPathComponent(owner, isDirectory: true) let snapshotDir = base
.appendingPathComponent(repo, isDirectory: true) .appendingPathComponent("models--\(slug)", isDirectory: true)
try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true) .appendingPathComponent("snapshots", isDirectory: true)
return directory .appendingPathComponent(hash, isDirectory: true)
try FileManager.default.createDirectory(at: snapshotDir, withIntermediateDirectories: true)
return snapshotDir
} }
private func writeJSON(_ object: Any, to url: URL) throws { private func writeJSON(_ object: Any, to url: URL) throws {

View File

@@ -1,7 +1,9 @@
import Foundation import Foundation
import Hub import HuggingFace
import MLXHuggingFace
import MLXLMCommon import MLXLMCommon
import MLXVLM import MLXVLM
import Tokenizers
import XCTest import XCTest
@testable import MLX_Server @testable import MLX_Server
@@ -671,10 +673,9 @@ private actor LocalGemmaFixture {
} }
let loadTask = Task<ModelContainer, Error> { let loadTask = Task<ModelContainer, Error> {
let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
let hub = HubApi(downloadBase: cachesDir, cache: nil)
return try await VLMModelFactory.shared.loadContainer( return try await VLMModelFactory.shared.loadContainer(
hub: hub, from: #hubDownloader(HubClient.default),
using: #huggingFaceTokenizerLoader(),
configuration: ModelConfiguration(directory: localDir), configuration: ModelConfiguration(directory: localDir),
progressHandler: { _ in } progressHandler: { _ in }
) )

View File

@@ -1,8 +1,10 @@
import Foundation import Foundation
import Hub import HuggingFace
import MLX import MLX
import MLXHuggingFace
import MLXLMCommon import MLXLMCommon
import MLXVLM import MLXVLM
import Tokenizers
import XCTest import XCTest
@testable import MLX_Server @testable import MLX_Server
@@ -230,10 +232,9 @@ private actor LocalGemmaFixture {
} }
let loadTask = Task<ModelContainer, Error> { let loadTask = Task<ModelContainer, Error> {
let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
let hub = HubApi(downloadBase: cachesDir, cache: nil)
return try await VLMModelFactory.shared.loadContainer( return try await VLMModelFactory.shared.loadContainer(
hub: hub, from: #hubDownloader(HubClient.default),
using: #huggingFaceTokenizerLoader(),
configuration: ModelConfiguration(directory: localDir), configuration: ModelConfiguration(directory: localDir),
progressHandler: { _ in } progressHandler: { _ in }
) )

View File

@@ -249,4 +249,11 @@ private final class NonStandardCache: KVCache {
) -> MLXFast.ScaledDotProductAttentionMaskMode { ) -> MLXFast.ScaledDotProductAttentionMaskMode {
.none .none
} }
func copy() -> any KVCache {
let c = NonStandardCache(tokenCount: 0, headDim: 0)
c.state = state
c.offset = offset
return c
}
} }

View File

@@ -388,4 +388,10 @@ private final class TestTrimRecordingCache: KVCache {
) -> MLXFast.ScaledDotProductAttentionMaskMode { ) -> MLXFast.ScaledDotProductAttentionMaskMode {
.none .none
} }
func copy() -> any KVCache {
let c = TestTrimRecordingCache(offset: offset, trimmable: trimmable)
c.state = state
return c
}
} }

View File

@@ -19,6 +19,7 @@ xcodebuild \
-scheme MLXServer \ -scheme MLXServer \
-destination 'platform=macOS' \ -destination 'platform=macOS' \
-configuration "$CONFIG" \ -configuration "$CONFIG" \
-skipMacroValidation \
SYMROOT="$BUILD_DIR" \ SYMROOT="$BUILD_DIR" \
build 2>&1 | \ build 2>&1 | \
grep -E "(CompileSwift .* 'MLXServer'|error:|warning:.*MLXServer/|BUILD )" | \ grep -E "(CompileSwift .* 'MLXServer'|error:|warning:.*MLXServer/|BUILD )" | \

View File

@@ -9,7 +9,13 @@ options:
packages: packages:
mlx-swift-lm: mlx-swift-lm:
url: https://github.com/ml-explore/mlx-swift-lm url: https://github.com/ml-explore/mlx-swift-lm
branch: main from: "3.31.3"
swift-huggingface:
url: https://github.com/huggingface/swift-huggingface
from: "0.9.0"
swift-transformers:
url: https://github.com/huggingface/swift-transformers
from: "1.2.0"
MarkdownUI: MarkdownUI:
url: https://github.com/gonzalezreal/swift-markdown-ui url: https://github.com/gonzalezreal/swift-markdown-ui
from: "2.4.0" from: "2.4.0"
@@ -40,6 +46,12 @@ targets:
product: MLXVLM product: MLXVLM
- package: mlx-swift-lm - package: mlx-swift-lm
product: MLXLMCommon product: MLXLMCommon
- package: mlx-swift-lm
product: MLXHuggingFace
- package: swift-huggingface
product: HuggingFace
- package: swift-transformers
product: Tokenizers
- package: MarkdownUI - package: MarkdownUI
product: MarkdownUI product: MarkdownUI
MLXServerTests: MLXServerTests:

27
resolve-packages.sh Executable file
View File

@@ -0,0 +1,27 @@
#!/bin/bash
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_PATH="$PROJECT_DIR/MLXServer.xcodeproj"
RESOLVED_PATH="$PROJECT_PATH/project.xcworkspace/xcshareddata/swiftpm/Package.resolved"
SPM_STATE_DIR="$PROJECT_DIR/build/swiftpm"
PACKAGE_CACHE_PATH="$SPM_STATE_DIR/cache"
CLONED_SOURCES_PATH="$SPM_STATE_DIR/clones"
DERIVED_DATA_PATH="$PROJECT_DIR/build/DerivedData"
echo "==> Resolving Swift packages from project.yml constraints"
# For branch-based dependencies (like mlx-swift-lm main), force a fresh resolve
# so the lockfile follows the current branch head.
rm -f "$RESOLVED_PATH"
rm -rf "$PACKAGE_CACHE_PATH" "$CLONED_SOURCES_PATH"
rm -rf "$DERIVED_DATA_PATH"
mkdir -p "$PACKAGE_CACHE_PATH" "$CLONED_SOURCES_PATH"
xcodebuild \
-resolvePackageDependencies \
-project "$PROJECT_PATH" \
-scheme MLXServer \
-disablePackageRepositoryCache \
-packageCachePath "$PACKAGE_CACHE_PATH" \
-clonedSourcePackagesDirPath "$CLONED_SOURCES_PATH"

View File

@@ -21,6 +21,7 @@ XCODEBUILD_ARGS=(
-scheme MLXServer -scheme MLXServer
-destination "$DESTINATION" -destination "$DESTINATION"
-configuration "$CONFIG" -configuration "$CONFIG"
-skipMacroValidation
SYMROOT="$BUILD_DIR" SYMROOT="$BUILD_DIR"
) )