diff --git a/MLXServer.xcodeproj/project.pbxproj b/MLXServer.xcodeproj/project.pbxproj index fb284b8..5240f12 100644 --- a/MLXServer.xcodeproj/project.pbxproj +++ b/MLXServer.xcodeproj/project.pbxproj @@ -17,6 +17,7 @@ 20FFB5DBF75AA6C359AAE31C /* SceneManagementView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FEB592E5E717F817B03151 /* SceneManagementView.swift */; }; 221DEC86374902FCFD661A01 /* TokenPrefixCacheTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */; }; 2640EDCA9033D85C0B785557 /* GenerationSettings.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */; }; + 28A780EEB6DC74B5B0BBF03D /* HuggingFace in Frameworks */ = {isa = PBXBuildFile; productRef = FDBFD829EE956976552514CC /* HuggingFace */; }; 29879D696584B96CC56560DF /* ChatExporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D7C9BAD674E29688ACE53B0B /* ChatExporter.swift */; }; 2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; }; 2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; }; @@ -38,6 +39,7 @@ 6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */ = {isa = PBXBuildFile; fileRef = D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */; }; 741692862DB1F13EA0B2D14D /* TokenPrefixCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1962D530BEABCC7F1E8E0ED1 /* TokenPrefixCache.swift */; }; 75E046B4ABB1E6FEF17C1A60 /* ModelManagementWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 721D6F203A10434FE0223042 /* ModelManagementWindow.swift */; }; + 777AEBB3471D8838F0F51D08 /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; }; 7936325B425DFA2931F6E421 /* ModelBackedQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F7E6F18C80D9859E89D2B4E3 /* ModelBackedQuantizationTests.swift */; }; 7CD765C1E2F9F4D7504C8D09 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B629DA084A9A40E54F8EA5FA /* Assets.xcassets */; }; 80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; }; @@ -67,7 +69,8 @@ E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */; }; EC4FC68608DDFA6A3DF133CC /* InferenceEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02EBDE0C72D1C5CE220E5B93 /* InferenceEngine.swift */; }; EDE59C241940E7B9B53D520D /* TokenPrefixCacheQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D50504058693CDE533D755B5 /* TokenPrefixCacheQuantizationTests.swift */; }; - F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; }; + F2A137B60D5DFCC591A01420 /* Tokenizers in Frameworks */ = {isa = PBXBuildFile; productRef = BD266A137966DB9451C2C352 /* Tokenizers */; }; + F546CE5955ED253D8A793D5E /* MLXHuggingFace in Frameworks */ = {isa = PBXBuildFile; productRef = 269A55730E9BDC735F9C2B78 /* MLXHuggingFace */; }; FAF7D4714AC6D02674920208 /* ChatMessage.swift in Sources */ = {isa = PBXBuildFile; fileRef = A4B359324B5FD8D106C74338 /* ChatMessage.swift */; }; FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */; }; FE4405F66873C75CD6FA19A5 /* StreamingSSEEncoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */; }; @@ -158,7 +161,10 @@ FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */, 945474365D0B3E961811909A /* MLXVLM in Frameworks */, B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */, - F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */, + F546CE5955ED253D8A793D5E /* MLXHuggingFace in Frameworks */, + 28A780EEB6DC74B5B0BBF03D /* HuggingFace in Frameworks */, + F2A137B60D5DFCC591A01420 /* Tokenizers in Frameworks */, + 777AEBB3471D8838F0F51D08 /* MarkdownUI in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -343,6 +349,9 @@ 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */, D5E8E1C2DD8D8AABB4306193 /* MLXVLM */, 9090667D4134056AE66DC2F1 /* MLXLMCommon */, + 269A55730E9BDC735F9C2B78 /* MLXHuggingFace */, + FDBFD829EE956976552514CC /* HuggingFace */, + BD266A137966DB9451C2C352 /* Tokenizers */, A98257123539E9E738213BFA /* MarkdownUI */, ); productName = MLXServer; @@ -390,6 +399,8 @@ packageReferences = ( D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */, 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */, + A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */, + 5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */, ); preferredProjectObjectVersion = 77; productRefGroup = 652987C2A419DBFC79E32CDE /* Products */; @@ -736,8 +747,24 @@ isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/ml-explore/mlx-swift-lm"; requirement = { - branch = main; - kind = branch; + kind = upToNextMajorVersion; + minimumVersion = 3.31.3; + }; + }; + 5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */ = { + isa = XCRemoteSwiftPackageReference; + repositoryURL = "https://github.com/huggingface/swift-transformers"; + requirement = { + kind = upToNextMajorVersion; + minimumVersion = 1.2.0; + }; + }; + A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */ = { + isa = XCRemoteSwiftPackageReference; + repositoryURL = "https://github.com/huggingface/swift-huggingface"; + requirement = { + kind = upToNextMajorVersion; + minimumVersion = 0.9.0; }; }; D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */ = { @@ -751,6 +778,11 @@ /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ + 269A55730E9BDC735F9C2B78 /* MLXHuggingFace */ = { + isa = XCSwiftPackageProductDependency; + package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */; + productName = MLXHuggingFace; + }; 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */ = { isa = XCSwiftPackageProductDependency; package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */; @@ -766,11 +798,21 @@ package = D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */; productName = MarkdownUI; }; + BD266A137966DB9451C2C352 /* Tokenizers */ = { + isa = XCSwiftPackageProductDependency; + package = 5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */; + productName = Tokenizers; + }; D5E8E1C2DD8D8AABB4306193 /* MLXVLM */ = { isa = XCSwiftPackageProductDependency; package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */; productName = MLXVLM; }; + FDBFD829EE956976552514CC /* HuggingFace */ = { + isa = XCSwiftPackageProductDependency; + package = A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */; + productName = HuggingFace; + }; /* End XCSwiftPackageProductDependency section */ }; rootObject = 938BC479816FCA8527B731F9 /* Project object */; diff --git a/MLXServer.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/MLXServer.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index a814384..88e8a84 100644 --- a/MLXServer.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/MLXServer.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "418f7299ccb303e0e8992dfc960a3df5df98d527f18667aa162699027b29b6cd", + "originHash" : "af28e5c426709ddbdb4b91bab23f3971aba7ff96fb35d16285d757a8f482e340", "pins" : [ { "identity" : "eventsource", @@ -15,8 +15,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/ml-explore/mlx-swift", "state" : { - "revision" : "6ba4827fb82c97d012eec9ab4b2de21f85c3b33d", - "version" : "0.30.6" + "revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896", + "version" : "0.31.3" } }, { @@ -24,8 +24,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/ml-explore/mlx-swift-lm", "state" : { - "branch" : "main", - "revision" : "bc3c20ef4644c86f2b347debcfe1efe4308712a6" + "revision" : "1c05248bb0899e2a7a4962b84d319cf12f4e12aa", + "version" : "3.31.3" } }, { @@ -127,6 +127,15 @@ "version" : "1.1.1" } }, + { + "identity" : "swift-syntax", + "kind" : "remoteSourceControl", + "location" : "https://github.com/swiftlang/swift-syntax.git", + "state" : { + "revision" : "0687f71944021d616d34d922343dcef086855920", + "version" : "600.0.1" + } + }, { "identity" : "swift-system", "kind" : "remoteSourceControl", diff --git a/MLXServer/Utilities/LocalModelResolver.swift b/MLXServer/Utilities/LocalModelResolver.swift index a93b5aa..a946089 100644 --- a/MLXServer/Utilities/LocalModelResolver.swift +++ b/MLXServer/Utilities/LocalModelResolver.swift @@ -1,9 +1,6 @@ import Foundation -/// Resolves HuggingFace model repos to local directories. -/// Checks multiple locations: -/// 1. Sandbox cache: ~/Library/Containers/de.rfc1437.mlxserver/Data/Library/Caches/models/{org}/{name}/ -/// 2. System HF cache: ~/.cache/huggingface/hub/ +/// Resolves HuggingFace model repos to local directories in ~/.cache/huggingface/hub/. enum LocalModelResolver { struct LocalModelInfo: Identifiable, Hashable { @@ -17,16 +14,8 @@ enum LocalModelResolver { var id: String { repoId } } - /// Base directory where HubApi stores downloaded models (sandbox cache). - private static let modelsBase: URL? = { - FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first? - .appendingPathComponent("models", isDirectory: true) - }() - - /// System HuggingFace cache directory (~/.cache/huggingface/hub/). - /// Note: Requires com.apple.security.files.home-relative-directory.read entitlement - private static let hfSystemCache: URL? = { - // Use homeDirectoryForCurrentUser which works in sandbox with proper entitlement + /// HuggingFace cache directory (~/.cache/huggingface/hub/). + private static let hfCacheBase: URL? = { return FileManager.default.homeDirectoryForCurrentUser .appendingPathComponent(".cache", isDirectory: true) .appendingPathComponent("huggingface", isDirectory: true) @@ -35,50 +24,38 @@ enum LocalModelResolver { /// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit") /// to its local directory, if it exists. - /// Checks sandbox cache first, then system HF cache. /// /// Returns `nil` if the model hasn't been downloaded yet. static func resolve(repoId: String) -> URL? { print("[LocalModelResolver] Resolving: \(repoId)") - - // Check sandbox cache first - if let base = modelsBase { - let modelDir = base.appendingPathComponent(repoId, isDirectory: true) - var isDir: ObjCBool = false - if FileManager.default.fileExists(atPath: modelDir.path, isDirectory: &isDir), isDir.boolValue { - print("[LocalModelResolver] Found in sandbox cache: \(modelDir.path)") - return modelDir - } + + // Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/ + guard let hfBase = hfCacheBase else { + print("[LocalModelResolver] No cache base") + return nil } - // Check system HF cache - // Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/ - if let hfBase = hfSystemCache { - let repoSlug = repoId.replacingOccurrences(of: "/", with: "--") - let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true) - - print("[LocalModelResolver] Checking HF cache: \(modelBase.path)") - - // Look for snapshots directory - let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true) - var isDir: ObjCBool = false - guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else { - print("[LocalModelResolver] No snapshots directory found") - return nil - } + let repoSlug = repoId.replacingOccurrences(of: "/", with: "--") + let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true) - // Find the latest snapshot (commit hash directories) - if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) { - print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots") - for snapshotDir in snapshotDirs where isDirectory(snapshotDir) { - let configPath = snapshotDir.appendingPathComponent("config.json") - if FileManager.default.fileExists(atPath: configPath.path) { - print("[LocalModelResolver] Found valid snapshot: \(snapshotDir.path)") - return snapshotDir - } + print("[LocalModelResolver] Checking HF cache: \(modelBase.path)") + + let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true) + var isDir: ObjCBool = false + guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else { + print("[LocalModelResolver] No snapshots directory found") + return nil + } + + if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) { + print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots") + for snapshotDir in snapshotDirs where isDirectory(snapshotDir) { + let configPath = snapshotDir.appendingPathComponent("config.json") + if FileManager.default.fileExists(atPath: configPath.path) { + print("[LocalModelResolver] Found valid snapshot: \(snapshotDir.path)") + return snapshotDir } } - print("[LocalModelResolver] No valid snapshot found") } print("[LocalModelResolver] Model not found locally") @@ -91,38 +68,18 @@ enum LocalModelResolver { } static func discoveredLocalModels() -> [LocalModelInfo] { - var discovered: [LocalModelInfo] = [] + print("[LocalModelResolver] Scanning HF cache: \(hfCacheBase?.path ?? "N/A")") + guard let hfBase = hfCacheBase else { return [] } - // Scan sandbox cache - print("[LocalModelResolver] Scanning sandbox cache: \(modelsBase?.path ?? "N/A")") - if let sandboxBase = modelsBase { - let sandboxModels = discoverModels(in: sandboxBase) - print("[LocalModelResolver] Found \(sandboxModels.count) models in sandbox cache") - discovered += sandboxModels + let models = discoverSystemHFModels(in: hfBase) + print("[LocalModelResolver] Found \(models.count) models:") + for model in models { + print("[LocalModelResolver] - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)") } - - // Scan system HF cache - print("[LocalModelResolver] Scanning system HF cache: \(hfSystemCache?.path ?? "N/A")") - if let hfBase = hfSystemCache { - let hfModels = discoverSystemHFModels(in: hfBase) - print("[LocalModelResolver] Found \(hfModels.count) models in HF system cache:") - for model in hfModels { - print("[LocalModelResolver] - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)") - } - discovered += hfModels - } - - // Remove duplicates (same repoId) and sort - let byRepoId = Dictionary(uniqueKeysWithValues: discovered.map { ($0.repoId, $0) }) - let finalModels = byRepoId.values.sorted { lhs, rhs in - lhs.repoId.localizedCaseInsensitiveCompare(rhs.repoId) == .orderedAscending - } - print("[LocalModelResolver] Total unique models: \(finalModels.count)") - return finalModels + return models } - /// Discover models in the system HF cache (~/.cache/huggingface/hub/) - private static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] { + static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] { let fileManager = FileManager.default let directoryKeys: Set = [.isDirectoryKey] guard let modelBases = try? fileManager.contentsOfDirectory( @@ -189,41 +146,6 @@ enum LocalModelResolver { ) } - static func discoverModels(in base: URL) -> [LocalModelInfo] { - let fileManager = FileManager.default - let directoryKeys: Set = [.isDirectoryKey] - guard let ownerDirectories = try? fileManager.contentsOfDirectory( - at: base, - includingPropertiesForKeys: Array(directoryKeys), - options: [.skipsHiddenFiles] - ) else { - return [] - } - - var discovered: [LocalModelInfo] = [] - - for ownerDirectory in ownerDirectories { - guard isDirectory(ownerDirectory) else { continue } - guard let repoDirectories = try? fileManager.contentsOfDirectory( - at: ownerDirectory, - includingPropertiesForKeys: Array(directoryKeys), - options: [.skipsHiddenFiles] - ) else { - continue - } - - for repoDirectory in repoDirectories where isDirectory(repoDirectory) { - if let info = localModelInfo(ownerDirectory: ownerDirectory, repoDirectory: repoDirectory) { - discovered.append(info) - } - } - } - - return discovered.sorted { - $0.repoId.localizedCaseInsensitiveCompare($1.repoId) == .orderedAscending - } - } - private static func isDirectory(_ url: URL) -> Bool { var isDir: ObjCBool = false if FileManager.default.fileExists(atPath: url.path, isDirectory: &isDir) { @@ -232,31 +154,6 @@ enum LocalModelResolver { return false } - private static func localModelInfo(ownerDirectory: URL, repoDirectory: URL) -> LocalModelInfo? { - let repoId = "\(ownerDirectory.lastPathComponent)/\(repoDirectory.lastPathComponent)" - guard containsModelArtifacts(at: repoDirectory) else { return nil } - - let config = readJSONObject(at: repoDirectory.appendingPathComponent("config.json")) - let tokenizerConfig = readJSONObject(at: repoDirectory.appendingPathComponent("tokenizer_config.json")) - let supportsImages = inferredSupportsImages( - repoDirectory: repoDirectory, - config: config, - tokenizerConfig: tokenizerConfig - ) - let sizeBytes = directorySize(at: repoDirectory) - let contextLength = inferredContextLength(config: config, tokenizerConfig: tokenizerConfig) - let loaderKinds: [ModelConfig.LoaderKind] = supportsImages ? [.vlm, .llm] : [.llm, .vlm] - - return LocalModelInfo( - repoId: repoId, - directory: repoDirectory, - sizeBytes: sizeBytes, - contextLength: contextLength, - loaderKinds: loaderKinds, - supportsImages: supportsImages - ) - } - private static func containsModelArtifacts(at directory: URL) -> Bool { let requiredPaths = [ directory.appendingPathComponent("config.json").path, @@ -267,42 +164,22 @@ enum LocalModelResolver { } /// Delete the local cache for a model so it will be re-downloaded next time. - /// Removes from both sandbox cache and system HF cache if present. @discardableResult static func deleteLocal(repoId: String) -> Bool { - var deleted = false + guard let hfBase = hfCacheBase else { return false } - // Delete from sandbox cache - if let base = modelsBase { - let modelDir = base.appendingPathComponent(repoId, isDirectory: true) - if FileManager.default.fileExists(atPath: modelDir.path) { - do { - try FileManager.default.removeItem(at: modelDir) - print("[LocalModelResolver] Deleted sandbox cache: \(modelDir.path)") - deleted = true - } catch { - print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)") - } - } + let repoSlug = repoId.replacingOccurrences(of: "/", with: "--") + let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true) + guard FileManager.default.fileExists(atPath: modelBase.path) else { return false } + + do { + try FileManager.default.removeItem(at: modelBase) + print("[LocalModelResolver] Deleted cache: \(modelBase.path)") + return true + } catch { + print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)") + return false } - - // Delete from system HF cache - // Structure: ~/.cache/huggingface/hub/models--{org}--{name}/ - if let hfBase = hfSystemCache { - let repoSlug = repoId.replacingOccurrences(of: "/", with: "--") - let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true) - if FileManager.default.fileExists(atPath: modelBase.path) { - do { - try FileManager.default.removeItem(at: modelBase) - print("[LocalModelResolver] Deleted system cache: \(modelBase.path)") - deleted = true - } catch { - print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)") - } - } - } - - return deleted } private static func readJSONObject(at url: URL) -> [String: Any]? { diff --git a/MLXServer/ViewModels/ModelManager.swift b/MLXServer/ViewModels/ModelManager.swift index db0ca38..ba00223 100644 --- a/MLXServer/ViewModels/ModelManager.swift +++ b/MLXServer/ViewModels/ModelManager.swift @@ -1,24 +1,18 @@ import Foundation -import Hub +import HuggingFace import MLX +import MLXHuggingFace import MLXLLM import MLXLMCommon import MLXVLM +import Tokenizers /// Manages model loading, switching, and generation. @Observable @MainActor final class ModelManager { - /// HubApi with blob cache disabled to avoid storing every model twice. - /// swift-huggingface defaults to caching in both huggingface/hub/ (snapshots) - /// AND models/ (content-addressed blobs). We only need the snapshots. - /// Must use the same downloadBase as defaultHubApi (.cachesDirectory) so - /// LocalModelResolver can find downloaded models. - private static let hub: HubApi = { - let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first - return HubApi(downloadBase: cachesDir, cache: nil) - }() + private static let hubClient = HubClient.default var currentModel: ModelConfig? var availableModels: [ModelConfig] @@ -235,13 +229,15 @@ final class ModelManager { switch loaderKind { case .llm: return try await LLMModelFactory.shared.loadContainer( - hub: Self.hub, + from: #hubDownloader(Self.hubClient), + using: #huggingFaceTokenizerLoader(), configuration: configuration, progressHandler: progressHandler ) case .vlm: return try await VLMModelFactory.shared.loadContainer( - hub: Self.hub, + from: #hubDownloader(Self.hubClient), + using: #huggingFaceTokenizerLoader(), configuration: configuration, progressHandler: progressHandler ) diff --git a/MLXServerTests/Server/LocalModelResolverTests.swift b/MLXServerTests/Server/LocalModelResolverTests.swift index 44c3333..1a29d30 100644 --- a/MLXServerTests/Server/LocalModelResolverTests.swift +++ b/MLXServerTests/Server/LocalModelResolverTests.swift @@ -3,30 +3,27 @@ import XCTest @testable import MLX_Server final class LocalModelResolverTests: XCTestCase { - func testDiscoverModelsInfersTextOnlyMetadataAndDirectorySize() throws { - let base = try makeTempModelsRoot() - let repoDirectory = try makeRepoDirectory(base: base, owner: "example", repo: "text-only") - let configURL = repoDirectory.appendingPathComponent("config.json") - let modelURL = repoDirectory.appendingPathComponent("model.safetensors") - let tokenizerURL = repoDirectory.appendingPathComponent("tokenizer.json") + func testDiscoverSystemHFModelsInfersTextOnlyMetadata() throws { + let base = try makeTempHFCache() + let snapshotDir = try makeHFSnapshot(base: base, repoId: "example/text-only") try writeJSON( [ "architectures": ["LlamaForCausalLM"], "max_position_embeddings": 32768, ], - to: configURL + to: snapshotDir.appendingPathComponent("config.json") ) - try Data(repeating: 0x11, count: 64).write(to: modelURL) - try Data(repeating: 0x22, count: 19).write(to: tokenizerURL) + try Data(repeating: 0x11, count: 64).write(to: snapshotDir.appendingPathComponent("model.safetensors")) + try Data(repeating: 0x22, count: 19).write(to: snapshotDir.appendingPathComponent("tokenizer.json")) let expectedSize = Int64( - try Data(contentsOf: configURL).count - + Data(contentsOf: modelURL).count - + Data(contentsOf: tokenizerURL).count + try Data(contentsOf: snapshotDir.appendingPathComponent("config.json")).count + + Data(contentsOf: snapshotDir.appendingPathComponent("model.safetensors")).count + + Data(contentsOf: snapshotDir.appendingPathComponent("tokenizer.json")).count ) - let discovered = LocalModelResolver.discoverModels(in: base) + let discovered = LocalModelResolver.discoverSystemHFModels(in: base) let model = try XCTUnwrap(discovered.first) XCTAssertEqual(model.repoId, "example/text-only") @@ -36,21 +33,25 @@ final class LocalModelResolverTests: XCTestCase { XCTAssertEqual(model.sizeBytes, expectedSize) } - func testDiscoverModelsInfersVisionMetadataFromProcessorFiles() throws { - let base = try makeTempModelsRoot() - let repoDirectory = try makeRepoDirectory(base: base, owner: "example", repo: "vision-model") + func testDiscoverSystemHFModelsInfersVisionMetadata() throws { + let base = try makeTempHFCache() + let snapshotDir = try makeHFSnapshot(base: base, repoId: "example/vision-model") + try writeJSON( [ "text_config": ["max_position_embeddings": 262144], "vision_config": ["hidden_size": 768], ], - to: repoDirectory.appendingPathComponent("config.json") + to: snapshotDir.appendingPathComponent("config.json") ) - try writeJSON(["processor_class": "Qwen3VLProcessor"], to: repoDirectory.appendingPathComponent("tokenizer_config.json")) - try Data(repeating: 0x33, count: 12).write(to: repoDirectory.appendingPathComponent("processor_config.json")) - try Data(repeating: 0x44, count: 8).write(to: repoDirectory.appendingPathComponent("model.safetensors.index.json")) + try writeJSON( + ["processor_class": "Qwen3VLProcessor"], + to: snapshotDir.appendingPathComponent("tokenizer_config.json") + ) + try Data(repeating: 0x33, count: 12).write(to: snapshotDir.appendingPathComponent("processor_config.json")) + try Data(repeating: 0x44, count: 8).write(to: snapshotDir.appendingPathComponent("model.safetensors.index.json")) - let discovered = LocalModelResolver.discoverModels(in: base) + let discovered = LocalModelResolver.discoverSystemHFModels(in: base) let model = try XCTUnwrap(discovered.first) XCTAssertEqual(model.repoId, "example/vision-model") @@ -155,7 +156,7 @@ final class LocalModelResolverTests: XCTestCase { XCTAssertTrue(config.supportsTools) } - private func makeTempModelsRoot() throws -> URL { + private func makeTempHFCache() throws -> URL { let root = FileManager.default.temporaryDirectory .appendingPathComponent(UUID().uuidString, isDirectory: true) try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true) @@ -165,16 +166,18 @@ final class LocalModelResolverTests: XCTestCase { return root } - private func makeRepoDirectory(base: URL, owner: String, repo: String) throws -> URL { - let directory = base - .appendingPathComponent(owner, isDirectory: true) - .appendingPathComponent(repo, isDirectory: true) - try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true) - return directory + private func makeHFSnapshot(base: URL, repoId: String, hash: String = "abc123") throws -> URL { + let slug = repoId.replacingOccurrences(of: "/", with: "--") + let snapshotDir = base + .appendingPathComponent("models--\(slug)", isDirectory: true) + .appendingPathComponent("snapshots", isDirectory: true) + .appendingPathComponent(hash, isDirectory: true) + try FileManager.default.createDirectory(at: snapshotDir, withIntermediateDirectories: true) + return snapshotDir } private func writeJSON(_ object: Any, to url: URL) throws { let data = try JSONSerialization.data(withJSONObject: object, options: [.prettyPrinted, .sortedKeys]) try data.write(to: url) } -} \ No newline at end of file +} diff --git a/MLXServerTests/Server/ModelBackedInferenceValidationTests.swift b/MLXServerTests/Server/ModelBackedInferenceValidationTests.swift index 78dba6d..d327ad8 100644 --- a/MLXServerTests/Server/ModelBackedInferenceValidationTests.swift +++ b/MLXServerTests/Server/ModelBackedInferenceValidationTests.swift @@ -1,7 +1,9 @@ import Foundation -import Hub +import HuggingFace +import MLXHuggingFace import MLXLMCommon import MLXVLM +import Tokenizers import XCTest @testable import MLX_Server @@ -671,10 +673,9 @@ private actor LocalGemmaFixture { } let loadTask = Task { - let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first - let hub = HubApi(downloadBase: cachesDir, cache: nil) return try await VLMModelFactory.shared.loadContainer( - hub: hub, + from: #hubDownloader(HubClient.default), + using: #huggingFaceTokenizerLoader(), configuration: ModelConfiguration(directory: localDir), progressHandler: { _ in } ) diff --git a/MLXServerTests/Server/ModelBackedQuantizationTests.swift b/MLXServerTests/Server/ModelBackedQuantizationTests.swift index 131e312..e9c04dd 100644 --- a/MLXServerTests/Server/ModelBackedQuantizationTests.swift +++ b/MLXServerTests/Server/ModelBackedQuantizationTests.swift @@ -1,8 +1,10 @@ import Foundation -import Hub +import HuggingFace import MLX +import MLXHuggingFace import MLXLMCommon import MLXVLM +import Tokenizers import XCTest @testable import MLX_Server @@ -230,10 +232,9 @@ private actor LocalGemmaFixture { } let loadTask = Task { - let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first - let hub = HubApi(downloadBase: cachesDir, cache: nil) return try await VLMModelFactory.shared.loadContainer( - hub: hub, + from: #hubDownloader(HubClient.default), + using: #huggingFaceTokenizerLoader(), configuration: ModelConfiguration(directory: localDir), progressHandler: { _ in } ) diff --git a/MLXServerTests/Server/TokenPrefixCacheQuantizationTests.swift b/MLXServerTests/Server/TokenPrefixCacheQuantizationTests.swift index 3a7738d..806cded 100644 --- a/MLXServerTests/Server/TokenPrefixCacheQuantizationTests.swift +++ b/MLXServerTests/Server/TokenPrefixCacheQuantizationTests.swift @@ -249,4 +249,11 @@ private final class NonStandardCache: KVCache { ) -> MLXFast.ScaledDotProductAttentionMaskMode { .none } + + func copy() -> any KVCache { + let c = NonStandardCache(tokenCount: 0, headDim: 0) + c.state = state + c.offset = offset + return c + } } diff --git a/MLXServerTests/Server/TokenPrefixCacheTests.swift b/MLXServerTests/Server/TokenPrefixCacheTests.swift index 503700d..4222384 100644 --- a/MLXServerTests/Server/TokenPrefixCacheTests.swift +++ b/MLXServerTests/Server/TokenPrefixCacheTests.swift @@ -388,4 +388,10 @@ private final class TestTrimRecordingCache: KVCache { ) -> MLXFast.ScaledDotProductAttentionMaskMode { .none } + + func copy() -> any KVCache { + let c = TestTrimRecordingCache(offset: offset, trimmable: trimmable) + c.state = state + return c + } } \ No newline at end of file diff --git a/build.sh b/build.sh index aacc921..bc60afe 100755 --- a/build.sh +++ b/build.sh @@ -19,6 +19,7 @@ xcodebuild \ -scheme MLXServer \ -destination 'platform=macOS' \ -configuration "$CONFIG" \ + -skipMacroValidation \ SYMROOT="$BUILD_DIR" \ build 2>&1 | \ grep -E "(CompileSwift .* 'MLXServer'|error:|warning:.*MLXServer/|BUILD )" | \ diff --git a/project.yml b/project.yml index 6201c08..8c267f6 100644 --- a/project.yml +++ b/project.yml @@ -9,7 +9,13 @@ options: packages: mlx-swift-lm: url: https://github.com/ml-explore/mlx-swift-lm - branch: main + from: "3.31.3" + swift-huggingface: + url: https://github.com/huggingface/swift-huggingface + from: "0.9.0" + swift-transformers: + url: https://github.com/huggingface/swift-transformers + from: "1.2.0" MarkdownUI: url: https://github.com/gonzalezreal/swift-markdown-ui from: "2.4.0" @@ -40,6 +46,12 @@ targets: product: MLXVLM - package: mlx-swift-lm product: MLXLMCommon + - package: mlx-swift-lm + product: MLXHuggingFace + - package: swift-huggingface + product: HuggingFace + - package: swift-transformers + product: Tokenizers - package: MarkdownUI product: MarkdownUI MLXServerTests: diff --git a/resolve-packages.sh b/resolve-packages.sh new file mode 100755 index 0000000..abb196b --- /dev/null +++ b/resolve-packages.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -euo pipefail + +PROJECT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_PATH="$PROJECT_DIR/MLXServer.xcodeproj" +RESOLVED_PATH="$PROJECT_PATH/project.xcworkspace/xcshareddata/swiftpm/Package.resolved" +SPM_STATE_DIR="$PROJECT_DIR/build/swiftpm" +PACKAGE_CACHE_PATH="$SPM_STATE_DIR/cache" +CLONED_SOURCES_PATH="$SPM_STATE_DIR/clones" +DERIVED_DATA_PATH="$PROJECT_DIR/build/DerivedData" + +echo "==> Resolving Swift packages from project.yml constraints" + +# For branch-based dependencies (like mlx-swift-lm main), force a fresh resolve +# so the lockfile follows the current branch head. +rm -f "$RESOLVED_PATH" +rm -rf "$PACKAGE_CACHE_PATH" "$CLONED_SOURCES_PATH" +rm -rf "$DERIVED_DATA_PATH" +mkdir -p "$PACKAGE_CACHE_PATH" "$CLONED_SOURCES_PATH" + +xcodebuild \ + -resolvePackageDependencies \ + -project "$PROJECT_PATH" \ + -scheme MLXServer \ + -disablePackageRepositoryCache \ + -packageCachePath "$PACKAGE_CACHE_PATH" \ + -clonedSourcePackagesDirPath "$CLONED_SOURCES_PATH" diff --git a/test.sh b/test.sh index b059a70..b3889ce 100755 --- a/test.sh +++ b/test.sh @@ -21,6 +21,7 @@ XCODEBUILD_ARGS=( -scheme MLXServer -destination "$DESTINATION" -configuration "$CONFIG" + -skipMacroValidation SYMROOT="$BUILD_DIR" )