feat: more o n migration to v3

feat: migration to mlx-swift-lm v3
2026-04-30 11:58:53 +02:00 · 2026-04-30 09:18:37 +02:00
17 changed files with 287 additions and 258 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,6 +1,7 @@
 {
    "chat.tools.terminal.autoApprove": {
        "./test.sh": true,
-        "setopt": true
+        "setopt": true,
+        "./build.sh": true
    }
 }
--- a/MLXServer.xcodeproj/project.pbxproj
+++ b/MLXServer.xcodeproj/project.pbxproj
@@ -17,6 +17,7 @@
 		20FFB5DBF75AA6C359AAE31C /* SceneManagementView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 37FEB592E5E717F817B03151 /* SceneManagementView.swift */; };
 		221DEC86374902FCFD661A01 /* TokenPrefixCacheTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 64B2EDD5D1881AC9E1E60913 /* TokenPrefixCacheTests.swift */; };
 		2640EDCA9033D85C0B785557 /* GenerationSettings.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FAF7455BD387CD2061E0CBF /* GenerationSettings.swift */; };
+		28A780EEB6DC74B5B0BBF03D /* HuggingFace in Frameworks */ = {isa = PBXBuildFile; productRef = FDBFD829EE956976552514CC /* HuggingFace */; };
 		29879D696584B96CC56560DF /* ChatExporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D7C9BAD674E29688ACE53B0B /* ChatExporter.swift */; };
 		2CAAF7129F7CC45200FA9F6B /* ModelPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3C3A76C02AF70A9D8F868FC /* ModelPickerView.swift */; };
 		2D08769282BD71C170DB0943 /* InferenceStats.swift in Sources */ = {isa = PBXBuildFile; fileRef = E35452B166893B25E765FF70 /* InferenceStats.swift */; };
@@ -38,6 +39,7 @@
 		6828CCA8B78AB40906F87CAB /* LocalModelResolver.swift in Sources */ = {isa = PBXBuildFile; fileRef = D733A0D1D4AC25DDDA6C8684 /* LocalModelResolver.swift */; };
 		741692862DB1F13EA0B2D14D /* TokenPrefixCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1962D530BEABCC7F1E8E0ED1 /* TokenPrefixCache.swift */; };
 		75E046B4ABB1E6FEF17C1A60 /* ModelManagementWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 721D6F203A10434FE0223042 /* ModelManagementWindow.swift */; };
+		777AEBB3471D8838F0F51D08 /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; };
 		7936325B425DFA2931F6E421 /* ModelBackedQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = F7E6F18C80D9859E89D2B4E3 /* ModelBackedQuantizationTests.swift */; };
 		7CD765C1E2F9F4D7504C8D09 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B629DA084A9A40E54F8EA5FA /* Assets.xcassets */; };
 		80646C5066BF79BC76E1D9D7 /* ModelConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 38DFC212AF4359A45FBE22BA /* ModelConfig.swift */; };
@@ -67,7 +69,8 @@
 		E92B6656C251EDA246B8F582 /* ImageDecoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E4573DC9314915F4C7963B4E /* ImageDecoderTests.swift */; };
 		EC4FC68608DDFA6A3DF133CC /* InferenceEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02EBDE0C72D1C5CE220E5B93 /* InferenceEngine.swift */; };
 		EDE59C241940E7B9B53D520D /* TokenPrefixCacheQuantizationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D50504058693CDE533D755B5 /* TokenPrefixCacheQuantizationTests.swift */; };
-		F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */ = {isa = PBXBuildFile; productRef = A98257123539E9E738213BFA /* MarkdownUI */; };
+		F2A137B60D5DFCC591A01420 /* Tokenizers in Frameworks */ = {isa = PBXBuildFile; productRef = BD266A137966DB9451C2C352 /* Tokenizers */; };
+		F546CE5955ED253D8A793D5E /* MLXHuggingFace in Frameworks */ = {isa = PBXBuildFile; productRef = 269A55730E9BDC735F9C2B78 /* MLXHuggingFace */; };
 		FAF7D4714AC6D02674920208 /* ChatMessage.swift in Sources */ = {isa = PBXBuildFile; fileRef = A4B359324B5FD8D106C74338 /* ChatMessage.swift */; };
 		FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = 3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */; };
 		FE4405F66873C75CD6FA19A5 /* StreamingSSEEncoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 49C383DD5224F3420EB98DB2 /* StreamingSSEEncoderTests.swift */; };
@@ -158,7 +161,10 @@
 				FCD48F8C132A2B830A15EEB4 /* MLXLLM in Frameworks */,
 				945474365D0B3E961811909A /* MLXVLM in Frameworks */,
 				B6D3662995B885C102876B4A /* MLXLMCommon in Frameworks */,
-				F546CE5955ED253D8A793D5E /* MarkdownUI in Frameworks */,
+				F546CE5955ED253D8A793D5E /* MLXHuggingFace in Frameworks */,
+				28A780EEB6DC74B5B0BBF03D /* HuggingFace in Frameworks */,
+				F2A137B60D5DFCC591A01420 /* Tokenizers in Frameworks */,
+				777AEBB3471D8838F0F51D08 /* MarkdownUI in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -343,6 +349,9 @@
 				3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */,
 				D5E8E1C2DD8D8AABB4306193 /* MLXVLM */,
 				9090667D4134056AE66DC2F1 /* MLXLMCommon */,
+				269A55730E9BDC735F9C2B78 /* MLXHuggingFace */,
+				FDBFD829EE956976552514CC /* HuggingFace */,
+				BD266A137966DB9451C2C352 /* Tokenizers */,
 				A98257123539E9E738213BFA /* MarkdownUI */,
 			);
 			productName = MLXServer;
@@ -390,6 +399,8 @@
 			packageReferences = (
 				D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */,
 				1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */,
+				A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */,
+				5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */,
 			);
 			preferredProjectObjectVersion = 77;
 			productRefGroup = 652987C2A419DBFC79E32CDE /* Products */;
@@ -736,8 +747,24 @@
 			isa = XCRemoteSwiftPackageReference;
 			repositoryURL = "https://github.com/ml-explore/mlx-swift-lm";
 			requirement = {
-				branch = main;
-				kind = branch;
+				kind = upToNextMajorVersion;
+				minimumVersion = 3.31.3;
+			};
+		};
+		5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */ = {
+			isa = XCRemoteSwiftPackageReference;
+			repositoryURL = "https://github.com/huggingface/swift-transformers";
+			requirement = {
+				kind = upToNextMajorVersion;
+				minimumVersion = 1.2.0;
+			};
+		};
+		A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */ = {
+			isa = XCRemoteSwiftPackageReference;
+			repositoryURL = "https://github.com/huggingface/swift-huggingface";
+			requirement = {
+				kind = upToNextMajorVersion;
+				minimumVersion = 0.9.0;
 			};
 		};
 		D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */ = {
@@ -751,6 +778,11 @@
 /* End XCRemoteSwiftPackageReference section */

 /* Begin XCSwiftPackageProductDependency section */
+		269A55730E9BDC735F9C2B78 /* MLXHuggingFace */ = {
+			isa = XCSwiftPackageProductDependency;
+			package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
+			productName = MLXHuggingFace;
+		};
 		3F5A4AC6DBAF7CA686ECA74E /* MLXLLM */ = {
 			isa = XCSwiftPackageProductDependency;
 			package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
@@ -766,11 +798,21 @@
 			package = D402301668D113A49B6DD32D /* XCRemoteSwiftPackageReference "swift-markdown-ui" */;
 			productName = MarkdownUI;
 		};
+		BD266A137966DB9451C2C352 /* Tokenizers */ = {
+			isa = XCSwiftPackageProductDependency;
+			package = 5479E9F7A876DC346598E560 /* XCRemoteSwiftPackageReference "swift-transformers" */;
+			productName = Tokenizers;
+		};
 		D5E8E1C2DD8D8AABB4306193 /* MLXVLM */ = {
 			isa = XCSwiftPackageProductDependency;
 			package = 1AA4C71F15847A241E418C0C /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
 			productName = MLXVLM;
 		};
+		FDBFD829EE956976552514CC /* HuggingFace */ = {
+			isa = XCSwiftPackageProductDependency;
+			package = A6D001FF3D9EA5BA3112F5BF /* XCRemoteSwiftPackageReference "swift-huggingface" */;
+			productName = HuggingFace;
+		};
 /* End XCSwiftPackageProductDependency section */
 	};
 	rootObject = 938BC479816FCA8527B731F9 /* Project object */;
--- a/MLXServer.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
+++ b/MLXServer.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
@@ -1,5 +1,5 @@
 {
-  "originHash" : "418f7299ccb303e0e8992dfc960a3df5df98d527f18667aa162699027b29b6cd",
+  "originHash" : "af28e5c426709ddbdb4b91bab23f3971aba7ff96fb35d16285d757a8f482e340",
  "pins" : [
    {
      "identity" : "eventsource",
@@ -15,8 +15,8 @@
      "kind" : "remoteSourceControl",
      "location" : "https://github.com/ml-explore/mlx-swift",
      "state" : {
-        "revision" : "6ba4827fb82c97d012eec9ab4b2de21f85c3b33d",
-        "version" : "0.30.6"
+        "revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896",
+        "version" : "0.31.3"
      }
    },
    {
@@ -24,8 +24,8 @@
      "kind" : "remoteSourceControl",
      "location" : "https://github.com/ml-explore/mlx-swift-lm",
      "state" : {
-        "branch" : "main",
-        "revision" : "bc3c20ef4644c86f2b347debcfe1efe4308712a6"
+        "revision" : "1c05248bb0899e2a7a4962b84d319cf12f4e12aa",
+        "version" : "3.31.3"
      }
    },
    {
@@ -127,6 +127,15 @@
        "version" : "1.1.1"
      }
    },
+    {
+      "identity" : "swift-syntax",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/swiftlang/swift-syntax.git",
+      "state" : {
+        "revision" : "0687f71944021d616d34d922343dcef086855920",
+        "version" : "600.0.1"
+      }
+    },
    {
      "identity" : "swift-system",
      "kind" : "remoteSourceControl",
--- a/MLXServer/Server/APIServer.swift
+++ b/MLXServer/Server/APIServer.swift
@@ -335,19 +335,23 @@ final class APIServer {
            }
        }

+        // NOTE: repetition / presence / frequency penalties are intentionally
+        // not forwarded to GenerateParameters. mlx-swift-lm 3.31.3's
+        // PenaltyProcessor uses TokenRing.loadPrompt, which assumes a 1-D
+        // prompt MLXArray. VLM models (Gemma3, Qwen-VL, …) hand it a 2-D
+        // [1, N] tokens array, so the ring buffer ends up the wrong size and
+        // every later MLX.where in TokenRing.append crashes via fatalError.
+        // Re-enable once upstream fixes TokenRing to flatten the prompt.
        let generateParams = GenerateParameters(
            maxTokens: maxTokens,
            temperature: Float(generationSettings.temperature),
            topP: Float(generationSettings.topP),
            topK: generationSettings.topK,
-            minP: Float(generationSettings.minP),
-            repetitionPenalty: generationSettings.repetitionPenalty.map(Float.init),
-            repetitionContextSize: 128,
-            presencePenalty: generationSettings.presencePenalty.map(Float.init),
-            presenceContextSize: 128,
-            frequencyPenalty: generationSettings.frequencyPenalty.map(Float.init),
-            frequencyContextSize: 128
+            minP: Float(generationSettings.minP)
        )
+        _ = generationSettings.repetitionPenalty
+        _ = generationSettings.presencePenalty
+        _ = generationSettings.frequencyPenalty
        let currentModelId = modelManager.currentModel?.id ?? modelName
        let engine = InferenceEngine(container: container)
        let preparedInference: InferenceEngine.PreparedInference
--- a/MLXServer/Utilities/LocalModelResolver.swift
+++ b/MLXServer/Utilities/LocalModelResolver.swift
@@ -1,9 +1,6 @@
 import Foundation

-/// Resolves HuggingFace model repos to local directories.
-/// Checks multiple locations:
-/// 1. Sandbox cache: ~/Library/Containers/de.rfc1437.mlxserver/Data/Library/Caches/models/{org}/{name}/
-/// 2. System HF cache: ~/.cache/huggingface/hub/
+/// Resolves HuggingFace model repos to local directories in ~/.cache/huggingface/hub/.
 enum LocalModelResolver {

    struct LocalModelInfo: Identifiable, Hashable {
@@ -17,16 +14,8 @@ enum LocalModelResolver {
        var id: String { repoId }
    }

-    /// Base directory where HubApi stores downloaded models (sandbox cache).
-    private static let modelsBase: URL? = {
-        FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first?
-            .appendingPathComponent("models", isDirectory: true)
-    }()
-
-    /// System HuggingFace cache directory (~/.cache/huggingface/hub/).
-    /// Note: Requires com.apple.security.files.home-relative-directory.read entitlement
-    private static let hfSystemCache: URL? = {
-        // Use homeDirectoryForCurrentUser which works in sandbox with proper entitlement
+    /// HuggingFace cache directory (~/.cache/huggingface/hub/).
+    private static let hfCacheBase: URL? = {
        return FileManager.default.homeDirectoryForCurrentUser
            .appendingPathComponent(".cache", isDirectory: true)
            .appendingPathComponent("huggingface", isDirectory: true)
@@ -35,50 +24,41 @@ enum LocalModelResolver {

    /// Resolve a HuggingFace repo ID (e.g. "mlx-community/gemma-3-4b-it-4bit")
    /// to its local directory, if it exists.
-    /// Checks sandbox cache first, then system HF cache.
    ///
    /// Returns `nil` if the model hasn't been downloaded yet.
    static func resolve(repoId: String) -> URL? {
        print("[LocalModelResolver] Resolving: \(repoId)")

-        // Check sandbox cache first
-        if let base = modelsBase {
-            let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
-            var isDir: ObjCBool = false
-            if FileManager.default.fileExists(atPath: modelDir.path, isDirectory: &isDir), isDir.boolValue {
-                print("[LocalModelResolver]   Found in sandbox cache: \(modelDir.path)")
-                return modelDir
-            }
+        // Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/
+        guard let hfBase = hfCacheBase else {
+            print("[LocalModelResolver]   No cache base")
+            return nil
        }

-        // Check system HF cache
-        // Structure: ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{commit-hash}/
-        if let hfBase = hfSystemCache {
-            let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
-            let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
+        let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
+        let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)

-            print("[LocalModelResolver]   Checking HF cache: \(modelBase.path)")
+        print("[LocalModelResolver]   Checking HF cache: \(modelBase.path)")

-            // Look for snapshots directory
-            let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
-            var isDir: ObjCBool = false
-            guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
-                print("[LocalModelResolver]   No snapshots directory found")
-                return nil
-            }
+        let snapshotsDir = modelBase.appendingPathComponent("snapshots", isDirectory: true)
+        var isDir: ObjCBool = false
+        guard FileManager.default.fileExists(atPath: snapshotsDir.path, isDirectory: &isDir), isDir.boolValue else {
+            print("[LocalModelResolver]   No snapshots directory found")
+            return nil
+        }

-            // Find the latest snapshot (commit hash directories)
-            if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
-                print("[LocalModelResolver]   Found \(snapshotDirs.count) snapshots")
-                for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
-                    let configPath = snapshotDir.appendingPathComponent("config.json")
-                    if FileManager.default.fileExists(atPath: configPath.path) {
-                        print("[LocalModelResolver]   Found valid snapshot: \(snapshotDir.path)")
-                        return snapshotDir
-                    }
+        if let snapshotDirs = try? FileManager.default.contentsOfDirectory(at: snapshotsDir, includingPropertiesForKeys: nil) {
+            print("[LocalModelResolver]   Found \(snapshotDirs.count) snapshots")
+            for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
+                let configPath = snapshotDir.appendingPathComponent("config.json")
+                guard FileManager.default.fileExists(atPath: configPath.path) else { continue }
+                guard hasCompleteWeights(at: snapshotDir) else {
+                    print("[LocalModelResolver]   Snapshot missing weight files (incomplete download): \(snapshotDir.path)")
+                    continue
                }
+                print("[LocalModelResolver]   Found valid snapshot: \(snapshotDir.path)")
+                return snapshotDir
            }
-            print("[LocalModelResolver]   No valid snapshot found")
        }

        print("[LocalModelResolver]   Model not found locally")
@@ -91,38 +71,18 @@ enum LocalModelResolver {
    }

    static func discoveredLocalModels() -> [LocalModelInfo] {
-        var discovered: [LocalModelInfo] = []
+        print("[LocalModelResolver] Scanning HF cache: \(hfCacheBase?.path ?? "N/A")")
+        guard let hfBase = hfCacheBase else { return [] }

-        // Scan sandbox cache
-        print("[LocalModelResolver] Scanning sandbox cache: \(modelsBase?.path ?? "N/A")")
-        if let sandboxBase = modelsBase {
-            let sandboxModels = discoverModels(in: sandboxBase)
-            print("[LocalModelResolver] Found \(sandboxModels.count) models in sandbox cache")
-            discovered += sandboxModels
+        let models = discoverSystemHFModels(in: hfBase)
+        print("[LocalModelResolver] Found \(models.count) models:")
+        for model in models {
+            print("[LocalModelResolver]   - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)")
        }
-
-        // Scan system HF cache
-        print("[LocalModelResolver] Scanning system HF cache: \(hfSystemCache?.path ?? "N/A")")
-        if let hfBase = hfSystemCache {
-            let hfModels = discoverSystemHFModels(in: hfBase)
-            print("[LocalModelResolver] Found \(hfModels.count) models in HF system cache:")
-            for model in hfModels {
-                print("[LocalModelResolver]   - \(model.repoId) (\(model.sizeBytes / (1024*1024)) MB)")
-            }
-            discovered += hfModels
-        }
-
-        // Remove duplicates (same repoId) and sort
-        let byRepoId = Dictionary(uniqueKeysWithValues: discovered.map { ($0.repoId, $0) })
-        let finalModels = byRepoId.values.sorted { lhs, rhs in
-            lhs.repoId.localizedCaseInsensitiveCompare(rhs.repoId) == .orderedAscending
-        }
-        print("[LocalModelResolver] Total unique models: \(finalModels.count)")
-        return finalModels
+        return models
    }

-    /// Discover models in the system HF cache (~/.cache/huggingface/hub/)
-    private static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] {
+    static func discoverSystemHFModels(in base: URL) -> [LocalModelInfo] {
        let fileManager = FileManager.default
        let directoryKeys: Set<URLResourceKey> = [.isDirectoryKey]
        guard let modelBases = try? fileManager.contentsOfDirectory(
@@ -189,41 +149,6 @@ enum LocalModelResolver {
        )
    }

-    static func discoverModels(in base: URL) -> [LocalModelInfo] {
-        let fileManager = FileManager.default
-        let directoryKeys: Set<URLResourceKey> = [.isDirectoryKey]
-        guard let ownerDirectories = try? fileManager.contentsOfDirectory(
-            at: base,
-            includingPropertiesForKeys: Array(directoryKeys),
-            options: [.skipsHiddenFiles]
-        ) else {
-            return []
-        }
-
-        var discovered: [LocalModelInfo] = []
-
-        for ownerDirectory in ownerDirectories {
-            guard isDirectory(ownerDirectory) else { continue }
-            guard let repoDirectories = try? fileManager.contentsOfDirectory(
-                at: ownerDirectory,
-                includingPropertiesForKeys: Array(directoryKeys),
-                options: [.skipsHiddenFiles]
-            ) else {
-                continue
-            }
-
-            for repoDirectory in repoDirectories where isDirectory(repoDirectory) {
-                if let info = localModelInfo(ownerDirectory: ownerDirectory, repoDirectory: repoDirectory) {
-                    discovered.append(info)
-                }
-            }
-        }
-
-        return discovered.sorted {
-            $0.repoId.localizedCaseInsensitiveCompare($1.repoId) == .orderedAscending
-        }
-    }
-
    private static func isDirectory(_ url: URL) -> Bool {
        var isDir: ObjCBool = false
        if FileManager.default.fileExists(atPath: url.path, isDirectory: &isDir) {
@@ -232,77 +157,56 @@ enum LocalModelResolver {
        return false
    }

-    private static func localModelInfo(ownerDirectory: URL, repoDirectory: URL) -> LocalModelInfo? {
-        let repoId = "\(ownerDirectory.lastPathComponent)/\(repoDirectory.lastPathComponent)"
-        guard containsModelArtifacts(at: repoDirectory) else { return nil }
-
-        let config = readJSONObject(at: repoDirectory.appendingPathComponent("config.json"))
-        let tokenizerConfig = readJSONObject(at: repoDirectory.appendingPathComponent("tokenizer_config.json"))
-        let supportsImages = inferredSupportsImages(
-            repoDirectory: repoDirectory,
-            config: config,
-            tokenizerConfig: tokenizerConfig
-        )
-        let sizeBytes = directorySize(at: repoDirectory)
-        let contextLength = inferredContextLength(config: config, tokenizerConfig: tokenizerConfig)
-        let loaderKinds: [ModelConfig.LoaderKind] = supportsImages ? [.vlm, .llm] : [.llm, .vlm]
-
-        return LocalModelInfo(
-            repoId: repoId,
-            directory: repoDirectory,
-            sizeBytes: sizeBytes,
-            contextLength: contextLength,
-            loaderKinds: loaderKinds,
-            supportsImages: supportsImages
+    private static func containsModelArtifacts(at directory: URL) -> Bool {
+        let configExists = FileManager.default.fileExists(
+            atPath: directory.appendingPathComponent("config.json").path
        )
+        return configExists && hasCompleteWeights(at: directory)
    }

-    private static func containsModelArtifacts(at directory: URL) -> Bool {
-        let requiredPaths = [
-            directory.appendingPathComponent("config.json").path,
-            directory.appendingPathComponent("model.safetensors").path,
-            directory.appendingPathComponent("model.safetensors.index.json").path,
-        ]
-        return requiredPaths.contains { FileManager.default.fileExists(atPath: $0) }
+    /// Returns true when the snapshot has the actual weight files on disk:
+    /// either a single `model.safetensors`, or every shard listed in
+    /// `model.safetensors.index.json`. Returns false for partial/interrupted downloads.
+    static func hasCompleteWeights(at directory: URL) -> Bool {
+        let fm = FileManager.default
+        let single = directory.appendingPathComponent("model.safetensors")
+        if fm.fileExists(atPath: single.path) {
+            return true
+        }
+
+        let indexURL = directory.appendingPathComponent("model.safetensors.index.json")
+        guard fm.fileExists(atPath: indexURL.path),
+              let data = try? Data(contentsOf: indexURL),
+              let json = (try? JSONSerialization.jsonObject(with: data)) as? [String: Any],
+              let weightMap = json["weight_map"] as? [String: Any]
+        else {
+            return false
+        }
+
+        let shardNames = Set(weightMap.values.compactMap { $0 as? String })
+        guard !shardNames.isEmpty else { return false }
+        return shardNames.allSatisfy { name in
+            fm.fileExists(atPath: directory.appendingPathComponent(name).path)
+        }
    }

    /// Delete the local cache for a model so it will be re-downloaded next time.
-    /// Removes from both sandbox cache and system HF cache if present.
    @discardableResult
    static func deleteLocal(repoId: String) -> Bool {
-        var deleted = false
+        guard let hfBase = hfCacheBase else { return false }

-        // Delete from sandbox cache
-        if let base = modelsBase {
-            let modelDir = base.appendingPathComponent(repoId, isDirectory: true)
-            if FileManager.default.fileExists(atPath: modelDir.path) {
-                do {
-                    try FileManager.default.removeItem(at: modelDir)
-                    print("[LocalModelResolver] Deleted sandbox cache: \(modelDir.path)")
-                    deleted = true
-                } catch {
-                    print("[LocalModelResolver] Failed to delete \(modelDir.path): \(error)")
-                }
-            }
+        let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
+        let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
+        guard FileManager.default.fileExists(atPath: modelBase.path) else { return false }
+
+        do {
+            try FileManager.default.removeItem(at: modelBase)
+            print("[LocalModelResolver] Deleted cache: \(modelBase.path)")
+            return true
+        } catch {
+            print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)")
+            return false
        }
-
-        // Delete from system HF cache
-        // Structure: ~/.cache/huggingface/hub/models--{org}--{name}/
-        if let hfBase = hfSystemCache {
-            let repoSlug = repoId.replacingOccurrences(of: "/", with: "--")
-            let modelBase = hfBase.appendingPathComponent("models--\(repoSlug)", isDirectory: true)
-            if FileManager.default.fileExists(atPath: modelBase.path) {
-                do {
-                    try FileManager.default.removeItem(at: modelBase)
-                    print("[LocalModelResolver] Deleted system cache: \(modelBase.path)")
-                    deleted = true
-                } catch {
-                    print("[LocalModelResolver] Failed to delete \(modelBase.path): \(error)")
-                }
-            }
-        }
-
-        return deleted
    }

    private static func readJSONObject(at url: URL) -> [String: Any]? {
--- a/MLXServer/ViewModels/ChatViewModel.swift
+++ b/MLXServer/ViewModels/ChatViewModel.swift
@@ -88,18 +88,19 @@ final class ChatViewModel {
            let thinkingContext: [String: any Sendable]? = generationSettings.thinkingEnabled
                ? nil
                : ["enable_thinking": false]
+            // NOTE: repetition / presence / frequency penalties are intentionally
+            // not forwarded to GenerateParameters. mlx-swift-lm 3.31.3's
+            // PenaltyProcessor uses TokenRing.loadPrompt, which assumes a 1-D
+            // prompt MLXArray. VLM models (Gemma3, Qwen-VL, …) hand it a 2-D
+            // [1, N] tokens array, so the ring buffer ends up the wrong size and
+            // every later MLX.where in TokenRing.append crashes via fatalError.
+            // Re-enable once upstream fixes TokenRing to flatten the prompt.
            let generateParameters = GenerateParameters(
                maxTokens: generationSettings.maxTokens,
                temperature: Float(generationSettings.temperature),
                topP: Float(generationSettings.topP),
                topK: generationSettings.topK,
-                minP: Float(generationSettings.minP),
-                repetitionPenalty: generationSettings.repetitionPenalty.map(Float.init),
-                repetitionContextSize: 128,
-                presencePenalty: generationSettings.presencePenalty.map(Float.init),
-                presenceContextSize: 128,
-                frequencyPenalty: generationSettings.frequencyPenalty.map(Float.init),
-                frequencyContextSize: 128
+                minP: Float(generationSettings.minP)
            )
            let history = conversation.messages.compactMap(historyMessage(from:))
            if history.isEmpty {
--- a/MLXServer/ViewModels/ModelManager.swift
+++ b/MLXServer/ViewModels/ModelManager.swift
@@ -1,24 +1,18 @@
 import Foundation
-import Hub
+import HuggingFace
 import MLX
+import MLXHuggingFace
 import MLXLLM
 import MLXLMCommon
 import MLXVLM
+import Tokenizers

 /// Manages model loading, switching, and generation.
@Observable
@MainActor
 final class ModelManager {

-    /// HubApi with blob cache disabled to avoid storing every model twice.
-    /// swift-huggingface defaults to caching in both huggingface/hub/ (snapshots)
-    /// AND models/ (content-addressed blobs). We only need the snapshots.
-    /// Must use the same downloadBase as defaultHubApi (.cachesDirectory) so
-    /// LocalModelResolver can find downloaded models.
-    private static let hub: HubApi = {
-        let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
-        return HubApi(downloadBase: cachesDir, cache: nil)
-    }()
+    private static let hubClient = HubClient.default

    var currentModel: ModelConfig?
    var availableModels: [ModelConfig]
@@ -31,8 +25,8 @@ final class ModelManager {

    // Download-specific state for the modal
    var isDownloading = false
-    var downloadFilesTotal: Int64 = 0
-    var downloadFilesCompleted: Int64 = 0
+    var downloadBytesTotal: Int64 = 0
+    var downloadBytesCompleted: Int64 = 0
    var downloadSpeed: Double = 0 // bytes/sec

    private var idleTimer: Timer?
@@ -93,8 +87,8 @@ final class ModelManager {
        isDownloading = false
        downloadProgress = 0
        loadingModelName = ""
-        downloadFilesTotal = 0
-        downloadFilesCompleted = 0
+        downloadBytesTotal = 0
+        downloadBytesCompleted = 0
        downloadSpeed = 0
    }

@@ -122,8 +116,8 @@ final class ModelManager {
        let needsDownload = !effectiveConfig.isLocal
        if needsDownload {
            isDownloading = true
-            downloadFilesTotal = 0
-            downloadFilesCompleted = 0
+            downloadBytesTotal = 0
+            downloadBytesCompleted = 0
            downloadSpeed = 0
        }

@@ -132,8 +126,8 @@ final class ModelManager {
                Task { @MainActor in
                    self.downloadProgress = progress.fractionCompleted
                    if self.isDownloading {
-                        self.downloadFilesTotal = progress.totalUnitCount
-                        self.downloadFilesCompleted = progress.completedUnitCount
+                        self.downloadBytesTotal = progress.totalUnitCount
+                        self.downloadBytesCompleted = progress.completedUnitCount
                        if let speed = progress.userInfo[.throughputKey] as? Double {
                            self.downloadSpeed = speed
                        }
@@ -235,13 +229,15 @@ final class ModelManager {
                switch loaderKind {
                case .llm:
                    return try await LLMModelFactory.shared.loadContainer(
-                        hub: Self.hub,
+                        from: #hubDownloader(Self.hubClient),
+                        using: #huggingFaceTokenizerLoader(),
                        configuration: configuration,
                        progressHandler: progressHandler
                    )
                case .vlm:
                    return try await VLMModelFactory.shared.loadContainer(
-                        hub: Self.hub,
+                        from: #hubDownloader(Self.hubClient),
+                        using: #huggingFaceTokenizerLoader(),
                        configuration: configuration,
                        progressHandler: progressHandler
                    )
--- a/MLXServer/Views/DownloadModalView.swift
+++ b/MLXServer/Views/DownloadModalView.swift
@@ -20,9 +20,9 @@ struct DownloadModalView: View {
                    .progressViewStyle(.linear)

                HStack {
-                    // Files progress
-                    if modelManager.downloadFilesTotal > 0 {
-                        Text("File \(modelManager.downloadFilesCompleted)/\(modelManager.downloadFilesTotal)")
+                    // Bytes progress
+                    if modelManager.downloadBytesTotal > 0 {
+                        Text("\(formatBytes(modelManager.downloadBytesCompleted)) / \(formatBytes(modelManager.downloadBytesTotal))")
                            .font(.caption.monospacedDigit())
                            .foregroundStyle(.secondary)
                    }
@@ -65,4 +65,17 @@ struct DownloadModalView: View {
            return String(format: "%.0f B/s", bytesPerSec)
        }
    }
+
+    private func formatBytes(_ bytes: Int64) -> String {
+        let value = Double(bytes)
+        if value >= 1_073_741_824 {
+            return String(format: "%.2f GB", value / 1_073_741_824)
+        } else if value >= 1_048_576 {
+            return String(format: "%.0f MB", value / 1_048_576)
+        } else if value >= 1024 {
+            return String(format: "%.0f KB", value / 1024)
+        } else {
+            return "\(bytes) B"
+        }
+    }
 }
--- a/MLXServerTests/Server/LocalModelResolverTests.swift
+++ b/MLXServerTests/Server/LocalModelResolverTests.swift
@@ -3,30 +3,27 @@ import XCTest
@testable import MLX_Server

 final class LocalModelResolverTests: XCTestCase {
-    func testDiscoverModelsInfersTextOnlyMetadataAndDirectorySize() throws {
-        let base = try makeTempModelsRoot()
-        let repoDirectory = try makeRepoDirectory(base: base, owner: "example", repo: "text-only")
-        let configURL = repoDirectory.appendingPathComponent("config.json")
-        let modelURL = repoDirectory.appendingPathComponent("model.safetensors")
-        let tokenizerURL = repoDirectory.appendingPathComponent("tokenizer.json")
+    func testDiscoverSystemHFModelsInfersTextOnlyMetadata() throws {
+        let base = try makeTempHFCache()
+        let snapshotDir = try makeHFSnapshot(base: base, repoId: "example/text-only")

        try writeJSON(
            [
                "architectures": ["LlamaForCausalLM"],
                "max_position_embeddings": 32768,
            ],
-            to: configURL
+            to: snapshotDir.appendingPathComponent("config.json")
        )
-        try Data(repeating: 0x11, count: 64).write(to: modelURL)
-        try Data(repeating: 0x22, count: 19).write(to: tokenizerURL)
+        try Data(repeating: 0x11, count: 64).write(to: snapshotDir.appendingPathComponent("model.safetensors"))
+        try Data(repeating: 0x22, count: 19).write(to: snapshotDir.appendingPathComponent("tokenizer.json"))

        let expectedSize = Int64(
-            try Data(contentsOf: configURL).count
-            + Data(contentsOf: modelURL).count
-            + Data(contentsOf: tokenizerURL).count
+            try Data(contentsOf: snapshotDir.appendingPathComponent("config.json")).count
+            + Data(contentsOf: snapshotDir.appendingPathComponent("model.safetensors")).count
+            + Data(contentsOf: snapshotDir.appendingPathComponent("tokenizer.json")).count
        )

-        let discovered = LocalModelResolver.discoverModels(in: base)
+        let discovered = LocalModelResolver.discoverSystemHFModels(in: base)
        let model = try XCTUnwrap(discovered.first)

        XCTAssertEqual(model.repoId, "example/text-only")
@@ -36,21 +33,25 @@ final class LocalModelResolverTests: XCTestCase {
        XCTAssertEqual(model.sizeBytes, expectedSize)
    }

-    func testDiscoverModelsInfersVisionMetadataFromProcessorFiles() throws {
-        let base = try makeTempModelsRoot()
-        let repoDirectory = try makeRepoDirectory(base: base, owner: "example", repo: "vision-model")
+    func testDiscoverSystemHFModelsInfersVisionMetadata() throws {
+        let base = try makeTempHFCache()
+        let snapshotDir = try makeHFSnapshot(base: base, repoId: "example/vision-model")
+
        try writeJSON(
            [
                "text_config": ["max_position_embeddings": 262144],
                "vision_config": ["hidden_size": 768],
            ],
-            to: repoDirectory.appendingPathComponent("config.json")
+            to: snapshotDir.appendingPathComponent("config.json")
        )
-        try writeJSON(["processor_class": "Qwen3VLProcessor"], to: repoDirectory.appendingPathComponent("tokenizer_config.json"))
-        try Data(repeating: 0x33, count: 12).write(to: repoDirectory.appendingPathComponent("processor_config.json"))
-        try Data(repeating: 0x44, count: 8).write(to: repoDirectory.appendingPathComponent("model.safetensors.index.json"))
+        try writeJSON(
+            ["processor_class": "Qwen3VLProcessor"],
+            to: snapshotDir.appendingPathComponent("tokenizer_config.json")
+        )
+        try Data(repeating: 0x33, count: 12).write(to: snapshotDir.appendingPathComponent("processor_config.json"))
+        try Data(repeating: 0x44, count: 8).write(to: snapshotDir.appendingPathComponent("model.safetensors.index.json"))

-        let discovered = LocalModelResolver.discoverModels(in: base)
+        let discovered = LocalModelResolver.discoverSystemHFModels(in: base)
        let model = try XCTUnwrap(discovered.first)

        XCTAssertEqual(model.repoId, "example/vision-model")
@@ -155,7 +156,7 @@ final class LocalModelResolverTests: XCTestCase {
        XCTAssertTrue(config.supportsTools)
    }

-    private func makeTempModelsRoot() throws -> URL {
+    private func makeTempHFCache() throws -> URL {
        let root = FileManager.default.temporaryDirectory
            .appendingPathComponent(UUID().uuidString, isDirectory: true)
        try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true)
@@ -165,12 +166,14 @@ final class LocalModelResolverTests: XCTestCase {
        return root
    }

-    private func makeRepoDirectory(base: URL, owner: String, repo: String) throws -> URL {
-        let directory = base
-            .appendingPathComponent(owner, isDirectory: true)
-            .appendingPathComponent(repo, isDirectory: true)
-        try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true)
-        return directory
+    private func makeHFSnapshot(base: URL, repoId: String, hash: String = "abc123") throws -> URL {
+        let slug = repoId.replacingOccurrences(of: "/", with: "--")
+        let snapshotDir = base
+            .appendingPathComponent("models--\(slug)", isDirectory: true)
+            .appendingPathComponent("snapshots", isDirectory: true)
+            .appendingPathComponent(hash, isDirectory: true)
+        try FileManager.default.createDirectory(at: snapshotDir, withIntermediateDirectories: true)
+        return snapshotDir
    }

    private func writeJSON(_ object: Any, to url: URL) throws {
--- a/MLXServerTests/Server/ModelBackedInferenceValidationTests.swift
+++ b/MLXServerTests/Server/ModelBackedInferenceValidationTests.swift
@@ -1,7 +1,9 @@
 import Foundation
-import Hub
+import HuggingFace
+import MLXHuggingFace
 import MLXLMCommon
 import MLXVLM
+import Tokenizers
 import XCTest
@testable import MLX_Server

@@ -671,10 +673,9 @@ private actor LocalGemmaFixture {
        }

        let loadTask = Task<ModelContainer, Error> {
-            let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
-            let hub = HubApi(downloadBase: cachesDir, cache: nil)
            return try await VLMModelFactory.shared.loadContainer(
-                hub: hub,
+                from: #hubDownloader(HubClient.default),
+                using: #huggingFaceTokenizerLoader(),
                configuration: ModelConfiguration(directory: localDir),
                progressHandler: { _ in }
            )
--- a/MLXServerTests/Server/ModelBackedQuantizationTests.swift
+++ b/MLXServerTests/Server/ModelBackedQuantizationTests.swift
@@ -1,8 +1,10 @@
 import Foundation
-import Hub
+import HuggingFace
 import MLX
+import MLXHuggingFace
 import MLXLMCommon
 import MLXVLM
+import Tokenizers
 import XCTest
@testable import MLX_Server

@@ -230,10 +232,9 @@ private actor LocalGemmaFixture {
        }

        let loadTask = Task<ModelContainer, Error> {
-            let cachesDir = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
-            let hub = HubApi(downloadBase: cachesDir, cache: nil)
            return try await VLMModelFactory.shared.loadContainer(
-                hub: hub,
+                from: #hubDownloader(HubClient.default),
+                using: #huggingFaceTokenizerLoader(),
                configuration: ModelConfiguration(directory: localDir),
                progressHandler: { _ in }
            )
--- a/MLXServerTests/Server/TokenPrefixCacheQuantizationTests.swift
+++ b/MLXServerTests/Server/TokenPrefixCacheQuantizationTests.swift
@@ -249,4 +249,11 @@ private final class NonStandardCache: KVCache {
    ) -> MLXFast.ScaledDotProductAttentionMaskMode {
        .none
    }
+
+    func copy() -> any KVCache {
+        let c = NonStandardCache(tokenCount: 0, headDim: 0)
+        c.state = state
+        c.offset = offset
+        return c
+    }
 }
--- a/MLXServerTests/Server/TokenPrefixCacheTests.swift
+++ b/MLXServerTests/Server/TokenPrefixCacheTests.swift
@@ -388,4 +388,10 @@ private final class TestTrimRecordingCache: KVCache {
    ) -> MLXFast.ScaledDotProductAttentionMaskMode {
        .none
    }
+
+    func copy() -> any KVCache {
+        let c = TestTrimRecordingCache(offset: offset, trimmable: trimmable)
+        c.state = state
+        return c
+    }
 }
--- a/build.sh
+++ b/build.sh
@@ -19,6 +19,7 @@ xcodebuild \
    -scheme MLXServer \
    -destination 'platform=macOS' \
    -configuration "$CONFIG" \
+    -skipMacroValidation \
    SYMROOT="$BUILD_DIR" \
    build 2>&1 | \
    grep -E "(CompileSwift .* 'MLXServer'|error:|warning:.*MLXServer/|BUILD )" | \
--- a/project.yml
+++ b/project.yml
@@ -9,7 +9,13 @@ options:
 packages:
  mlx-swift-lm:
    url: https://github.com/ml-explore/mlx-swift-lm
-    branch: main
+    from: "3.31.3"
+  swift-huggingface:
+    url: https://github.com/huggingface/swift-huggingface
+    from: "0.9.0"
+  swift-transformers:
+    url: https://github.com/huggingface/swift-transformers
+    from: "1.2.0"
  MarkdownUI:
    url: https://github.com/gonzalezreal/swift-markdown-ui
    from: "2.4.0"
@@ -40,6 +46,12 @@ targets:
        product: MLXVLM
      - package: mlx-swift-lm
        product: MLXLMCommon
+      - package: mlx-swift-lm
+        product: MLXHuggingFace
+      - package: swift-huggingface
+        product: HuggingFace
+      - package: swift-transformers
+        product: Tokenizers
      - package: MarkdownUI
        product: MarkdownUI
  MLXServerTests:
--- a/resolve-packages.sh
+++ b/resolve-packages.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+set -euo pipefail
+
+PROJECT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_PATH="$PROJECT_DIR/MLXServer.xcodeproj"
+RESOLVED_PATH="$PROJECT_PATH/project.xcworkspace/xcshareddata/swiftpm/Package.resolved"
+SPM_STATE_DIR="$PROJECT_DIR/build/swiftpm"
+PACKAGE_CACHE_PATH="$SPM_STATE_DIR/cache"
+CLONED_SOURCES_PATH="$SPM_STATE_DIR/clones"
+DERIVED_DATA_PATH="$PROJECT_DIR/build/DerivedData"
+
+echo "==> Resolving Swift packages from project.yml constraints"
+
+# For branch-based dependencies (like mlx-swift-lm main), force a fresh resolve
+# so the lockfile follows the current branch head.
+rm -f "$RESOLVED_PATH"
+rm -rf "$PACKAGE_CACHE_PATH" "$CLONED_SOURCES_PATH"
+rm -rf "$DERIVED_DATA_PATH"
+mkdir -p "$PACKAGE_CACHE_PATH" "$CLONED_SOURCES_PATH"
+
+xcodebuild \
+    -resolvePackageDependencies \
+    -project "$PROJECT_PATH" \
+    -scheme MLXServer \
+    -disablePackageRepositoryCache \
+    -packageCachePath "$PACKAGE_CACHE_PATH" \
+    -clonedSourcePackagesDirPath "$CLONED_SOURCES_PATH"
--- a/test.sh
+++ b/test.sh
@@ -21,6 +21,7 @@ XCODEBUILD_ARGS=(
    -scheme MLXServer
    -destination "$DESTINATION"
    -configuration "$CONFIG"
+    -skipMacroValidation
    SYMROOT="$BUILD_DIR"
 )
Author	SHA1	Message	Date
Chili Palmer	11300e3034	feat: more o n migration to v3	2026-04-30 11:58:53 +02:00
Chili Palmer	3502266ff9	feat: migration to mlx-swift-lm v3	2026-04-30 09:18:37 +02:00