feat: more o n migration to v3
This commit is contained in:
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"chat.tools.terminal.autoApprove": {
|
"chat.tools.terminal.autoApprove": {
|
||||||
"./test.sh": true,
|
"./test.sh": true,
|
||||||
"setopt": true
|
"setopt": true,
|
||||||
|
"./build.sh": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -335,19 +335,23 @@ final class APIServer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: repetition / presence / frequency penalties are intentionally
|
||||||
|
// not forwarded to GenerateParameters. mlx-swift-lm 3.31.3's
|
||||||
|
// PenaltyProcessor uses TokenRing.loadPrompt, which assumes a 1-D
|
||||||
|
// prompt MLXArray. VLM models (Gemma3, Qwen-VL, …) hand it a 2-D
|
||||||
|
// [1, N] tokens array, so the ring buffer ends up the wrong size and
|
||||||
|
// every later MLX.where in TokenRing.append crashes via fatalError.
|
||||||
|
// Re-enable once upstream fixes TokenRing to flatten the prompt.
|
||||||
let generateParams = GenerateParameters(
|
let generateParams = GenerateParameters(
|
||||||
maxTokens: maxTokens,
|
maxTokens: maxTokens,
|
||||||
temperature: Float(generationSettings.temperature),
|
temperature: Float(generationSettings.temperature),
|
||||||
topP: Float(generationSettings.topP),
|
topP: Float(generationSettings.topP),
|
||||||
topK: generationSettings.topK,
|
topK: generationSettings.topK,
|
||||||
minP: Float(generationSettings.minP),
|
minP: Float(generationSettings.minP)
|
||||||
repetitionPenalty: generationSettings.repetitionPenalty.map(Float.init),
|
|
||||||
repetitionContextSize: 128,
|
|
||||||
presencePenalty: generationSettings.presencePenalty.map(Float.init),
|
|
||||||
presenceContextSize: 128,
|
|
||||||
frequencyPenalty: generationSettings.frequencyPenalty.map(Float.init),
|
|
||||||
frequencyContextSize: 128
|
|
||||||
)
|
)
|
||||||
|
_ = generationSettings.repetitionPenalty
|
||||||
|
_ = generationSettings.presencePenalty
|
||||||
|
_ = generationSettings.frequencyPenalty
|
||||||
let currentModelId = modelManager.currentModel?.id ?? modelName
|
let currentModelId = modelManager.currentModel?.id ?? modelName
|
||||||
let engine = InferenceEngine(container: container)
|
let engine = InferenceEngine(container: container)
|
||||||
let preparedInference: InferenceEngine.PreparedInference
|
let preparedInference: InferenceEngine.PreparedInference
|
||||||
|
|||||||
@@ -51,10 +51,13 @@ enum LocalModelResolver {
|
|||||||
print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots")
|
print("[LocalModelResolver] Found \(snapshotDirs.count) snapshots")
|
||||||
for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
|
for snapshotDir in snapshotDirs where isDirectory(snapshotDir) {
|
||||||
let configPath = snapshotDir.appendingPathComponent("config.json")
|
let configPath = snapshotDir.appendingPathComponent("config.json")
|
||||||
if FileManager.default.fileExists(atPath: configPath.path) {
|
guard FileManager.default.fileExists(atPath: configPath.path) else { continue }
|
||||||
print("[LocalModelResolver] Found valid snapshot: \(snapshotDir.path)")
|
guard hasCompleteWeights(at: snapshotDir) else {
|
||||||
return snapshotDir
|
print("[LocalModelResolver] Snapshot missing weight files (incomplete download): \(snapshotDir.path)")
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
print("[LocalModelResolver] Found valid snapshot: \(snapshotDir.path)")
|
||||||
|
return snapshotDir
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -155,12 +158,36 @@ enum LocalModelResolver {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static func containsModelArtifacts(at directory: URL) -> Bool {
|
private static func containsModelArtifacts(at directory: URL) -> Bool {
|
||||||
let requiredPaths = [
|
let configExists = FileManager.default.fileExists(
|
||||||
directory.appendingPathComponent("config.json").path,
|
atPath: directory.appendingPathComponent("config.json").path
|
||||||
directory.appendingPathComponent("model.safetensors").path,
|
)
|
||||||
directory.appendingPathComponent("model.safetensors.index.json").path,
|
return configExists && hasCompleteWeights(at: directory)
|
||||||
]
|
}
|
||||||
return requiredPaths.contains { FileManager.default.fileExists(atPath: $0) }
|
|
||||||
|
/// Returns true when the snapshot has the actual weight files on disk:
|
||||||
|
/// either a single `model.safetensors`, or every shard listed in
|
||||||
|
/// `model.safetensors.index.json`. Returns false for partial/interrupted downloads.
|
||||||
|
static func hasCompleteWeights(at directory: URL) -> Bool {
|
||||||
|
let fm = FileManager.default
|
||||||
|
let single = directory.appendingPathComponent("model.safetensors")
|
||||||
|
if fm.fileExists(atPath: single.path) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
let indexURL = directory.appendingPathComponent("model.safetensors.index.json")
|
||||||
|
guard fm.fileExists(atPath: indexURL.path),
|
||||||
|
let data = try? Data(contentsOf: indexURL),
|
||||||
|
let json = (try? JSONSerialization.jsonObject(with: data)) as? [String: Any],
|
||||||
|
let weightMap = json["weight_map"] as? [String: Any]
|
||||||
|
else {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
let shardNames = Set(weightMap.values.compactMap { $0 as? String })
|
||||||
|
guard !shardNames.isEmpty else { return false }
|
||||||
|
return shardNames.allSatisfy { name in
|
||||||
|
fm.fileExists(atPath: directory.appendingPathComponent(name).path)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Delete the local cache for a model so it will be re-downloaded next time.
|
/// Delete the local cache for a model so it will be re-downloaded next time.
|
||||||
|
|||||||
@@ -88,18 +88,19 @@ final class ChatViewModel {
|
|||||||
let thinkingContext: [String: any Sendable]? = generationSettings.thinkingEnabled
|
let thinkingContext: [String: any Sendable]? = generationSettings.thinkingEnabled
|
||||||
? nil
|
? nil
|
||||||
: ["enable_thinking": false]
|
: ["enable_thinking": false]
|
||||||
|
// NOTE: repetition / presence / frequency penalties are intentionally
|
||||||
|
// not forwarded to GenerateParameters. mlx-swift-lm 3.31.3's
|
||||||
|
// PenaltyProcessor uses TokenRing.loadPrompt, which assumes a 1-D
|
||||||
|
// prompt MLXArray. VLM models (Gemma3, Qwen-VL, …) hand it a 2-D
|
||||||
|
// [1, N] tokens array, so the ring buffer ends up the wrong size and
|
||||||
|
// every later MLX.where in TokenRing.append crashes via fatalError.
|
||||||
|
// Re-enable once upstream fixes TokenRing to flatten the prompt.
|
||||||
let generateParameters = GenerateParameters(
|
let generateParameters = GenerateParameters(
|
||||||
maxTokens: generationSettings.maxTokens,
|
maxTokens: generationSettings.maxTokens,
|
||||||
temperature: Float(generationSettings.temperature),
|
temperature: Float(generationSettings.temperature),
|
||||||
topP: Float(generationSettings.topP),
|
topP: Float(generationSettings.topP),
|
||||||
topK: generationSettings.topK,
|
topK: generationSettings.topK,
|
||||||
minP: Float(generationSettings.minP),
|
minP: Float(generationSettings.minP)
|
||||||
repetitionPenalty: generationSettings.repetitionPenalty.map(Float.init),
|
|
||||||
repetitionContextSize: 128,
|
|
||||||
presencePenalty: generationSettings.presencePenalty.map(Float.init),
|
|
||||||
presenceContextSize: 128,
|
|
||||||
frequencyPenalty: generationSettings.frequencyPenalty.map(Float.init),
|
|
||||||
frequencyContextSize: 128
|
|
||||||
)
|
)
|
||||||
let history = conversation.messages.compactMap(historyMessage(from:))
|
let history = conversation.messages.compactMap(historyMessage(from:))
|
||||||
if history.isEmpty {
|
if history.isEmpty {
|
||||||
|
|||||||
@@ -25,8 +25,8 @@ final class ModelManager {
|
|||||||
|
|
||||||
// Download-specific state for the modal
|
// Download-specific state for the modal
|
||||||
var isDownloading = false
|
var isDownloading = false
|
||||||
var downloadFilesTotal: Int64 = 0
|
var downloadBytesTotal: Int64 = 0
|
||||||
var downloadFilesCompleted: Int64 = 0
|
var downloadBytesCompleted: Int64 = 0
|
||||||
var downloadSpeed: Double = 0 // bytes/sec
|
var downloadSpeed: Double = 0 // bytes/sec
|
||||||
|
|
||||||
private var idleTimer: Timer?
|
private var idleTimer: Timer?
|
||||||
@@ -87,8 +87,8 @@ final class ModelManager {
|
|||||||
isDownloading = false
|
isDownloading = false
|
||||||
downloadProgress = 0
|
downloadProgress = 0
|
||||||
loadingModelName = ""
|
loadingModelName = ""
|
||||||
downloadFilesTotal = 0
|
downloadBytesTotal = 0
|
||||||
downloadFilesCompleted = 0
|
downloadBytesCompleted = 0
|
||||||
downloadSpeed = 0
|
downloadSpeed = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -116,8 +116,8 @@ final class ModelManager {
|
|||||||
let needsDownload = !effectiveConfig.isLocal
|
let needsDownload = !effectiveConfig.isLocal
|
||||||
if needsDownload {
|
if needsDownload {
|
||||||
isDownloading = true
|
isDownloading = true
|
||||||
downloadFilesTotal = 0
|
downloadBytesTotal = 0
|
||||||
downloadFilesCompleted = 0
|
downloadBytesCompleted = 0
|
||||||
downloadSpeed = 0
|
downloadSpeed = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -126,8 +126,8 @@ final class ModelManager {
|
|||||||
Task { @MainActor in
|
Task { @MainActor in
|
||||||
self.downloadProgress = progress.fractionCompleted
|
self.downloadProgress = progress.fractionCompleted
|
||||||
if self.isDownloading {
|
if self.isDownloading {
|
||||||
self.downloadFilesTotal = progress.totalUnitCount
|
self.downloadBytesTotal = progress.totalUnitCount
|
||||||
self.downloadFilesCompleted = progress.completedUnitCount
|
self.downloadBytesCompleted = progress.completedUnitCount
|
||||||
if let speed = progress.userInfo[.throughputKey] as? Double {
|
if let speed = progress.userInfo[.throughputKey] as? Double {
|
||||||
self.downloadSpeed = speed
|
self.downloadSpeed = speed
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,9 +20,9 @@ struct DownloadModalView: View {
|
|||||||
.progressViewStyle(.linear)
|
.progressViewStyle(.linear)
|
||||||
|
|
||||||
HStack {
|
HStack {
|
||||||
// Files progress
|
// Bytes progress
|
||||||
if modelManager.downloadFilesTotal > 0 {
|
if modelManager.downloadBytesTotal > 0 {
|
||||||
Text("File \(modelManager.downloadFilesCompleted)/\(modelManager.downloadFilesTotal)")
|
Text("\(formatBytes(modelManager.downloadBytesCompleted)) / \(formatBytes(modelManager.downloadBytesTotal))")
|
||||||
.font(.caption.monospacedDigit())
|
.font(.caption.monospacedDigit())
|
||||||
.foregroundStyle(.secondary)
|
.foregroundStyle(.secondary)
|
||||||
}
|
}
|
||||||
@@ -65,4 +65,17 @@ struct DownloadModalView: View {
|
|||||||
return String(format: "%.0f B/s", bytesPerSec)
|
return String(format: "%.0f B/s", bytesPerSec)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private func formatBytes(_ bytes: Int64) -> String {
|
||||||
|
let value = Double(bytes)
|
||||||
|
if value >= 1_073_741_824 {
|
||||||
|
return String(format: "%.2f GB", value / 1_073_741_824)
|
||||||
|
} else if value >= 1_048_576 {
|
||||||
|
return String(format: "%.0f MB", value / 1_048_576)
|
||||||
|
} else if value >= 1024 {
|
||||||
|
return String(format: "%.0f KB", value / 1024)
|
||||||
|
} else {
|
||||||
|
return "\(bytes) B"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user