diff --git a/.gitleaks.toml b/.gitleaks.toml new file mode 100644 index 0000000..a95f2b2 --- /dev/null +++ b/.gitleaks.toml @@ -0,0 +1,33 @@ +[extend] +useDefault = true + +[[allowlists]] +description = "Swift MLX module metadata labels are not secrets." +targetRules = ["generic-api-key"] +condition = "AND" +paths = [ + '''(^|/)Packages/mlx-audio-swift-valar/.+\.swift$''', +] +regexTarget = "match" +regexes = [ + '''key:\s*"[A-Za-z0-9_.-]+"''', +] + +[[allowlists]] +description = "Voice protection tests declare a fixture environment variable name, not a secret value." +targetRules = ["generic-api-key"] +condition = "AND" +paths = [ + '''(^|/)Packages/ValarPersistence/Tests/ValarPersistenceTests/VoiceLibraryProtectionTests\.swift$''', +] +regexTarget = "match" +regexes = [ + '''testKeyEnvironmentVariable = "VALARTTS_TEST_VOICE_KEY_B64"''', +] + +[[allowlists]] +description = "Public-audit rule patterns intentionally include private-key sentinels." +targetRules = ["private-key"] +paths = [ + '''(^|/)tools/public_repo_rules\.sh$''', +] diff --git a/PRIVACY.md b/PRIVACY.md index 16541cd..336a315 100644 --- a/PRIVACY.md +++ b/PRIVACY.md @@ -9,6 +9,7 @@ Valar is a local speech stack for macOS and Apple Silicon. This document describ - model downloads happen only when the user explicitly installs a model - the daemon listens on loopback by default - Valar does not install background services for you by default +- benchmark, validation, and generated audio files are written only where you choose or under local temporary directories used by the scripts ## Local Data @@ -19,15 +20,15 @@ Valar stores local state on your Mac for things like: - project or document state - generated outputs that you choose to write to disk -Saved voice material and other local state remain on your machine unless you choose to export or share those files yourself. +Saved voice material and other local state remain on your machine unless you choose to export or share those files yourself. Local files are protected by your macOS account and filesystem settings; do not assume generated outputs or model packs are encrypted unless you have enabled disk encryption or another explicit protection layer. If you use the MCP bridge in `bridge/`, you may choose to read or write local media under `~/Library/Application Support/Valar/bridge-storage`. The public bridge does not persist channel identifiers, sender metadata, or transcript/reply sidecars by default. Override this location with `VALARTTS_BRIDGE_STORAGE_ROOT` if needed. ## Network Access -The normal workflow uses the network only for model downloads from upstream hosts such as Hugging Face. Valar does not require a cloud inference backend. +The normal workflow uses the network only for dependency resolution and model downloads from upstream hosts such as Hugging Face. Valar does not require a cloud inference backend. -The public daemon binds to `127.0.0.1:8787` only. It is loopback-only in this repo and is not intended to be exposed as a network service. +The public daemon binds to `127.0.0.1:8787` by default. Do not expose it on a routable interface unless you add your own authentication, authorization, and network controls. ## Model Licenses diff --git a/Packages/ValarCore/Sources/ValarCore/ValarCatalog.swift b/Packages/ValarCore/Sources/ValarCore/ValarCatalog.swift index 7a739b8..d95e9ba 100644 --- a/Packages/ValarCore/Sources/ValarCore/ValarCatalog.swift +++ b/Packages/ValarCore/Sources/ValarCore/ValarCatalog.swift @@ -251,6 +251,7 @@ public actor ModelCatalog { private let capabilityRegistry: (any CapabilityRegistryManaging)? private let visibilityPolicyProvider: @Sendable () -> CatalogVisibilityPolicy private let hfCacheRoot: URL? + private let appPaths: ValarAppPaths? private var cachedModels: [ModelIdentifier: CatalogModel] public init( @@ -259,6 +260,7 @@ public actor ModelCatalog { packStore: (any ModelPackStore)? = nil, capabilityRegistry: (any CapabilityRegistryManaging)? = nil, hfCacheRoot: URL? = nil, + appPaths: ValarAppPaths? = nil, visibilityPolicyProvider: @escaping @Sendable () -> CatalogVisibilityPolicy = CatalogVisibilityPolicy.currentProcess ) { self.supportedSource = supportedSource @@ -267,6 +269,7 @@ public actor ModelCatalog { self.capabilityRegistry = capabilityRegistry self.visibilityPolicyProvider = visibilityPolicyProvider self.hfCacheRoot = hfCacheRoot + self.appPaths = appPaths self.cachedModels = [:] } @@ -290,7 +293,7 @@ public actor ModelCatalog { let installedRecord = try await packStore?.installedRecord(for: entry.id.rawValue) let supportedPersistenceManifest = Self.makePersistenceManifest(from: entry.manifest) let installPathStatus = installedRecord.map { - Self.installPathStatus($0, manifest: supportedPersistenceManifest) + Self.installPathStatus($0, manifest: supportedPersistenceManifest, appPaths: appPaths) } let materializedInstalledRecord = installPathStatus?.isValid == true ? installedRecord : nil let resolvedManifest = entry.manifest @@ -426,6 +429,7 @@ public actor ModelCatalog { fileManager: FileManager = .default, hfCacheRoot: URL? = nil ) -> Bool { + let resolvedHFCacheRoot = resolveHFHubCacheRoot(fileManager: fileManager, hfCacheRoot: hfCacheRoot) let hubArtifactURL: (String, String) -> URL? = { modelID, relativePath in guard let snapshotDirectory = hfHubSnapshotDirectory( modelID: modelID, @@ -442,7 +446,7 @@ public actor ModelCatalog { if !requiredArtifacts.isEmpty, requiredArtifacts.allSatisfy({ artifact in if let direct = hubArtifactURL(entry.id.rawValue, artifact.relativePath) { - return nonEmptyFileExists(at: direct, fileManager: fileManager) + return nonEmptyFileExists(at: direct, containedIn: resolvedHFCacheRoot, fileManager: fileManager) } let fallbackModelID: String? @@ -459,7 +463,7 @@ public actor ModelCatalog { return false } - return nonEmptyFileExists(at: fallback, fileManager: fileManager) + return nonEmptyFileExists(at: fallback, containedIn: resolvedHFCacheRoot, fileManager: fileManager) }) { return true } @@ -469,7 +473,8 @@ public actor ModelCatalog { relativePaths: requiredArtifactRelativePaths(for: entry.manifest), under: URL(fileURLWithPath: cachePath, isDirectory: true), fileManager: fileManager, - allowBasenameFallback: true + allowBasenameFallback: true, + containmentRoot: resolvedHFCacheRoot ) { return true } @@ -480,13 +485,43 @@ public actor ModelCatalog { private static func installPathStatus( _ record: InstalledModelRecord, manifest: ValarPersistence.ModelPackManifest, + appPaths: ValarAppPaths? = nil, fileManager: FileManager = .default ) -> CatalogInstallPathStatus { - let installedRoot = URL(fileURLWithPath: record.installedPath, isDirectory: true) - let manifestURL = URL(fileURLWithPath: record.manifestPath, isDirectory: false) + let installedRoot: URL + let manifestURL: URL + if let appPaths { + do { + installedRoot = try appPaths.modelPackDirectory( + familyID: manifest.familyID, + modelID: manifest.modelID + ) + manifestURL = try appPaths.modelPackManifestURL( + familyID: manifest.familyID, + modelID: manifest.modelID + ) + } catch { + return .missingInstalledPath + } + } else { + installedRoot = URL(fileURLWithPath: record.installedPath, isDirectory: true) + manifestURL = URL(fileURLWithPath: record.manifestPath, isDirectory: false) + } + guard fileManager.fileExists(atPath: installedRoot.path) else { return .missingInstalledPath } + if ValarAppPaths.isSymbolicLink(installedRoot, fileManager: fileManager) { + return .missingInstalledPath + } + if let appPaths, + (try? ValarAppPaths.validateContainment( + installedRoot, + within: appPaths.modelPacksDirectory, + fileManager: fileManager + )) == nil { + return .missingInstalledPath + } guard fileManager.fileExists(atPath: manifestURL.path) else { return .missingManifest } @@ -528,7 +563,8 @@ public actor ModelCatalog { relativePaths: [String], under root: URL, fileManager: FileManager = .default, - allowBasenameFallback: Bool = false + allowBasenameFallback: Bool = false, + containmentRoot: URL? = nil ) -> Bool { guard fileManager.fileExists(atPath: root.path) else { return false @@ -536,7 +572,11 @@ public actor ModelCatalog { return relativePaths.allSatisfy { relativePath in let artifactURL = root.appendingPathComponent(relativePath, isDirectory: false) - if nonEmptyFileExists(at: artifactURL, fileManager: fileManager) { + if nonEmptyFileExists( + at: artifactURL, + containedIn: containmentRoot ?? root, + fileManager: fileManager + ) { return true } @@ -545,16 +585,41 @@ public actor ModelCatalog { } let basenameURL = root.appendingPathComponent(URL(fileURLWithPath: relativePath).lastPathComponent, isDirectory: false) - return nonEmptyFileExists(at: basenameURL, fileManager: fileManager) + return nonEmptyFileExists( + at: basenameURL, + containedIn: containmentRoot ?? root, + fileManager: fileManager + ) } } - private static func nonEmptyFileExists(at url: URL, fileManager: FileManager = .default) -> Bool { + private static func nonEmptyFileExists( + at url: URL, + containedIn containmentRoot: URL? = nil, + fileManager: FileManager = .default + ) -> Bool { guard fileManager.fileExists(atPath: url.path) else { return false } - let size = (try? url.resourceValues(forKeys: [.fileSizeKey]))?.fileSize ?? 0 + let resolvedURL = url.resolvingSymlinksInPath() + if let containmentRoot { + do { + try ValarAppPaths.validateContainment( + resolvedURL, + within: containmentRoot, + fileManager: fileManager + ) + } catch { + return false + } + } + + let values = try? resolvedURL.resourceValues(forKeys: [.fileSizeKey, .isRegularFileKey]) + guard values?.isRegularFile == true else { + return false + } + let size = values?.fileSize ?? 0 return size > 0 } @@ -893,6 +958,7 @@ public enum ModelInstallerError: Error, Equatable, LocalizedError { case installedPackMissing(String) case invalidRemoteSourceLocation(String) case downloadFailed(String) + case unsafeFilesystemPath(String) case checksumMismatch(artifactPath: String, expected: String, actual: String) case missingChecksum(artifactPath: String) @@ -908,6 +974,8 @@ public enum ModelInstallerError: Error, Equatable, LocalizedError { return "Invalid remote model source: \(location)." case .downloadFailed(let message): return message + case .unsafeFilesystemPath(let message): + return message case .checksumMismatch(let artifactPath, let expected, let actual): return "Checksum mismatch for \(artifactPath): expected \(expected), got \(actual)." case .missingChecksum(let artifactPath): @@ -1216,9 +1284,23 @@ public actor ModelInstaller { return nil } - if FileManager.default.fileExists(atPath: existingRecord.installedPath) { - try FileManager.default.removeItem(atPath: existingRecord.installedPath) - try pruneEmptyModelPackDirectories(startingAt: URL(fileURLWithPath: existingRecord.installedPath, isDirectory: true)) + let manifest = try await registry.manifest(for: modelID.rawValue) + let packDirectory: URL + do { + packDirectory = try expectedModelPackDirectory( + for: existingRecord, + manifest: manifest, + requestedModelID: modelID.rawValue + ) + } catch { + throw ModelInstallerError.unsafeFilesystemPath( + "Refusing to remove unsafe model pack directory for '\(modelID.rawValue)': \(error.localizedDescription)" + ) + } + try validateDeletableModelPackDirectory(packDirectory) + if fileManager.fileExists(atPath: packDirectory.path) { + try fileManager.removeItem(at: packDirectory) + try pruneEmptyModelPackDirectories(startingAt: packDirectory) } guard let record = try await registry.uninstall(modelID: modelID.rawValue) else { @@ -1274,13 +1356,11 @@ public actor ModelInstaller { .appendingPathComponent(Self.hfMLXAudioDirectoryName(for: modelID.rawValue), isDirectory: true) var removedPaths: [String] = [] - if fileManager.fileExists(atPath: standardDirectory.path) { - try fileManager.removeItem(at: standardDirectory) - removedPaths.append(standardDirectory.path) - } - if fileManager.fileExists(atPath: legacyDirectory.path) { - try fileManager.removeItem(at: legacyDirectory) - removedPaths.append(legacyDirectory.path) + for directory in [standardDirectory, legacyDirectory] { + guard fileManager.fileExists(atPath: directory.path) else { continue } + try validateRemovableCacheDirectory(directory, within: hubRoot) + try fileManager.removeItem(at: directory) + removedPaths.append(directory.standardizedFileURL.path) } return removedPaths } @@ -1975,7 +2055,13 @@ public actor ModelInstaller { } private func removeIfPresent(_ url: URL) throws { + if ValarAppPaths.isSymbolicLink(url, fileManager: fileManager) { + throw ModelInstallerError.unsafeFilesystemPath( + "Refusing to remove unsafe model pack directory '\(url.path)': symbolic links are not allowed" + ) + } guard fileManager.fileExists(atPath: url.path) else { return } + try validateDeletableModelPackDirectory(url) try fileManager.removeItem(at: url) } @@ -1986,6 +2072,12 @@ public actor ModelInstaller { "Cached artifact is missing or points to a broken symlink: \(sourceURL.path)" ) } + try validateCachedArtifactSource(sourceURL, resolvedSource: resolvedSource) + try ValarAppPaths.validateContainment( + destinationURL, + within: paths.modelPacksDirectory, + fileManager: fileManager + ) do { try fileManager.linkItem(at: resolvedSource, to: destinationURL) return @@ -1994,12 +2086,73 @@ public actor ModelInstaller { } } + private func expectedModelPackDirectory( + for record: InstalledModelRecord, + manifest: ValarPersistence.ModelPackManifest?, + requestedModelID: String + ) throws -> URL { + try paths.modelPackDirectory( + familyID: manifest?.familyID ?? record.familyID, + modelID: requestedModelID + ) + } + + private func validateDeletableModelPackDirectory(_ directory: URL) throws { + do { + try ValarAppPaths.validateContainment(directory, within: paths.modelPacksDirectory, fileManager: fileManager) + if fileManager.fileExists(atPath: directory.path) { + try ValarAppPaths.validateDirectoryIsNotSymbolicLink(directory, fileManager: fileManager) + } + } catch { + throw ModelInstallerError.unsafeFilesystemPath( + "Refusing to remove unsafe model pack directory '\(directory.path)': \(error.localizedDescription)" + ) + } + } + + private func validateCachedArtifactSource(_ sourceURL: URL, resolvedSource: URL) throws { + let cacheRoot = Self.resolveHFHubCacheRoot( + fileManager: fileManager, + hfCacheRoot: hfCacheRoot + ) + do { + try ValarAppPaths.validateContainment( + resolvedSource.standardizedFileURL, + within: cacheRoot, + fileManager: fileManager + ) + } catch { + throw ModelInstallerError.unsafeFilesystemPath( + "Cached artifact resolves outside the Hugging Face cache: \(sourceURL.path)" + ) + } + + let values = try resolvedSource.resourceValues(forKeys: [.isRegularFileKey]) + guard values.isRegularFile == true else { + throw ModelInstallerError.unsafeFilesystemPath( + "Cached artifact is not a regular file: \(sourceURL.path)" + ) + } + } + + private func validateRemovableCacheDirectory(_ directory: URL, within cacheRoot: URL) throws { + do { + try ValarAppPaths.validateContainment(directory, within: cacheRoot, fileManager: fileManager) + try ValarAppPaths.validateDirectoryIsNotSymbolicLink(directory, fileManager: fileManager) + } catch { + throw ModelInstallerError.unsafeFilesystemPath( + "Refusing to remove cache directory outside the Hugging Face cache root: \(directory.path)" + ) + } + } + private func pruneEmptyModelPackDirectories(startingAt directory: URL) throws { - let allowedRoot = paths.modelPacksDirectory.standardizedFileURL - var current = directory.deletingLastPathComponent().standardizedFileURL + let allowedRoot = paths.modelPacksDirectory.resolvingSymlinksInPath().standardizedFileURL + var current = directory.deletingLastPathComponent().resolvingSymlinksInPath().standardizedFileURL while current.path != allowedRoot.path { try ValarAppPaths.validateContainment(current, within: allowedRoot, fileManager: fileManager) + try ValarAppPaths.validateDirectoryIsNotSymbolicLink(current, fileManager: fileManager) let children = try fileManager.contentsOfDirectory( at: current, includingPropertiesForKeys: nil, diff --git a/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Daemon.swift b/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Daemon.swift index 6544e6b..de40d94 100644 --- a/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Daemon.swift +++ b/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Daemon.swift @@ -459,6 +459,11 @@ public extension ValarRuntime { let sourceLocation = entry.remoteURL?.absoluteString ?? "catalog:\(id)" let manifest = ModelCatalog.makePersistenceManifest(from: entry.manifest) let mode: ModelInstallMode = sourceKind == .remoteURL ? .downloadArtifacts : .metadataOnly + let currentModel = try await modelCatalog.refresh().first { $0.id == identifier } + + if !refreshCache, currentModel?.installState == .installed { + return + } if refreshCache { if sourceKind == .remoteURL, !allowDownload { @@ -475,6 +480,12 @@ public extension ValarRuntime { _ = try await modelInstaller.purgeSharedCaches(for: identifier) } + if sourceKind == .remoteURL, + !allowDownload, + currentModel?.installState != .cached { + throw RouteModelError.refreshRequiresDownload(id) + } + _ = try await modelInstaller.install( manifest: manifest, sourceKind: sourceKind, @@ -784,7 +795,7 @@ public enum RouteModelError: LocalizedError, Sendable, Equatable { case .modelHidden(let message): return message case .refreshRequiresDownload(let id): - return "Refreshing shared cache for model '\(id)' requires network download. Retry with allow_download=true." + return "Model '\(id)' requires network download. Retry with allow_download=true." } } } diff --git a/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Maintenance.swift b/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Maintenance.swift index 066a65b..db75f4b 100644 --- a/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Maintenance.swift +++ b/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Maintenance.swift @@ -136,13 +136,20 @@ public extension ValarRuntime { paths orphanedPaths: [String], fileManager: FileManager = .default ) async throws -> [String] { + let currentOrphans = Set(try await orphanedModelPackPaths(fileManager: fileManager)) var removedPaths: [String] = [] for path in orphanedPaths { - guard fileManager.fileExists(atPath: path) else { continue } - try fileManager.removeItem(atPath: path) - removedPaths.append(path) + let url = URL(fileURLWithPath: path, isDirectory: true).standardizedFileURL + guard fileManager.fileExists(atPath: url.path) else { continue } + try validateModelPackDirectoryForRemoval(url, fileManager: fileManager) + guard currentOrphans.contains(url.path) else { + continue + } + try fileManager.removeItem(at: url) + removedPaths.append(url.path) try pruneEmptyModelPackDirectories( - startingAt: URL(fileURLWithPath: path, isDirectory: true) + startingAt: url, + fileManager: fileManager ) } return removedPaths.sorted() @@ -150,7 +157,19 @@ public extension ValarRuntime { func installedModelPackPaths() async throws -> Set { let receipts = try await modelPackRegistry.receipts() - return Set(receipts.map(\.installedModelPath)) + var paths: Set = [] + for receipt in receipts { + do { + let packDirectory = try self.paths.modelPackDirectory( + familyID: receipt.familyID, + modelID: receipt.modelID + ) + paths.formUnion(modelPackPathComparisonKeys(for: packDirectory)) + } catch { + continue + } + } + return paths } func performVoiceLibraryMaintenance( @@ -222,6 +241,14 @@ private extension ValarRuntime { guard fileManager.fileExists(atPath: familyDirectory.path, isDirectory: &isDirectory), isDirectory.boolValue else { continue } + guard !ValarAppPaths.isSymbolicLink(familyDirectory, fileManager: fileManager) else { + continue + } + try ValarAppPaths.validateContainment( + familyDirectory, + within: root, + fileManager: fileManager + ) let modelDirectories = try fileManager.contentsOfDirectory( at: familyDirectory, @@ -229,13 +256,22 @@ private extension ValarRuntime { options: [.skipsHiddenFiles] ) for modelDirectory in modelDirectories { + guard !ValarAppPaths.isSymbolicLink(modelDirectory, fileManager: fileManager) else { + continue + } + try ValarAppPaths.validateContainment( + modelDirectory, + within: root, + fileManager: fileManager + ) guard fileManager.fileExists( atPath: modelDirectory.appendingPathComponent("manifest.json").path ) else { continue } - let standardizedPath = modelDirectory.standardizedFileURL.path - if !registeredPaths.contains(standardizedPath) { + let pathKeys = modelPackPathComparisonKeys(for: modelDirectory) + if registeredPaths.isDisjoint(with: pathKeys) { + let standardizedPath = modelDirectory.standardizedFileURL.path orphanedPaths.append(standardizedPath) } } @@ -244,17 +280,36 @@ private extension ValarRuntime { return orphanedPaths } - func pruneEmptyModelPackDirectories(startingAt url: URL) throws { - let root = paths.modelPacksDirectory.standardizedFileURL - var current = url.standardizedFileURL.deletingLastPathComponent() - while current.path.hasPrefix(root.path), current != root { - let contents = try FileManager.default.contentsOfDirectory( + func modelPackPathComparisonKeys(for url: URL) -> Set { + let standardized = url.standardizedFileURL + return [ + standardized.path, + standardized.resolvingSymlinksInPath().standardizedFileURL.path + ] + } + + func validateModelPackDirectoryForRemoval(_ url: URL, fileManager: FileManager) throws { + let root = paths.modelPacksDirectory + try ValarAppPaths.validateContainment(url, within: root, fileManager: fileManager) + try ValarAppPaths.validateDirectoryIsNotSymbolicLink(url, fileManager: fileManager) + } + + func pruneEmptyModelPackDirectories( + startingAt url: URL, + fileManager: FileManager = .default + ) throws { + let root = paths.modelPacksDirectory.resolvingSymlinksInPath().standardizedFileURL + var current = url.deletingLastPathComponent().resolvingSymlinksInPath().standardizedFileURL + while current != root { + try ValarAppPaths.validateContainment(current, within: root, fileManager: fileManager) + try ValarAppPaths.validateDirectoryIsNotSymbolicLink(current, fileManager: fileManager) + let contents = try fileManager.contentsOfDirectory( at: current, includingPropertiesForKeys: nil, options: [.skipsHiddenFiles] ) guard contents.isEmpty else { break } - try FileManager.default.removeItem(at: current) + try fileManager.removeItem(at: current) let parent = current.deletingLastPathComponent() if parent.path == current.path { break } current = parent diff --git a/Packages/ValarCore/Sources/ValarCore/ValarRuntime.swift b/Packages/ValarCore/Sources/ValarCore/ValarRuntime.swift index 0bf81b3..2e0151b 100644 --- a/Packages/ValarCore/Sources/ValarCore/ValarRuntime.swift +++ b/Packages/ValarCore/Sources/ValarCore/ValarRuntime.swift @@ -221,7 +221,8 @@ public final class ValarRuntime: Sendable { supportedSource: SupportedCatalogSource.curated(), catalogStore: modelPackRegistry, packStore: modelPackRegistry, - capabilityRegistry: capabilityRegistry + capabilityRegistry: capabilityRegistry, + appPaths: paths ) let grdbProjectStore = GRDBProjectStore(db: database, paths: paths) let projectStore = GRDBBackedProjectStore(store: grdbProjectStore) diff --git a/Packages/ValarCore/Tests/ValarCoreTests/ValarCoreCatalogTests.swift b/Packages/ValarCore/Tests/ValarCoreTests/ValarCoreCatalogTests.swift index 50ab3a9..356e830 100644 --- a/Packages/ValarCore/Tests/ValarCoreTests/ValarCoreCatalogTests.swift +++ b/Packages/ValarCore/Tests/ValarCoreTests/ValarCoreCatalogTests.swift @@ -186,6 +186,37 @@ final class ValarCoreCatalogTests: XCTestCase { XCTAssertFalse(tada.cachedOnDisk) } + func testModelCatalogDoesNotMarkHFHubSymlinkEscapeAsCached() async throws { + let cacheRoot = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + try FileManager.default.createDirectory(at: cacheRoot, withIntermediateDirectories: true) + let outsideDirectory = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + try FileManager.default.createDirectory(at: outsideDirectory, withIntermediateDirectories: true) + let outsideArtifact = outsideDirectory.appendingPathComponent("model.safetensors", isDirectory: false) + try Data("outside-cache".utf8).write(to: outsideArtifact) + + let entry = makeSingleArtifactCatalogEntry(modelID: "example/Symlinked-Model") + try writeHFHubSnapshotSymlink( + cacheRoot: cacheRoot, + modelID: entry.id.rawValue, + relativePath: "model.safetensors", + destination: outsideArtifact + ) + + let catalog = ModelCatalog( + supportedSource: StaticSupportedCatalogSource(records: [entry]), + hfCacheRoot: cacheRoot, + visibilityPolicyProvider: { CatalogVisibilityPolicy(allowsNonCommercialModels: false) } + ) + + let models = try await catalog.refresh() + let model = try XCTUnwrap(models.first(where: { $0.id == entry.id })) + + XCTAssertEqual(model.installState, .supported) + XCTAssertFalse(model.cachedOnDisk) + } + func testModelCatalogMarksVibeVoiceSnapshotAsCachedWhenQwenTokenizerFallbackIsCached() async throws { let cacheRoot = FileManager.default.temporaryDirectory .appendingPathComponent(UUID().uuidString, isDirectory: true) @@ -589,6 +620,110 @@ final class ValarCoreCatalogTests: XCTestCase { XCTAssertFalse(capabilitiesAfterUninstall.contains(result.descriptor)) } + func testModelInstallerUninstallDerivesPackDirectoryInsteadOfTrustingPersistedPath() async throws { + let manifest = makePersistenceManifest( + modelID: "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16", + familyID: "qwen3_tts", + displayName: "Qwen3 TTS Base", + capabilities: ["speech.synthesis", "text.tokenization"], + backendKinds: ["mlx"] + ) + let paths = try makeAppPaths() + try materializeInstalledPack(paths: paths, manifest: manifest) + + let outsideDirectory = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + let tamperedPackDirectory = outsideDirectory.appendingPathComponent("do-not-remove", isDirectory: true) + try FileManager.default.createDirectory(at: tamperedPackDirectory, withIntermediateDirectories: true) + try Data("outside".utf8).write(to: tamperedPackDirectory.appendingPathComponent("sentinel.txt")) + + let receipt = ModelInstallReceipt( + modelID: manifest.modelID, + familyID: manifest.familyID, + sourceKind: .localFile, + sourceLocation: "/tmp/qwen-base.valarmodel", + installedModelPath: tamperedPackDirectory.path, + manifestPath: tamperedPackDirectory.appendingPathComponent("manifest.json").path, + artifactCount: manifest.artifactSpecs.count + ) + let record = InstalledModelRecord( + id: receipt.id, + familyID: manifest.familyID, + modelID: manifest.modelID, + displayName: manifest.displayName, + installedPath: tamperedPackDirectory.path, + manifestPath: tamperedPackDirectory.appendingPathComponent("manifest.json").path, + artifactCount: manifest.artifactSpecs.count, + sourceKind: .localFile + ) + let registry = ModelPackRegistry( + paths: paths, + manifests: [manifest], + records: [record], + receipts: [receipt] + ) + let installer = ModelInstaller(registry: registry, paths: paths) + + let removed = try await installer.uninstall(modelID: ModelIdentifier(manifest.modelID)) + let expectedPackDirectory = try paths.modelPackDirectory( + familyID: manifest.familyID, + modelID: manifest.modelID + ) + + XCTAssertEqual(removed?.installedPath, tamperedPackDirectory.path) + XCTAssertFalse(FileManager.default.fileExists(atPath: expectedPackDirectory.path)) + XCTAssertTrue(FileManager.default.fileExists(atPath: tamperedPackDirectory.path)) + let installedAfterRemoval = await registry.installedRecord(for: manifest.modelID) + XCTAssertNil(installedAfterRemoval) + } + + func testModelInstallerUninstallRejectsSymlinkedPackDirectory() async throws { + let manifest = makePersistenceManifest( + modelID: "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16", + familyID: "qwen3_tts", + displayName: "Qwen3 TTS Base", + capabilities: ["speech.synthesis", "text.tokenization"], + backendKinds: ["mlx"] + ) + let paths = try makeAppPaths() + let registry = ModelPackRegistry(paths: paths) + _ = try await registry.install( + manifest: manifest, + sourceKind: .localFile, + sourceLocation: "/tmp/qwen-base.valarmodel" + ) + + let packDirectory = try paths.modelPackDirectory( + familyID: manifest.familyID, + modelID: manifest.modelID + ) + let outsideDirectory = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + try FileManager.default.createDirectory(at: outsideDirectory, withIntermediateDirectories: true) + try FileManager.default.createDirectory( + at: packDirectory.deletingLastPathComponent(), + withIntermediateDirectories: true + ) + try FileManager.default.createSymbolicLink(at: packDirectory, withDestinationURL: outsideDirectory) + + let installer = ModelInstaller(registry: registry, paths: paths) + + do { + _ = try await installer.uninstall(modelID: ModelIdentifier(manifest.modelID)) + XCTFail("Expected symlinked pack removal to be rejected") + } catch let error as ModelInstallerError { + guard case .unsafeFilesystemPath(let message) = error else { + return XCTFail("Expected unsafe filesystem path error, got \(error)") + } + XCTAssertTrue(message.contains("Refusing to remove unsafe model pack directory")) + } + + let installedAfterFailedRemoval = await registry.installedRecord(for: manifest.modelID) + XCTAssertNotNil(installedAfterFailedRemoval) + XCTAssertTrue(ValarAppPaths.isSymbolicLink(packDirectory)) + XCTAssertTrue(FileManager.default.fileExists(atPath: outsideDirectory.path)) + } + func testModelInstallerDownloadsArtifactsPublishesProgressAndWritesManifest() async throws { let paths = try makeAppPaths() let cacheRoot = FileManager.default.temporaryDirectory @@ -696,6 +831,8 @@ final class ValarCoreCatalogTests: XCTestCase { let cacheRoot = FileManager.default.temporaryDirectory .appendingPathComponent(UUID().uuidString, isDirectory: true) try FileManager.default.createDirectory(at: cacheRoot, withIntermediateDirectories: true) + let configData = Data(#"{"model_type":"qwen3"}"#.utf8) + let weightsData = Data("pretend-weights".utf8) let manifest = ValarPersistence.ModelPackManifest( familyID: "qwen3_tts", @@ -709,20 +846,22 @@ final class ValarCoreCatalogTests: XCTestCase { ModelPackArtifact( id: "config", kind: "config", - relativePath: "config.json" + relativePath: "config.json", + checksum: sha256Hex(for: configData), + byteCount: configData.count ), ModelPackArtifact( id: "weights", kind: "weights", - relativePath: "model.safetensors" + relativePath: "model.safetensors", + checksum: sha256Hex(for: weightsData), + byteCount: weightsData.count ), ], licenseName: "Model license", licenseURL: "https://example.com/license" ) - let configData = Data(#"{"model_type":"qwen3"}"#.utf8) - let weightsData = Data("pretend-weights".utf8) try writeHFHubSnapshotArtifact( cacheRoot: cacheRoot, modelID: manifest.modelID, @@ -767,11 +906,78 @@ final class ValarCoreCatalogTests: XCTestCase { XCTAssertEqual(installedFileID.inode, cachedFileID.inode) } + func testModelInstallerRejectsCachedArtifactSymlinkEscapingHFCache() async throws { + let paths = try makeAppPaths() + let cacheRoot = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + try FileManager.default.createDirectory(at: cacheRoot, withIntermediateDirectories: true) + + let outsideDirectory = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + try FileManager.default.createDirectory(at: outsideDirectory, withIntermediateDirectories: true) + let outsideData = Data("outside-cache".utf8) + let outsideArtifact = outsideDirectory.appendingPathComponent("model.safetensors", isDirectory: false) + try outsideData.write(to: outsideArtifact) + + let manifest = ValarPersistence.ModelPackManifest( + familyID: "qwen3_tts", + modelID: "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16", + displayName: "Qwen3 TTS Base", + capabilities: ["speech.synthesis"], + backendKinds: ["mlx"], + artifactSpecs: [ + ModelPackArtifact( + id: "weights", + kind: "weights", + relativePath: "model.safetensors", + checksum: sha256Hex(for: outsideData), + byteCount: outsideData.count + ), + ], + licenseName: "Model license", + licenseURL: "https://example.com/license" + ) + try writeHFHubSnapshotSymlink( + cacheRoot: cacheRoot, + modelID: manifest.modelID, + relativePath: "model.safetensors", + destination: outsideArtifact + ) + + let installer = ModelInstaller( + registry: ModelPackRegistry(paths: paths), + paths: paths, + hfCacheRoot: cacheRoot, + sessionFactory: makeMockDownloadSession + ) + + do { + _ = try await installer.install( + manifest: manifest, + sourceKind: .remoteURL, + sourceLocation: "https://huggingface.co/\(manifest.modelID)", + mode: .downloadArtifacts + ) + XCTFail("Expected escaped cached artifact symlink to be rejected") + } catch let error as ModelInstallerError { + guard case .unsafeFilesystemPath(let message) = error else { + return XCTFail("Expected unsafe filesystem path error, got \(error)") + } + XCTAssertTrue(message.contains("outside the Hugging Face cache")) + } + } + func testModelInstallerPrefersHFHubSnapshotOverMLXAudioCacheWhenBothExist() async throws { let paths = try makeAppPaths() let cacheRoot = FileManager.default.temporaryDirectory .appendingPathComponent(UUID().uuidString, isDirectory: true) try FileManager.default.createDirectory(at: cacheRoot, withIntermediateDirectories: true) + let configData = Data(#"{"model_type":"qwen3"}"#.utf8) + let hubWeightsData = Data("hub-weights".utf8) + let tokenizerData = Data(#"{"tokenizer":"hub"}"#.utf8) + let mlxAudioConfigData = Data(#"{"model_type":"qwen3"}"#.utf8) + let mlxAudioWeightsData = Data("mlx-audio-weights".utf8) + let mlxAudioTokenizerData = Data(#"{"tokenizer":"mlx-audio"}"#.utf8) let manifest = ValarPersistence.ModelPackManifest( familyID: "qwen3_tts", @@ -782,9 +988,27 @@ final class ValarCoreCatalogTests: XCTestCase { tokenizerType: "huggingface", sampleRate: 24_000, artifactSpecs: [ - ModelPackArtifact(id: "config", kind: "config", relativePath: "config.json"), - ModelPackArtifact(id: "weights", kind: "weights", relativePath: "model.safetensors"), - ModelPackArtifact(id: "tokenizer", kind: "tokenizer", relativePath: "tokenizer.json"), + ModelPackArtifact( + id: "config", + kind: "config", + relativePath: "config.json", + checksum: sha256Hex(for: configData), + byteCount: configData.count + ), + ModelPackArtifact( + id: "weights", + kind: "weights", + relativePath: "model.safetensors", + checksum: sha256Hex(for: hubWeightsData), + byteCount: hubWeightsData.count + ), + ModelPackArtifact( + id: "tokenizer", + kind: "tokenizer", + relativePath: "tokenizer.json", + checksum: sha256Hex(for: tokenizerData), + byteCount: tokenizerData.count + ), ], licenseName: "Model license", licenseURL: "https://example.com/license" @@ -794,38 +1018,38 @@ final class ValarCoreCatalogTests: XCTestCase { cacheRoot: cacheRoot, modelID: manifest.modelID, relativePath: "config.json", - data: Data(#"{"model_type":"qwen3"}"#.utf8) + data: configData ) try writeHFHubSnapshotArtifact( cacheRoot: cacheRoot, modelID: manifest.modelID, relativePath: "model.safetensors", - data: Data("hub-weights".utf8) + data: hubWeightsData ) try writeHFHubSnapshotArtifact( cacheRoot: cacheRoot, modelID: manifest.modelID, relativePath: "tokenizer.json", - data: Data(#"{"tokenizer":"hub"}"#.utf8) + data: tokenizerData ) try writeMLXAudioCacheArtifact( cacheRoot: cacheRoot, modelID: manifest.modelID, relativePath: "config.json", - data: Data(#"{"model_type":"qwen3"}"#.utf8) + data: mlxAudioConfigData ) try writeMLXAudioCacheArtifact( cacheRoot: cacheRoot, modelID: manifest.modelID, relativePath: "model.safetensors", - data: Data("mlx-audio-weights".utf8) + data: mlxAudioWeightsData ) try writeMLXAudioCacheArtifact( cacheRoot: cacheRoot, modelID: manifest.modelID, relativePath: "tokenizer.json", - data: Data(#"{"tokenizer":"mlx-audio"}"#.utf8) + data: mlxAudioTokenizerData ) let installer = ModelInstaller( @@ -1075,15 +1299,17 @@ final class ValarCoreCatalogTests: XCTestCase { ) } - // Models without pre-computed checksums are now trusted when downloaded - // from HuggingFace. Install should succeed. - let record = try await installer.install( - manifest: manifest, - sourceKind: ModelPackSourceKind.remoteURL, - sourceLocation: sourceLocation, - mode: ModelInstallMode.downloadArtifacts - ) - XCTAssertNotNil(record) + do { + _ = try await installer.install( + manifest: manifest, + sourceKind: ModelPackSourceKind.remoteURL, + sourceLocation: sourceLocation, + mode: ModelInstallMode.downloadArtifacts + ) + XCTFail("Expected missing checksum") + } catch let error as ModelInstallerError { + XCTAssertEqual(error, .missingChecksum(artifactPath: "weights/model.safetensors")) + } } func testModelInstallerRejectsMissingConfigChecksumOnRemoteDownload() async throws { @@ -1126,15 +1352,17 @@ final class ValarCoreCatalogTests: XCTestCase { ) } - // Models without pre-computed checksums are now trusted when downloaded - // from HuggingFace. Install should succeed. - let record = try await installer.install( - manifest: manifest, - sourceKind: ModelPackSourceKind.remoteURL, - sourceLocation: sourceLocation, - mode: ModelInstallMode.downloadArtifacts - ) - XCTAssertNotNil(record) + do { + _ = try await installer.install( + manifest: manifest, + sourceKind: ModelPackSourceKind.remoteURL, + sourceLocation: sourceLocation, + mode: ModelInstallMode.downloadArtifacts + ) + XCTFail("Expected missing checksum") + } catch let error as ModelInstallerError { + XCTAssertEqual(error, .missingChecksum(artifactPath: "config.json")) + } } func testModelInstallerRejectsTokenizerChecksumMismatchBeforeRegistryInstall() async throws { @@ -1313,19 +1541,18 @@ final class ValarCoreCatalogTests: XCTestCase { XCTAssertTrue(report.issues.contains { $0.severity == .warning && $0.message.contains("Config artifact 'config'") - && $0.message.contains("cannot locally verify the downloaded file") + && $0.message.contains("Valar will not install the file from a remote source") }) XCTAssertTrue(report.issues.contains { $0.severity == .warning && $0.message.contains("Tokenizer artifact 'tokenizer'") - && $0.message.contains("cannot locally verify the downloaded file") + && $0.message.contains("Valar will not install the file from a remote source") }) XCTAssertTrue(report.issues.contains { $0.severity == .warning && $0.message.contains("Weight artifact 'weights'") - && $0.message.contains("cannot locally verify the downloaded file") + && $0.message.contains("Valar will not install the file from a remote source") }) - XCTAssertFalse(report.issues.contains { $0.message.contains("remote downloads will be rejected") }) } func testQwenCatalogEntriesIncludeWeightChecksums() { @@ -1405,6 +1632,24 @@ final class ValarCoreCatalogTests: XCTestCase { ) } + private func makeSingleArtifactCatalogEntry(modelID: String) -> SupportedModelCatalogEntry { + SupportedModelCatalogEntry( + manifest: ValarModelKit.ModelPackManifest( + id: ModelIdentifier(modelID), + familyID: .qwen3TTS, + displayName: "Symlinked Model", + domain: .tts, + capabilities: [.speechSynthesis], + supportedBackends: [BackendRequirement(backendKind: .mlx)], + artifacts: [ + ArtifactSpec(id: "weights", role: .weights, relativePath: "model.safetensors"), + ], + licenses: [] + ), + remoteURL: URL(string: "https://huggingface.co/\(modelID)") + ) + } + private func makeAppPaths() throws -> ValarAppPaths { let root = FileManager.default.temporaryDirectory .appendingPathComponent(UUID().uuidString, isDirectory: true) @@ -1437,6 +1682,31 @@ final class ValarCoreCatalogTests: XCTestCase { try data.write(to: artifactURL) } + private func writeHFHubSnapshotSymlink( + cacheRoot: URL, + modelID: String, + relativePath: String, + destination: URL + ) throws { + let repoDirectory = cacheRoot.appendingPathComponent( + "models--" + modelID.replacingOccurrences(of: "/", with: "--"), + isDirectory: true + ) + let revision = "test-revision" + let snapshotRoot = repoDirectory + .appendingPathComponent("snapshots", isDirectory: true) + .appendingPathComponent(revision, isDirectory: true) + let refsRoot = repoDirectory.appendingPathComponent("refs", isDirectory: true) + + try FileManager.default.createDirectory(at: snapshotRoot, withIntermediateDirectories: true) + try FileManager.default.createDirectory(at: refsRoot, withIntermediateDirectories: true) + try Data(revision.utf8).write(to: refsRoot.appendingPathComponent("main", isDirectory: false)) + + let artifactURL = snapshotRoot.appendingPathComponent(relativePath, isDirectory: false) + try FileManager.default.createDirectory(at: artifactURL.deletingLastPathComponent(), withIntermediateDirectories: true) + try FileManager.default.createSymbolicLink(at: artifactURL, withDestinationURL: destination) + } + private func writeHFHubSnapshotArtifacts( cacheRoot: URL, modelID: String, diff --git a/Packages/ValarCore/Tests/ValarCoreTests/ValarCoreTests.swift b/Packages/ValarCore/Tests/ValarCoreTests/ValarCoreTests.swift index cc9837e..83ea5e7 100644 --- a/Packages/ValarCore/Tests/ValarCoreTests/ValarCoreTests.swift +++ b/Packages/ValarCore/Tests/ValarCoreTests/ValarCoreTests.swift @@ -803,6 +803,100 @@ final class ValarCoreTests: XCTestCase { XCTAssertEqual(report.orphanedModelPackPaths, [packDirectory.standardizedFileURL.path]) } + func testReconcileLocalModelPackStateIgnoresSymlinkedOrphanDirectories() async throws { + let fileManager = FileManager.default + let baseURL = fileManager.temporaryDirectory.appendingPathComponent(UUID().uuidString, isDirectory: true) + let runtime = try ValarRuntime( + paths: ValarAppPaths(baseURL: baseURL), + runtimeConfiguration: RuntimeConfiguration(), + inferenceBackend: LocalStubInferenceBackend() + ) + let modelID = ValarRuntime.defaultVoiceCloneRuntimeModelID + let manifestOptional = try await runtime.modelCatalog.installationManifest(for: modelID) + let manifest = try XCTUnwrap(manifestOptional) + let packDirectory = try runtime.paths.modelPackDirectory( + familyID: manifest.familyID, + modelID: manifest.modelID + ) + let outsideDirectory = fileManager.temporaryDirectory.appendingPathComponent(UUID().uuidString, isDirectory: true) + try fileManager.createDirectory(at: outsideDirectory, withIntermediateDirectories: true) + try fileManager.createDirectory(at: packDirectory.deletingLastPathComponent(), withIntermediateDirectories: true) + try JSONEncoder().encode(manifest).write( + to: outsideDirectory.appendingPathComponent("manifest.json", isDirectory: false) + ) + try fileManager.createSymbolicLink(at: packDirectory, withDestinationURL: outsideDirectory) + + let report = try await runtime.auditLocalModelPackState(fileManager: fileManager) + + XCTAssertFalse(report.orphanedModelPackPaths.contains(packDirectory.standardizedFileURL.path)) + XCTAssertTrue(ValarAppPaths.isSymbolicLink(packDirectory, fileManager: fileManager)) + XCTAssertTrue(fileManager.fileExists(atPath: outsideDirectory.path)) + } + + func testRemoveOrphanedModelPacksDoesNotRemoveRegisteredPackFromArbitraryInput() async throws { + let fileManager = FileManager.default + let baseURL = fileManager.temporaryDirectory.appendingPathComponent(UUID().uuidString, isDirectory: true) + let paths = ValarAppPaths(baseURL: baseURL) + let runtime = try ValarRuntime( + paths: paths, + runtimeConfiguration: RuntimeConfiguration(), + inferenceBackend: LocalStubInferenceBackend() + ) + let modelID = ValarRuntime.defaultVoiceCloneRuntimeModelID + let manifestOptional = try await runtime.modelCatalog.installationManifest(for: modelID) + let manifest = try XCTUnwrap(manifestOptional) + _ = try await runtime.modelPackRegistry.install( + manifest: manifest, + sourceKind: .remoteURL, + sourceLocation: "https://example.com/\(modelID.rawValue)", + notes: nil + ) + try materializeInstalledPack(paths: paths, manifest: manifest) + let packDirectory = try paths.modelPackDirectory(familyID: manifest.familyID, modelID: manifest.modelID) + + let removed = try await runtime.removeOrphanedModelPacks( + paths: [packDirectory.path], + fileManager: fileManager + ) + + XCTAssertTrue(removed.isEmpty) + XCTAssertTrue(fileManager.fileExists(atPath: packDirectory.path)) + let installedRecord = try await runtime.modelPackRegistry.installedRecord(for: modelID.rawValue) + XCTAssertNotNil(installedRecord) + } + + func testInstallRouteModelRequiresAllowDownloadWhenRemoteModelIsNotCached() async throws { + let fileManager = FileManager.default + let baseURL = fileManager.temporaryDirectory.appendingPathComponent(UUID().uuidString, isDirectory: true) + let cacheRoot = fileManager.temporaryDirectory.appendingPathComponent(UUID().uuidString, isDirectory: true) + try fileManager.createDirectory(at: cacheRoot, withIntermediateDirectories: true) + let previousHFHubCache = ProcessInfo.processInfo.environment["HF_HUB_CACHE"] + setenv("HF_HUB_CACHE", cacheRoot.path, 1) + defer { + if let previousHFHubCache { + setenv("HF_HUB_CACHE", previousHFHubCache, 1) + } else { + unsetenv("HF_HUB_CACHE") + } + } + let runtime = try ValarRuntime( + paths: ValarAppPaths(baseURL: baseURL), + runtimeConfiguration: RuntimeConfiguration(), + inferenceBackend: LocalStubInferenceBackend() + ) + let modelID = ValarRuntime.defaultVoiceCloneRuntimeModelID + + do { + try await runtime.installRouteModel(id: modelID.rawValue, allowDownload: false) + XCTFail("Expected install without download consent to fail") + } catch let error as RouteModelError { + XCTAssertEqual(error, .refreshRequiresDownload(modelID.rawValue)) + } + + let installedRecord = try await runtime.modelPackRegistry.installedRecord(for: modelID.rawValue) + XCTAssertNil(installedRecord) + } + func testDaemonReadyStatusIncludesInstalledAvailabilityWhenModelsAreResident() async throws { let baseURL = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString, isDirectory: true) let paths = ValarAppPaths(baseURL: baseURL) diff --git a/Packages/ValarPersistence/Sources/ValarPersistence/ProjectBundle.swift b/Packages/ValarPersistence/Sources/ValarPersistence/ProjectBundle.swift index fe761bd..6de3a2a 100644 --- a/Packages/ValarPersistence/Sources/ValarPersistence/ProjectBundle.swift +++ b/Packages/ValarPersistence/Sources/ValarPersistence/ProjectBundle.swift @@ -633,6 +633,8 @@ public final class ProjectBundleWriter { using fileManager: FileManager, at directoryURL: URL ) throws { + try ValarAppPaths.validateDirectoryIsNotSymbolicLink(directoryURL, fileManager: fileManager) + var isDirectory: ObjCBool = false if fileManager.fileExists(atPath: directoryURL.path, isDirectory: &isDirectory), isDirectory.boolValue { return diff --git a/Packages/ValarPersistence/Sources/ValarPersistence/ValarPersistence.swift b/Packages/ValarPersistence/Sources/ValarPersistence/ValarPersistence.swift index 1744653..e54b424 100644 --- a/Packages/ValarPersistence/Sources/ValarPersistence/ValarPersistence.swift +++ b/Packages/ValarPersistence/Sources/ValarPersistence/ValarPersistence.swift @@ -6,6 +6,7 @@ public enum ValarPathValidationError: Error, LocalizedError, Equatable { case absolutePathNotAllowed(label: String, value: String) case pathTraversalDetected(label: String, value: String) case pathEscapesContainment(path: String, allowedDirectory: String) + case symbolicLinkDirectoryNotAllowed(path: String) case applicationSupportDirectoryUnavailable public var errorDescription: String? { @@ -18,6 +19,8 @@ public enum ValarPathValidationError: Error, LocalizedError, Equatable { return "\(label) contains path traversal components" case .pathEscapesContainment: return "Resolved path escapes the allowed directory" + case .symbolicLinkDirectoryNotAllowed: + return "Directory must not be a symbolic link" case .applicationSupportDirectoryUnavailable: return "Application Support directory is unavailable; cannot safely determine storage location" } @@ -121,6 +124,22 @@ public struct ValarAppPaths: Sendable, Equatable { try Self.validateContainment(candidate, within: allowedDirectory, fileManager: fileManager) } + public static func isSymbolicLink( + _ url: URL, + fileManager: FileManager = .default + ) -> Bool { + (try? fileManager.destinationOfSymbolicLink(atPath: url.path)) != nil + } + + public static func validateDirectoryIsNotSymbolicLink( + _ url: URL, + fileManager: FileManager = .default + ) throws { + if isSymbolicLink(url, fileManager: fileManager) { + throw ValarPathValidationError.symbolicLinkDirectoryNotAllowed(path: url.path) + } + } + public static func validateRelativePath(_ path: String, label: String = "path") throws { let trimmed = path.trimmingCharacters(in: .whitespacesAndNewlines) guard !trimmed.isEmpty else { diff --git a/Packages/ValarPersistence/Tests/ValarPersistenceTests/ValarPersistenceTests.swift b/Packages/ValarPersistence/Tests/ValarPersistenceTests/ValarPersistenceTests.swift index b167101..a9d47f2 100644 --- a/Packages/ValarPersistence/Tests/ValarPersistenceTests/ValarPersistenceTests.swift +++ b/Packages/ValarPersistence/Tests/ValarPersistenceTests/ValarPersistenceTests.swift @@ -317,6 +317,63 @@ final class ValarPersistenceTests: XCTestCase { XCTAssertFalse(FileManager.default.fileExists(atPath: bundleURL.appendingPathExtension("saving").path)) } + func testProjectBundleWriterRejectsSymlinkedChildDirectoryOnResave() throws { + let root = try makeTemporaryDirectory() + defer { try? FileManager.default.removeItem(at: root) } + + let project = ProjectRecord( + id: UUID(uuidString: "AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE")!, + title: "Narrated Chapter One", + createdAt: Date(timeIntervalSince1970: 1_710_000_000), + updatedAt: Date(timeIntervalSince1970: 1_710_000_600), + notes: "Opening chapter" + ) + let chapter = ChapterRecord( + id: UUID(uuidString: "11111111-2222-3333-4444-555555555555")!, + projectID: project.id, + index: 0, + title: "Chapter 1", + script: "A beginning.", + speakerLabel: "Narrator", + estimatedDurationSeconds: 12.5 + ) + let snapshot = ProjectBundleSnapshot( + project: project, + modelID: "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16", + renderSynthesisOptions: RenderSynthesisOptions(), + chapters: [chapter], + renderJobs: [], + exports: [] + ) + let bundleURL = root + .appendingPathComponent("Narrated Chapter One", isDirectory: true) + .appendingPathExtension("valarproject") + let location = ValarProjectBundleLocation( + projectID: project.id, + title: project.title, + bundleURL: bundleURL + ) + let writer = ProjectBundleWriter() + _ = try writer.write(snapshot, to: location, createdAt: Date(timeIntervalSince1970: 1_710_004_000)) + + let outsideDirectory = try makeTemporaryDirectory() + defer { try? FileManager.default.removeItem(at: outsideDirectory) } + try FileManager.default.removeItem(at: location.assetsDirectory) + try FileManager.default.createSymbolicLink(at: location.assetsDirectory, withDestinationURL: outsideDirectory) + + XCTAssertThrowsError( + try writer.write(snapshot, to: location, createdAt: Date(timeIntervalSince1970: 1_710_005_000)) + ) { error in + XCTAssertEqual( + error as? ValarPathValidationError, + .symbolicLinkDirectoryNotAllowed(path: location.assetsDirectory.path) + ) + } + XCTAssertTrue(ValarAppPaths.isSymbolicLink(location.assetsDirectory)) + XCTAssertTrue(FileManager.default.fileExists(atPath: outsideDirectory.path)) + XCTAssertFalse(FileManager.default.fileExists(atPath: bundleURL.appendingPathExtension("saving").path)) + } + func testProjectBundleSaveFailureCleansTempAndKeepsOriginalBundle() throws { let root = try makeTemporaryDirectory() defer { try? FileManager.default.removeItem(at: root) } diff --git a/README.md b/README.md index 0c971de..c25490d 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ That repo-local state directory is gitignored in this public repo. | Goal | Start with | Why | | --- | --- | --- | -| Prove your machine works | `Soprano` | Smallest, fastest first clip | +| Prove your machine works | `Soprano` | Smallest first clip and lowest-friction install | | Main narration / stable speech | `Qwen Base` | Primary supported TTS lane | | Voice design and saved speakers | `Qwen VoiceDesign` | Text-driven voice creation that stays inside the main Qwen family | | Transcription or timestamps | `Qwen ASR` / `Qwen ForcedAligner` | Main supported ASR and alignment lane | @@ -73,18 +73,9 @@ The bridge is optional. Bun is only required if you want MCP or advanced automat Valar app preview

-## Performance +## Performance Expectations -Benchmarked on Apple Silicon with warm model cache: - -| Model | First Audio | Real-Time Factor | Footprint | -|---|---|---|---| -| Soprano | Instant | < 1.0x | ~285 MB | -| Qwen Base | < 1 s | < 1.5x | ~1.0-4.2 GB | -| VibeVoice | < 500 ms | < 1.5x | ~700 MB | -| Voxtral | < 1 s | < 1.5x | ~2.4 GB | - -First Audio = time to first audio byte (warm start). Real-Time Factor = synthesis time / audio duration (lower is faster). See [tests/vibevoice_corpus/README.md](./tests/vibevoice_corpus/README.md) for detailed benchmark targets and methodology. +The public docs do not publish universal real-time-factor targets. Local speed varies with Mac model, memory pressure, cold versus warm model cache, text length, selected voice path, and upstream model changes. Treat validation clips and locally generated benchmark output as machine-specific evidence, not a project-wide performance guarantee. ## App Source diff --git a/SECURITY.md b/SECURITY.md index 2d7f63f..e5e1573 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -9,6 +9,9 @@ Valar runs locally on Apple Silicon. Audio, text, model execution, and project d - there is no telemetry or analytics pipeline in the repo - model downloads are user-initiated and fetched over HTTPS - the public MCP bridge depends on the same local loopback daemon rather than a hosted inference service +- the public validation gate runs a public-content audit, secret scan, and git-history scan in CI + +Do not expose `valarttsd` outside loopback without adding your own authentication, authorization, TLS, logging, and rate controls. The public daemon is intended as a local developer and automation surface, not a hardened internet service. ## Reporting A Vulnerability @@ -17,6 +20,7 @@ If you discover a security issue in Valar, report it privately rather than openi Preferred path: - use GitHub Security Advisories on the public `Valar` repo +- if advisories are unavailable, open a minimal public issue that says you need private security contact without exploit details - do not publish exploit details in public issues while the report is untriaged Please include: @@ -26,6 +30,8 @@ Please include: - expected impact - the commit or release you tested +Maintainers should acknowledge new reports within 5 business days and keep the reporter updated when a fix, mitigation, or non-issue determination is ready. + ## Repo Scope This public repo focuses on: diff --git a/docs/model-quickstart.md b/docs/model-quickstart.md index c639d73..ff82619 100644 --- a/docs/model-quickstart.md +++ b/docs/model-quickstart.md @@ -6,7 +6,7 @@ Use `swift run --package-path apps/ValarCLI valartts ...` if you have not instal ## Two-Command Newcomer Path -For the fastest first success in a clean clone: +For the smallest first success in a clean clone: ```bash make quickstart @@ -67,7 +67,7 @@ make validate-bridge-live make validate-bridge-live-blessed ``` -## Soprano: Fastest First Run +## Soprano: Smallest First Run ```bash make first-clip @@ -123,7 +123,7 @@ swift run --package-path apps/ValarCLI valartts speak \ --output "${TMPDIR:-/tmp}/qwen-stable.wav" ``` -Qwen `CustomVoice` remains the named-speaker lane, but the fastest public voice-creation path is `VoiceDesign` first and optional `stabilize` second. +Qwen `CustomVoice` remains the named-speaker lane, but the simplest public voice-creation path is `VoiceDesign` first and optional `stabilize` second. ### Qwen ASR diff --git a/docs/prerequisites-and-expectations.md b/docs/prerequisites-and-expectations.md index 214bad8..d5ec903 100644 --- a/docs/prerequisites-and-expectations.md +++ b/docs/prerequisites-and-expectations.md @@ -53,7 +53,7 @@ The public v1 working set is intentionally narrow. | Family | Public posture | Rough local footprint | Best first use | | --- | --- | --- | --- | -| `Soprano` | Supported | About `285 MB` | Fastest first success | +| `Soprano` | Supported | About `285 MB` | Smallest first success | | `Qwen` | Supported | About `1.0 GB` to `4.2 GB`, depending on the lane | Main TTS, ASR, and alignment lane | | `VibeVoice` | Compatibility preview | About `700 MB` | Preset-voice realtime TTS, English-first | | `Voxtral` | Preview, explicit non-commercial opt-in | About `2.4 GB` | Optional preset-voice multilingual lane | @@ -62,7 +62,7 @@ Footprint values are rough planning numbers and can drift as upstream packs chan ## What To Install First -- Install `Soprano` first if you want the fastest proof that your machine is working. +- Install `Soprano` first if you want the smallest proof that your machine is working. - Install `Qwen Base` first if your real target is long-form narration or the main public TTS lane. - Install `Qwen ASR` or `Qwen ForcedAligner` only when you need transcription or timestamps. - Install `VibeVoice` only when you specifically want preset voices and accept preview-quality multilingual behavior. diff --git a/docs/working-models.md b/docs/working-models.md index 61220b5..2ff9747 100644 --- a/docs/working-models.md +++ b/docs/working-models.md @@ -15,7 +15,7 @@ Only the exact IDs below are part of the main public onboarding path. | Family | Exact install ID | Support status | Download posture | Rough local footprint | License | Best use | | --- | --- | --- | --- | --- | --- | --- | -| Soprano 1.1 80M | `mlx-community/Soprano-1.1-80M-bf16` | Supported | Recommended first install | About `285 MB` | Apache 2.0 | Fastest first run and starter demo lane | +| Soprano 1.1 80M | `mlx-community/Soprano-1.1-80M-bf16` | Supported | Recommended first install | About `285 MB` | Apache 2.0 | Smallest first run and starter demo lane | | Qwen3-TTS 1.7B Base | `mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16` | Supported | Optional install | About `4.2 GB` | Qwen License Agreement | Main narrator lane and stable long-form speech | | Qwen3-TTS 1.7B CustomVoice | `mlx-community/Qwen3-TTS-12Hz-1.7B-CustomVoice-bf16` | Supported | Optional install | About `4.2 GB` | Qwen License Agreement | Named speakers and saved voices | | Qwen3-TTS 1.7B VoiceDesign | `mlx-community/Qwen3-TTS-12Hz-1.7B-VoiceDesign-bf16` | Supported | Optional install | About `4.2 GB` | Qwen License Agreement | Text-described voice creation | diff --git a/tools/public_repo_audit.sh b/tools/public_repo_audit.sh index 805f0ba..8b4e269 100644 --- a/tools/public_repo_audit.sh +++ b/tools/public_repo_audit.sh @@ -94,7 +94,12 @@ path_is_excluded() { collect_files() { local root="$1" - if git -C "$root" rev-parse --show-toplevel >/dev/null 2>&1 \ + local root_abs + local git_top + root_abs="$(cd "$root" && pwd -P)" + git_top="$(git -C "$root" rev-parse --show-toplevel 2>/dev/null || true)" + if [[ -n "$git_top" ]] \ + && [[ "$(cd "$git_top" && pwd -P)" == "$root_abs" ]] \ && git -C "$root" rev-parse --verify HEAD >/dev/null 2>&1; then git -C "$root" ls-files else @@ -131,7 +136,10 @@ while IFS= read -r rel; do [[ -n "$rel" ]] || continue [[ -f "$SCAN_ROOT/$rel" ]] || continue case "$rel" in - tools/public_repo_audit.sh|tools/public_repo_rules.sh|tools/public_repo_secret_scan.sh) + .gitleaks.toml|.gitleaksignore) + continue + ;; + tools/public_repo_audit.sh|tools/public_repo_history_scan.sh|tools/public_repo_rules.sh|tools/public_repo_secret_scan.sh) continue ;; esac diff --git a/tools/public_repo_history_scan.sh b/tools/public_repo_history_scan.sh index 3aae8f6..7ab46fc 100755 --- a/tools/public_repo_history_scan.sh +++ b/tools/public_repo_history_scan.sh @@ -68,17 +68,18 @@ while IFS= read -r regex; do CONTENT_BLOCK_REGEXES+=("$regex") done < <(valar_public_repo_content_block_regexes) -declare -a SECRET_BLOCK_REGEXES=() +declare -a HISTORY_BLOCK_REGEXES=() while IFS= read -r regex; do [[ -n "$regex" ]] || continue - SECRET_BLOCK_REGEXES+=("$regex") -done < <(valar_public_repo_secret_block_regexes) + HISTORY_BLOCK_REGEXES+=("$regex") +done < <(valar_public_repo_history_block_regexes) TMP_PATH_HITS="$(mktemp)" TMP_CONTENT_HITS="$(mktemp)" +TMP_MESSAGE_HITS="$(mktemp)" TMP_FILTERED_CONTENT_HITS="$(mktemp)" cleanup() { - rm -f "$TMP_PATH_HITS" "$TMP_CONTENT_HITS" "$TMP_FILTERED_CONTENT_HITS" + rm -f "$TMP_PATH_HITS" "$TMP_CONTENT_HITS" "$TMP_MESSAGE_HITS" "$TMP_FILTERED_CONTENT_HITS" } trap cleanup EXIT @@ -92,7 +93,7 @@ declare -a GREP_ARGS=() for regex in "${CONTENT_BLOCK_REGEXES[@]}"; do GREP_ARGS+=(-e "$regex") done -for regex in "${SECRET_BLOCK_REGEXES[@]}"; do +for regex in "${HISTORY_BLOCK_REGEXES[@]}"; do GREP_ARGS+=(-e "$regex") done @@ -129,6 +130,16 @@ while IFS= read -r rev; do echo "git grep failed while scanning revision $rev" >&2 exit "$grep_status" fi + + set +e + git -C "$SCAN_ROOT" log -1 --format=%B "$rev" | grep -E -n "${GREP_ARGS[@]}" \ + | sed "s#^#$rev:commit-message:#" >> "$TMP_MESSAGE_HITS" + grep_status=$? + set -e + if [[ "$grep_status" -gt 1 ]]; then + echo "git log/grep failed while scanning commit message $rev" >&2 + exit "$grep_status" + fi done < <(git -C "$SCAN_ROOT" rev-list --all) if [[ -s "$TMP_PATH_HITS" ]]; then @@ -144,6 +155,13 @@ while IFS= read -r hit; do fi printf '%s\n' "$hit" >> "$TMP_FILTERED_CONTENT_HITS" done < "$TMP_CONTENT_HITS" +while IFS= read -r hit; do + [[ -n "$hit" ]] || continue + if history_content_hit_is_allowed "$hit"; then + continue + fi + printf '%s\n' "$hit" >> "$TMP_FILTERED_CONTENT_HITS" +done < "$TMP_MESSAGE_HITS" if [[ -s "$TMP_FILTERED_CONTENT_HITS" ]]; then echo "Public history scan failed. Found private or secret-like content in git history:" >&2 diff --git a/tools/public_repo_rules.sh b/tools/public_repo_rules.sh index 6d021ed..2bab9ac 100644 --- a/tools/public_repo_rules.sh +++ b/tools/public_repo_rules.sh @@ -22,6 +22,13 @@ valar_public_repo_path_block_regexes() { (^|/)docs/analysis-[^/]+\.md$ (^|/)docs/agent-loop-playbook\.md$ (^|/)docs/repo-lanes\.md$ +(^|/)\.env(\..*)?$ +(^|/)\.netrc$ +(^|/)\.npmrc$ +(^|/)\.pypirc$ +(^|/)\.aws/(credentials|config)$ +(^|/)(id_rsa|id_dsa|id_ecdsa|id_ed25519)$ +(^|/)[^/]*\.(pem|p12|pfx|key)$ (^|/)artifacts/ (^|/)mlx_audio/ (^|/)MANIFEST\.in$ @@ -77,25 +84,74 @@ forwardToClaude claude-fast claude-quality ghp_[A-Za-z0-9]{36} +gh[osru]_[A-Za-z0-9_]{36,} github_pat_[A-Za-z0-9_]{20,} hf_[A-Za-z0-9]{32,} -sk-[A-Za-z0-9]{20,} +sk-[A-Za-z0-9_-]{20,} +sk-proj-[A-Za-z0-9_-]{20,} +sk-ant-[A-Za-z0-9_-]{20,} xox[baprs]-[A-Za-z0-9-]+ +xapp-[A-Za-z0-9-]+ +AKIA[0-9A-Z]{16} +ASIA[0-9A-Z]{16} +AIza[0-9A-Za-z_-]{35} +(OPENAI|ANTHROPIC|LINEAR|TELEGRAM|SLACK|GITHUB|HF|HUGGINGFACE|HUGGING_FACE|RUNPOD|AWS|GOOGLE|GEMINI|DISCORD|ELEVENLABS)[A-Z0-9_]*(KEY|TOKEN|SECRET|PASSWORD)[[:space:]]*[:=][[:space:]]*["']?[^[:space:]'"]{8,} -----BEGIN [A-Z ]*PRIVATE KEY----- +-----BEGIN OPENSSH PRIVATE KEY----- EOF } valar_public_repo_secret_block_regexes() { cat <<'EOF' ghp_[A-Za-z0-9]{36} +gh[osru]_[A-Za-z0-9_]{36,} +github_pat_[A-Za-z0-9_]{20,} +hf_[A-Za-z0-9]{32,} +sk-[A-Za-z0-9_-]{20,} +sk-proj-[A-Za-z0-9_-]{20,} +sk-ant-[A-Za-z0-9_-]{20,} +xox[baprs]-[A-Za-z0-9-]+ +xapp-[A-Za-z0-9-]+ +AKIA[0-9A-Z]{16} +ASIA[0-9A-Z]{16} +AIza[0-9A-Za-z_-]{35} +(OPENAI|ANTHROPIC|LINEAR|TELEGRAM|SLACK|GITHUB|HF|HUGGINGFACE|HUGGING_FACE|RUNPOD|AWS|GOOGLE|GEMINI|DISCORD|ELEVENLABS)[A-Z0-9_]*(KEY|TOKEN|SECRET|PASSWORD)[[:space:]]*[:=][[:space:]]*["']?[^[:space:]'"]{8,} +-----BEGIN [A-Z ]*PRIVATE KEY----- +-----BEGIN OPENSSH PRIVATE KEY----- +EOF +} + +valar_public_repo_history_block_regexes() { + cat <<'EOF' +/Users/[A-Za-z0-9._-]+/ +/Volumes/[A-Za-z0-9._-]+/ +SSK_Symphony +Library/LaunchAgents +private snapshot +snapshot-and-corpus +Capture private snapshot +private-local-ops +LINEAR_API_KEY +\.claude/ +claude-plugins-official +Claude Code +forwardToClaude +claude-fast +claude-quality +ghp_[A-Za-z0-9]{36} +gh[osru]_[A-Za-z0-9_]{36,} github_pat_[A-Za-z0-9_]{20,} hf_[A-Za-z0-9]{32,} -sk-[A-Za-z0-9]{20,} +sk-[A-Za-z0-9_-]{20,} +sk-proj-[A-Za-z0-9_-]{20,} +sk-ant-[A-Za-z0-9_-]{20,} xox[baprs]-[A-Za-z0-9-]+ +xapp-[A-Za-z0-9-]+ AKIA[0-9A-Z]{16} +ASIA[0-9A-Z]{16} AIza[0-9A-Za-z_-]{35} -(OPENAI_API_KEY|ANTHROPIC_API_KEY|LINEAR_API_KEY|TELEGRAM_BOT_TOKEN|SLACK_BOT_TOKEN|GITHUB_TOKEN)[[:space:]]*=[[:space:]]*[^[:space:]]+ -(RUNPOD_API_KEY|HF_TOKEN|HUGGINGFACE_HUB_TOKEN|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|GEMINI_API_KEY|GOOGLE_API_KEY)[[:space:]]*=[[:space:]]*[^[:space:]]+ +(OPENAI|ANTHROPIC|LINEAR|TELEGRAM|SLACK|GITHUB|HF|HUGGINGFACE|HUGGING_FACE|RUNPOD|AWS|GOOGLE|GEMINI|DISCORD|ELEVENLABS)[A-Z0-9_]*(KEY|TOKEN|SECRET|PASSWORD)[[:space:]]*[:=][[:space:]]*["']?[^[:space:]'"]{8,} -----BEGIN [A-Z ]*PRIVATE KEY----- +-----BEGIN OPENSSH PRIVATE KEY----- EOF } diff --git a/tools/public_repo_secret_scan.sh b/tools/public_repo_secret_scan.sh index 71f3ebd..abc4420 100644 --- a/tools/public_repo_secret_scan.sh +++ b/tools/public_repo_secret_scan.sh @@ -63,8 +63,13 @@ source "$RULES_FILE" collect_files() { local root="$1" + local root_abs + local git_top + root_abs="$(cd "$root" && pwd -P)" + git_top="$(git -C "$root" rev-parse --show-toplevel 2>/dev/null || true)" if [[ "$scan_ignored" == "0" ]] \ - && git -C "$root" rev-parse --show-toplevel >/dev/null 2>&1 \ + && [[ -n "$git_top" ]] \ + && [[ "$(cd "$git_top" && pwd -P)" == "$root_abs" ]] \ && git -C "$root" rev-parse --verify HEAD >/dev/null 2>&1; then git -C "$root" ls-files else @@ -94,7 +99,10 @@ while IFS= read -r rel; do [[ -n "$rel" ]] || continue [[ -f "$SCAN_ROOT/$rel" ]] || continue case "$rel" in - tools/public_repo_audit.sh|tools/public_repo_rules.sh|tools/public_repo_secret_scan.sh) + .gitleaks.toml|.gitleaksignore) + continue + ;; + tools/public_repo_audit.sh|tools/public_repo_history_scan.sh|tools/public_repo_rules.sh|tools/public_repo_secret_scan.sh) continue ;; esac