diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 305e941..8798c8f 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,11 +5,21 @@ updates: schedule: interval: "weekly" - - package-ecosystem: "npm" + - package-ecosystem: "bun" directory: "/bridge" schedule: interval: "weekly" + - package-ecosystem: "pip" + directory: "/scripts/voxtral" + schedule: + interval: "weekly" + + - package-ecosystem: "pre-commit" + directory: "/" + schedule: + interval: "weekly" + - package-ecosystem: "swift" directory: "/apps/ValarCLI" schedule: @@ -25,6 +35,16 @@ updates: schedule: interval: "weekly" + - package-ecosystem: "swift" + directory: "/Packages/ValarCore" + schedule: + interval: "weekly" + + - package-ecosystem: "swift" + directory: "/Packages/ValarModelKit" + schedule: + interval: "weekly" + - package-ecosystem: "swift" directory: "/Packages/ValarAudio" schedule: diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 18bba46..1863257 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -27,22 +27,17 @@ jobs: - name: Initialize CodeQL uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4 with: - languages: swift,javascript-typescript + languages: javascript-typescript - - name: Ensure build helpers - run: | - command -v jq >/dev/null 2>&1 || arch -arm64 brew install jq - command -v rg >/dev/null 2>&1 || arch -arm64 brew install ripgrep - jq --version - rg --version | head -n 1 - - - name: Bootstrap dependencies - run: bash ./tools/bootstrap.sh native --with-bridge + - name: Set up Bun + uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2 + with: + bun-version: "1.3.14" - - name: Build Swift targets + - name: Install bridge dependencies run: | - swift build --package-path apps/ValarCLI - swift build --package-path apps/ValarDaemon + cd bridge + bun install --frozen-lockfile --ignore-scripts - name: Analyze uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4 diff --git a/.github/workflows/native.yml b/.github/workflows/native.yml index d0dcae1..0177fc1 100644 --- a/.github/workflows/native.yml +++ b/.github/workflows/native.yml @@ -23,7 +23,7 @@ jobs: - name: Set up Bun uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2 with: - bun-version: "1.2.13" + bun-version: "1.3.14" - name: Ensure validation helpers run: | diff --git a/Packages/ValarCore/Package.resolved b/Packages/ValarCore/Package.resolved index 3e8e88f..e01aaf2 100644 --- a/Packages/ValarCore/Package.resolved +++ b/Packages/ValarCore/Package.resolved @@ -95,8 +95,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-crypto.git", "state" : { - "revision" : "fa308c07a6fa04a727212d793e761460e41049c3", - "version" : "4.3.0" + "revision" : "1b6b2e274e85105bfa155183145a1dcfd63331f1", + "version" : "4.5.0" } }, { diff --git a/Packages/ValarCore/Sources/ValarCore/ValarCatalog.swift b/Packages/ValarCore/Sources/ValarCore/ValarCatalog.swift index a43bd23..7a739b8 100644 --- a/Packages/ValarCore/Sources/ValarCore/ValarCatalog.swift +++ b/Packages/ValarCore/Sources/ValarCore/ValarCatalog.swift @@ -887,7 +887,7 @@ public struct ModelInstallValidationReport: Sendable, Equatable { } } -public enum ModelInstallerError: Error, Equatable { +public enum ModelInstallerError: Error, Equatable, LocalizedError { case validationFailed([String]) case installedRecordMissing(String) case installedPackMissing(String) @@ -895,6 +895,25 @@ public enum ModelInstallerError: Error, Equatable { case downloadFailed(String) case checksumMismatch(artifactPath: String, expected: String, actual: String) case missingChecksum(artifactPath: String) + + public var errorDescription: String? { + switch self { + case .validationFailed(let messages): + return "Model manifest validation failed: \(messages.joined(separator: "; "))" + case .installedRecordMissing(let modelID): + return "Installed model record was not created for \(modelID)." + case .installedPackMissing(let path): + return "Installed model pack is missing at \(path)." + case .invalidRemoteSourceLocation(let location): + return "Invalid remote model source: \(location)." + case .downloadFailed(let message): + return message + case .checksumMismatch(let artifactPath, let expected, let actual): + return "Checksum mismatch for \(artifactPath): expected \(expected), got \(actual)." + case .missingChecksum(let artifactPath): + return "Remote artifact '\(artifactPath)' is missing a SHA-256 checksum." + } + } } public enum ModelInstallMode: Sendable, Equatable { @@ -1063,7 +1082,7 @@ public actor ModelInstaller { for artifact in uncheckedArtifacts { issues.append(.init( severity: .warning, - message: "\(Self.checksumWarningLabel(for: artifact.kind)) artifact '\(artifact.id)' is missing a SHA-256 checksum; Valar can install it, but cannot locally verify the downloaded file" + message: "\(Self.checksumWarningLabel(for: artifact.kind)) artifact '\(artifact.id)' is missing a SHA-256 checksum; Valar will not install the file from a remote source until a checksum is declared" )) } do { @@ -1216,6 +1235,37 @@ public actor ModelInstaller { return record } + public func verifyInstalledArtifacts(manifest: ValarPersistence.ModelPackManifest) throws { + let packDirectory = try paths.modelPackDirectory(familyID: manifest.familyID, modelID: manifest.modelID) + + for artifact in manifest.artifactSpecs where !artifact.relativePath.hasSuffix("/") { + let artifactURL = packDirectory.appendingPathComponent(artifact.relativePath, isDirectory: false) + try ValarAppPaths.validateContainment(artifactURL, within: packDirectory) + + guard fileManager.fileExists(atPath: artifactURL.path) else { + if artifact.required { + throw ModelInstallerError.downloadFailed( + "Installed artifact '\(artifact.relativePath)' is missing from the current model pack." + ) + } + continue + } + + if let checksum = artifact.checksum { + let actualChecksum = try sha256Hex(for: artifactURL) + guard actualChecksum.caseInsensitiveCompare(checksum) == .orderedSame else { + throw ModelInstallerError.checksumMismatch( + artifactPath: artifact.relativePath, + expected: checksum, + actual: actualChecksum + ) + } + } else if Self.remoteChecksumRequiredKinds.contains(artifact.kind) { + throw ModelInstallerError.missingChecksum(artifactPath: artifact.relativePath) + } + } + } + public func purgeSharedCaches(for modelID: ModelIdentifier) throws -> [String] { let hubRoot = Self.resolveHFHubCacheRoot(fileManager: fileManager, hfCacheRoot: hfCacheRoot) let standardDirectory = hubRoot.appendingPathComponent(Self.hfHubRepoDirectoryName(for: modelID.rawValue), isDirectory: true) @@ -1286,6 +1336,10 @@ public actor ModelInstaller { let weight = 1 / totalArtifacts let destinationURL = stagingDirectory.appendingPathComponent(artifact.relativePath, isDirectory: false) try ValarAppPaths.validateContainment(destinationURL, within: stagingDirectory) + let requiresChecksum = Self.remoteChecksumRequiredKinds.contains(artifact.kind) + guard !artifact.required || artifact.checksum != nil else { + throw ModelInstallerError.missingChecksum(artifactPath: artifact.relativePath) + } try fileManager.createDirectory( at: destinationURL.deletingLastPathComponent(), @@ -1347,12 +1401,9 @@ public actor ModelInstaller { actual: actualChecksum ) } - } else if artifact.checksum != nil && hfCached == nil { - // Catalog declared a checksum but it wasn't verified above — - // this shouldn't happen, but guard against it. + } else if requiresChecksum { + try? removeIfPresent(destinationURL) throw ModelInstallerError.missingChecksum(artifactPath: artifact.relativePath) - // Note: models without pre-computed checksums (checksum == nil) - // are trusted when downloaded directly from HuggingFace. } } } diff --git a/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Daemon.swift b/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Daemon.swift index 33da736..6544e6b 100644 --- a/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Daemon.swift +++ b/Packages/ValarCore/Sources/ValarCore/ValarRuntime+Daemon.swift @@ -464,7 +464,14 @@ public extension ValarRuntime { if sourceKind == .remoteURL, !allowDownload { throw RouteModelError.refreshRequiresDownload(id) } - _ = try await modelInstaller.uninstall(modelID: identifier) + if sourceKind == .remoteURL, + try await modelPackRegistry.installedRecord(for: identifier.rawValue) != nil { + do { + try await modelInstaller.verifyInstalledArtifacts(manifest: manifest) + } catch { + _ = try await modelInstaller.uninstall(modelID: identifier) + } + } _ = try await modelInstaller.purgeSharedCaches(for: identifier) } diff --git a/Packages/ValarMLX/Package.resolved b/Packages/ValarMLX/Package.resolved index 04a5b20..e0b483c 100644 --- a/Packages/ValarMLX/Package.resolved +++ b/Packages/ValarMLX/Package.resolved @@ -68,8 +68,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-crypto.git", "state" : { - "revision" : "fa308c07a6fa04a727212d793e761460e41049c3", - "version" : "4.3.0" + "revision" : "1b6b2e274e85105bfa155183145a1dcfd63331f1", + "version" : "4.5.0" } }, { diff --git a/Packages/ValarModelKit/Sources/ValarModelKit/VibeVoiceCatalog.swift b/Packages/ValarModelKit/Sources/ValarModelKit/VibeVoiceCatalog.swift index 511ba23..3ec2be2 100644 --- a/Packages/ValarModelKit/Sources/ValarModelKit/VibeVoiceCatalog.swift +++ b/Packages/ValarModelKit/Sources/ValarModelKit/VibeVoiceCatalog.swift @@ -115,6 +115,61 @@ public enum VibeVoiceCatalog { VibeVoiceSurface.vibeVoiceRealtime05B4Bit.supportedLanguages } + private struct ArtifactIntegrity { + let sha256: String + let sizeBytes: Int + } + + private static let artifactIntegrity: [String: ArtifactIntegrity] = [ + "config.json": .init(sha256: "ef672aca9e7deb835925970492a10e606d1b2c7fc741dd30cf36e3efe2886717", sizeBytes: 2655), + "model.safetensors": .init(sha256: "d4e33a2daca2dd866b472e42210701fe9e28dc6fcec649b2a1fd05e5885b30bd", sizeBytes: 632_644_595), + "tokenizer.json": .init(sha256: "c0382117ea329cdf097041132f6d735924b697924d6f6fc3945713e96ce87539", sizeBytes: 7_031_645), + "tokenizer_config.json": .init(sha256: "c91efca15ceff6e9ee9424db58a6f59cd41294e550a86cbd07e3c1fb500b34f9", sizeBytes: 7_228), + "preprocessor_config.json": .init(sha256: "ebf514b5d30a012e5ae00d9a19d01e735e35b27768c3926d980815db8fa742e5", sizeBytes: 360), + "voices/de-Spk0_man.safetensors": .init(sha256: "2e34ddef90b8585c6298c2545841ef73c67e6adf8f728376107e8088244c1463", sizeBytes: 7_017_168), + "voices/de-Spk1_woman.safetensors": .init(sha256: "7a6c9efd03b06a2a6c2cb4fe88a43ec5b4ea5f3fe46d597ed36056f324ab415c", sizeBytes: 5_268_176), + "voices/en-Carter_man.safetensors": .init(sha256: "1b3efb89bc26bc14d86095da9b26b0aaf5989e8ed75e39efa958088bf301160c", sizeBytes: 4_241_352), + "voices/en-Davis_man.safetensors": .init(sha256: "6d689ac3f6f630fd1617814a15ab165544772a208ce305d3779feb08f033f1e0", sizeBytes: 2_456_752), + "voices/en-Emma_woman.safetensors": .init(sha256: "8572620ccf3384529c8fce7b211871482cfa1fc8e3068f80576b7ea15257e819", sizeBytes: 3_328_696), + "voices/en-Frank_man.safetensors": .init(sha256: "869af2fd5e83b3f70cdc23b28fdc1ef82c11e7122c387adb8bc56902b323efb2", sizeBytes: 3_345_080), + "voices/en-Grace_woman.safetensors": .init(sha256: "7b0cb4438eb8a2cc0d45eb8c4d724d27fb459f999a8ea0bf9ab221e67cb92ba6", sizeBytes: 2_758_064), + "voices/en-Mike_man.safetensors": .init(sha256: "10524823aa1f90cd4cec05828f16d20d090d83ac3eb682646aa43e45c1f9dc0a", sizeBytes: 1_993_376), + "voices/fr-Spk0_man.safetensors": .init(sha256: "8d6d1df2b70d05680bf2da34bb606b991eefa35068f091e2f17c603af8c8726e", sizeBytes: 4_363_984), + "voices/fr-Spk1_woman.safetensors": .init(sha256: "b9ce6e695df9bef90ce926b6c570ce211c456d969164208c3445a4ac257bd1e4", sizeBytes: 4_249_296), + "voices/in-Samuel_man.safetensors": .init(sha256: "3a22a118b02f1d2dbafe02284152bd83413dad344c7c65a66dc83ae2b528bc64", sizeBytes: 3_768_000), + "voices/it-Spk0_woman.safetensors": .init(sha256: "87ba3fcecc31c1d639a86bf5631096e06297bcdff2b455756b014bd4ccad9672", sizeBytes: 2_529_456), + "voices/it-Spk1_man.safetensors": .init(sha256: "b5c7cb194cca41d43e15e6cadbd7a7add10bc3d5fe53ef287979129f8fa90c21", sizeBytes: 2_832_056), + "voices/jp-Spk0_man.safetensors": .init(sha256: "ab53e74e00e87577506db3817985bd3584fab079acfc80615dccd160435b3476", sizeBytes: 4_645_840), + "voices/jp-Spk1_woman.safetensors": .init(sha256: "b331ec494ce24227ee4bd8a834aa407f3fb6d0fde9fd2ad6ac739f5df7b96a44", sizeBytes: 4_615_120), + "voices/kr-Spk0_woman.safetensors": .init(sha256: "2ac9224493d510782c7ca2294c4309edbd74e5871a76fa6b8fe0c408c2ddba01", sizeBytes: 4_131_024), + "voices/kr-Spk1_man.safetensors": .init(sha256: "67ccf3f76b5be71609d4c26ea64c6ce850ff8cfcf03ee623568a6bca916e40e9", sizeBytes: 5_842_640), + "voices/nl-Spk0_man.safetensors": .init(sha256: "8645c3a0fd62e94609527c066fece280998824ebd6a4dae169d024ad3de085fc", sizeBytes: 3_681_992), + "voices/nl-Spk1_woman.safetensors": .init(sha256: "cc8ac9607e1c61e3347ef3a25a0599886bf7c06dc69ab9a7633facfc80e6cbb4", sizeBytes: 5_073_104), + "voices/pl-Spk0_man.safetensors": .init(sha256: "b8f61efaf59ea95f520b4231646f4d04dea367fdf39633e80106d20abc3873b8", sizeBytes: 3_728_328), + "voices/pl-Spk1_woman.safetensors": .init(sha256: "7c3220f7ef26e8a06bfbb9f8acd0201139a9ee7be8a7b83df46bf28324e9677c", sizeBytes: 4_955_856), + "voices/pt-Spk0_woman.safetensors": .init(sha256: "0828216576aae51b2ada2b542a393edf1177a4e2cd5338b230187162ae4fdcc3", sizeBytes: 2_245_544), + "voices/pt-Spk1_man.safetensors": .init(sha256: "370345ebe6209bfea8084290104060297414e0d1015a9c7e5e9e3ce532bb17e2", sizeBytes: 3_532_488), + "voices/sp-Spk0_woman.safetensors": .init(sha256: "7c05318ca1f3c94ba533d4f7c2fb4694332a7de67de68051018fbd97f158070e", sizeBytes: 4_221_128), + "voices/sp-Spk1_man.safetensors": .init(sha256: "90c928352e59070b7a41c6d7ad76943f18e0f9d8dfc58d430d25e005c2287d79", sizeBytes: 5_107_920), + ] + + private static func artifactSpec( + id: String, + role: ArtifactRole, + relativePath: String, + required: Bool = true + ) -> ArtifactSpec { + let integrity = artifactIntegrity[relativePath] + return ArtifactSpec( + id: id, + role: role, + relativePath: relativePath, + sha256: integrity?.sha256, + sizeBytes: integrity?.sizeBytes, + required: required + ) + } + public static func primaryLanguage(for preset: PresetVoiceSpec) -> String? { preset.languageAffinity.first?.lowercased() } @@ -231,7 +286,7 @@ public enum VibeVoiceCatalog { static var voiceCacheArtifacts: [ArtifactSpec] { presetVoices.map { preset in - ArtifactSpec( + artifactSpec( id: "voice-cache-\(preset.name)", role: .voiceAsset, relativePath: "voices/\(preset.name).safetensors" @@ -261,13 +316,13 @@ public enum VibeVoiceCatalog { ), ], artifacts: [ - ArtifactSpec(id: "model-config", role: .config, relativePath: "config.json"), - ArtifactSpec(id: "model-weights", role: .weights, relativePath: "model.safetensors"), - ArtifactSpec(id: "tokenizer", role: .tokenizer, relativePath: "tokenizer.json"), - ArtifactSpec(id: "tokenizer-config", role: .tokenizer, relativePath: "tokenizer_config.json"), - ArtifactSpec(id: "special-tokens-map", role: .auxiliary, relativePath: "special_tokens_map.json", required: false), - ArtifactSpec(id: "added-tokens", role: .auxiliary, relativePath: "added_tokens.json", required: false), - ArtifactSpec(id: "preprocessor-config", role: .config, relativePath: "preprocessor_config.json"), + artifactSpec(id: "model-config", role: .config, relativePath: "config.json"), + artifactSpec(id: "model-weights", role: .weights, relativePath: "model.safetensors"), + artifactSpec(id: "tokenizer", role: .tokenizer, relativePath: "tokenizer.json"), + artifactSpec(id: "tokenizer-config", role: .tokenizer, relativePath: "tokenizer_config.json"), + artifactSpec(id: "special-tokens-map", role: .auxiliary, relativePath: "special_tokens_map.json", required: false), + artifactSpec(id: "added-tokens", role: .auxiliary, relativePath: "added_tokens.json", required: false), + artifactSpec(id: "preprocessor-config", role: .config, relativePath: "preprocessor_config.json"), ] + voiceCacheArtifacts, tokenizer: TokenizerSpec( kind: "huggingface", diff --git a/Packages/ValarModelKit/Sources/ValarModelKit/VoxtralCatalog.swift b/Packages/ValarModelKit/Sources/ValarModelKit/VoxtralCatalog.swift index 63ca32f..a4abfd1 100644 --- a/Packages/ValarModelKit/Sources/ValarModelKit/VoxtralCatalog.swift +++ b/Packages/ValarModelKit/Sources/ValarModelKit/VoxtralCatalog.swift @@ -60,6 +60,97 @@ public enum VoxtralCatalog { /// ar_male: quality issues with Arabic synthesis. private static let randomExcluded: Set = ["neutral_male", "ar_male"] + private struct ArtifactIntegrity { + let sha256: String + let sizeBytes: Int + } + + private static let rawArtifactIntegrity: [String: ArtifactIntegrity] = [ + "params.json": .init(sha256: "f6408ee76dea8da16ce40ac66729d59406019ea71cdb9d656709a38d2e58691e", sizeBytes: 3_482), + "consolidated.safetensors": .init(sha256: "66c4fd998db10e1a6d9cc5baa10e6264bf10701ec22ccdc0822c7dcc45dbe55b", sizeBytes: 8_004_752_248), + "tekken.json": .init(sha256: "587989c9f56676b35e7d16d6fc61461301e402d908392a8ce16f0349f61b56d7", sizeBytes: 14_894_731), + "voice_embedding/ar_male.pt": .init(sha256: "f44603f6433cbb4b2abc7f496a382632171118557a175cb385df168a0dc20464", sizeBytes: 413_253), + "voice_embedding/casual_female.pt": .init(sha256: "780637984644064ee22e60b3152e0cd43fa64b2dcd39d9cab6cd2c62f2ce0342", sizeBytes: 1_316_421), + "voice_embedding/casual_male.pt": .init(sha256: "7a056c9156ad0058e9d1368363bf3a25a9fcd8fe53e211ffac97de0bbffb3504", sizeBytes: 904_773), + "voice_embedding/cheerful_female.pt": .init(sha256: "75fe69c8fcb5a0883a3d0bc1215b28f28cc0586aff5732eeebd2b254e8288253", sizeBytes: 812_613), + "voice_embedding/de_female.pt": .init(sha256: "282fc191fda496de2ebf2c809acb44056dde6fbe2f1cb99e85e67985bc6f6619", sizeBytes: 904_773), + "voice_embedding/de_male.pt": .init(sha256: "bd75d9fd3ffb9df0481668ce8781287a58f552e2388c5bbc0efdd4ebff0421bf", sizeBytes: 1_003_077), + "voice_embedding/es_female.pt": .init(sha256: "90e01ad34f231cc881987c3b1c0728853fd9b904e52c296a07c71a132949d8a6", sizeBytes: 849_477), + "voice_embedding/es_male.pt": .init(sha256: "ec116d8f4a102291bae3d9156d7c3222d9e1056020bf5894a7504bfc09640fdf", sizeBytes: 1_279_557), + "voice_embedding/fr_female.pt": .init(sha256: "82628d963670f919aa302f9c8a7336c745418a145934edb211810b07d9c8b852", sizeBytes: 597_573), + "voice_embedding/fr_male.pt": .init(sha256: "73395073472be3fb586b487705ac4ebf35f99db664f56400137e8bfcfe4cd8a8", sizeBytes: 597_573), + "voice_embedding/hi_female.pt": .init(sha256: "aa7718cdd6f65735226bcc701379fdec64f36d0207ca79fc4c61b445ca7bde82", sizeBytes: 529_989), + "voice_embedding/hi_male.pt": .init(sha256: "c3cde36ab9a336f67fd33b46435cdf645cff9e10117f13bcbcb67b44b80a11b0", sizeBytes: 579_141), + "voice_embedding/it_female.pt": .init(sha256: "29e1714bdb3ce0726e590ce1862fbe953c168ba51a05bc7daa8cb35cddc312b4", sizeBytes: 1_058_373), + "voice_embedding/it_male.pt": .init(sha256: "b98ba2253e2a0b872e20d33d29cab32263cc81062c01e3f5a8696de89e6f47b1", sizeBytes: 1_033_797), + "voice_embedding/neutral_female.pt": .init(sha256: "2a03f4008614da7b1505a360a6b0d58d94dd72b0b0f49bf216e39de5eb733c61", sizeBytes: 1_340_997), + "voice_embedding/neutral_male.pt": .init(sha256: "439df812990e6e4bcc6010ca12f12df90916e862bc1e1b56036d6433b892834e", sizeBytes: 1_039_941), + "voice_embedding/nl_female.pt": .init(sha256: "b1bad34c22e0563f05c1f13c1db96680778c297aea6a5c0bb202950648b796b6", sizeBytes: 898_629), + "voice_embedding/nl_male.pt": .init(sha256: "43fd2de89dc08503f37ae3107273eeb3f2a6195d705ff58d2228b3b5642ff7de", sizeBytes: 849_477), + "voice_embedding/pt_female.pt": .init(sha256: "82f1006b2cd69118cba67085daa1795d9dab90b9bc70e1392e77f82cb616c9ce", sizeBytes: 1_076_805), + "voice_embedding/pt_male.pt": .init(sha256: "7b30dca6c5d16c7b10a1c09c53e971c1bb1fab65692d7244876fbdc4ad52ba18", sizeBytes: 886_341), + ] + + private static let sharedMLXArtifactIntegrity: [String: ArtifactIntegrity] = [ + "params.json": .init(sha256: "f6408ee76dea8da16ce40ac66729d59406019ea71cdb9d656709a38d2e58691e", sizeBytes: 3_482), + "model.safetensors.index.json": .init(sha256: "6f550dfaf7569a5369fd22b361e5a5838bea06c624ded0c6dab60f1dcf1032d9", sizeBytes: 73_050), + "tekken.json": .init(sha256: "587989c9f56676b35e7d16d6fc61461301e402d908392a8ce16f0349f61b56d7", sizeBytes: 14_894_731), + "voice_embedding/ar_male.safetensors": .init(sha256: "f4c480657b730c169614c66dcb26684bc234ee2829cb2a9e490c300f60b1782a", sizeBytes: 411_736), + "voice_embedding/casual_female.safetensors": .init(sha256: "9a2027d9265fd7ef4a55294b10ae8b2095dcad7547d50762a5012a3955ca0860", sizeBytes: 1_314_904), + "voice_embedding/casual_male.safetensors": .init(sha256: "2056ade898f6f1b04c1af764f54d705038ac046f0b4967457a36989b69730fa8", sizeBytes: 903_256), + "voice_embedding/cheerful_female.safetensors": .init(sha256: "35441e3030ba0076356ef1ad54fbfd5adfbde9e34bbf8a1a6535d6efabe63af1", sizeBytes: 811_096), + "voice_embedding/de_female.safetensors": .init(sha256: "a89197fe5e77a2dd3cfc4ac0ed1cb5248e4e1ac26fdb13500e9782e43a90a69e", sizeBytes: 903_256), + "voice_embedding/de_male.safetensors": .init(sha256: "cabfeeb98db0b713a80f234e11d90c3105fe83e64de75876509c3cda43946656", sizeBytes: 1_001_560), + "voice_embedding/es_female.safetensors": .init(sha256: "52bca0b4e770afaccc3f2899d7396009422a0b3fdb5a6a96c523bcdc2f35c165", sizeBytes: 847_960), + "voice_embedding/es_male.safetensors": .init(sha256: "b63fd5dff4dbc5070d470a364b8691f63fd6c10412a2692101bf6a90549c36e1", sizeBytes: 1_278_040), + "voice_embedding/fr_female.safetensors": .init(sha256: "e8be0f673696dc1bf668fa6cc9999ab09ab43b29a6d6e85c5ac3f9f8ed43c449", sizeBytes: 596_056), + "voice_embedding/fr_male.safetensors": .init(sha256: "d34fde0b57acaea4c3f78e841c93dd290f104a167dd0bea9e0673d9cdfedadd5", sizeBytes: 596_056), + "voice_embedding/hi_female.safetensors": .init(sha256: "01e987092046cf5dc1041f00d10b0763f9ab4080c3bf124d97a01ccb04428e26", sizeBytes: 528_472), + "voice_embedding/hi_male.safetensors": .init(sha256: "342d40fcda1e2a6ded081f0d2d8eefadcc7b8102a20da6e7e803ef4b4a21785c", sizeBytes: 577_624), + "voice_embedding/it_female.safetensors": .init(sha256: "378b9bab596a68780ca07f4bf2032a9c70847825772aed9ee148ba8173b477d3", sizeBytes: 1_056_856), + "voice_embedding/it_male.safetensors": .init(sha256: "05e313648254a57e4ca7e503b416590a4c832a106f7201efd4dea02ecce79035", sizeBytes: 1_032_280), + "voice_embedding/neutral_female.safetensors": .init(sha256: "e229d5646ab8c2cad1b4e24cd63b88192f34da61de8f0595291799232460752f", sizeBytes: 1_339_480), + "voice_embedding/neutral_male.safetensors": .init(sha256: "b114132e7301b2d2308ff7b0c1843574bab9ffeac5fc2dd48ca27db70a24ea0c", sizeBytes: 1_038_424), + "voice_embedding/nl_female.safetensors": .init(sha256: "0124a62762a89b9a4ef0ff0c016db045ccfe30ed9fa3d886db0e78e444673ea3", sizeBytes: 897_112), + "voice_embedding/nl_male.safetensors": .init(sha256: "6f931e4c34cf496cce4ed7ce5d8af286140c4969860f9410b12237ff28de2d33", sizeBytes: 847_960), + "voice_embedding/pt_female.safetensors": .init(sha256: "ce23b97302fe6ca58507b1ae2c2fe1d0a472268b1faf597901cbf75e326a08f9", sizeBytes: 1_075_288), + "voice_embedding/pt_male.safetensors": .init(sha256: "4ccd63a39d35f483c89485112d35a8a2121cbb0e6d5d124e403e40d03e0c0e82", sizeBytes: 884_824), + ] + + private static let mlx4BitOnlyArtifactIntegrity: [String: ArtifactIntegrity] = [ + "model.safetensors": .init(sha256: "a62a28f02ce54f9157877df44ce2da92bed97159ab19c2878445d3ec4d357786", sizeBytes: 2_509_879_373), + ] + + private static let mlx6BitOnlyArtifactIntegrity: [String: ArtifactIntegrity] = [ + "model.safetensors": .init(sha256: "faea8347d8d27f7f0d1a338c6cccd887d7b28df8978d447e5f1c20d414354af1", sizeBytes: 3_465_520_393), + ] + + private static let mlx4BitArtifactIntegrity = sharedMLXArtifactIntegrity.merging( + mlx4BitOnlyArtifactIntegrity, + uniquingKeysWith: { _, new in new } + ) + + private static let mlx6BitArtifactIntegrity = sharedMLXArtifactIntegrity.merging( + mlx6BitOnlyArtifactIntegrity, + uniquingKeysWith: { _, new in new } + ) + + private static func artifactSpec( + id: String, + role: ArtifactRole, + relativePath: String, + integrity: [String: ArtifactIntegrity] + ) -> ArtifactSpec { + let resolvedIntegrity = integrity[relativePath] + return ArtifactSpec( + id: id, + role: role, + relativePath: relativePath, + sha256: resolvedIntegrity?.sha256, + sizeBytes: resolvedIntegrity?.sizeBytes + ) + } + /// Resolve a voice name or alias to the canonical preset name. /// Returns nil if not found. Accepts "random" to pick a random preset. public static func resolvePresetName(_ input: String) -> String? { @@ -79,10 +170,11 @@ public enum VoxtralCatalog { static var rawVoiceEmbeddingArtifacts: [ArtifactSpec] { presetVoices.map { preset in - ArtifactSpec( + artifactSpec( id: "\(rawVoiceEmbeddingArtifactIDPrefix)\(preset.name)", role: .voiceAsset, - relativePath: "voice_embedding/\(preset.name).pt" + relativePath: "voice_embedding/\(preset.name).pt", + integrity: rawArtifactIntegrity ) } } @@ -149,9 +241,9 @@ public enum VoxtralCatalog { modelID: modelIdentifier, displayName: "Voxtral 4B TTS 2603", artifacts: [ - ArtifactSpec(id: "model-config", role: .config, relativePath: "params.json"), - ArtifactSpec(id: "model-weights", role: .weights, relativePath: "consolidated.safetensors"), - ArtifactSpec(id: "tokenizer", role: .tokenizer, relativePath: "tekken.json"), + artifactSpec(id: "model-config", role: .config, relativePath: "params.json", integrity: rawArtifactIntegrity), + artifactSpec(id: "model-weights", role: .weights, relativePath: "consolidated.safetensors", integrity: rawArtifactIntegrity), + artifactSpec(id: "tokenizer", role: .tokenizer, relativePath: "tekken.json", integrity: rawArtifactIntegrity), ] + rawVoiceEmbeddingArtifacts + [ ArtifactSpec(id: "voice-embeddings-safe", role: .voiceAsset, relativePath: "voice_embedding_safe/"), ], @@ -163,15 +255,16 @@ public enum VoxtralCatalog { modelID: mlx4BitModelIdentifier, displayName: "Voxtral 4B TTS 2603 MLX (4-bit)", artifacts: [ - ArtifactSpec(id: "model-config", role: .config, relativePath: "params.json"), - ArtifactSpec(id: "model-weights", role: .weights, relativePath: "model.safetensors"), - ArtifactSpec(id: "model-index", role: .weights, relativePath: "model.safetensors.index.json"), - ArtifactSpec(id: "tokenizer", role: .tokenizer, relativePath: "tekken.json"), + artifactSpec(id: "model-config", role: .config, relativePath: "params.json", integrity: mlx4BitArtifactIntegrity), + artifactSpec(id: "model-weights", role: .weights, relativePath: "model.safetensors", integrity: mlx4BitArtifactIntegrity), + artifactSpec(id: "model-index", role: .weights, relativePath: "model.safetensors.index.json", integrity: mlx4BitArtifactIntegrity), + artifactSpec(id: "tokenizer", role: .tokenizer, relativePath: "tekken.json", integrity: mlx4BitArtifactIntegrity), ] + presetVoices.map { preset in - ArtifactSpec( + artifactSpec( id: "voice-embedding-safetensors-\(preset.name)", role: .voiceAsset, - relativePath: "voice_embedding/\(preset.name).safetensors" + relativePath: "voice_embedding/\(preset.name).safetensors", + integrity: mlx4BitArtifactIntegrity ) }, preferredQuantization: "4bit", @@ -182,15 +275,16 @@ public enum VoxtralCatalog { modelID: mlx6BitModelIdentifier, displayName: "Voxtral 4B TTS 2603 MLX (6-bit)", artifacts: [ - ArtifactSpec(id: "model-config", role: .config, relativePath: "params.json"), - ArtifactSpec(id: "model-weights", role: .weights, relativePath: "model.safetensors"), - ArtifactSpec(id: "model-index", role: .weights, relativePath: "model.safetensors.index.json"), - ArtifactSpec(id: "tokenizer", role: .tokenizer, relativePath: "tekken.json"), + artifactSpec(id: "model-config", role: .config, relativePath: "params.json", integrity: mlx6BitArtifactIntegrity), + artifactSpec(id: "model-weights", role: .weights, relativePath: "model.safetensors", integrity: mlx6BitArtifactIntegrity), + artifactSpec(id: "model-index", role: .weights, relativePath: "model.safetensors.index.json", integrity: mlx6BitArtifactIntegrity), + artifactSpec(id: "tokenizer", role: .tokenizer, relativePath: "tekken.json", integrity: mlx6BitArtifactIntegrity), ] + presetVoices.map { preset in - ArtifactSpec( + artifactSpec( id: "voice-embedding-safetensors-\(preset.name)", role: .voiceAsset, - relativePath: "voice_embedding/\(preset.name).safetensors" + relativePath: "voice_embedding/\(preset.name).safetensors", + integrity: mlx6BitArtifactIntegrity ) }, preferredQuantization: "6bit", diff --git a/Packages/mlx-audio-swift-valar/Examples/VoicesApp/VoicesApp/Info.plist b/Packages/mlx-audio-swift-valar/Examples/VoicesApp/VoicesApp/Info.plist index 334dcd2..b37f294 100644 --- a/Packages/mlx-audio-swift-valar/Examples/VoicesApp/VoicesApp/Info.plist +++ b/Packages/mlx-audio-swift-valar/Examples/VoicesApp/VoicesApp/Info.plist @@ -26,8 +26,6 @@ NSPrincipalClass NSApplication - HF_TOKEN - NSMicrophoneUsageDescription VoicesApp needs microphone access to record audio for transcription. diff --git a/apps/ValarCLI/Package.resolved b/apps/ValarCLI/Package.resolved index a910778..7d7fe8a 100644 --- a/apps/ValarCLI/Package.resolved +++ b/apps/ValarCLI/Package.resolved @@ -104,8 +104,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-crypto.git", "state" : { - "revision" : "fa308c07a6fa04a727212d793e761460e41049c3", - "version" : "4.3.0" + "revision" : "1b6b2e274e85105bfa155183145a1dcfd63331f1", + "version" : "4.5.0" } }, { diff --git a/apps/ValarCLI/Sources/ValarCLI/Commands/CapabilitiesCommand.swift b/apps/ValarCLI/Sources/ValarCLI/Commands/CapabilitiesCommand.swift index aaa904e..61129af 100644 --- a/apps/ValarCLI/Sources/ValarCLI/Commands/CapabilitiesCommand.swift +++ b/apps/ValarCLI/Sources/ValarCLI/Commands/CapabilitiesCommand.swift @@ -13,21 +13,21 @@ struct CapabilitiesCommand: AsyncParsableCommand { mutating func run() async throws { let runtime = try ValarRuntime() let metallibAvailable = Self.checkMetallibAvailable() - let daemonBaseURL = Self.daemonBaseURL() + let daemonBaseURL = CLILocalDaemon.baseURL() // Ping daemon health first, then readiness. var daemonReachable = false var daemonReady = false var daemonReadyDTO: DaemonReadyDTO? if let healthURL = daemonBaseURL?.appendingPathComponent("v1/health") { - if let (_, response) = try? await URLSession.shared.data(from: healthURL), + if let (_, response) = try? await CLILocalDaemon.session.data(from: healthURL), let http = response as? HTTPURLResponse, http.statusCode == 200 { daemonReachable = true } } if daemonReachable, let url = daemonBaseURL?.appendingPathComponent("v1/ready") { - if let (data, response) = try? await URLSession.shared.data(from: url), + if let (data, response) = try? await CLILocalDaemon.session.data(from: url), let http = response as? HTTPURLResponse { if let dto = try? JSONDecoder().decode(DaemonReadyDTO.self, from: data) { daemonReadyDTO = dto @@ -73,18 +73,6 @@ struct CapabilitiesCommand: AsyncParsableCommand { return false } - private static func daemonBaseURL( - environment: [String: String] = ProcessInfo.processInfo.environment - ) -> URL? { - let trimmedHost = environment["VALARTTSD_BIND_HOST"]? - .trimmingCharacters(in: .whitespacesAndNewlines) - let trimmedPort = environment["VALARTTSD_BIND_PORT"]? - .trimmingCharacters(in: .whitespacesAndNewlines) - let host = (trimmedHost?.isEmpty == false ? trimmedHost : nil) ?? "127.0.0.1" - let port = (trimmedPort?.isEmpty == false ? trimmedPort : nil) ?? "8787" - return URL(string: "http://\(host):\(port)") - } - // MARK: - Human-readable output private func printHumanSummary( diff --git a/apps/ValarCLI/Sources/ValarCLI/Commands/DoctorCommand.swift b/apps/ValarCLI/Sources/ValarCLI/Commands/DoctorCommand.swift index cbfb8b5..0539b70 100644 --- a/apps/ValarCLI/Sources/ValarCLI/Commands/DoctorCommand.swift +++ b/apps/ValarCLI/Sources/ValarCLI/Commands/DoctorCommand.swift @@ -16,7 +16,7 @@ struct DoctorCommand: AsyncParsableCommand { let runtime = try ValarRuntime() var issues: [String] = [] var advisories: [String] = [] - let daemonBaseURL = Self.daemonBaseURL() + let daemonBaseURL = CLILocalDaemon.baseURL() let modelPackAudit = try? await runtime.auditLocalModelPackState() let orphanedModelPackPaths = modelPackAudit?.orphanedModelPackPaths ?? [] let daemonPIDStatus = daemonPIDStatus(paths: runtime.paths) @@ -159,7 +159,7 @@ struct DoctorCommand: AsyncParsableCommand { var daemonReachable = false if let url = daemonBaseURL?.appendingPathComponent("v1/health") { do { - let (_, response) = try await URLSession.shared.data(from: url) + let (_, response) = try await CLILocalDaemon.session.data(from: url) if let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 { daemonReachable = true } @@ -174,7 +174,7 @@ struct DoctorCommand: AsyncParsableCommand { if daemonReachable { if let readyURL = daemonBaseURL?.appendingPathComponent("v1/ready") { - if let (readyData, readyResponse) = try? await URLSession.shared.data(from: readyURL), + if let (readyData, readyResponse) = try? await CLILocalDaemon.session.data(from: readyURL), let httpResponse = readyResponse as? HTTPURLResponse { if let dto = try? JSONDecoder().decode(DaemonReadyDTO.self, from: readyData) { daemonReadyDTO = dto @@ -189,7 +189,7 @@ struct DoctorCommand: AsyncParsableCommand { } if let runtimeURL = daemonBaseURL?.appendingPathComponent("v1/runtime") { - if let (runtimeData, runtimeResponse) = try? await URLSession.shared.data(from: runtimeURL), + if let (runtimeData, runtimeResponse) = try? await CLILocalDaemon.session.data(from: runtimeURL), let httpResponse = runtimeResponse as? HTTPURLResponse, httpResponse.statusCode == 200 { daemonRuntimeDTO = try? JSONDecoder().decode(DaemonRuntimeStatusDTO.self, from: runtimeData) @@ -666,16 +666,6 @@ struct DoctorCommand: AsyncParsableCommand { LocalInferenceAssetsStatus.currentProcess() } - private static func daemonBaseURL(environment: [String: String] = ProcessInfo.processInfo.environment) -> URL? { - let host = environment["VALARTTSD_BIND_HOST"]? - .trimmingCharacters(in: .whitespacesAndNewlines) - .nonEmpty ?? "127.0.0.1" - let port = environment["VALARTTSD_BIND_PORT"]? - .trimmingCharacters(in: .whitespacesAndNewlines) - .nonEmpty ?? "8787" - return URL(string: "http://\(host):\(port)") - } - private func daemonPIDStatus(paths: ValarAppPaths) -> (path: String, present: Bool, pid: Int?, live: Bool?, matchesProcess: Bool?) { let pidFileURL = paths.daemonPIDFileURL let present = FileManager.default.fileExists(atPath: pidFileURL.path) diff --git a/apps/ValarCLI/Sources/ValarCLI/Commands/ModelsCommand.swift b/apps/ValarCLI/Sources/ValarCLI/Commands/ModelsCommand.swift index 2f55ac4..981c50d 100644 --- a/apps/ValarCLI/Sources/ValarCLI/Commands/ModelsCommand.swift +++ b/apps/ValarCLI/Sources/ValarCLI/Commands/ModelsCommand.swift @@ -89,8 +89,21 @@ extension ModelsCommand { throw ValidationError("Refreshing shared cache for '\(id)' requires --allow-download because Valar will need to fetch a fresh snapshot.") } + var refreshWarnings: [String] = [] if refreshCache { - _ = try await runtime.modelInstaller.uninstall(modelID: model.id) + if model.providerURL != nil, model.installState == .installed { + do { + try await runtime.modelInstaller.verifyInstalledArtifacts(manifest: manifest) + } catch { + let warning = "Existing installed pack failed integrity validation and will be removed before refresh: \(error.localizedDescription)" + refreshWarnings.append(warning) + if !OutputContext.jsonRequested { + print("warning: \(warning)") + } + _ = try await runtime.modelInstaller.uninstall(modelID: model.id) + } + } + let purgedPaths = try await runtime.modelInstaller.purgeSharedCaches(for: model.id) if !OutputContext.jsonRequested { if purgedPaths.isEmpty { @@ -108,31 +121,7 @@ extension ModelsCommand { // A `.cached` model has files on disk already; only `.supported` + remoteURL requires download. if (model.installState == .supported || refreshCache), let sourceURL = model.providerURL, !allowDownload { let warningMessage = "Model '\(model.descriptor.displayName)' is not cached locally and requires a download from: \(sourceURL.absoluteString)\nRe-run with --allow-download to permit the download." - if OutputContext.jsonRequested { - struct DownloadRequiredEnvelope: Encodable { - let ok: Bool - let command: String - let error: ErrorDetails - struct ErrorDetails: Encodable { - let code: Int - let kind: String - let message: String - let model: String - let estimatedSize: String - } - } - let encoder = JSONEncoder() - encoder.outputFormatting = [.prettyPrinted, .sortedKeys] - let dto = DownloadRequiredEnvelope( - ok: false, - command: OutputFormat.commandPath("models install"), - error: .init(code: 2, kind: "download_required", message: warningMessage, model: id, estimatedSize: "unknown") - ) - print(String(decoding: try encoder.encode(dto), as: UTF8.self)) - } else { - print("warning: \(warningMessage)") - } - return + throw ValidationError(warningMessage) } let sourceKind: ModelPackSourceKind = model.providerURL == nil ? .localFile : .remoteURL @@ -168,6 +157,7 @@ extension ModelsCommand { let warnings = result.report.issues.filter { $0.severity == .warning } let message = "Installed \(result.descriptor.displayName) | \(result.record.installedPath)" + let installWarnings = refreshWarnings + ModelsCommand.humanReadableInstallWarnings(from: warnings) if OutputContext.jsonRequested { try OutputFormat.writeSuccess( @@ -177,13 +167,13 @@ extension ModelsCommand { model: ModelSummaryDTO(from: model), result: "installed", installedPath: result.record.installedPath, - warnings: ModelsCommand.humanReadableInstallWarnings(from: warnings) + warnings: installWarnings ) ) return } - for warning in ModelsCommand.humanReadableInstallWarnings(from: warnings) { + for warning in installWarnings where !refreshWarnings.contains(warning) { print("warning: \(warning)") } @@ -501,7 +491,7 @@ private extension ModelsCommand { let checksumCount = checksumWarnings.count let noun = checksumCount == 1 ? "artifact" : "artifacts" let checksumSummary = - "\(checksumCount) \(noun) in this model pack do not declare SHA-256 checksums. Valar can install them, but cannot locally verify the downloaded files." + "\(checksumCount) \(noun) in this model pack do not declare SHA-256 checksums. Valar will not install those files from remote sources until checksums are declared." return otherWarnings + [checksumSummary] } diff --git a/apps/ValarCLI/Sources/ValarCLI/Commands/SpeakCommand.swift b/apps/ValarCLI/Sources/ValarCLI/Commands/SpeakCommand.swift index 5aaf480..1b10885 100644 --- a/apps/ValarCLI/Sources/ValarCLI/Commands/SpeakCommand.swift +++ b/apps/ValarCLI/Sources/ValarCLI/Commands/SpeakCommand.swift @@ -679,6 +679,11 @@ struct SpeakCommand: AsyncParsableCommand { runtime: ValarRuntime ) async throws { let buffer = AudioPCMBuffer(mono: chunk.samples, sampleRate: chunk.sampleRate) + try FileManager.default.createDirectory( + at: outputURL.deletingLastPathComponent(), + withIntermediateDirectories: true + ) + switch format { case .wav: let audioFormat = AudioFormatDescriptor( @@ -698,10 +703,6 @@ struct SpeakCommand: AsyncParsableCommand { try oggData.write(to: outputURL) case .pcmF32le: let rawData = audioPCMFloat32LEData(from: chunk.samples) - try FileManager.default.createDirectory( - at: outputURL.deletingLastPathComponent(), - withIntermediateDirectories: true - ) try rawData.write(to: outputURL) } } diff --git a/apps/ValarCLI/Sources/ValarCLI/Commands/VoicesCommand.swift b/apps/ValarCLI/Sources/ValarCLI/Commands/VoicesCommand.swift index 04c5ef4..c9b7cb9 100644 --- a/apps/ValarCLI/Sources/ValarCLI/Commands/VoicesCommand.swift +++ b/apps/ValarCLI/Sources/ValarCLI/Commands/VoicesCommand.swift @@ -568,7 +568,9 @@ private extension VoicesCommand { let assetURL = URL(fileURLWithPath: trimmed) if assetURL.path.hasPrefix("/") { - return assetURL.standardizedFileURL + let resolvedURL = assetURL.standardizedFileURL + try ValarAppPaths.validateContainment(resolvedURL, within: voiceLibraryDirectory) + return resolvedURL } try ValarAppPaths.validateRelativePath(trimmed, label: "voice asset") diff --git a/apps/ValarCLI/Sources/ValarCLI/LocalDaemonSupport.swift b/apps/ValarCLI/Sources/ValarCLI/LocalDaemonSupport.swift new file mode 100644 index 0000000..0896756 --- /dev/null +++ b/apps/ValarCLI/Sources/ValarCLI/LocalDaemonSupport.swift @@ -0,0 +1,40 @@ +import Foundation + +enum CLILocalDaemon { + private static let allowedHosts = Set(["127.0.0.1", "::1", "localhost"]) + + static let session: URLSession = { + let configuration = URLSessionConfiguration.ephemeral + configuration.timeoutIntervalForRequest = 2 + configuration.timeoutIntervalForResource = 4 + return URLSession(configuration: configuration) + }() + + static func baseURL(environment: [String: String] = ProcessInfo.processInfo.environment) -> URL? { + let rawHost = trimmedNonEmpty(environment["VALARTTSD_BIND_HOST"]) ?? "127.0.0.1" + let rawPort = trimmedNonEmpty(environment["VALARTTSD_BIND_PORT"]) ?? "8787" + + let host = rawHost.trimmingCharacters(in: CharacterSet(charactersIn: "[]")).lowercased() + guard allowedHosts.contains(host), + let port = Int(rawPort), + (1...65_535).contains(port) + else { + return nil + } + + var components = URLComponents() + components.scheme = "http" + components.host = host + components.port = port + return components.url + } + + private static func trimmedNonEmpty(_ value: String?) -> String? { + guard let trimmed = value?.trimmingCharacters(in: .whitespacesAndNewlines), + !trimmed.isEmpty + else { + return nil + } + return trimmed + } +} diff --git a/apps/ValarDaemon/Package.resolved b/apps/ValarDaemon/Package.resolved index 86d41f4..9908cca 100644 --- a/apps/ValarDaemon/Package.resolved +++ b/apps/ValarDaemon/Package.resolved @@ -149,8 +149,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-crypto.git", "state" : { - "revision" : "fa308c07a6fa04a727212d793e761460e41049c3", - "version" : "4.3.0" + "revision" : "1b6b2e274e85105bfa155183145a1dcfd63331f1", + "version" : "4.5.0" } }, { diff --git a/apps/ValarDaemon/Sources/ValarDaemon/OperationQueue.swift b/apps/ValarDaemon/Sources/ValarDaemon/OperationQueue.swift index a8c109f..5cfb226 100644 --- a/apps/ValarDaemon/Sources/ValarDaemon/OperationQueue.swift +++ b/apps/ValarDaemon/Sources/ValarDaemon/OperationQueue.swift @@ -4,6 +4,13 @@ import ValarCore actor DaemonOperationQueue { typealias OperationWork = @Sendable () async throws -> Void + enum CancelResult: Sendable { + case cancelled(DaemonOperationStatusDTO) + case running(DaemonOperationStatusDTO) + case alreadyFinished(DaemonOperationStatusDTO) + case notFound + } + private struct QueuedOperation: Sendable { let id: String let work: OperationWork @@ -62,6 +69,36 @@ actor DaemonOperationQueue { return Self.makeDTO(from: record) } + func cancel(id: String) -> CancelResult { + guard var record = records[id] else { + return .notFound + } + + if let pendingIndex = pending.firstIndex(where: { $0.id == id }) { + pending.remove(at: pendingIndex) + record.status = "cancelled" + record.finishedAt = Date() + record.error = "Operation cancelled before it started." + records[id] = record + trimCompletedHistory() + return .cancelled(Self.makeDTO(from: record)) + } + + switch record.status { + case "queued": + record.status = "cancelled" + record.finishedAt = Date() + record.error = "Operation cancelled before it started." + records[id] = record + trimCompletedHistory() + return .cancelled(Self.makeDTO(from: record)) + case "running": + return .running(Self.makeDTO(from: record)) + default: + return .alreadyFinished(Self.makeDTO(from: record)) + } + } + func queueState() -> DaemonQueueStateDTO { let operations = orderedIDs.compactMap { id in records[id].map(Self.makeDTO(from:)) diff --git a/apps/ValarDaemon/Sources/ValarDaemon/Router.swift b/apps/ValarDaemon/Sources/ValarDaemon/Router.swift index 3ef1815..654b611 100644 --- a/apps/ValarDaemon/Sources/ValarDaemon/Router.swift +++ b/apps/ValarDaemon/Sources/ValarDaemon/Router.swift @@ -1,5 +1,6 @@ import Hummingbird import Foundation +import HTTPTypes import NIOCore import NIOHTTPTypes import ValarCore @@ -63,6 +64,114 @@ final class ClientInputCloseHandler: ChannelInboundHandler, RemovableChannelHand } } +private extension HTTPField.Name { + static let secFetchSite = Self("Sec-Fetch-Site")! + static let secFetchMode = Self("Sec-Fetch-Mode")! +} + +struct LocalDaemonRequestGuardMiddleware: RouterMiddleware { + private let loopbackHosts = Set(["127.0.0.1", "::1", "[::1]", "localhost"]) + + func handle( + _ request: Request, + context: Context, + next: (Request, Context) async throws -> Response + ) async throws -> Response { + guard isAllowedHost(request.head.authority) else { + return ValarDaemonRouter.daemonErrorResponse( + message: "Request Host must be loopback.", + status: .forbidden, + kind: "local_request_guard" + ) + } + guard isAllowedBrowserFetchMetadata(request) else { + return ValarDaemonRouter.daemonErrorResponse( + message: "Cross-site browser requests are not allowed.", + status: .forbidden, + kind: "local_request_guard" + ) + } + guard isAllowedOrigin(request.headers[.origin]) else { + return ValarDaemonRouter.daemonErrorResponse( + message: "Cross-origin browser requests are not allowed.", + status: .forbidden, + kind: "local_request_guard" + ) + } + guard hasAllowedContentType(request) else { + return ValarDaemonRouter.daemonErrorResponse( + message: "Unsupported Content-Type for this endpoint.", + status: .unsupportedMediaType, + kind: "local_request_guard" + ) + } + + return try await next(request, context) + } + + private func isAllowedHost(_ hostHeader: String?) -> Bool { + guard let hostHeader, hostHeader.isEmpty == false else { + return true + } + return loopbackHosts.contains(hostWithoutPort(hostHeader).lowercased()) + } + + private func hostWithoutPort(_ value: String) -> String { + let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.hasPrefix("[") { + guard let end = trimmed.firstIndex(of: "]") else { return trimmed } + return String(trimmed[...end]) + } + return trimmed.split(separator: ":", maxSplits: 1).first.map(String.init) ?? trimmed + } + + private func isAllowedBrowserFetchMetadata(_ request: Request) -> Bool { + let site = request.headers[.secFetchSite]?.lowercased() + if site == "cross-site" || site == "same-site" { + return false + } + if request.method != .get { + let mode = request.headers[.secFetchMode]?.lowercased() + if mode == "no-cors" || mode == "navigate" { + return false + } + } + return true + } + + private func isAllowedOrigin(_ originHeader: String?) -> Bool { + guard let originHeader, originHeader.isEmpty == false else { + return true + } + guard let origin = URL(string: originHeader), let host = origin.host else { + return false + } + return loopbackHosts.contains(host.lowercased()) + } + + private func hasAllowedContentType(_ request: Request) -> Bool { + switch request.method { + case .get, .head, .delete: + return true + default: + guard let contentType = request.headers[.contentType]?.lowercased() else { + return requestContentLength(request) == 0 + } + return contentType.hasPrefix("application/json") + || contentType.hasPrefix("multipart/form-data") + } + } + + private func requestContentLength(_ request: Request) -> Int { + guard let rawLength = request.headers[.contentLength]?.trimmingCharacters(in: .whitespacesAndNewlines), + rawLength.isEmpty == false + else { + return 0 + } + return Int(rawLength) ?? -1 + } +} + enum ValarDaemonRouter { struct DaemonRequestContext: RequestContext { var coreContext: CoreRequestContextStorage @@ -129,6 +238,9 @@ enum ValarDaemonRouter { startedAt: Date ) { let v1 = router.group("v1") + v1.addMiddleware { + LocalDaemonRequestGuardMiddleware() + } registerHealthRoutes(on: v1, runtime: runtime) registerReadyRoutes(on: v1, runtime: runtime) registerRuntimeRoutes(on: v1, runtime: runtime, startedAt: startedAt) diff --git a/apps/ValarDaemon/Sources/ValarDaemon/Routes/AudioRoutes.swift b/apps/ValarDaemon/Sources/ValarDaemon/Routes/AudioRoutes.swift index de23baa..46c77cc 100644 --- a/apps/ValarDaemon/Sources/ValarDaemon/Routes/AudioRoutes.swift +++ b/apps/ValarDaemon/Sources/ValarDaemon/Routes/AudioRoutes.swift @@ -11,6 +11,10 @@ import ValarPersistence private let maxMultipartBodyBytes = 15_000_000 private let maxSpeechRequestBodyBytes = 15_000_000 +private let maxMultipartPartCount = 24 +private let maxMultipartHeaderBytes = 16_384 +private let maxMultipartFieldBytes = 256 * 1024 +private let maxMultipartFileBytes = maxMultipartBodyBytes private let validOpusRates: Set = [8_000, 12_000, 16_000, 24_000, 48_000] private let voxtralPresetVoiceIDs = Set(VoxtralCatalog.presetVoices.map(\.name)) private let internalAudioErrorMessage = "Audio request failed due to an internal daemon error." @@ -1946,6 +1950,9 @@ private struct MultipartFormData { let delimiter = Data(("--" + boundary).utf8) let terminal = Data("--".utf8) let parts = split(body, by: delimiter) + guard parts.count <= maxMultipartPartCount + 2 else { + throw DaemonRequestError.invalidMultipart("Multipart request has too many parts.") + } var fields: [String: String] = [:] var files: [String: MultipartFile] = [:] @@ -1966,6 +1973,9 @@ private struct MultipartFormData { let headerData = part.subdata(in: part.startIndex ..< separatorRange.lowerBound) let valueData = part.subdata(in: separatorRange.upperBound ..< part.endIndex) + guard headerData.count <= maxMultipartHeaderBytes else { + throw DaemonRequestError.invalidMultipart("Multipart section headers are too large.") + } guard let headerText = String(data: headerData, encoding: .utf8) else { throw DaemonRequestError.invalidMultipart("Multipart headers are not valid UTF-8.") } @@ -1992,14 +2002,26 @@ private struct MultipartFormData { guard let fieldName = dispositionAttributes["name"] else { throw DaemonRequestError.invalidMultipart("Multipart field is missing a name.") } + guard fieldName.utf8.count <= 128 else { + throw DaemonRequestError.invalidMultipart("Multipart field name is too long.") + } if let filename = dispositionAttributes["filename"] { + guard filename.utf8.count <= 255 else { + throw DaemonRequestError.invalidMultipart("Multipart filename is too long.") + } + guard valueData.count <= maxMultipartFileBytes else { + throw DaemonRequestError.invalidMultipart("Multipart file is too large.") + } files[fieldName] = MultipartFile( filename: filename, contentType: contentTypeHeader, data: valueData ) } else { + guard valueData.count <= maxMultipartFieldBytes else { + throw DaemonRequestError.invalidMultipart("Multipart field value is too large.") + } let text = String(data: valueData, encoding: .utf8)? .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" fields[fieldName] = text diff --git a/apps/ValarDaemon/Sources/ValarDaemon/Routes/OperationRoutes.swift b/apps/ValarDaemon/Sources/ValarDaemon/Routes/OperationRoutes.swift index 34e0c4a..605bb90 100644 --- a/apps/ValarDaemon/Sources/ValarDaemon/Routes/OperationRoutes.swift +++ b/apps/ValarDaemon/Sources/ValarDaemon/Routes/OperationRoutes.swift @@ -161,6 +161,41 @@ extension ValarDaemonRouter { } } + router.post("operations/:id/cancel") { _, context async -> Response in + do { + guard let id = context.parameters.get("id"), + !id.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { + return operationErrorResponse( + "Missing operation ID parameter.", + status: .badRequest + ) + } + + switch await operations.cancel(id: id) { + case .cancelled(let operation): + return try jsonResponse(operation) + case .running: + return operationErrorResponse( + "Operation is already running and cannot be cancelled safely.", + status: .conflict, + help: "Wait for the operation to finish, then inspect its status." + ) + case .alreadyFinished(let operation): + return try jsonResponse(operation) + case .notFound: + return operationErrorResponse( + "Operation '\(id)' not found.", + status: .notFound + ) + } + } catch { + return operationErrorResponse( + "Failed to cancel operation.", + status: .internalServerError + ) + } + } + router.get("queue") { _, _ async throws -> Response in do { return try jsonResponse(await operations.queueState()) diff --git a/apps/ValarDaemon/Sources/ValarDaemon/Routes/VoiceRoutes.swift b/apps/ValarDaemon/Sources/ValarDaemon/Routes/VoiceRoutes.swift index 9f146b6..fa4a0ee 100644 --- a/apps/ValarDaemon/Sources/ValarDaemon/Routes/VoiceRoutes.swift +++ b/apps/ValarDaemon/Sources/ValarDaemon/Routes/VoiceRoutes.swift @@ -5,6 +5,11 @@ import ValarCore import ValarModelKit import ValarPersistence +private let maxVoiceCloneMultipartPartCount = 16 +private let maxVoiceCloneMultipartHeaderBytes = 16_384 +private let maxVoiceCloneMultipartFieldBytes = 256 * 1024 +private let maxVoiceCloneMultipartFileBytes = 15_000_000 + private actor VoiceMutationRateLimiter { struct Decision: Sendable { let allowed: Bool @@ -444,6 +449,9 @@ private struct VoiceCloneFormData: Sendable { let delimiter = Data(("--" + boundary).utf8) let terminal = Data("--".utf8) let parts = splitByDelimiter(body, delimiter: delimiter) + guard parts.count <= maxVoiceCloneMultipartPartCount + 2 else { + throw VoiceCloneParseError.tooManyParts + } var fields: [String: String] = [:] var files: [String: VoiceCloneFile] = [:] @@ -461,6 +469,9 @@ private struct VoiceCloneFormData: Sendable { let headerData = part.subdata(in: part.startIndex ..< sepRange.lowerBound) let valueData = part.subdata(in: sepRange.upperBound ..< part.endIndex) + guard headerData.count <= maxVoiceCloneMultipartHeaderBytes else { + throw VoiceCloneParseError.headerTooLarge + } guard let headerText = String(data: headerData, encoding: .utf8) else { throw VoiceCloneParseError.malformedPart @@ -480,10 +491,22 @@ private struct VoiceCloneFormData: Sendable { } guard let fieldName = dispositionAttrs["name"] else { continue } + guard fieldName.utf8.count <= 128 else { + throw VoiceCloneParseError.fieldNameTooLong + } if let filename = dispositionAttrs["filename"] { + guard filename.utf8.count <= 255 else { + throw VoiceCloneParseError.filenameTooLong + } + guard valueData.count <= maxVoiceCloneMultipartFileBytes else { + throw VoiceCloneParseError.fileTooLarge + } files[fieldName] = VoiceCloneFile(filename: filename, data: valueData) } else { + guard valueData.count <= maxVoiceCloneMultipartFieldBytes else { + throw VoiceCloneParseError.fieldTooLarge + } let text = String(data: valueData, encoding: .utf8)? .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" fields[fieldName] = text @@ -558,6 +581,12 @@ private struct VoiceCloneFormData: Sendable { private enum VoiceCloneParseError: LocalizedError { case missingBoundary case malformedPart + case tooManyParts + case headerTooLarge + case fieldNameTooLong + case filenameTooLong + case fieldTooLarge + case fileTooLarge var errorDescription: String? { switch self { @@ -565,6 +594,18 @@ private enum VoiceCloneParseError: LocalizedError { return "Missing multipart boundary in Content-Type." case .malformedPart: return "Malformed multipart section." + case .tooManyParts: + return "Multipart request has too many parts." + case .headerTooLarge: + return "Multipart section headers are too large." + case .fieldNameTooLong: + return "Multipart field name is too long." + case .filenameTooLong: + return "Multipart filename is too long." + case .fieldTooLarge: + return "Multipart field value is too large." + case .fileTooLarge: + return "Multipart file is too large." } } } diff --git a/apps/ValarDaemon/Sources/ValarDaemon/SessionManager.swift b/apps/ValarDaemon/Sources/ValarDaemon/SessionManager.swift index cc78763..54d405a 100644 --- a/apps/ValarDaemon/Sources/ValarDaemon/SessionManager.swift +++ b/apps/ValarDaemon/Sources/ValarDaemon/SessionManager.swift @@ -14,12 +14,19 @@ actor SessionManager { var lastAccessedAt: Date } + private struct SessionCreation { + let sessionID: UUID + let projectID: UUID + let bundleURL: URL + } + /// Sessions idle longer than this are evicted on the next prune pass. static let sessionTTL: TimeInterval = 60 * 60 // 1 hour // MARK: - State private var sessions: [UUID: SessionEntry] = [:] + private var openingSessions: [String: Task] = [:] // MARK: - Session lifecycle @@ -27,18 +34,92 @@ actor SessionManager { /// Returns a session ID that callers use for subsequent requests. /// If the bundle URL is already open the existing session ID is returned. func openOrCreate(path rawPath: String, runtime: ValarRuntime) async throws -> UUID { - let bundleURL = URL(fileURLWithPath: rawPath).standardizedFileURL - - // Reject paths outside the allowed projects directory to prevent path traversal. - try ValarAppPaths.validateContainment(bundleURL, within: runtime.paths.projectsDirectory) + let bundleURL = try Self.normalizedProjectBundleURL(rawPath, runtime: runtime) + let bundleKey = bundleURL.path // Return existing session ID if this bundle is already registered. - if let existing = sessions.first(where: { $0.value.bundleURL == bundleURL }) { + if let existing = sessions.first(where: { $0.value.bundleURL.path == bundleKey }) { sessions[existing.key]?.lastAccessedAt = Date() return existing.key } - let sessionID = UUID() + if openingSessions[bundleKey] == nil { + let sessionID = UUID() + openingSessions[bundleKey] = Task { + try await Self.createSession( + sessionID: sessionID, + bundleURL: bundleURL, + runtime: runtime + ) + } + } + + guard let creationTask = openingSessions[bundleKey] else { + throw SessionManagerError.sessionOpeningFailed + } + + do { + let creation = try await creationTask.value + if let existing = sessions.first(where: { $0.value.bundleURL.path == bundleKey }) { + openingSessions[bundleKey] = nil + sessions[existing.key]?.lastAccessedAt = Date() + return existing.key + } + + sessions[creation.sessionID] = SessionEntry( + projectID: creation.projectID, + bundleURL: creation.bundleURL, + lastAccessedAt: Date() + ) + openingSessions[bundleKey] = nil + return creation.sessionID + } catch { + openingSessions[bundleKey] = nil + if let existing = sessions.first(where: { $0.value.bundleURL.path == bundleKey }) { + sessions[existing.key]?.lastAccessedAt = Date() + return existing.key + } + throw error + } + } + + private nonisolated static func normalizedProjectBundleURL( + _ rawPath: String, + runtime: ValarRuntime + ) throws -> URL { + let standardized = URL(fileURLWithPath: rawPath).standardizedFileURL + try ValarAppPaths.validateContainment(standardized, within: runtime.paths.projectsDirectory) + return try canonicalizedURL(standardized, fileManager: .default) + } + + private nonisolated static func canonicalizedURL( + _ url: URL, + fileManager: FileManager + ) throws -> URL { + let standardized = url.standardizedFileURL + var existingAncestor = standardized + var unresolvedComponents: [String] = [] + + while !fileManager.fileExists(atPath: existingAncestor.path) { + let parent = existingAncestor.deletingLastPathComponent() + if parent.path == existingAncestor.path { + break + } + unresolvedComponents.insert(existingAncestor.lastPathComponent, at: 0) + existingAncestor = parent + } + + let resolvedAncestor = existingAncestor.resolvingSymlinksInPath().standardizedFileURL + return unresolvedComponents.reduce(resolvedAncestor) { partial, component in + partial.appendingPathComponent(component, isDirectory: false) + } + } + + private nonisolated static func createSession( + sessionID: UUID, + bundleURL: URL, + runtime: ValarRuntime + ) async throws -> SessionCreation { let projectID: UUID if FileManager.default.fileExists(atPath: bundleURL.path) { @@ -47,7 +128,6 @@ actor SessionManager { let docSession = await runtime.createDocumentSession(for: bundle) projectID = await docSession.projectID() } else { - // Create a fresh project and seed an empty bundle in the runtime. let title = bundleURL.deletingPathExtension().lastPathComponent let project = try await runtime.projectStore.create( title: title.isEmpty ? "Untitled" : title, @@ -70,14 +150,7 @@ actor SessionManager { _ = await runtime.createDocumentSession(for: bundle) } - // After await — re-check for duplicate (another request may have inserted during our await) - if let existing = sessions.first(where: { $0.value.bundleURL == bundleURL }) { - // Another caller already created a session for this bundle during our await - return existing.key - } - - sessions[sessionID] = SessionEntry(projectID: projectID, bundleURL: bundleURL, lastAccessedAt: Date()) - return sessionID + return SessionCreation(sessionID: sessionID, projectID: projectID, bundleURL: bundleURL) } /// Returns the project ID and bundle URL for the given session ID, updating its last-accessed timestamp. @@ -112,11 +185,14 @@ actor SessionManager { enum SessionManagerError: LocalizedError, Sendable { case sessionNotFound(UUID) + case sessionOpeningFailed var errorDescription: String? { switch self { case .sessionNotFound(let id): return "Session '\(id.uuidString)' not found." + case .sessionOpeningFailed: + return "Session could not be opened." } } } diff --git a/apps/ValarTTSMac/Package.resolved b/apps/ValarTTSMac/Package.resolved index dfa4d07..7bce660 100644 --- a/apps/ValarTTSMac/Package.resolved +++ b/apps/ValarTTSMac/Package.resolved @@ -95,8 +95,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-crypto.git", "state" : { - "revision" : "fa308c07a6fa04a727212d793e761460e41049c3", - "version" : "4.3.0" + "revision" : "1b6b2e274e85105bfa155183145a1dcfd63331f1", + "version" : "4.5.0" } }, { diff --git a/bridge/bun.lock b/bridge/bun.lock index 1f1ec56..ed2ddbe 100644 --- a/bridge/bun.lock +++ b/bridge/bun.lock @@ -3,23 +3,30 @@ "configVersion": 1, "workspaces": { "": { - "name": "valartts-mcp-bridge", + "name": "valar-mcp-bridge", "dependencies": { - "@modelcontextprotocol/sdk": "^1.26.0", - "zod": "^3.22.0", + "@modelcontextprotocol/sdk": "^1.29.0", + "zod": "^3.25.76", }, "devDependencies": { - "@types/bun": "latest", - "typescript": "^5.0.0", + "@types/bun": "1.3.14", + "typescript": "^5.9.3", }, }, }, + "overrides": { + "@hono/node-server": "^1.19.14", + "fast-uri": "^3.1.2", + "hono": "^4.12.18", + "ip-address": "^10.2.0", + "path-to-regexp": "^8.4.2", + }, "packages": { - "@hono/node-server": ["@hono/node-server@1.19.11", "", { "peerDependencies": { "hono": "^4" } }, "sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g=="], + "@hono/node-server": ["@hono/node-server@1.19.14", "", { "peerDependencies": { "hono": "^4" } }, "sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw=="], - "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.27.1", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-sr6GbP+4edBwFndLbM60gf07z0FQ79gaExpnsjMGePXqFcSSb7t6iscpjk9DhFhwd+mTEQrzNafGP8/iGGFYaA=="], + "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="], - "@types/bun": ["@types/bun@1.3.11", "", { "dependencies": { "bun-types": "1.3.11" } }, "sha512-5vPne5QvtpjGpsGYXiFyycfpDF2ECyPcTSsFBMa0fraoxiQyMJ3SmuQIGhzPg2WJuWxVBoxWJ2kClYTcw/4fAg=="], + "@types/bun": ["@types/bun@1.3.14", "", { "dependencies": { "bun-types": "1.3.14" } }, "sha512-h1hFqFVcvAvD9j9K7ZW7vd82aSA+rTdznZa+5bwvCwqSB1jmmfLcbIWhOLx1/+boy/xmjgCs/OMUL8hRJSmnPw=="], "@types/node": ["@types/node@25.5.0", "", { "dependencies": { "undici-types": "~7.18.0" } }, "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw=="], @@ -31,7 +38,7 @@ "body-parser": ["body-parser@2.2.2", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^1.0.5", "debug": "^4.4.3", "http-errors": "^2.0.0", "iconv-lite": "^0.7.0", "on-finished": "^2.4.1", "qs": "^6.14.1", "raw-body": "^3.0.1", "type-is": "^2.0.1" } }, "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA=="], - "bun-types": ["bun-types@1.3.11", "", { "dependencies": { "@types/node": "*" } }, "sha512-1KGPpoxQWl9f6wcZh57LvrPIInQMn2TQ7jsgxqpRzg+l0QPOFvJVH7HmvHo/AiPgwXy+/Thf6Ov3EdVn1vOabg=="], + "bun-types": ["bun-types@1.3.14", "", { "dependencies": { "@types/node": "*" } }, "sha512-4N0ig0fEomHt5R0KCFWjovxow98rIoRwKolrYdCcknNwMekCXRnWEUvgu5soYV8QXtVsrUD8B95MBOZGPvr6KQ=="], "bytes": ["bytes@3.1.2", "", {}, "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg=="], @@ -81,7 +88,7 @@ "fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="], - "fast-uri": ["fast-uri@3.1.0", "", {}, "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA=="], + "fast-uri": ["fast-uri@3.1.2", "", {}, "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ=="], "finalhandler": ["finalhandler@2.1.1", "", { "dependencies": { "debug": "^4.4.0", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "on-finished": "^2.4.1", "parseurl": "^1.3.3", "statuses": "^2.0.1" } }, "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA=="], @@ -101,7 +108,7 @@ "hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="], - "hono": ["hono@4.12.8", "", {}, "sha512-VJCEvtrezO1IAR+kqEYnxUOoStaQPGrCmX3j4wDTNOcD1uRPFpGlwQUIW8niPuvHXaTUxeOUl5MMDGrl+tmO9A=="], + "hono": ["hono@4.12.18", "", {}, "sha512-RWzP96k/yv0PQfyXnWjs6zot20TqfpfsNXhOnev8d1InAxubW93L11/oNUc3tQqn2G0bSdAOBpX+2uDFHV7kdQ=="], "http-errors": ["http-errors@2.0.1", "", { "dependencies": { "depd": "~2.0.0", "inherits": "~2.0.4", "setprototypeof": "~1.2.0", "statuses": "~2.0.2", "toidentifier": "~1.0.1" } }, "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ=="], @@ -109,7 +116,7 @@ "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="], - "ip-address": ["ip-address@10.1.0", "", {}, "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q=="], + "ip-address": ["ip-address@10.2.0", "", {}, "sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA=="], "ipaddr.js": ["ipaddr.js@1.9.1", "", {}, "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="], @@ -149,7 +156,7 @@ "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="], - "path-to-regexp": ["path-to-regexp@8.3.0", "", {}, "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA=="], + "path-to-regexp": ["path-to-regexp@8.4.2", "", {}, "sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA=="], "pkce-challenge": ["pkce-challenge@5.0.1", "", {}, "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ=="], diff --git a/bridge/package.json b/bridge/package.json index 64ba74a..9fdaa2b 100644 --- a/bridge/package.json +++ b/bridge/package.json @@ -3,17 +3,25 @@ "version": "1.0.0", "description": "Public MCP bridge server for the local Valar daemon", "private": true, - "packageManager": "bun@1.2.13", + "packageManager": "bun@1.3.14", "scripts": { + "setup": "bun install --frozen-lockfile --ignore-scripts", "start": "bun server.ts", "typecheck": "tsc --noEmit" }, "dependencies": { - "@modelcontextprotocol/sdk": "^1.26.0", - "zod": "^3.22.0" + "@modelcontextprotocol/sdk": "^1.29.0", + "zod": "^3.25.76" }, "devDependencies": { - "@types/bun": "latest", - "typescript": "^5.0.0" + "@types/bun": "1.3.14", + "typescript": "^5.9.3" + }, + "overrides": { + "@hono/node-server": "^1.19.14", + "fast-uri": "^3.1.2", + "hono": "^4.12.18", + "ip-address": "^10.2.0", + "path-to-regexp": "^8.4.2" } } diff --git a/bridge/server.ts b/bridge/server.ts index c9f0b39..a2536ec 100644 --- a/bridge/server.ts +++ b/bridge/server.ts @@ -13,49 +13,37 @@ import * as valarTranscribe from "./src/tools/valar_transcribe.js"; import * as valarAlign from "./src/tools/valar_align.js"; import * as valarInstallModel from "./src/tools/valar_install_model.js"; import { daemonUnavailableMessage, sanitizeMessage } from "./src/security/redaction.js"; - -function requireLoopbackDaemonURL(raw: string): string { - let parsed: URL; - try { - parsed = new URL(raw); - } catch { - throw new Error(`VALAR_DAEMON_URL is not a valid URL: ${raw}`); - } - const loopbackHosts = new Set(["127.0.0.1", "::1", "localhost"]); - if (!loopbackHosts.has(parsed.hostname)) { - throw new Error( - `VALAR_DAEMON_URL must point to a loopback address (127.0.0.1, ::1, or localhost). Got: ${parsed.hostname}`, - ); - } - return raw; -} +import { + createDaemonURLBuilder, + daemonFetch, + readDaemonJSON, + readDaemonText, + requireLoopbackDaemonURL, +} from "./src/security/daemon.js"; const DAEMON_URL = requireLoopbackDaemonURL(process.env.VALAR_DAEMON_URL ?? "http://127.0.0.1:8787"); - -function daemonURL(path: string): string { - return `${DAEMON_URL}/v1${path}`; -} +const daemonURL = createDaemonURLBuilder(DAEMON_URL); async function daemonGet(path: string): Promise { - const res = await fetch(daemonURL(path)); + const res = await daemonFetch(daemonURL(path)); if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); throw new Error(`Daemon GET ${path} → ${res.status}: ${text}`); } - return res.json(); + return readDaemonJSON(res); } async function daemonPost(path: string, body: unknown): Promise { - const res = await fetch(daemonURL(path), { + const res = await daemonFetch(daemonURL(path), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body), }); if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); throw new Error(`Daemon POST ${path} → ${res.status}: ${text}`); } - return res.json(); + return readDaemonJSON(res); } function ok(text: string) { @@ -68,9 +56,7 @@ function err(text: string) { async function checkDaemonHealthy(): Promise { try { - const res = await fetch(daemonURL("/health"), { - signal: AbortSignal.timeout(5_000), - }); + const res = await daemonFetch(daemonURL("/health"), {}, 5_000); return res.ok; } catch { return false; @@ -247,18 +233,18 @@ server.tool( if (title !== undefined) body.title = title; if (text !== undefined) body.text = text; - const res = await fetch(daemonURL(`/sessions/${session_id}/chapters/${chapter_id}`), { + const res = await daemonFetch(daemonURL(`/sessions/${session_id}/chapters/${chapter_id}`), { method: "PATCH", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body), }); if (!res.ok) { - const t = await res.text().catch(() => ""); + const t = await readDaemonText(res).catch(() => ""); return err(`Chapter update failed (${res.status}): ${t}`); } - const result = await res.json(); + const result = await readDaemonJSON(res); return ok(JSON.stringify(result, null, 2)); } catch (e) { return err(String(e)); diff --git a/bridge/src/security/daemon.ts b/bridge/src/security/daemon.ts new file mode 100644 index 0000000..9b35047 --- /dev/null +++ b/bridge/src/security/daemon.ts @@ -0,0 +1,97 @@ +import { sanitizeMessage } from "./redaction.js"; + +const DEFAULT_TIMEOUT_MS = 120_000; +const DEFAULT_TEXT_LIMIT_BYTES = 512 * 1024; +const DEFAULT_BINARY_LIMIT_BYTES = 200 * 1024 * 1024; + +export function requireLoopbackDaemonURL(raw: string): string { + let parsed: URL; + try { + parsed = new URL(raw); + } catch { + throw new Error(`VALAR_DAEMON_URL is not a valid URL: ${sanitizeMessage(raw)}`); + } + + const loopbackHosts = new Set(["127.0.0.1", "::1", "[::1]", "localhost"]); + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { + throw new Error("VALAR_DAEMON_URL must use http: or https:."); + } + if (parsed.username || parsed.password) { + throw new Error("VALAR_DAEMON_URL must not include credentials."); + } + if (!loopbackHosts.has(parsed.hostname)) { + throw new Error( + `VALAR_DAEMON_URL must point to a loopback address (127.0.0.1, ::1, or localhost). Got: ${parsed.hostname}`, + ); + } + + return parsed.origin; +} + +export function createDaemonURLBuilder(baseURL: string): (path: string) => string { + return (path: string) => `${baseURL}/v1${path}`; +} + +export async function daemonFetch( + url: string, + init: RequestInit = {}, + timeoutMs = DEFAULT_TIMEOUT_MS, +): Promise { + const signal = AbortSignal.timeout(timeoutMs); + return fetch(url, { ...init, signal }); +} + +async function readLimitedBytes(response: Response, limitBytes: number): Promise { + const reader = response.body?.getReader(); + if (!reader) { + return new Uint8Array(await response.arrayBuffer()); + } + + const chunks: Uint8Array[] = []; + let total = 0; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + if (!value) continue; + total += value.byteLength; + if (total > limitBytes) { + await reader.cancel(); + throw new Error(`Daemon response exceeded ${(limitBytes / 1024 / 1024).toFixed(1)} MB limit.`); + } + chunks.push(value); + } + + const bytes = new Uint8Array(total); + let offset = 0; + for (const chunk of chunks) { + bytes.set(chunk, offset); + offset += chunk.byteLength; + } + return bytes; +} + +export async function readDaemonText( + response: Response, + limitBytes = DEFAULT_TEXT_LIMIT_BYTES, +): Promise { + const bytes = await readLimitedBytes(response, limitBytes); + return new TextDecoder().decode(bytes); +} + +export async function readDaemonJSON( + response: Response, + limitBytes = DEFAULT_TEXT_LIMIT_BYTES, +): Promise { + const text = await readDaemonText(response, limitBytes); + return JSON.parse(text) as T; +} + +export async function readDaemonBinary( + response: Response, + limitBytes = DEFAULT_BINARY_LIMIT_BYTES, +): Promise { + const bytes = await readLimitedBytes(response, limitBytes); + const copy = new ArrayBuffer(bytes.byteLength); + new Uint8Array(copy).set(bytes); + return copy; +} diff --git a/bridge/src/security/passphrase.ts b/bridge/src/security/passphrase.ts index b11fc5e..0047798 100644 --- a/bridge/src/security/passphrase.ts +++ b/bridge/src/security/passphrase.ts @@ -31,13 +31,28 @@ export interface PassphraseExtractionResult { } function stripPhrase(transcript: string, phrase: string): string { - const idx = transcript.toLowerCase().indexOf(phrase.toLowerCase()); - if (idx === -1) return transcript; - return (transcript.slice(0, idx) + transcript.slice(idx + phrase.length)) + const regex = phraseRegex(phrase); + const match = regex.exec(transcript); + if (!match) return transcript; + const idx = match.index; + const matchedPhrase = match[0]; + return (transcript.slice(0, idx) + transcript.slice(idx + matchedPhrase.length)) .replace(/\s{2,}/g, " ") .trim(); } +function phraseRegex(phrase: string): RegExp { + const words = phrase.trim().split(/\s+/).map(normalise).filter(Boolean); + const escapedWords = words.map((word) => word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")); + return new RegExp(`\\b${escapedWords.join("[^a-z0-9]+")}\\b`, "i"); +} + +function transcriptContainsPhrase(transcript: string, phrase: string): boolean { + if (phrase.trim().length === 0) return false; + const regex = phraseRegex(phrase); + return regex.test(transcript); +} + function normalise(word: string): string { return word.toLowerCase().replace(/^[^a-z0-9]+|[^a-z0-9]+$/gi, ""); } @@ -88,11 +103,16 @@ export function extractPassphrase( maxGapSeconds = 0.5, } = config; - const phraseInTranscript = transcript - .toLowerCase() - .includes(phrase.toLowerCase()); + const phraseWords = phrase.trim().split(/\s+/).map(normalise).filter(Boolean); + if (phraseWords.length === 0) { + return { + passphraseFound: false, + confidence: null, + strippedTranscript: transcript, + }; + } - if (!phraseInTranscript) { + if (!requireAlignerVerification && !transcriptContainsPhrase(transcript, phrase)) { return { passphraseFound: false, confidence: null, @@ -109,7 +129,6 @@ export function extractPassphrase( }; } - const phraseWords = phrase.trim().split(/\s+/).filter(Boolean); const run = findPhraseTokenRun(alignment.tokens, phraseWords); if (!run) { @@ -155,13 +174,13 @@ export function extractPassphrase( return { passphraseFound: true, confidence: avgConfidence, - strippedTranscript: stripPhrase(transcript, phrase), + strippedTranscript: stripPhrase(transcript, phraseWords.join(" ")), }; } return { passphraseFound: true, confidence: null, - strippedTranscript: stripPhrase(transcript, phrase), + strippedTranscript: stripPhrase(transcript, phraseWords.join(" ")), }; } diff --git a/bridge/src/security/paths.ts b/bridge/src/security/paths.ts index cb29b48..c7fe0b8 100644 --- a/bridge/src/security/paths.ts +++ b/bridge/src/security/paths.ts @@ -1,4 +1,4 @@ -import { resolve, join, extname, dirname, basename } from "path"; +import { resolve, join, extname, dirname, basename, isAbsolute } from "path"; import { statSync, lstatSync, realpathSync } from "fs"; import { homedir } from "os"; import { INBOX_DIR, OUTBOX_DIR } from "../storage.js"; @@ -86,7 +86,10 @@ function resolveInputAllowedDirs(): string[] { const INPUT_ALLOWED = resolveInputAllowedDirs(); -export function validateInputPath(p: string): void { +export function validateInputPath(p: string): string { + if (!isAbsolute(p)) { + throw new Error("Input path must be absolute."); + } const resolved = resolveWithSymlinks(p); const ext = extname(resolved).toLowerCase(); if (!AUDIO_EXTENSIONS.has(ext)) { @@ -96,8 +99,15 @@ export function validateInputPath(p: string): void { } let size: number; try { - size = statSync(resolved).size; - } catch { + const stat = statSync(resolved); + if (!stat.isFile()) { + throw new Error(`Input path must be a regular file: ${redactPath(p)}`); + } + size = stat.size; + } catch (error) { + if (error instanceof Error && error.message.includes("regular file")) { + throw error; + } throw new Error(`Cannot stat input file: ${redactPath(p)}`); } if (size > MAX_INPUT_BYTES) { @@ -111,9 +121,13 @@ export function validateInputPath(p: string): void { "Input path must be within Desktop, Downloads, Documents, Music, /tmp, or Valar bridge storage.", ); } + return resolved; } -export function validateOutputPath(p: string): void { +export function validateOutputPath(p: string): string { + if (!isAbsolute(p)) { + throw new Error("Output path must be absolute."); + } const resolved = resolveWithSymlinks(p); const ext = extname(resolved).toLowerCase(); if (!AUDIO_EXTENSIONS.has(ext)) { @@ -127,4 +141,5 @@ export function validateOutputPath(p: string): void { `Output path must be within an allowed directory (Desktop, Downloads, Documents, /tmp, or Valar bridge storage). Got: ${redactPath(p)}`, ); } + return resolved; } diff --git a/bridge/src/tools/profiles.ts b/bridge/src/tools/profiles.ts index dab0149..cd6e381 100644 --- a/bridge/src/tools/profiles.ts +++ b/bridge/src/tools/profiles.ts @@ -1,6 +1,7 @@ import { readFile } from "fs/promises"; import { join, dirname } from "path"; import { fileURLToPath } from "url"; +import { z } from "zod"; export interface ChannelPrefs { format?: "wav" | "ogg_opus"; @@ -18,6 +19,22 @@ interface ProfilesFile { profiles: Record; } +const ChannelPrefsSchema = z.object({ + format: z.enum(["wav", "ogg_opus"]).optional(), +}); + +const AgentProfileSchema = z.object({ + voiceId: z.string().nullable(), + model: z.string().nullable(), + format: z.enum(["wav", "ogg_opus"]).nullable(), + voiceBehavior: z.enum(["auto", "expressive", "stableNarrator"]).optional(), + channelPrefs: ChannelPrefsSchema.default({}), +}); + +const ProfilesFileSchema = z.object({ + profiles: z.record(AgentProfileSchema), +}); + const BUNDLED_CONFIG_PATH = join( dirname(fileURLToPath(import.meta.url)), "../../config/profiles.json", @@ -51,8 +68,8 @@ async function loadProfiles(): Promise> { for (const path of profileSearchPaths()) { try { const raw = await readFile(path, "utf8"); - const parsed = JSON.parse(raw) as ProfilesFile; - if (parsed?.profiles && typeof parsed.profiles === "object") { + const parsed = ProfilesFileSchema.parse(JSON.parse(raw)) as ProfilesFile; + if (parsed.profiles && typeof parsed.profiles === "object") { cachedProfiles = parsed.profiles; return cachedProfiles; } diff --git a/bridge/src/tools/valar_align.ts b/bridge/src/tools/valar_align.ts index c995ce6..ea25d9e 100644 --- a/bridge/src/tools/valar_align.ts +++ b/bridge/src/tools/valar_align.ts @@ -4,6 +4,7 @@ import { readFile } from "fs/promises"; import { basename } from "path"; import { validateInputPath } from "../security/paths.js"; import { daemonUnavailableMessage, redactPath, sanitizeMessage } from "../security/redaction.js"; +import { daemonFetch, readDaemonJSON, readDaemonText } from "../security/daemon.js"; function ok(text: string) { return { content: [{ type: "text" as const, text }] }; @@ -38,19 +39,20 @@ export function register( .describe('Optional language hint such as "en" or "ja".'), }, async ({ file_path, transcript, model, language }) => { + let resolvedPath: string; try { - validateInputPath(file_path); + resolvedPath = validateInputPath(file_path); } catch (e) { return err(String(e)); } let fileData: Buffer; try { - fileData = await readFile(file_path); + fileData = await readFile(resolvedPath); } catch (e) { return err(`Cannot read file "${redactPath(file_path)}": ${e}`); } - const filename = basename(file_path); + const filename = basename(resolvedPath); const form = new FormData(); form.append("file", new Blob([fileData]), filename); form.append("transcript", transcript); @@ -59,7 +61,7 @@ export function register( let res: Response; try { - res = await fetch(daemonURL("/alignments"), { + res = await daemonFetch(daemonURL("/alignments"), { method: "POST", body: form, }); @@ -68,11 +70,11 @@ export function register( } if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); return err(`Alignment failed (${res.status}): ${text}`); } - const data = await res.json(); + const data = await readDaemonJSON(res); return ok(JSON.stringify(data, null, 2)); }, ); diff --git a/bridge/src/tools/valar_clone_voice_from_file.ts b/bridge/src/tools/valar_clone_voice_from_file.ts index cc03877..d7c53d4 100644 --- a/bridge/src/tools/valar_clone_voice_from_file.ts +++ b/bridge/src/tools/valar_clone_voice_from_file.ts @@ -4,6 +4,7 @@ import { readFile } from "fs/promises"; import { basename } from "path"; import { validateInputPath } from "../security/paths.js"; import { daemonUnavailableMessage, sanitizeMessage } from "../security/redaction.js"; +import { daemonFetch, readDaemonJSON, readDaemonText } from "../security/daemon.js"; function ok(text: string) { return { content: [{ type: "text" as const, text }] }; @@ -43,19 +44,20 @@ export function register( ), }, async ({ file_path, name, transcript, model }) => { + let resolvedPath: string; try { - validateInputPath(file_path); + resolvedPath = validateInputPath(file_path); } catch (e) { return err(String(e)); } let fileData: Buffer; try { - fileData = await readFile(file_path); + fileData = await readFile(resolvedPath); } catch (e) { return err(`Failed to read audio file: ${e}`); } - const filename = basename(file_path); + const filename = basename(resolvedPath); const form = new FormData(); form.append("file", new Blob([fileData]), filename); form.append("name", name); @@ -64,7 +66,7 @@ export function register( let res: Response; try { - res = await fetch(daemonURL("/voices/clone"), { + res = await daemonFetch(daemonURL("/voices/clone"), { method: "POST", body: form, }); @@ -73,11 +75,11 @@ export function register( } if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); return err(`Voice cloning failed (${res.status}): ${text}`); } - const voice = await res.json(); + const voice = await readDaemonJSON(res); return ok(JSON.stringify(voice, null, 2)); }, ); diff --git a/bridge/src/tools/valar_create_voice.ts b/bridge/src/tools/valar_create_voice.ts index c86fd0f..ba97aea 100644 --- a/bridge/src/tools/valar_create_voice.ts +++ b/bridge/src/tools/valar_create_voice.ts @@ -1,6 +1,7 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { daemonUnavailableMessage, sanitizeMessage } from "../security/redaction.js"; +import { daemonFetch, readDaemonJSON, readDaemonText } from "../security/daemon.js"; function ok(text: string) { return { content: [{ type: "text" as const, text }] }; @@ -34,7 +35,7 @@ export function register( let res: Response; try { - res = await fetch(daemonURL("/voices/create"), { + res = await daemonFetch(daemonURL("/voices/create"), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body), @@ -44,11 +45,11 @@ export function register( } if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); return err(`Voice creation failed (${res.status}): ${text}`); } - const voice = await res.json(); + const voice = await readDaemonJSON(res); return ok(JSON.stringify(voice, null, 2)); }, ); diff --git a/bridge/src/tools/valar_delete_voice.ts b/bridge/src/tools/valar_delete_voice.ts index 3916be7..231d912 100644 --- a/bridge/src/tools/valar_delete_voice.ts +++ b/bridge/src/tools/valar_delete_voice.ts @@ -1,6 +1,7 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { daemonUnavailableMessage, sanitizeMessage } from "../security/redaction.js"; +import { daemonFetch, readDaemonText } from "../security/daemon.js"; function ok(text: string) { return { content: [{ type: "text" as const, text }] }; @@ -26,7 +27,7 @@ export function register( async ({ voice_id }) => { let res: Response; try { - res = await fetch(daemonURL(`/voices/${voice_id}`), { + res = await daemonFetch(daemonURL(`/voices/${voice_id}`), { method: "DELETE", }); } catch { @@ -34,7 +35,7 @@ export function register( } if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); return err(`Voice deletion failed (${res.status}): ${text}`); } diff --git a/bridge/src/tools/valar_design_voice.ts b/bridge/src/tools/valar_design_voice.ts index 3ea34a7..2548892 100644 --- a/bridge/src/tools/valar_design_voice.ts +++ b/bridge/src/tools/valar_design_voice.ts @@ -1,6 +1,7 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { daemonUnavailableMessage, sanitizeMessage } from "../security/redaction.js"; +import { daemonFetch, readDaemonJSON, readDaemonText } from "../security/daemon.js"; function ok(text: string) { return { content: [{ type: "text" as const, text }] }; @@ -32,7 +33,7 @@ export function register( async ({ name, description }) => { let res: Response; try { - res = await fetch(daemonURL("/voices/design"), { + res = await daemonFetch(daemonURL("/voices/design"), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ name, description }), @@ -42,11 +43,11 @@ export function register( } if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); return err(`Voice design failed (${res.status}): ${text}`); } - const voice = await res.json(); + const voice = await readDaemonJSON(res); return ok(JSON.stringify(voice, null, 2)); }, ); diff --git a/bridge/src/tools/valar_install_model.ts b/bridge/src/tools/valar_install_model.ts index e073e84..4d4dc1e 100644 --- a/bridge/src/tools/valar_install_model.ts +++ b/bridge/src/tools/valar_install_model.ts @@ -1,6 +1,7 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { daemonUnavailableMessage, sanitizeMessage } from "../security/redaction.js"; +import { daemonFetch, readDaemonJSON, readDaemonText } from "../security/daemon.js"; function ok(text: string) { return { content: [{ type: "text" as const, text }] }; @@ -27,7 +28,7 @@ export function register( .boolean() .optional() .describe( - "Allow the daemon to download model artifacts if they are not already cached locally. Defaults to true.", + "Allow the daemon to download model artifacts if they are not already cached locally. Defaults to false.", ), refresh_cache: z .boolean() @@ -39,13 +40,13 @@ export function register( async ({ model_id, allow_download, refresh_cache }) => { const body: Record = { model: model_id, - allow_download: allow_download ?? true, + allow_download: allow_download ?? false, }; if (refresh_cache !== undefined) body.refresh_cache = refresh_cache; let res: Response; try { - res = await fetch(daemonURL("/models/install"), { + res = await daemonFetch(daemonURL("/models/install"), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body), @@ -55,7 +56,7 @@ export function register( } if (res.status === 409) { - const json = await res.json().catch(() => ({})) as Record; + const json = await readDaemonJSON>(res).catch(() => ({})); return err( `Model '${model_id}' is not cached locally and requires a network download.\n` + `Retry with allow_download=true, or use the CLI instead:\n` + @@ -65,11 +66,11 @@ export function register( } if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); return err(`Model installation failed (${res.status}): ${text}`); } - const result = await res.json(); + const result = await readDaemonJSON(res); return ok(JSON.stringify(result, null, 2)); }, ); diff --git a/bridge/src/tools/valar_models.ts b/bridge/src/tools/valar_models.ts index 8cdbc9d..0993f17 100644 --- a/bridge/src/tools/valar_models.ts +++ b/bridge/src/tools/valar_models.ts @@ -1,5 +1,6 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { daemonUnavailableMessage, sanitizeMessage } from "../security/redaction.js"; +import { daemonFetch, readDaemonJSON, readDaemonText } from "../security/daemon.js"; function ok(text: string) { return { content: [{ type: "text" as const, text }] }; @@ -19,12 +20,12 @@ export function register( {}, async () => { try { - const res = await fetch(daemonURL("/models")); + const res = await daemonFetch(daemonURL("/models")); if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); return err(`Failed to list models (${res.status}): ${text}`); } - const models = await res.json(); + const models = await readDaemonJSON(res); return ok(JSON.stringify(models, null, 2)); } catch { return err(daemonUnavailableMessage()); diff --git a/bridge/src/tools/valar_speak.ts b/bridge/src/tools/valar_speak.ts index d1fa3c2..e332687 100644 --- a/bridge/src/tools/valar_speak.ts +++ b/bridge/src/tools/valar_speak.ts @@ -5,6 +5,7 @@ import { extname } from "path"; import { resolveProfile } from "./profiles.js"; import { validateInputPath, validateOutputPath } from "../security/paths.js"; import { daemonUnavailableMessage, redactPath, sanitizeMessage } from "../security/redaction.js"; +import { daemonFetch, readDaemonBinary, readDaemonText } from "../security/daemon.js"; function ok(text: string) { return { content: [{ type: "text" as const, text }] }; @@ -15,9 +16,9 @@ function err(text: string) { } async function encodeReferenceAudioDataURL(filePath: string): Promise { - validateInputPath(filePath); - const bytes = await readFile(filePath); - const ext = extname(filePath).toLowerCase(); + const resolvedPath = validateInputPath(filePath); + const bytes = await readFile(resolvedPath); + const ext = extname(resolvedPath).toLowerCase(); const mime = ext === ".m4a" ? "audio/mp4" : ext === ".wav" ? "audio/wav" : @@ -97,6 +98,13 @@ export function register( ), }, async ({ text, output_path, profile, voice, model, voice_behavior, format, temperature, top_p, repetition_penalty, max_tokens, reference_audio_path, reference_transcript, language }) => { + let resolvedOutputPath: string; + try { + resolvedOutputPath = validateOutputPath(output_path); + } catch (e) { + return err(String(e)); + } + let resolvedVoice = voice; let resolvedModel = model; let resolvedFormat = format; @@ -133,7 +141,7 @@ export function register( let res: Response; try { - res = await fetch(daemonURL("/audio/speech"), { + res = await daemonFetch(daemonURL("/audio/speech"), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body), @@ -143,14 +151,13 @@ export function register( } if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); return err(`Speech synthesis failed (${res.status}): ${text}`); } try { - const buffer = await res.arrayBuffer(); - validateOutputPath(output_path); - await writeFile(output_path, Buffer.from(buffer)); + const buffer = await readDaemonBinary(res); + await writeFile(resolvedOutputPath, Buffer.from(buffer)); return ok( `Audio written to ${redactPath(output_path)} (${buffer.byteLength} bytes)`, ); diff --git a/bridge/src/tools/valar_transcribe.ts b/bridge/src/tools/valar_transcribe.ts index 034ece8..9497908 100644 --- a/bridge/src/tools/valar_transcribe.ts +++ b/bridge/src/tools/valar_transcribe.ts @@ -4,6 +4,7 @@ import { readFile } from "fs/promises"; import { basename } from "path"; import { validateInputPath } from "../security/paths.js"; import { daemonUnavailableMessage, redactPath, sanitizeMessage } from "../security/redaction.js"; +import { daemonFetch, readDaemonJSON, readDaemonText } from "../security/daemon.js"; function ok(text: string) { return { content: [{ type: "text" as const, text }] }; @@ -38,19 +39,20 @@ export function register( .describe('Response format. Defaults to "text".'), }, async ({ file_path, model, language, response_format }) => { + let resolvedPath: string; try { - validateInputPath(file_path); + resolvedPath = validateInputPath(file_path); } catch (e) { return err(String(e)); } let fileData: Buffer; try { - fileData = await readFile(file_path); + fileData = await readFile(resolvedPath); } catch (e) { return err(`Cannot read file "${redactPath(file_path)}": ${e}`); } - const filename = basename(file_path); + const filename = basename(resolvedPath); const form = new FormData(); form.append("file", new Blob([fileData]), filename); if (model) form.append("model", model); @@ -59,7 +61,7 @@ export function register( let res: Response; try { - res = await fetch(daemonURL("/audio/transcriptions"), { + res = await daemonFetch(daemonURL("/audio/transcriptions"), { method: "POST", body: form, }); @@ -68,16 +70,16 @@ export function register( } if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); return err(`Transcription failed (${res.status}): ${text}`); } const fmt = response_format ?? "text"; if (fmt === "text" || fmt === "srt" || fmt === "vtt") { - const text = await res.text(); + const text = await readDaemonText(res); return ok(text); } - const data = await res.json(); + const data = await readDaemonJSON(res); return ok(JSON.stringify(data, null, 2)); }, ); diff --git a/bridge/src/tools/valar_voices.ts b/bridge/src/tools/valar_voices.ts index f2dcb05..b50118e 100644 --- a/bridge/src/tools/valar_voices.ts +++ b/bridge/src/tools/valar_voices.ts @@ -1,5 +1,6 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { daemonUnavailableMessage, sanitizeMessage } from "../security/redaction.js"; +import { daemonFetch, readDaemonJSON, readDaemonText } from "../security/daemon.js"; function ok(text: string) { return { content: [{ type: "text" as const, text }] }; @@ -19,12 +20,12 @@ export function register( {}, async () => { try { - const res = await fetch(daemonURL("/voices")); + const res = await daemonFetch(daemonURL("/voices")); if (!res.ok) { - const text = await res.text().catch(() => ""); + const text = await readDaemonText(res).catch(() => ""); return err(`Failed to list voices (${res.status}): ${text}`); } - const voices = await res.json(); + const voices = await readDaemonJSON(res); return ok(JSON.stringify(voices, null, 2)); } catch { return err(daemonUnavailableMessage()); diff --git a/docs/github-repo-settings.md b/docs/github-repo-settings.md index b388427..f476f9b 100644 --- a/docs/github-repo-settings.md +++ b/docs/github-repo-settings.md @@ -66,4 +66,4 @@ Use `assets/media/social-preview.png` as the social preview image when the GitHu ## Release Boundary -The public repo should keep fresh public history only. Do not mirror the canonical private git history into the public repository. +The public repo should keep fresh public history only. Do not mirror non-public source history into the public repository. diff --git a/docs/release-maintainers.md b/docs/release-maintainers.md index 6f59122..0109316 100644 --- a/docs/release-maintainers.md +++ b/docs/release-maintainers.md @@ -4,24 +4,24 @@ This doc is for maintainers preparing the public `Valar` repo for GitHub publica ## Maintenance Model -The public repo is a fresh-history derived repo. +The public repo is a fresh-history publication repo. -- changes land in the canonical source tree first -- the public tree is regenerated or synced from that source -- accepted public PRs are ported back into the canonical source tree before the next export +- changes should be staged in a clean source tree first +- the public tree should be regenerated or synced from that clean source +- accepted public PRs should be carried forward into the source tree before the next publication update -Do not publish the canonical private history directly. +Do not publish non-public source history directly. ## Public Release Flow -1. Land the intended change in the canonical source tree. -2. Regenerate or sync the public `Valar` tree from that canonical source. +1. Land the intended change in a clean source tree. +2. Regenerate or sync the public `Valar` tree from that source. 3. Run the public release gates in the public tree: - `make audit-and-secret-scan` - `make validate-public` - `make validate-bridge` when `bridge/` changed - `python3 tools/generate_launch_media.py` when launch-facing visuals need refresh -4. Run a private-side history scan as an advisory first-publication check for the canonical source history. +4. If content was imported from another tree, run a history scan before publication. 5. Verify the public repo worktree is clean: - no `bridge/node_modules` - no build outputs @@ -62,4 +62,4 @@ The audit catches private/operator content and workstation assumptions in the cu ## First Public Commit -The first public commit should be created directly in the public `Valar` repo from the exported tree. Do not attach or mirror the canonical private git history. +The first public commit should be created directly in the public `Valar` repo from the exported tree. Do not attach or mirror non-public git history. diff --git a/examples/daemon-api.sh b/examples/daemon-api.sh index f2b42c4..1eb65cb 100755 --- a/examples/daemon-api.sh +++ b/examples/daemon-api.sh @@ -30,9 +30,17 @@ if ! curl -sf "$DAEMON/v1/health" > /dev/null 2>&1; then fi # Synthesize via the OpenAI-compatible endpoint +request_body="$(MODEL="$MODEL" TEXT="$TEXT" python3 - <<'PY' +import json +import os + +print(json.dumps({"model": os.environ["MODEL"], "input": os.environ["TEXT"]})) +PY +)" + curl -fSs -X POST "$DAEMON/v1/audio/speech" \ -H "Content-Type: application/json" \ - -d "{\"model\": \"$MODEL\", \"input\": \"$TEXT\"}" \ + -d "$request_body" \ -o "$OUTPUT" echo "Done. Play it with:" diff --git a/scripts/build_metallib.sh b/scripts/build_metallib.sh index ac4bdeb..28ba240 100644 --- a/scripts/build_metallib.sh +++ b/scripts/build_metallib.sh @@ -40,22 +40,29 @@ FALLBACK_METALLIB_CANDIDATES=( append_path_list() { local value="$1" - local -n target_ref="$2" + local target_name="$2" local item local IFS=':' read -r -a extra_paths <<< "$value" for item in "${extra_paths[@]}"; do [[ -n "$item" ]] || continue - target_ref+=("$item") + case "$target_name" in + scratch) + DEFAULT_SCRATCH_CANDIDATES+=("$item") + ;; + output) + DEFAULT_OUTPUT_DIRS+=("$item") + ;; + esac done } if [[ -n "${VALARTTS_METALLIB_EXTRA_SCRATCH_CANDIDATES:-}" ]]; then - append_path_list "$VALARTTS_METALLIB_EXTRA_SCRATCH_CANDIDATES" DEFAULT_SCRATCH_CANDIDATES + append_path_list "$VALARTTS_METALLIB_EXTRA_SCRATCH_CANDIDATES" scratch fi if [[ -n "${VALARTTS_METALLIB_EXTRA_OUTPUT_DIRS:-}" ]]; then - append_path_list "$VALARTTS_METALLIB_EXTRA_OUTPUT_DIRS" DEFAULT_OUTPUT_DIRS + append_path_list "$VALARTTS_METALLIB_EXTRA_OUTPUT_DIRS" output fi install_metallib() { @@ -179,7 +186,11 @@ DEFAULT_OUTPUT_DIRS=( "${DEFAULT_OUTPUT_DIRS[@]}" ) -mkdir -p "$BUILD_DIR" +BUILD_DIR="$(mktemp -d "${TMPDIR:-/tmp}/valar-metallib.XXXXXX")" +cleanup() { + rm -rf "$BUILD_DIR" +} +trap cleanup EXIT # Include paths for Metal headers: # - Generated shaders reference headers in their own dir @@ -223,13 +234,14 @@ fi echo "Compiling Metal shaders..." compiled=0 skipped=0 +air_files=() # Compile ALL .metal files: backend kernels first (authoritative), then generated shaders. # Track compiled basenames in a temp file to skip generated duplicates (bash 3 compatible). compiled_names="$BUILD_DIR/.compiled_basenames" : > "$compiled_names" -for f in $(find "$BACKEND_DIR" "$GENERATED_DIR" -name "*.metal" 2>/dev/null | sort); do +while IFS= read -r -d '' f; do [[ -f "$f" ]] || continue base=$(basename "$f" .metal) # Skip generated duplicates of backend kernels (backend is authoritative) @@ -240,22 +252,25 @@ for f in $(find "$BACKEND_DIR" "$GENERATED_DIR" -name "*.metal" 2>/dev/null | so # Use relative path to create unique .air name relpath="${f#$MLX_CHECKOUT/}" airname=$(echo "$relpath" | tr '/' '_' | sed 's/\.metal$/.air/') + airpath="$BUILD_DIR/$airname" if "$METAL_BIN" -c "$f" -o "$BUILD_DIR/$airname" \ "${INCLUDE_FLAGS[@]}" \ -std=metal3.1 -target air64-apple-macos14.0 2>/dev/null; then compiled=$((compiled + 1)) echo "$base" >> "$compiled_names" + air_files+=("$airpath") elif "$METAL_BIN" -c "$f" -o "$BUILD_DIR/$airname" \ "${INCLUDE_FLAGS[@]}" \ -std=metal3.2 -target air64-apple-macos15.0 2>/dev/null; then # Some kernels (fence, nax variants) require Metal 3.2 features compiled=$((compiled + 1)) echo "$base" >> "$compiled_names" + air_files+=("$airpath") else skipped=$((skipped + 1)) echo " SKIP $base (compile error)" fi -done +done < <(find "$BACKEND_DIR" "$GENERATED_DIR" -name "*.metal" -print0 2>/dev/null) rm -f "$compiled_names" echo "Compiled $compiled shaders ($skipped skipped)." @@ -273,7 +288,7 @@ if [[ $compiled -eq 0 ]]; then fi echo "Linking mlx.metallib..." -"$METALLIB_BIN" "$BUILD_DIR"/*.air -o "$BUILD_DIR/mlx.metallib" +"$METALLIB_BIN" "${air_files[@]}" -o "$BUILD_DIR/mlx.metallib" install_metallib "$BUILD_DIR/mlx.metallib" diff --git a/tools/bootstrap.sh b/tools/bootstrap.sh index 631b2f4..e7fc2c5 100644 --- a/tools/bootstrap.sh +++ b/tools/bootstrap.sh @@ -81,6 +81,6 @@ if [[ "$with_bridge" == "1" && -f "bridge/package.json" ]]; then echo "Installing bridge dependencies" ( cd bridge - bun install + bun install --frozen-lockfile --ignore-scripts ) fi diff --git a/tools/first_clip.sh b/tools/first_clip.sh index 2ce1a9b..33b4c7d 100644 --- a/tools/first_clip.sh +++ b/tools/first_clip.sh @@ -8,9 +8,10 @@ output_path="${VALAR_FIRST_CLIP_OUTPUT:-${TMPDIR:-/tmp}/valar-first-clip.wav}" clip_text="${VALAR_FIRST_CLIP_TEXT:-Hello from Valar.}" model_id="mlx-community/Soprano-1.1-80M-bf16" -cli_bin="$repo_root/apps/ValarCLI/.build/arm64-apple-macosx/debug/valartts" -metallib_path="$repo_root/apps/ValarCLI/.build/arm64-apple-macosx/debug/mlx.metallib" -default_metallib_path="$repo_root/apps/ValarCLI/.build/arm64-apple-macosx/debug/default.metallib" +cli_bin="$(swift build --package-path apps/ValarCLI --show-bin-path)/valartts" +cli_bin_dir="$(dirname "$cli_bin")" +metallib_path="$cli_bin_dir/mlx.metallib" +default_metallib_path="$cli_bin_dir/default.metallib" if [[ ! -x "$cli_bin" || ( ! -f "$metallib_path" && ! -f "$default_metallib_path" ) ]]; then bash ./tools/quickstart.sh diff --git a/tools/public_repo_audit.sh b/tools/public_repo_audit.sh index 403b527..805f0ba 100644 --- a/tools/public_repo_audit.sh +++ b/tools/public_repo_audit.sh @@ -25,10 +25,20 @@ EOF while [[ $# -gt 0 ]]; do case "$1" in --root) + if [[ $# -lt 2 ]]; then + echo "--root requires a path." >&2 + usage >&2 + exit 1 + fi SCAN_ROOT="$2" shift 2 ;; --exclude-file) + if [[ $# -lt 2 ]]; then + echo "--exclude-file requires a path." >&2 + usage >&2 + exit 1 + fi EXCLUDE_FILE="$2" shift 2 ;; @@ -54,6 +64,11 @@ if [[ ! -f "$RULES_FILE" ]]; then exit 1 fi +if ! command -v rg >/dev/null 2>&1; then + echo "Public-repo audit requires ripgrep (rg) on PATH." >&2 + exit 1 +fi + # shellcheck source=/dev/null source "$RULES_FILE" @@ -159,12 +174,19 @@ if [[ -n "${VALAR_PUBLIC_AUDIT_EXTRA_PATTERN:-}" ]]; then RG_ARGS+=(-e "${VALAR_PUBLIC_AUDIT_EXTRA_PATTERN}") fi -if ! ( - cd "$SCAN_ROOT" - xargs -0 rg -n --no-heading "${RG_ARGS[@]}" < "$TMP_FILE_LIST" -) > "$TMP_HITS"; then - : -fi +while IFS= read -r -d '' rel; do + set +e + ( + cd "$SCAN_ROOT" + rg -n --no-heading "${RG_ARGS[@]}" -- "$rel" + ) >> "$TMP_HITS" + scan_status=$? + set -e + if [[ "$scan_status" -gt 1 ]]; then + echo "Public-repo audit failed while scanning: $rel" >&2 + exit 1 + fi +done < "$TMP_FILE_LIST" if [[ -s "$TMP_HITS" ]]; then echo "Public-repo audit failed. Found local/private markers or secret-like content:" >&2 diff --git a/tools/public_repo_history_scan.sh b/tools/public_repo_history_scan.sh index 6b4610c..3aae8f6 100755 --- a/tools/public_repo_history_scan.sh +++ b/tools/public_repo_history_scan.sh @@ -17,6 +17,11 @@ EOF while [[ $# -gt 0 ]]; do case "$1" in --root) + if [[ $# -lt 2 ]]; then + echo "--root requires a path." >&2 + usage >&2 + exit 1 + fi SCAN_ROOT="$2" shift 2 ;; @@ -71,11 +76,18 @@ done < <(valar_public_repo_secret_block_regexes) TMP_PATH_HITS="$(mktemp)" TMP_CONTENT_HITS="$(mktemp)" +TMP_FILTERED_CONTENT_HITS="$(mktemp)" cleanup() { - rm -f "$TMP_PATH_HITS" "$TMP_CONTENT_HITS" + rm -f "$TMP_PATH_HITS" "$TMP_CONTENT_HITS" "$TMP_FILTERED_CONTENT_HITS" } trap cleanup EXIT +history_content_hit_is_allowed() { + local hit="$1" + local old_synthetic_fixture="/Vol""umes/External/audio.wav" + [[ "$hit" == *"Packages/ValarPersistence/Tests/ValarPersistenceTests/ValarPersistenceTests.swift:"*"$old_synthetic_fixture"* ]] +} + declare -a GREP_ARGS=() for regex in "${CONTENT_BLOCK_REGEXES[@]}"; do GREP_ARGS+=(-e "$regex") @@ -102,15 +114,20 @@ while IFS= read -r rev; do done done < <(git -C "$SCAN_ROOT" ls-tree -r --name-only "$rev") - if ! ( + set +e + ( cd "$SCAN_ROOT" - git grep -n --no-heading "${GREP_ARGS[@]}" "$rev" -- . \ + git grep -E -n --no-heading "${GREP_ARGS[@]}" "$rev" -- . \ ':(exclude)tools/public_repo_audit.sh' \ ':(exclude)tools/public_repo_secret_scan.sh' \ ':(exclude)tools/public_repo_history_scan.sh' \ ':(exclude)tools/public_repo_rules.sh' - ) >> "$TMP_CONTENT_HITS"; then - : + ) >> "$TMP_CONTENT_HITS" + grep_status=$? + set -e + if [[ "$grep_status" -gt 1 ]]; then + echo "git grep failed while scanning revision $rev" >&2 + exit "$grep_status" fi done < <(git -C "$SCAN_ROOT" rev-list --all) @@ -120,9 +137,17 @@ if [[ -s "$TMP_PATH_HITS" ]]; then exit 1 fi -if [[ -s "$TMP_CONTENT_HITS" ]]; then +while IFS= read -r hit; do + [[ -n "$hit" ]] || continue + if history_content_hit_is_allowed "$hit"; then + continue + fi + printf '%s\n' "$hit" >> "$TMP_FILTERED_CONTENT_HITS" +done < "$TMP_CONTENT_HITS" + +if [[ -s "$TMP_FILTERED_CONTENT_HITS" ]]; then echo "Public history scan failed. Found private or secret-like content in git history:" >&2 - sort -u "$TMP_CONTENT_HITS" >&2 + sort -u "$TMP_FILTERED_CONTENT_HITS" >&2 exit 1 fi diff --git a/tools/public_repo_rules.sh b/tools/public_repo_rules.sh index 2705a09..6d021ed 100644 --- a/tools/public_repo_rules.sh +++ b/tools/public_repo_rules.sh @@ -94,7 +94,8 @@ sk-[A-Za-z0-9]{20,} xox[baprs]-[A-Za-z0-9-]+ AKIA[0-9A-Z]{16} AIza[0-9A-Za-z_-]{35} -(OPENAI_API_KEY|ANTHROPIC_API_KEY|LINEAR_API_KEY|TELEGRAM_BOT_TOKEN|SLACK_BOT_TOKEN|GITHUB_TOKEN)\s*=\s*[^[:space:]]+ +(OPENAI_API_KEY|ANTHROPIC_API_KEY|LINEAR_API_KEY|TELEGRAM_BOT_TOKEN|SLACK_BOT_TOKEN|GITHUB_TOKEN)[[:space:]]*=[[:space:]]*[^[:space:]]+ +(RUNPOD_API_KEY|HF_TOKEN|HUGGINGFACE_HUB_TOKEN|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|GEMINI_API_KEY|GOOGLE_API_KEY)[[:space:]]*=[[:space:]]*[^[:space:]]+ -----BEGIN [A-Z ]*PRIVATE KEY----- EOF } diff --git a/tools/public_repo_secret_scan.sh b/tools/public_repo_secret_scan.sh index 648dffa..71f3ebd 100644 --- a/tools/public_repo_secret_scan.sh +++ b/tools/public_repo_secret_scan.sh @@ -4,22 +4,33 @@ set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" SCAN_ROOT="$ROOT_DIR" RULES_FILE="$ROOT_DIR/tools/public_repo_rules.sh" +scan_ignored=0 usage() { cat <&2 + usage >&2 + exit 1 + fi SCAN_ROOT="$2" shift 2 ;; + --include-ignored) + scan_ignored=1 + shift + ;; -h|--help) usage exit 0 @@ -42,19 +53,25 @@ if [[ ! -f "$RULES_FILE" ]]; then exit 1 fi +if ! command -v rg >/dev/null 2>&1; then + echo "Public-repo secret scan requires ripgrep (rg) on PATH." >&2 + exit 1 +fi + # shellcheck source=/dev/null source "$RULES_FILE" collect_files() { local root="$1" - if git -C "$root" rev-parse --show-toplevel >/dev/null 2>&1 \ + if [[ "$scan_ignored" == "0" ]] \ + && git -C "$root" rev-parse --show-toplevel >/dev/null 2>&1 \ && git -C "$root" rev-parse --verify HEAD >/dev/null 2>&1; then git -C "$root" ls-files else ( cd "$root" find . \ - \( -name .git -o -name .build -o -name .build-cache -o -name .swiftpm -o -name node_modules \) -prune -o \ + \( -name .git -o -name .build -o -name .build-cache -o -name .swiftpm -o -name node_modules -o -name __pycache__ \) -prune -o \ -type f -print | sed 's#^\./##' ) fi @@ -98,12 +115,19 @@ if [[ -n "${VALAR_PUBLIC_SECRET_SCAN_EXTRA_PATTERN:-}" ]]; then RG_ARGS+=(-e "${VALAR_PUBLIC_SECRET_SCAN_EXTRA_PATTERN}") fi -if ! ( - cd "$SCAN_ROOT" - xargs -0 rg -n --no-heading "${RG_ARGS[@]}" < "$TMP_FILE_LIST" -) > "$TMP_HITS"; then - : -fi +while IFS= read -r -d '' rel; do + set +e + ( + cd "$SCAN_ROOT" + rg -n --no-heading "${RG_ARGS[@]}" -- "$rel" + ) >> "$TMP_HITS" + scan_status=$? + set -e + if [[ "$scan_status" -gt 1 ]]; then + echo "Public-repo secret scan failed while scanning: $rel" >&2 + exit 1 + fi +done < "$TMP_FILE_LIST" if [[ -s "$TMP_HITS" ]]; then echo "Public-repo secret scan failed. Found committed secret-like content:" >&2 diff --git a/tools/quickstart.sh b/tools/quickstart.sh index 8692f0f..d04a045 100644 --- a/tools/quickstart.sh +++ b/tools/quickstart.sh @@ -9,11 +9,11 @@ bash ./tools/bootstrap.sh native echo "Building Valar CLI" swift build --package-path apps/ValarCLI +cli_bin="$(swift build --package-path apps/ValarCLI --show-bin-path)/valartts" echo "Building MLX metallib" bash ./scripts/build_metallib.sh -cli_bin="$repo_root/apps/ValarCLI/.build/arm64-apple-macosx/debug/valartts" if [[ ! -x "$cli_bin" ]]; then echo "Error: expected CLI binary at $cli_bin" >&2 exit 1 diff --git a/tools/validate.sh b/tools/validate.sh index a41f5e7..9e6b760 100644 --- a/tools/validate.sh +++ b/tools/validate.sh @@ -130,12 +130,12 @@ if [[ "$with_bridge" == "1" && -f "bridge/package.json" ]]; then echo "Typechecking MCP bridge" ( cd bridge - bun install + bun install --frozen-lockfile bun run typecheck ) fi -cli_bin="$repo_root/apps/ValarCLI/.build/arm64-apple-macosx/debug/valartts" +cli_bin="$(swift build --package-path apps/ValarCLI --show-bin-path)/valartts" [[ -x "$cli_bin" ]] || { echo "Error: expected CLI binary at $cli_bin" >&2 exit 1