Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 87 additions & 1 deletion swift/Sources/CoreAILanguageModels/Bundle/LanguageConfig.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
// be found in the LICENSE file or at https://opensource.org/licenses/BSD-3-Clause

import CoreAIShared
import Foundation
import Tokenizers

/// `language` block of `metadata.json` schema 0.2 — LLM-specific config.
public struct LanguageConfig: Codable, Sendable, Equatable {
Expand Down Expand Up @@ -42,12 +44,96 @@ public struct LanguageConfig: Codable, Sendable, Equatable {
case functionMap = "function_map"
}

public init(from decoder: Decoder) throws {
public init(from decoder: Swift.Decoder) throws {
let c = try decoder.container(keyedBy: CodingKeys.self)
self.tokenizer = try c.decode(String.self, forKey: .tokenizer)
self.vocabSize = try c.decode(Int.self, forKey: .vocabSize)
self.maxContextLength = try c.decode(Int.self, forKey: .maxContextLength)
self.embeddedTokenizer = try c.decodeIfPresent(Bool.self, forKey: .embeddedTokenizer) ?? true
self.functionMap = try c.decodeIfPresent(FunctionMap.self, forKey: .functionMap)
}

// MARK: - Additional Stop Tokens

/// Extract additional stop token IDs from the tokenizer config.
/// Reads `additional_special_tokens` from tokenizer_config.json and
/// cross-references with the tokenizer to get their IDs.
///
/// Also checks for array-valued `eos_token` (some models list multiple).
///
/// Best-effort: returns empty if the file doesn't exist or can't be parsed.
///
/// TODO: Upstream this to swift-transformers as `Tokenizer.additionalEosTokenIds`
/// so we don't need to parse tokenizer_config.json ourselves.
public static func additionalStopTokenIds(
from tokenizerDir: URL,
tokenizer: any Tokenizer
) -> [Int32] {
let configURL = tokenizerDir.appending(path: "tokenizer_config.json")
guard let data = try? Data(contentsOf: configURL),
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any]
else {
return []
}

let mainEos = tokenizer.eosTokenId.map { Int32($0) }
var result = Set<Int32>()

// 1. Check additional_special_tokens array
if let specials = json["additional_special_tokens"] as? [Any] {
for item in specials {
// Each item can be a string or a dict with a "content" key
let tokenString: String?
if let s = item as? String {
tokenString = s
} else if let dict = item as? [String: Any],
let content = dict["content"] as? String
{
tokenString = content
} else {
tokenString = nil
}
guard let token = tokenString else { continue }

if let id = tokenizer.convertTokenToId(token) {
let id32 = Int32(id)
if id32 != mainEos {
result.insert(id32)
}
}
}
}

// 2. Check if eos_token is an array (some models list multiple)
if let eosArray = json["eos_token"] as? [String] {
for token in eosArray {
if let id = tokenizer.convertTokenToId(token) {
let id32 = Int32(id)
if id32 != mainEos {
result.insert(id32)
}
}
}
}

// 3. Check added_tokens_decoder for turn-ending special tokens
// (e.g. Gemma's <end_of_turn> ID 106, Qwen's <|im_end|>)
// Only include tokens whose content matches known turn-ending patterns.
let turnEndPatterns = ["end_of_turn", "im_end", "eot_id"]
if let addedTokens = json["added_tokens_decoder"] as? [String: Any] {
for (idString, value) in addedTokens {
guard let dict = value as? [String: Any],
let isSpecial = dict["special"] as? Bool, isSpecial,
let content = dict["content"] as? String,
let id = Int32(idString)
else { continue }
let lower = content.lowercased()
if id != mainEos && turnEndPatterns.contains(where: { lower.contains($0) }) {
result.insert(id)
}
}
}

return Array(result)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,13 @@ public struct StopSequences: Sendable {
/// Initialize with tokenizer, automatically including EOS tokens
/// - Parameter tokenizer: Tokenizer to extract EOS token from
/// - Parameter additionalSequences: Optional additional stop sequences to include
public init(for tokenizer: any Tokenizer, additionalSequences: [[Int32]] = []) {
/// - Parameter additionalEosTokenIds: Optional additional single-token EOS IDs
/// (e.g. from tokenizer_config.json's `additional_special_tokens`)
public init(
for tokenizer: any Tokenizer,
additionalSequences: [[Int32]] = [],
additionalEosTokenIds: [Int32] = []
) {
var allSequences = additionalSequences

// Collect existing single-token sequences to avoid duplicates
Expand All @@ -94,6 +100,14 @@ public struct StopSequences: Sendable {
}
}

// Add additional EOS token IDs (e.g. from tokenizer_config.json)
for token in additionalEosTokenIds {
if !existingTokens.contains(token) {
existingTokens.insert(token)
allSequences.append([token])
}
}

self.sequences = allSequences
self.maxLength = allSequences.map { $0.count }.max() ?? 0
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public struct CoreAILanguageModel: LanguageModel {
private let vocabSize: Int?
private let supportsToolCalling: Bool
private let supportsReasoning: Bool
private let additionalEosTokenIds: [Int32]

// MARK: - Protocol Requirements

Expand All @@ -56,7 +57,8 @@ public struct CoreAILanguageModel: LanguageModel {
tokenizer: tokenizer,
modelIdentifier: modelIdentifier,
samplingConfig: samplingConfig,
vocabSize: vocabSize
vocabSize: vocabSize,
additionalEosTokenIds: additionalEosTokenIds
)
}

Expand Down Expand Up @@ -97,13 +99,15 @@ public struct CoreAILanguageModel: LanguageModel {
tokenizer: any Tokenizer,
modelIdentifier: String = "coreai-model",
samplingConfig: SamplingConfiguration = .greedy,
vocabSize: Int? = nil
vocabSize: Int? = nil,
additionalEosTokenIds: [Int32] = []
) {
self.engine = engine
self.tokenizer = tokenizer
self.modelIdentifier = modelIdentifier
self.samplingConfig = samplingConfig
self.vocabSize = vocabSize
self.additionalEosTokenIds = additionalEosTokenIds
self.supportsToolCalling = CoreAIExecutor.detectToolCallMarkers(using: tokenizer) != nil
self.supportsReasoning =
tokenizer.convertTokenToId("<think>") != nil
Expand All @@ -121,6 +125,7 @@ public struct CoreAILanguageModel: LanguageModel {
fileprivate let modelIdentifier: String
fileprivate let samplingConfig: SamplingConfiguration
fileprivate let vocabSize: Int?
fileprivate let additionalEosTokenIds: [Int32]

public static func == (lhs: Configuration, rhs: Configuration) -> Bool {
lhs.modelIdentifier == rhs.modelIdentifier
Expand All @@ -140,6 +145,9 @@ public struct CoreAILanguageModel: LanguageModel {
private let modelIdentifier: String
private let samplingConfig: SamplingConfiguration
private let vocabSize: Int?
/// All EOS-like token IDs: the main `eosTokenId` plus any additional
/// stop tokens from tokenizer_config.json (e.g. Gemma's `<end_of_turn>`).
private let eosTokenIds: Set<Int32>
/// Open / close marker pair the model uses for chain-of-thought
/// blocks, discovered from the tokenizer's known token ids at init
/// (see `detectThinkingMarkers`). For models that don't emit
Expand All @@ -162,6 +170,14 @@ public struct CoreAILanguageModel: LanguageModel {
self.vocabSize = configuration.vocabSize
self.thinkingMarkers = Self.detectThinkingMarkers(using: configuration.tokenizer)
self.toolCallMarkers = Self.detectToolCallMarkers(using: configuration.tokenizer)

// Build the full set of EOS-like token IDs
var eos = Set<Int32>()
if let id = configuration.tokenizer.eosTokenId {
eos.insert(Int32(id))
}
eos.formUnion(configuration.additionalEosTokenIds)
self.eosTokenIds = eos
}

/// Probes the tokenizer for known reasoning marker pairs. Each
Expand Down Expand Up @@ -328,7 +344,8 @@ public struct CoreAILanguageModel: LanguageModel {
inferenceOptions: InferenceOptions(maxTokens: maxTokens)
)

let eosTokenId = tokenizer.eosTokenId
// Use pre-computed set of all EOS-like tokens (main + additional)
let eosTokens = eosTokenIds
// Incremental-decode buffer. After a clean emit, one token is
// retained as context for the next step (see below). During a
// multi-byte sequence that hasn't decoded cleanly yet, multiple
Expand Down Expand Up @@ -359,7 +376,7 @@ public struct CoreAILanguageModel: LanguageModel {

for try await output in tokenStream {
let token = output.tokenId
if let eos = eosTokenId, Int(token) == eos {
if eosTokens.contains(token) {
tokenStream.setStopReason(.eos)
break
}
Expand Down Expand Up @@ -523,7 +540,10 @@ public struct CoreAILanguageModel: LanguageModel {
}

let strategy = ConstrainedDecodingStrategy(jsonSchema: jsonSchema, vocabSize: vocabSize)
let stopSequences = StopSequences(for: tokenizer)
let stopSequences = StopSequences(
for: tokenizer,
additionalEosTokenIds: Array(eosTokenIds)
)

let stream = try await strategy.decode(
from: .tokens(promptTokens),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,22 @@ public struct CoreAIRunner {
let tokenizer = try await bundle.loadTokenizer()
tokenizerLoadSpan.end()

// Read additional stop token IDs from tokenizer_config.json
let additionalEos: [Int32]
if let tokenizerDir = bundle.tokenizerPath {
additionalEos = LanguageConfig.additionalStopTokenIds(
from: tokenizerDir, tokenizer: tokenizer)
} else {
additionalEos = []
}

return CoreAILanguageModel(
engine: engine,
tokenizer: tokenizer,
modelIdentifier: bundle.name,
samplingConfig: SamplingConfiguration.greedy,
vocabSize: bundle.vocabSize
vocabSize: bundle.vocabSize,
additionalEosTokenIds: additionalEos
)
}

Expand Down
40 changes: 33 additions & 7 deletions swift/Sources/Tools/llm-runner/LLMRunnerMain.swift
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,20 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
"Tokenizer loaded from \(bundle.hasEmbeddedTokenizer ? "embedded bundle" : "HuggingFace")",
component: "Main")

// Read additional stop token IDs from tokenizer_config.json (e.g. <end_of_turn> for Gemma)
let additionalEosTokenIds: [Int32]
if let tokenizerDir = bundle.tokenizerPath {
additionalEosTokenIds = LanguageConfig.additionalStopTokenIds(
from: tokenizerDir, tokenizer: tokenizer)
if !additionalEosTokenIds.isEmpty {
CLILogger.log(
"Found \(additionalEosTokenIds.count) additional stop token(s) from tokenizer config: \(additionalEosTokenIds)",
component: "Main")
}
} else {
additionalEosTokenIds = []
}

CLILogger.log("Model loaded successfully:", component: "Main")
CLILogger.log(" Name: \(modelName)", component: "Main")
CLILogger.log(" Source: model bundle", component: "Main")
Expand Down Expand Up @@ -494,7 +508,8 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
samplingConfiguration: samplingConfiguration,
maxTokens: maxTokens,
actualInputTokens: actualInputTokens,
modelVocabSize: modelVocabSize
modelVocabSize: modelVocabSize,
additionalEosTokenIds: additionalEosTokenIds
)
} else {
// Generate text (timing handled by decoding strategies)
Expand All @@ -505,7 +520,8 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
// Encode stop tokens to sequences
let stopSequences = try validateAndEncodeStopTokens(
stopTokens: stopTokens,
tokenizer: tokenizer
tokenizer: tokenizer,
additionalEosTokenIds: additionalEosTokenIds
)

// Check if logits are requested
Expand Down Expand Up @@ -590,7 +606,8 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
samplingConfiguration: SamplingConfiguration,
maxTokens: Int,
actualInputTokens: Int,
modelVocabSize: Int?
modelVocabSize: Int?,
additionalEosTokenIds: [Int32] = []
) async throws {
let schema: String
if FileManager.default.fileExists(atPath: schemaInput) {
Expand All @@ -603,7 +620,8 @@ struct LLMRunner: AsyncParsableCommand, Sendable {

let stopSequences = try validateAndEncodeStopTokens(
stopTokens: stopTokens,
tokenizer: tokenizer
tokenizer: tokenizer,
additionalEosTokenIds: additionalEosTokenIds
)

guard let vocabSize = modelVocabSize else {
Expand Down Expand Up @@ -676,15 +694,19 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
/// - Parameters:
/// - stopTokens: Array of stop token strings from CLI
/// - tokenizer: Tokenizer to use for encoding
/// - additionalEosTokenIds: Additional EOS token IDs from tokenizer config
/// - Returns: StopSequences containing all valid sequences plus tokenizer EOS tokens
func validateAndEncodeStopTokens(
stopTokens: [String],
tokenizer: any Tokenizer
tokenizer: any Tokenizer,
additionalEosTokenIds: [Int32] = []
) throws -> StopSequences {
var sequences: [[Int32]] = []

for stopString in stopTokens {
let tokens = tokenizer.encode(text: stopString).map { Int32($0) }
// Encode without adding BOS/EOS so special token strings like
// "<end_of_turn>" resolve to their single token ID, not [BOS, id].
let tokens = tokenizer.encode(text: stopString, addSpecialTokens: false).map { Int32($0) }

// Fatal error for empty encodings - user explicitly requested this stop token
guard !tokens.isEmpty else {
Expand All @@ -710,7 +732,11 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
}

// Use new initializer that automatically includes EOS tokens from tokenizer
return StopSequences(for: tokenizer, additionalSequences: sequences)
return StopSequences(
for: tokenizer,
additionalSequences: sequences,
additionalEosTokenIds: additionalEosTokenIds
)
}

// MARK: - Asset Type Label
Expand Down