apple · stikves · Jun 14, 2026 · Jun 13, 2026
diff --git a/swift/Sources/CoreAILanguageModels/Bundle/LanguageConfig.swift b/swift/Sources/CoreAILanguageModels/Bundle/LanguageConfig.swift
@@ -4,6 +4,8 @@
 // be found in the LICENSE file or at https://opensource.org/licenses/BSD-3-Clause
 
 import CoreAIShared
+import Foundation
+import Tokenizers
 
 /// `language` block of `metadata.json` schema 0.2 — LLM-specific config.
 public struct LanguageConfig: Codable, Sendable, Equatable {
@@ -42,12 +44,96 @@ public struct LanguageConfig: Codable, Sendable, Equatable {
         case functionMap = "function_map"
     }
 
-    public init(from decoder: Decoder) throws {
+    public init(from decoder: Swift.Decoder) throws {
         let c = try decoder.container(keyedBy: CodingKeys.self)
         self.tokenizer = try c.decode(String.self, forKey: .tokenizer)
         self.vocabSize = try c.decode(Int.self, forKey: .vocabSize)
         self.maxContextLength = try c.decode(Int.self, forKey: .maxContextLength)
         self.embeddedTokenizer = try c.decodeIfPresent(Bool.self, forKey: .embeddedTokenizer) ?? true
         self.functionMap = try c.decodeIfPresent(FunctionMap.self, forKey: .functionMap)
     }
+
+    // MARK: - Additional Stop Tokens
+
+    /// Extract additional stop token IDs from the tokenizer config.
+    /// Reads `additional_special_tokens` from tokenizer_config.json and
+    /// cross-references with the tokenizer to get their IDs.
+    ///
+    /// Also checks for array-valued `eos_token` (some models list multiple).
+    ///
+    /// Best-effort: returns empty if the file doesn't exist or can't be parsed.
+    ///
+    /// TODO: Upstream this to swift-transformers as `Tokenizer.additionalEosTokenIds`
+    /// so we don't need to parse tokenizer_config.json ourselves.
+    public static func additionalStopTokenIds(
+        from tokenizerDir: URL,
+        tokenizer: any Tokenizer
+    ) -> [Int32] {
+        let configURL = tokenizerDir.appending(path: "tokenizer_config.json")
+        guard let data = try? Data(contentsOf: configURL),
+            let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any]
+        else {
+            return []
+        }
+
+        let mainEos = tokenizer.eosTokenId.map { Int32($0) }
+        var result = Set<Int32>()
+
+        // 1. Check additional_special_tokens array
+        if let specials = json["additional_special_tokens"] as? [Any] {
+            for item in specials {
+                // Each item can be a string or a dict with a "content" key
+                let tokenString: String?
+                if let s = item as? String {
+                    tokenString = s
+                } else if let dict = item as? [String: Any],
+                    let content = dict["content"] as? String
+                {
+                    tokenString = content
+                } else {
+                    tokenString = nil
+                }
+                guard let token = tokenString else { continue }
+
+                if let id = tokenizer.convertTokenToId(token) {
+                    let id32 = Int32(id)
+                    if id32 != mainEos {
+                        result.insert(id32)
+                    }
+                }
+            }
+        }
+
+        // 2. Check if eos_token is an array (some models list multiple)
+        if let eosArray = json["eos_token"] as? [String] {
+            for token in eosArray {
+                if let id = tokenizer.convertTokenToId(token) {
+                    let id32 = Int32(id)
+                    if id32 != mainEos {
+                        result.insert(id32)
+                    }
+                }
+            }
+        }
+
+        // 3. Check added_tokens_decoder for turn-ending special tokens
+        //    (e.g. Gemma's <end_of_turn> ID 106, Qwen's <|im_end|>)
+        //    Only include tokens whose content matches known turn-ending patterns.
+        let turnEndPatterns = ["end_of_turn", "im_end", "eot_id"]
+        if let addedTokens = json["added_tokens_decoder"] as? [String: Any] {
+            for (idString, value) in addedTokens {
+                guard let dict = value as? [String: Any],
+                    let isSpecial = dict["special"] as? Bool, isSpecial,
+                    let content = dict["content"] as? String,
+                    let id = Int32(idString)
+                else { continue }
+                let lower = content.lowercased()
+                if id != mainEos && turnEndPatterns.contains(where: { lower.contains($0) }) {
+                    result.insert(id)
+                }
+            }
+        }
+
+        return Array(result)
+    }
 }
diff --git a/swift/Sources/CoreAILanguageModels/DecodingStrategies/DecodingStrategy.swift b/swift/Sources/CoreAILanguageModels/DecodingStrategies/DecodingStrategy.swift
@@ -76,7 +76,13 @@ public struct StopSequences: Sendable {
     /// Initialize with tokenizer, automatically including EOS tokens
     /// - Parameter tokenizer: Tokenizer to extract EOS token from
     /// - Parameter additionalSequences: Optional additional stop sequences to include
-    public init(for tokenizer: any Tokenizer, additionalSequences: [[Int32]] = []) {
+    /// - Parameter additionalEosTokenIds: Optional additional single-token EOS IDs
+    ///   (e.g. from tokenizer_config.json's `additional_special_tokens`)
+    public init(
+        for tokenizer: any Tokenizer,
+        additionalSequences: [[Int32]] = [],
+        additionalEosTokenIds: [Int32] = []
+    ) {
         var allSequences = additionalSequences
 
         // Collect existing single-token sequences to avoid duplicates
@@ -94,6 +100,14 @@ public struct StopSequences: Sendable {
             }
         }
 
+        // Add additional EOS token IDs (e.g. from tokenizer_config.json)
+        for token in additionalEosTokenIds {
+            if !existingTokens.contains(token) {
+                existingTokens.insert(token)
+                allSequences.append([token])
+            }
+        }
+
         self.sequences = allSequences
         self.maxLength = allSequences.map { $0.count }.max() ?? 0
     }

diff --git a/swift/Sources/CoreAILanguageModels/LanguageModel/CoreAILanguageModel.swift b/swift/Sources/CoreAILanguageModels/LanguageModel/CoreAILanguageModel.swift
@@ -37,6 +37,7 @@ public struct CoreAILanguageModel: LanguageModel {
     private let vocabSize: Int?
     private let supportsToolCalling: Bool
     private let supportsReasoning: Bool
+    private let additionalEosTokenIds: [Int32]
 
     // MARK: - Protocol Requirements
 
@@ -56,7 +57,8 @@ public struct CoreAILanguageModel: LanguageModel {
             tokenizer: tokenizer,
             modelIdentifier: modelIdentifier,
             samplingConfig: samplingConfig,
-            vocabSize: vocabSize
+            vocabSize: vocabSize,
+            additionalEosTokenIds: additionalEosTokenIds
         )
     }
 
@@ -97,13 +99,15 @@ public struct CoreAILanguageModel: LanguageModel {
         tokenizer: any Tokenizer,
         modelIdentifier: String = "coreai-model",
         samplingConfig: SamplingConfiguration = .greedy,
-        vocabSize: Int? = nil
+        vocabSize: Int? = nil,
+        additionalEosTokenIds: [Int32] = []
     ) {
         self.engine = engine
         self.tokenizer = tokenizer
         self.modelIdentifier = modelIdentifier
         self.samplingConfig = samplingConfig
         self.vocabSize = vocabSize
+        self.additionalEosTokenIds = additionalEosTokenIds
         self.supportsToolCalling = CoreAIExecutor.detectToolCallMarkers(using: tokenizer) != nil
         self.supportsReasoning =
             tokenizer.convertTokenToId("<think>") != nil
@@ -121,6 +125,7 @@ public struct CoreAILanguageModel: LanguageModel {
             fileprivate let modelIdentifier: String
             fileprivate let samplingConfig: SamplingConfiguration
             fileprivate let vocabSize: Int?
+            fileprivate let additionalEosTokenIds: [Int32]
 
             public static func == (lhs: Configuration, rhs: Configuration) -> Bool {
                 lhs.modelIdentifier == rhs.modelIdentifier
@@ -140,6 +145,9 @@ public struct CoreAILanguageModel: LanguageModel {
         private let modelIdentifier: String
         private let samplingConfig: SamplingConfiguration
         private let vocabSize: Int?
+        /// All EOS-like token IDs: the main `eosTokenId` plus any additional
+        /// stop tokens from tokenizer_config.json (e.g. Gemma's `<end_of_turn>`).
+        private let eosTokenIds: Set<Int32>
         /// Open / close marker pair the model uses for chain-of-thought
         /// blocks, discovered from the tokenizer's known token ids at init
         /// (see `detectThinkingMarkers`). For models that don't emit
@@ -162,6 +170,14 @@ public struct CoreAILanguageModel: LanguageModel {
             self.vocabSize = configuration.vocabSize
             self.thinkingMarkers = Self.detectThinkingMarkers(using: configuration.tokenizer)
             self.toolCallMarkers = Self.detectToolCallMarkers(using: configuration.tokenizer)
+
+            // Build the full set of EOS-like token IDs
+            var eos = Set<Int32>()
+            if let id = configuration.tokenizer.eosTokenId {
+                eos.insert(Int32(id))
+            }
+            eos.formUnion(configuration.additionalEosTokenIds)
+            self.eosTokenIds = eos
         }
 
         /// Probes the tokenizer for known reasoning marker pairs. Each
@@ -328,7 +344,8 @@ public struct CoreAILanguageModel: LanguageModel {
                 inferenceOptions: InferenceOptions(maxTokens: maxTokens)
             )
 
-            let eosTokenId = tokenizer.eosTokenId
+            // Use pre-computed set of all EOS-like tokens (main + additional)
+            let eosTokens = eosTokenIds
             // Incremental-decode buffer. After a clean emit, one token is
             // retained as context for the next step (see below). During a
             // multi-byte sequence that hasn't decoded cleanly yet, multiple
@@ -359,7 +376,7 @@ public struct CoreAILanguageModel: LanguageModel {
 
             for try await output in tokenStream {
                 let token = output.tokenId
-                if let eos = eosTokenId, Int(token) == eos {
+                if eosTokens.contains(token) {
                     tokenStream.setStopReason(.eos)
                     break
                 }
@@ -523,7 +540,10 @@ public struct CoreAILanguageModel: LanguageModel {
             }
 
             let strategy = ConstrainedDecodingStrategy(jsonSchema: jsonSchema, vocabSize: vocabSize)
-            let stopSequences = StopSequences(for: tokenizer)
+            let stopSequences = StopSequences(
+                for: tokenizer,
+                additionalEosTokenIds: Array(eosTokenIds)
+            )
 
             let stream = try await strategy.decode(
                 from: .tokens(promptTokens),

diff --git a/swift/Sources/CoreAILanguageModels/LanguageModel/CoreAIRunner.swift b/swift/Sources/CoreAILanguageModels/LanguageModel/CoreAIRunner.swift
@@ -80,12 +80,22 @@ public struct CoreAIRunner {
         let tokenizer = try await bundle.loadTokenizer()
         tokenizerLoadSpan.end()
 
+        // Read additional stop token IDs from tokenizer_config.json
+        let additionalEos: [Int32]
+        if let tokenizerDir = bundle.tokenizerPath {
+            additionalEos = LanguageConfig.additionalStopTokenIds(
+                from: tokenizerDir, tokenizer: tokenizer)
+        } else {
+            additionalEos = []
+        }
+
         return CoreAILanguageModel(
             engine: engine,
             tokenizer: tokenizer,
             modelIdentifier: bundle.name,
             samplingConfig: SamplingConfiguration.greedy,
-            vocabSize: bundle.vocabSize
+            vocabSize: bundle.vocabSize,
+            additionalEosTokenIds: additionalEos
         )
     }
 

diff --git a/swift/Sources/Tools/llm-runner/LLMRunnerMain.swift b/swift/Sources/Tools/llm-runner/LLMRunnerMain.swift
@@ -361,6 +361,20 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
             "Tokenizer loaded from \(bundle.hasEmbeddedTokenizer ? "embedded bundle" : "HuggingFace")",
             component: "Main")
 
+        // Read additional stop token IDs from tokenizer_config.json (e.g. <end_of_turn> for Gemma)
+        let additionalEosTokenIds: [Int32]
+        if let tokenizerDir = bundle.tokenizerPath {
+            additionalEosTokenIds = LanguageConfig.additionalStopTokenIds(
+                from: tokenizerDir, tokenizer: tokenizer)
+            if !additionalEosTokenIds.isEmpty {
+                CLILogger.log(
+                    "Found \(additionalEosTokenIds.count) additional stop token(s) from tokenizer config: \(additionalEosTokenIds)",
+                    component: "Main")
+            }
+        } else {
+            additionalEosTokenIds = []
+        }
+
         CLILogger.log("Model loaded successfully:", component: "Main")
         CLILogger.log("   Name: \(modelName)", component: "Main")
         CLILogger.log("   Source: model bundle", component: "Main")
@@ -494,7 +508,8 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
                 samplingConfiguration: samplingConfiguration,
                 maxTokens: maxTokens,
                 actualInputTokens: actualInputTokens,
-                modelVocabSize: modelVocabSize
+                modelVocabSize: modelVocabSize,
+                additionalEosTokenIds: additionalEosTokenIds
             )
         } else {
             // Generate text (timing handled by decoding strategies)
@@ -505,7 +520,8 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
             // Encode stop tokens to sequences
             let stopSequences = try validateAndEncodeStopTokens(
                 stopTokens: stopTokens,
-                tokenizer: tokenizer
+                tokenizer: tokenizer,
+                additionalEosTokenIds: additionalEosTokenIds
             )
 
             // Check if logits are requested
@@ -590,7 +606,8 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
         samplingConfiguration: SamplingConfiguration,
         maxTokens: Int,
         actualInputTokens: Int,
-        modelVocabSize: Int?
+        modelVocabSize: Int?,
+        additionalEosTokenIds: [Int32] = []
     ) async throws {
         let schema: String
         if FileManager.default.fileExists(atPath: schemaInput) {
@@ -603,7 +620,8 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
 
         let stopSequences = try validateAndEncodeStopTokens(
             stopTokens: stopTokens,
-            tokenizer: tokenizer
+            tokenizer: tokenizer,
+            additionalEosTokenIds: additionalEosTokenIds
         )
 
         guard let vocabSize = modelVocabSize else {
@@ -676,15 +694,19 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
     /// - Parameters:
     ///   - stopTokens: Array of stop token strings from CLI
     ///   - tokenizer: Tokenizer to use for encoding
+    ///   - additionalEosTokenIds: Additional EOS token IDs from tokenizer config
     /// - Returns: StopSequences containing all valid sequences plus tokenizer EOS tokens
     func validateAndEncodeStopTokens(
         stopTokens: [String],
-        tokenizer: any Tokenizer
+        tokenizer: any Tokenizer,
+        additionalEosTokenIds: [Int32] = []
     ) throws -> StopSequences {
         var sequences: [[Int32]] = []
 
         for stopString in stopTokens {
-            let tokens = tokenizer.encode(text: stopString).map { Int32($0) }
+            // Encode without adding BOS/EOS so special token strings like
+            // "<end_of_turn>" resolve to their single token ID, not [BOS, id].
+            let tokens = tokenizer.encode(text: stopString, addSpecialTokens: false).map { Int32($0) }
 
             // Fatal error for empty encodings - user explicitly requested this stop token
             guard !tokens.isEmpty else {
@@ -710,7 +732,11 @@ struct LLMRunner: AsyncParsableCommand, Sendable {
         }
 
         // Use new initializer that automatically includes EOS tokens from tokenizer
-        return StopSequences(for: tokenizer, additionalSequences: sequences)
+        return StopSequences(
+            for: tokenizer,
+            additionalSequences: sequences,
+            additionalEosTokenIds: additionalEosTokenIds
+        )
     }
 
     // MARK: - Asset Type Label