LongLeCE · pull · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -435,6 +435,25 @@ static bool parse_bool_value(const std::string & value) {
 // CLI argument parsing functions
 //
 
+void common_params_handle_models(common_params & params, llama_example curr_ex) {
+    auto res = common_params_handle_model(params.model, params.hf_token, params.offline);
+    if (params.no_mmproj) {
+        params.mmproj = {};
+    } else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
+        // optionally, handle mmproj model when -hf is specified
+        params.mmproj = res.mmproj;
+    }
+    // only download mmproj if the current example is using it
+    for (const auto & ex : mmproj_examples) {
+        if (curr_ex == ex) {
+            common_params_handle_model(params.mmproj,    params.hf_token, params.offline);
+            break;
+        }
+    }
+    common_params_handle_model(params.speculative.draft.mparams, params.hf_token, params.offline);
+    common_params_handle_model(params.vocoder.model,             params.hf_token, params.offline);
+}
+
 static bool common_params_parse_ex(int argc, char ** argv, common_params_context & ctx_arg) {
     common_params & params = ctx_arg.params;
 
@@ -588,22 +607,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
 
     // handle model and download
     if (!skip_model_download) {
-        auto res = common_params_handle_model(params.model, params.hf_token, params.offline);
-        if (params.no_mmproj) {
-            params.mmproj = {};
-        } else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
-            // optionally, handle mmproj model when -hf is specified
-            params.mmproj = res.mmproj;
-        }
-        // only download mmproj if the current example is using it
-        for (const auto & ex : mmproj_examples) {
-            if (ctx_arg.ex == ex) {
-                common_params_handle_model(params.mmproj,    params.hf_token, params.offline);
-                break;
-            }
-        }
-        common_params_handle_model(params.speculative.draft.mparams, params.hf_token, params.offline);
-        common_params_handle_model(params.vocoder.model,             params.hf_token, params.offline);
+        common_params_handle_models(params, ctx_arg.ex);
     }
 
     // model is required (except for server)

diff --git a/common/arg.h b/common/arg.h
@@ -129,5 +129,8 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
 // see: https://github.com/ggml-org/llama.cpp/issues/18163
 void common_params_add_preset_options(std::vector<common_arg> & args);
 
+// Populate model paths (main model, mmproj, etc) from -hf if necessary
+void common_params_handle_models(common_params & params, llama_example curr_ex);
+
 // initialize argument parser context - used by test-arg-parser and preset
 common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
diff --git a/common/preset.cpp b/common/preset.cpp
@@ -163,8 +163,13 @@ void common_preset::merge(const common_preset & other) {
     }
 }
 
-void common_preset::apply_to_params(common_params & params) const {
+void common_preset::apply_to_params(common_params & params, const std::set<std::string> & handled_keys) const {
     for (const auto & [opt, val] : options) {
+        if (!handled_keys.empty()) {
+            if (!opt.env || handled_keys.find(opt.env) == handled_keys.end()) {
+                continue;
+            }
+        }
         // apply each option to params
         if (opt.handler_string) {
             opt.handler_string(params, val);

diff --git a/common/preset.h b/common/preset.h
@@ -43,7 +43,8 @@ struct common_preset {
     void merge(const common_preset & other);
 
     // apply preset options to common_params
-    void apply_to_params(common_params & params) const;
+    // optionally specify handled_keys to only apply a subset of options (identified by their env), if empty, apply all options
+    void apply_to_params(common_params & params, const std::set<std::string> & handled_keys = std::set<std::string>()) const;
 };
 
 // interface for multiple presets in one file

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -2865,8 +2865,12 @@ def __init__(self, *args, **kwargs):
         # fix for SmolVLM2, missing `num_attention_heads` in config.json
         if self.hf_arch == "VLlama3ForCausalLM":
             self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
-        hparams = ModelBase.load_hparams(self.dir_model, is_mistral_format=False)
-        self.origin_hf_arch = hparams.get('architectures', [None])[0]
+        # Mistral consolidated format has no config.json; origin_hf_arch is HF-only.
+        if self.is_mistral_format:
+            self.origin_hf_arch = None
+        else:
+            hparams = ModelBase.load_hparams(self.dir_model, is_mistral_format=False)
+            self.origin_hf_arch = hparams.get('architectures', [None])[0]
 
     def set_vocab(self):
         if self.origin_hf_arch == "GlmasrModel":
@@ -13409,16 +13413,20 @@ def set_gguf_parameters(self):
         self.gguf_writer.add_vision_use_silu(True)
 
         # spatial_merge_size
-        if self.find_vparam(["mm_projector_id"]) == "patch_merge":
+        if self.find_vparam(["mm_projector_id"], optional=True) == "patch_merge":
             self.gguf_writer.add_vision_spatial_merge_size(
                 self.find_vparam(["spatial_merge_size"])
             )
 
     def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str:
         if name == "vision_language_adapter.w_in.weight":
             return "mm.1.weight"
+        elif name == "vision_language_adapter.w_in.bias":
+            return "mm.1.bias"
         elif name == "vision_language_adapter.w_out.weight":
             return "mm.2.weight"
+        elif name == "vision_language_adapter.w_out.bias":
+            return "mm.2.bias"
         return super().map_tensor_name(name, try_suffixes)
 
 

diff --git a/ggml/src/ggml-opencl/CMakeLists.txt b/ggml/src/ggml-opencl/CMakeLists.txt
@@ -176,6 +176,10 @@ set(GGML_OPENCL_KERNELS
     flash_attn_f32
 )
 
+if (GGML_OPENCL_USE_ADRENO_KERNELS)
+    list(APPEND GGML_OPENCL_KERNELS gemm_xmem_f16_f32_os8)
+endif ()
+
 foreach (K ${GGML_OPENCL_KERNELS})
     ggml_opencl_add_kernel(${K})
 endforeach()