Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 20 additions & 16 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,25 @@ static bool parse_bool_value(const std::string & value) {
// CLI argument parsing functions
//

void common_params_handle_models(common_params & params, llama_example curr_ex) {
auto res = common_params_handle_model(params.model, params.hf_token, params.offline);
if (params.no_mmproj) {
params.mmproj = {};
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
// optionally, handle mmproj model when -hf is specified
params.mmproj = res.mmproj;
}
// only download mmproj if the current example is using it
for (const auto & ex : mmproj_examples) {
if (curr_ex == ex) {
common_params_handle_model(params.mmproj, params.hf_token, params.offline);
break;
}
}
common_params_handle_model(params.speculative.draft.mparams, params.hf_token, params.offline);
common_params_handle_model(params.vocoder.model, params.hf_token, params.offline);
}

static bool common_params_parse_ex(int argc, char ** argv, common_params_context & ctx_arg) {
common_params & params = ctx_arg.params;

Expand Down Expand Up @@ -588,22 +607,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context

// handle model and download
if (!skip_model_download) {
auto res = common_params_handle_model(params.model, params.hf_token, params.offline);
if (params.no_mmproj) {
params.mmproj = {};
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
// optionally, handle mmproj model when -hf is specified
params.mmproj = res.mmproj;
}
// only download mmproj if the current example is using it
for (const auto & ex : mmproj_examples) {
if (ctx_arg.ex == ex) {
common_params_handle_model(params.mmproj, params.hf_token, params.offline);
break;
}
}
common_params_handle_model(params.speculative.draft.mparams, params.hf_token, params.offline);
common_params_handle_model(params.vocoder.model, params.hf_token, params.offline);
common_params_handle_models(params, ctx_arg.ex);
}

// model is required (except for server)
Expand Down
3 changes: 3 additions & 0 deletions common/arg.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,5 +129,8 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
// see: https://github.com/ggml-org/llama.cpp/issues/18163
void common_params_add_preset_options(std::vector<common_arg> & args);

// Populate model paths (main model, mmproj, etc) from -hf if necessary
void common_params_handle_models(common_params & params, llama_example curr_ex);

// initialize argument parser context - used by test-arg-parser and preset
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
7 changes: 6 additions & 1 deletion common/preset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,13 @@ void common_preset::merge(const common_preset & other) {
}
}

void common_preset::apply_to_params(common_params & params) const {
void common_preset::apply_to_params(common_params & params, const std::set<std::string> & handled_keys) const {
for (const auto & [opt, val] : options) {
if (!handled_keys.empty()) {
if (!opt.env || handled_keys.find(opt.env) == handled_keys.end()) {
continue;
}
}
// apply each option to params
if (opt.handler_string) {
opt.handler_string(params, val);
Expand Down
3 changes: 2 additions & 1 deletion common/preset.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ struct common_preset {
void merge(const common_preset & other);

// apply preset options to common_params
void apply_to_params(common_params & params) const;
// optionally specify handled_keys to only apply a subset of options (identified by their env), if empty, apply all options
void apply_to_params(common_params & params, const std::set<std::string> & handled_keys = std::set<std::string>()) const;
};

// interface for multiple presets in one file
Expand Down
14 changes: 11 additions & 3 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2865,8 +2865,12 @@ def __init__(self, *args, **kwargs):
# fix for SmolVLM2, missing `num_attention_heads` in config.json
if self.hf_arch == "VLlama3ForCausalLM":
self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
hparams = ModelBase.load_hparams(self.dir_model, is_mistral_format=False)
self.origin_hf_arch = hparams.get('architectures', [None])[0]
# Mistral consolidated format has no config.json; origin_hf_arch is HF-only.
if self.is_mistral_format:
self.origin_hf_arch = None
else:
hparams = ModelBase.load_hparams(self.dir_model, is_mistral_format=False)
self.origin_hf_arch = hparams.get('architectures', [None])[0]

def set_vocab(self):
if self.origin_hf_arch == "GlmasrModel":
Expand Down Expand Up @@ -13409,16 +13413,20 @@ def set_gguf_parameters(self):
self.gguf_writer.add_vision_use_silu(True)

# spatial_merge_size
if self.find_vparam(["mm_projector_id"]) == "patch_merge":
if self.find_vparam(["mm_projector_id"], optional=True) == "patch_merge":
self.gguf_writer.add_vision_spatial_merge_size(
self.find_vparam(["spatial_merge_size"])
)

def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str:
if name == "vision_language_adapter.w_in.weight":
return "mm.1.weight"
elif name == "vision_language_adapter.w_in.bias":
return "mm.1.bias"
elif name == "vision_language_adapter.w_out.weight":
return "mm.2.weight"
elif name == "vision_language_adapter.w_out.bias":
return "mm.2.bias"
return super().map_tensor_name(name, try_suffixes)


Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-opencl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,10 @@ set(GGML_OPENCL_KERNELS
flash_attn_f32
)

if (GGML_OPENCL_USE_ADRENO_KERNELS)
list(APPEND GGML_OPENCL_KERNELS gemm_xmem_f16_f32_os8)
endif ()

foreach (K ${GGML_OPENCL_KERNELS})
ggml_opencl_add_kernel(${K})
endforeach()
Loading
Loading