From 76ca50eacc152a77e683a4bd0e48b45d7d3a0a4c Mon Sep 17 00:00:00 2001 From: Ding Wang <30425407+playaswd@users.noreply.github.com> Date: Thu, 26 Mar 2026 17:57:54 +0800 Subject: [PATCH] fix(docs): correct LLM_TYPE for MiniCPM-V-4 from "llama" to "llama3" (#1049) The finetune documentation and shell scripts incorrectly stated that MiniCPM-V-4 should use LLM_TYPE="llama", but the code in dataset.py only handles "llama3", "qwen", and "minicpm" (default). There is no "llama" handler, so users following the docs would silently get the wrong tokenization (minicpm-style instead of llama3-style). Since MiniCPM-V-4 uses a Llama-based LLM, the correct LLM_TYPE is "llama3". This commit updates the documentation in readme.md, finetune_ds.sh, and finetune_lora.sh accordingly. Co-Authored-By: Claude Opus 4.6 --- finetune/finetune_ds.sh | 2 +- finetune/finetune_lora.sh | 2 +- finetune/readme.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/finetune/finetune_ds.sh b/finetune/finetune_ds.sh index 93ff7599..c5ad086c 100644 --- a/finetune/finetune_ds.sh +++ b/finetune/finetune_ds.sh @@ -13,7 +13,7 @@ MODEL="openbmb/MiniCPM-o-2_6" DATA="path/to/trainging_data" EVAL_DATA="path/to/test_data" -# if use openbmb/MiniCPM-V-2, please set LLM_TYPE=minicpm, if use openbmb/MiniCPM-Llama3-V-2_5, please set LLM_TYPE="llama3", +# if use openbmb/MiniCPM-V-2, please set LLM_TYPE=minicpm, if use openbmb/MiniCPM-Llama3-V-2_5 or MiniCPM-V-4, please set LLM_TYPE="llama3", # if use openbmb/MiniCPM-o-2_6 or openbmb/MiniCPM-V-2_6, please set LLM_TYPE=qwen LLM_TYPE="qwen" MODEL_MAX_Length=2048 # if conduct multi-images sft, please set MODEL_MAX_Length=4096 diff --git a/finetune/finetune_lora.sh b/finetune/finetune_lora.sh index df3140a4..c9d18216 100644 --- a/finetune/finetune_lora.sh +++ b/finetune/finetune_lora.sh @@ -12,7 +12,7 @@ MODEL="openbmb/MiniCPM-o-2_6" # See the section for finetuning in README for more information. DATA="path/to/trainging_data" EVAL_DATA="path/to/test_data" -# if use openbmb/MiniCPM-V-2, please set LLM_TYPE=minicpm, if use openbmb/MiniCPM-Llama3-V-2_5, please set LLM_TYPE="llama3", +# if use openbmb/MiniCPM-V-2, please set LLM_TYPE=minicpm, if use openbmb/MiniCPM-Llama3-V-2_5 or MiniCPM-V-4, please set LLM_TYPE="llama3", # if use openbmb/MiniCPM-o-2_6 or openbmb/MiniCPM-V-2_6, please set LLM_TYPE=qwen LLM_TYPE="qwen" MODEL_MAX_Length=2048 # if conduct multi-images sft, please set MODEL_MAX_Length=4096 diff --git a/finetune/readme.md b/finetune/readme.md index 188670f5..f874bffb 100644 --- a/finetune/readme.md +++ b/finetune/readme.md @@ -99,7 +99,7 @@ Full-parameter parameter finetuning requires updating all parameters of LLM in t MODEL="MiniCPM-o-2_6" # or "openbmb/MiniCPM-V-2_6", "openbmb/MiniCPM-Llama3-V-2_5", "openbmb/MiniCPM-V-2" DATA="path/to/training_data.json" EVAL_DATA="path/to/test_data.json" -LLM_TYPE="qwen" # llama for MiniCPM-V-4, minicpm for MiniCPM-V-2, llama3 for MiniCPM-Llama3-V-2_5, qwen for MiniCPM-o-2_6/MiniCPM-V-2_6 +LLM_TYPE="qwen" # llama3 for MiniCPM-V-4, minicpm for MiniCPM-V-2, llama3 for MiniCPM-Llama3-V-2_5, qwen for MiniCPM-o-2_6/MiniCPM-V-2_6 ``` To launch your training, run the following script: