From 19bcaa2b070b504ae808e22fe19047520afd753d Mon Sep 17 00:00:00 2001 From: Nic van Dessel <51134175+nvandessel@users.noreply.github.com> Date: Sat, 4 Apr 2026 04:43:30 +0000 Subject: [PATCH 1/3] fix: add formatting_func for SFTTrainer compatibility Newer unsloth/trl requires a formatting_func when passing raw datasets. Use tokenizer.apply_chat_template to convert the messages list to a chat-formatted string. Co-Authored-By: Claude Opus 4.6 (1M context) --- notebooks/train-hippofloop.ipynb | 38 +----------------------------- src/hippofloop/training/trainer.py | 6 +++++ 2 files changed, 7 insertions(+), 37 deletions(-) diff --git a/notebooks/train-hippofloop.ipynb b/notebooks/train-hippofloop.ipynb index 0c56293..e75e85b 100644 --- a/notebooks/train-hippofloop.ipynb +++ b/notebooks/train-hippofloop.ipynb @@ -227,43 +227,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from transformers import TrainingArguments\n", - "from trl import SFTTrainer\n", - "\n", - "OUTPUT_DIR = \"checkpoints/qwen25-3b-hippofloop\"\n", - "\n", - "training_args = TrainingArguments(\n", - " output_dir=OUTPUT_DIR,\n", - " num_train_epochs=3,\n", - " per_device_train_batch_size=1,\n", - " gradient_accumulation_steps=16,\n", - " learning_rate=2e-4,\n", - " lr_scheduler_type=\"cosine\",\n", - " warmup_ratio=0.03,\n", - " weight_decay=0.01,\n", - " bf16=False,\n", - " fp16=True,\n", - " eval_strategy=\"epoch\",\n", - " save_strategy=\"epoch\",\n", - " load_best_model_at_end=True,\n", - " metric_for_best_model=\"eval_loss\",\n", - " logging_steps=10,\n", - " seed=SEED,\n", - ")\n", - "\n", - "sft_trainer = SFTTrainer(\n", - " model=model,\n", - " tokenizer=tokenizer,\n", - " train_dataset=train_dataset,\n", - " eval_dataset=val_dataset,\n", - " args=training_args,\n", - ")\n", - "\n", - "print(f\"Training {len(train_dataset)} examples for {training_args.num_train_epochs} epochs...\")\n", - "print(f\"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}\")\n", - "sft_trainer.train()" - ] + "source": "from transformers import TrainingArguments\nfrom trl import SFTTrainer\n\nOUTPUT_DIR = \"checkpoints/qwen25-3b-hippofloop\"\n\ntraining_args = TrainingArguments(\n output_dir=OUTPUT_DIR,\n num_train_epochs=3,\n per_device_train_batch_size=1,\n gradient_accumulation_steps=16,\n learning_rate=2e-4,\n lr_scheduler_type=\"cosine\",\n warmup_ratio=0.03,\n weight_decay=0.01,\n bf16=False,\n fp16=True,\n eval_strategy=\"epoch\",\n save_strategy=\"epoch\",\n load_best_model_at_end=True,\n metric_for_best_model=\"eval_loss\",\n logging_steps=10,\n seed=SEED,\n)\n\ndef formatting_func(example):\n return tokenizer.apply_chat_template(example[\"messages\"], tokenize=False)\n\nsft_trainer = SFTTrainer(\n model=model,\n tokenizer=tokenizer,\n train_dataset=train_dataset,\n eval_dataset=val_dataset,\n args=training_args,\n formatting_func=formatting_func,\n)\n\nprint(f\"Training {len(train_dataset)} examples for {training_args.num_train_epochs} epochs...\")\nprint(f\"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}\")\nsft_trainer.train()" }, { "cell_type": "code", diff --git a/src/hippofloop/training/trainer.py b/src/hippofloop/training/trainer.py index 69a948e..fb6254c 100644 --- a/src/hippofloop/training/trainer.py +++ b/src/hippofloop/training/trainer.py @@ -88,12 +88,18 @@ def train( seed=self._config.seed, ) + def formatting_func(example: dict) -> str: + return tokenizer.apply_chat_template( + example["messages"], tokenize=False, + ) + trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=train_dataset, eval_dataset=val_dataset, args=training_args, + formatting_func=formatting_func, ) logger.info("Starting training (%d epochs)", self._config.epochs) From 046188ded199a091cf957bead6d49209e09a7d99 Mon Sep 17 00:00:00 2001 From: Nic van Dessel <51134175+nvandessel@users.noreply.github.com> Date: Sat, 4 Apr 2026 04:59:11 +0000 Subject: [PATCH 2/3] fix: formatting_func must return list, warmup_ratio deprecated Unsloth expects formatting_func to return a list of strings, not a single string. Also replace deprecated warmup_ratio with warmup_steps. Co-Authored-By: Claude Opus 4.6 (1M context) --- notebooks/train-hippofloop.ipynb | 2 +- src/hippofloop/training/trainer.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/notebooks/train-hippofloop.ipynb b/notebooks/train-hippofloop.ipynb index e75e85b..2bfb853 100644 --- a/notebooks/train-hippofloop.ipynb +++ b/notebooks/train-hippofloop.ipynb @@ -227,7 +227,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "from transformers import TrainingArguments\nfrom trl import SFTTrainer\n\nOUTPUT_DIR = \"checkpoints/qwen25-3b-hippofloop\"\n\ntraining_args = TrainingArguments(\n output_dir=OUTPUT_DIR,\n num_train_epochs=3,\n per_device_train_batch_size=1,\n gradient_accumulation_steps=16,\n learning_rate=2e-4,\n lr_scheduler_type=\"cosine\",\n warmup_ratio=0.03,\n weight_decay=0.01,\n bf16=False,\n fp16=True,\n eval_strategy=\"epoch\",\n save_strategy=\"epoch\",\n load_best_model_at_end=True,\n metric_for_best_model=\"eval_loss\",\n logging_steps=10,\n seed=SEED,\n)\n\ndef formatting_func(example):\n return tokenizer.apply_chat_template(example[\"messages\"], tokenize=False)\n\nsft_trainer = SFTTrainer(\n model=model,\n tokenizer=tokenizer,\n train_dataset=train_dataset,\n eval_dataset=val_dataset,\n args=training_args,\n formatting_func=formatting_func,\n)\n\nprint(f\"Training {len(train_dataset)} examples for {training_args.num_train_epochs} epochs...\")\nprint(f\"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}\")\nsft_trainer.train()" + "source": "from transformers import TrainingArguments\nfrom trl import SFTTrainer\n\nOUTPUT_DIR = \"checkpoints/qwen25-3b-hippofloop\"\n\ntraining_args = TrainingArguments(\n output_dir=OUTPUT_DIR,\n num_train_epochs=3,\n per_device_train_batch_size=1,\n gradient_accumulation_steps=16,\n learning_rate=2e-4,\n lr_scheduler_type=\"cosine\",\n warmup_steps=50,\n weight_decay=0.01,\n bf16=False,\n fp16=True,\n eval_strategy=\"epoch\",\n save_strategy=\"epoch\",\n load_best_model_at_end=True,\n metric_for_best_model=\"eval_loss\",\n logging_steps=10,\n seed=SEED,\n)\n\ndef formatting_func(example):\n return [tokenizer.apply_chat_template(example[\"messages\"], tokenize=False)]\n\nsft_trainer = SFTTrainer(\n model=model,\n tokenizer=tokenizer,\n train_dataset=train_dataset,\n eval_dataset=val_dataset,\n args=training_args,\n formatting_func=formatting_func,\n)\n\nprint(f\"Training {len(train_dataset)} examples for {training_args.num_train_epochs} epochs...\")\nprint(f\"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}\")\nsft_trainer.train()" }, { "cell_type": "code", diff --git a/src/hippofloop/training/trainer.py b/src/hippofloop/training/trainer.py index fb6254c..5231316 100644 --- a/src/hippofloop/training/trainer.py +++ b/src/hippofloop/training/trainer.py @@ -76,7 +76,7 @@ def train( gradient_accumulation_steps=self._config.gradient_accumulation_steps, learning_rate=self._config.learning_rate, lr_scheduler_type=self._config.lr_scheduler, - warmup_ratio=self._config.warmup_ratio, + warmup_steps=50, weight_decay=self._config.weight_decay, bf16=self._config.bf16, fp16=self._config.fp16, @@ -88,10 +88,10 @@ def train( seed=self._config.seed, ) - def formatting_func(example: dict) -> str: - return tokenizer.apply_chat_template( + def formatting_func(example: dict) -> list[str]: + return [tokenizer.apply_chat_template( example["messages"], tokenize=False, - ) + )] trainer = SFTTrainer( model=model, From c8013502e6fb515a2415358cc01769965dfdb917 Mon Sep 17 00:00:00 2001 From: Nic van Dessel <51134175+nvandessel@users.noreply.github.com> Date: Sat, 4 Apr 2026 05:36:26 +0000 Subject: [PATCH 3/3] fix: formatting_func receives batched examples, not single When batched=True (unsloth default), formatting_func gets a dict of lists. Iterate over examples["messages"] and return a list of formatted strings. Co-Authored-By: Claude Opus 4.6 (1M context) --- notebooks/train-hippofloop.ipynb | 2 +- src/hippofloop/training/trainer.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/notebooks/train-hippofloop.ipynb b/notebooks/train-hippofloop.ipynb index 2bfb853..b4c7de2 100644 --- a/notebooks/train-hippofloop.ipynb +++ b/notebooks/train-hippofloop.ipynb @@ -227,7 +227,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "from transformers import TrainingArguments\nfrom trl import SFTTrainer\n\nOUTPUT_DIR = \"checkpoints/qwen25-3b-hippofloop\"\n\ntraining_args = TrainingArguments(\n output_dir=OUTPUT_DIR,\n num_train_epochs=3,\n per_device_train_batch_size=1,\n gradient_accumulation_steps=16,\n learning_rate=2e-4,\n lr_scheduler_type=\"cosine\",\n warmup_steps=50,\n weight_decay=0.01,\n bf16=False,\n fp16=True,\n eval_strategy=\"epoch\",\n save_strategy=\"epoch\",\n load_best_model_at_end=True,\n metric_for_best_model=\"eval_loss\",\n logging_steps=10,\n seed=SEED,\n)\n\ndef formatting_func(example):\n return [tokenizer.apply_chat_template(example[\"messages\"], tokenize=False)]\n\nsft_trainer = SFTTrainer(\n model=model,\n tokenizer=tokenizer,\n train_dataset=train_dataset,\n eval_dataset=val_dataset,\n args=training_args,\n formatting_func=formatting_func,\n)\n\nprint(f\"Training {len(train_dataset)} examples for {training_args.num_train_epochs} epochs...\")\nprint(f\"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}\")\nsft_trainer.train()" + "source": "from transformers import TrainingArguments\nfrom trl import SFTTrainer\n\nOUTPUT_DIR = \"checkpoints/qwen25-3b-hippofloop\"\n\ntraining_args = TrainingArguments(\n output_dir=OUTPUT_DIR,\n num_train_epochs=3,\n per_device_train_batch_size=1,\n gradient_accumulation_steps=16,\n learning_rate=2e-4,\n lr_scheduler_type=\"cosine\",\n warmup_steps=50,\n weight_decay=0.01,\n bf16=False,\n fp16=True,\n eval_strategy=\"epoch\",\n save_strategy=\"epoch\",\n load_best_model_at_end=True,\n metric_for_best_model=\"eval_loss\",\n logging_steps=10,\n seed=SEED,\n)\n\ndef formatting_func(examples):\n texts = []\n for msgs in examples[\"messages\"]:\n texts.append(tokenizer.apply_chat_template(msgs, tokenize=False))\n return texts\n\nsft_trainer = SFTTrainer(\n model=model,\n tokenizer=tokenizer,\n train_dataset=train_dataset,\n eval_dataset=val_dataset,\n args=training_args,\n formatting_func=formatting_func,\n)\n\nprint(f\"Training {len(train_dataset)} examples for {training_args.num_train_epochs} epochs...\")\nprint(f\"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}\")\nsft_trainer.train()" }, { "cell_type": "code", diff --git a/src/hippofloop/training/trainer.py b/src/hippofloop/training/trainer.py index 5231316..4011d3e 100644 --- a/src/hippofloop/training/trainer.py +++ b/src/hippofloop/training/trainer.py @@ -88,10 +88,11 @@ def train( seed=self._config.seed, ) - def formatting_func(example: dict) -> list[str]: - return [tokenizer.apply_chat_template( - example["messages"], tokenize=False, - )] + def formatting_func(examples: dict) -> list[str]: + return [ + tokenizer.apply_chat_template(msgs, tokenize=False) + for msgs in examples["messages"] + ] trainer = SFTTrainer( model=model,