diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f2d5c4c..fc181e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,9 +6,6 @@ on: - main - master pull_request: - branches: - - main - - master jobs: test: diff --git a/README.md b/README.md index 60c22fd..53cbd4d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # MLX Ruby Examples +[![CI](https://github.com/skryl/mlx-ruby-examples/actions/workflows/ci.yml/badge.svg)](https://github.com/skryl/mlx-ruby-examples/actions/workflows/ci.yml) + Ruby/MLX example ports that depend on Apple Metal when building the `mlx` gem. ## Prerequisites (macOS) diff --git a/lora/lora.rb b/lora/lora.rb index 4622d4f..18c9986 100644 --- a/lora/lora.rb +++ b/lora/lora.rb @@ -155,7 +155,7 @@ def train(model, train_set, valid_set, optimizer, tokenizer, options) seed: options[:seed] ) - options[:iters].times do |it| + options[:iters].times do |iter| inputs, targets, lengths = batches.next (lvalue, toks), grad = loss_value_and_grad.call(inputs, targets, lengths) optimizer.update(model, grad) @@ -164,12 +164,12 @@ def train(model, train_set, valid_set, optimizer, tokenizer, options) losses << lvalue.item.to_f n_tokens += toks.item.to_f - if ((it + 1) % options[:steps_per_report]).zero? + if ((iter + 1) % options[:steps_per_report]).zero? stop = Process.clock_gettime(Process::CLOCK_MONOTONIC) train_loss = losses.sum / [losses.length, 1].max.to_f puts format( "Iter %d: Train loss %.3f, It/sec %.3f, Tokens/sec %.3f", - it + 1, + iter + 1, train_loss, options[:steps_per_report] / [stop - start, 1e-9].max, n_tokens / [stop - start, 1e-9].max @@ -179,7 +179,7 @@ def train(model, train_set, valid_set, optimizer, tokenizer, options) start = Process.clock_gettime(Process::CLOCK_MONOTONIC) end - if it.zero? || ((it + 1) % options[:steps_per_eval]).zero? + if iter.zero? || ((iter + 1) % options[:steps_per_eval]).zero? eval_start = Process.clock_gettime(Process::CLOCK_MONOTONIC) val_loss = evaluate( model, @@ -191,16 +191,16 @@ def train(model, train_set, valid_set, optimizer, tokenizer, options) ) puts format( "Iter %d: Val loss %.3f, Val took %.3fs", - it + 1, + iter + 1, val_loss, Process.clock_gettime(Process::CLOCK_MONOTONIC) - eval_start ) start = Process.clock_gettime(Process::CLOCK_MONOTONIC) end - if ((it + 1) % options[:save_every]).zero? + if ((iter + 1) % options[:save_every]).zero? save_trainable_adapters(options[:adapter_file], model) - puts "Iter #{it + 1}: Saved adapter weights to #{options[:adapter_file]}." + puts "Iter #{iter + 1}: Saved adapter weights to #{options[:adapter_file]}." end end end diff --git a/no_dsl/lora/lora.rb b/no_dsl/lora/lora.rb index 862aed6..e182679 100644 --- a/no_dsl/lora/lora.rb +++ b/no_dsl/lora/lora.rb @@ -155,7 +155,7 @@ def train(model, train_set, valid_set, optimizer, tokenizer, options) seed: options[:seed] ) - options[:iters].times do |it| + options[:iters].times do |iter| inputs, targets, lengths = batches.next (lvalue, toks), grad = loss_value_and_grad.call(inputs, targets, lengths) optimizer.update(model, grad) @@ -164,12 +164,12 @@ def train(model, train_set, valid_set, optimizer, tokenizer, options) losses << lvalue.item.to_f n_tokens += toks.item.to_f - if ((it + 1) % options[:steps_per_report]).zero? + if ((iter + 1) % options[:steps_per_report]).zero? stop = Process.clock_gettime(Process::CLOCK_MONOTONIC) train_loss = losses.sum / [losses.length, 1].max.to_f puts format( "Iter %d: Train loss %.3f, It/sec %.3f, Tokens/sec %.3f", - it + 1, + iter + 1, train_loss, options[:steps_per_report] / [stop - start, 1e-9].max, n_tokens / [stop - start, 1e-9].max @@ -179,7 +179,7 @@ def train(model, train_set, valid_set, optimizer, tokenizer, options) start = Process.clock_gettime(Process::CLOCK_MONOTONIC) end - if it.zero? || ((it + 1) % options[:steps_per_eval]).zero? + if iter.zero? || ((iter + 1) % options[:steps_per_eval]).zero? eval_start = Process.clock_gettime(Process::CLOCK_MONOTONIC) val_loss = evaluate( model, @@ -191,16 +191,16 @@ def train(model, train_set, valid_set, optimizer, tokenizer, options) ) puts format( "Iter %d: Val loss %.3f, Val took %.3fs", - it + 1, + iter + 1, val_loss, Process.clock_gettime(Process::CLOCK_MONOTONIC) - eval_start ) start = Process.clock_gettime(Process::CLOCK_MONOTONIC) end - if ((it + 1) % options[:save_every]).zero? + if ((iter + 1) % options[:save_every]).zero? save_trainable_adapters(options[:adapter_file], model) - puts "Iter #{it + 1}: Saved adapter weights to #{options[:adapter_file]}." + puts "Iter #{iter + 1}: Saved adapter weights to #{options[:adapter_file]}." end end end diff --git a/no_dsl/normalizing_flow/main.rb b/no_dsl/normalizing_flow/main.rb index 98fddaa..91b1ecd 100644 --- a/no_dsl/normalizing_flow/main.rb +++ b/no_dsl/normalizing_flow/main.rb @@ -74,19 +74,19 @@ def run(options) rng = Random.new(options[:seed] + 1) all_indices = (0...x.shape[0]).to_a tic = Process.clock_gettime(Process::CLOCK_MONOTONIC) - options[:n_steps].times do |it| + options[:n_steps].times do |iter| ids = all_indices.sample(options[:n_batch], random: rng) batch = MLX::Core.take(x, MLX::Core.array(ids, MLX::Core.int32), 0) loss, grads = loss_and_grad_fn.call(batch) optimizer.update(model, grads) MLX::Core.eval(loss, model.parameters, optimizer.state) - next unless ((it + 1) % options[:report_every]).zero? + next unless ((iter + 1) % options[:report_every]).zero? toc = Process.clock_gettime(Process::CLOCK_MONOTONIC) puts format( "Step %d: Loss %.4f | It/sec %.2f", - it + 1, + iter + 1, loss.item.to_f, options[:report_every] / [toc - tic, 1e-9].max ) diff --git a/no_dsl/transformer_lm/main.rb b/no_dsl/transformer_lm/main.rb index 3c4d694..705e523 100644 --- a/no_dsl/transformer_lm/main.rb +++ b/no_dsl/transformer_lm/main.rb @@ -146,11 +146,11 @@ def run(options) losses = [] tic = Process.clock_gettime(Process::CLOCK_MONOTONIC) - options[:num_iters].times do |it| + options[:num_iters].times do |iter| warmup = if options[:lr_warmup] <= 0 1.0 else - [1.0, it.to_f / options[:lr_warmup].to_f].min + [1.0, iter.to_f / options[:lr_warmup].to_f].min end optimizer.learning_rate = warmup * options[:learning_rate] @@ -160,12 +160,12 @@ def run(options) MLX::Core.eval(loss, model.parameters, optimizer.state) losses << loss.item.to_f - if ((it + 1) % options[:steps_per_report]).zero? + if ((iter + 1) % options[:steps_per_report]).zero? toc = Process.clock_gettime(Process::CLOCK_MONOTONIC) train_loss = losses.sum / [losses.length, 1].max.to_f puts format( "Iter %d: Train loss %.3f, It/sec %.3f", - it + 1, + iter + 1, train_loss, options[:steps_per_report] / [toc - tic, 1e-9].max ) @@ -173,7 +173,7 @@ def run(options) tic = Process.clock_gettime(Process::CLOCK_MONOTONIC) end - next unless ((it + 1) % options[:steps_per_eval]).zero? + next unless ((iter + 1) % options[:steps_per_eval]).zero? eval_tic = Process.clock_gettime(Process::CLOCK_MONOTONIC) val_loss = eval_fn( @@ -186,7 +186,7 @@ def run(options) val_ppl = Math.exp(val_loss) puts format( "Iter %d: Val loss %.3f, Val ppl %.3f, Val took %.3fs", - it + 1, + iter + 1, val_loss, val_ppl, eval_toc - eval_tic diff --git a/transformer_lm/main.rb b/transformer_lm/main.rb index 9225e19..0bdb87b 100644 --- a/transformer_lm/main.rb +++ b/transformer_lm/main.rb @@ -153,11 +153,11 @@ def run(options) end trainer.before_epoch do |ctx| - it = ctx.fetch(:epoch).to_i + iter = ctx.fetch(:epoch).to_i warmup = if options[:lr_warmup] <= 0 1.0 else - [1.0, it.to_f / options[:lr_warmup].to_f].min + [1.0, iter.to_f / options[:lr_warmup].to_f].min end optimizer.learning_rate = warmup * options[:learning_rate] end @@ -165,15 +165,15 @@ def run(options) losses = [] tic = Process.clock_gettime(Process::CLOCK_MONOTONIC) trainer.after_epoch do |ctx| - it = ctx.fetch(:epoch).to_i + 1 + iter = ctx.fetch(:epoch).to_i + 1 losses << ctx.fetch(:epoch_loss).to_f - if (it % options[:steps_per_report]).zero? + if (iter % options[:steps_per_report]).zero? toc = Process.clock_gettime(Process::CLOCK_MONOTONIC) train_loss = losses.sum / [losses.length, 1].max.to_f puts format( "Iter %d: Train loss %.3f, It/sec %.3f", - it, + iter, train_loss, options[:steps_per_report] / [toc - tic, 1e-9].max ) @@ -181,7 +181,7 @@ def run(options) tic = Process.clock_gettime(Process::CLOCK_MONOTONIC) end - next unless (it % options[:steps_per_eval]).zero? + next unless (iter % options[:steps_per_eval]).zero? eval_tic = Process.clock_gettime(Process::CLOCK_MONOTONIC) val_loss = eval_fn( @@ -194,7 +194,7 @@ def run(options) val_ppl = Math.exp(val_loss) puts format( "Iter %d: Val loss %.3f, Val ppl %.3f, Val took %.3fs", - it, + iter, val_loss, val_ppl, eval_toc - eval_tic