Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions finetuning/lightning_modules/datasets/mathqa_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from finetuning.lightning_modules.datasets.base_reader import NL2CodeDataset, NL2CodeDataModule

class MathQADataset(NL2CodeDataset):

def __init__(self, **kwargs):
super().__init__(**kwargs)

@overrides
def get_train_instance(self, example: Dict[str, Any]) -> List[Dict[str, Any]]:
Expand All @@ -20,16 +23,12 @@ def setup(self, stage: Optional[str] = None):
# OPTIONAL, called for every GPU/machine (assigning state is OK)
assert stage in ["fit", "validate", "test"]

train_data = MathQADataset(file_path=self.train_file_path,
transformer_model_name=self.transformer_model_name,
max_instances=self.train_max_instances,
mask_context_loss=self.mask_context_loss,
mode="train", few_shot_n=self.few_shot_n)
train_data = MathQADataset(transformer_model_name=self.transformer_model_name,
mode="train",
**self.train_set_init_args)
self.train_data = train_data

val_data = MathQADataset(file_path=self.val_file_path,
transformer_model_name=self.transformer_model_name,
max_instances=self.val_max_instances,
mask_context_loss=self.mask_context_loss,
mode="test", few_shot_n=self.few_shot_n)
val_data = MathQADataset(transformer_model_name=self.transformer_model_name,
mode="test",
**self.val_set_init_args)
self.val_data = val_data
64 changes: 35 additions & 29 deletions finetuning/training_configs/gsmath_gpt_finetuning.yaml
Original file line number Diff line number Diff line change
@@ -1,61 +1,63 @@
seed_everything: 333
trainer:
gpus: 0, 1
gpus: 2, 3
gradient_clip_val: 1.0
default_root_dir: &exp_name results/gsmath-gpt_neo_125M-finetuning
# default_root_dir: &exp_name results/debug-tmp
val_check_interval: 1.0
# default_root_dir: &exp_name results/gsmath-gpt_neo_125M-finetuning
default_root_dir: &exp_name results/debug-tmp
# val_check_interval: 1.0
check_val_every_n_epoch: 4
max_steps: &max_steps 50000
# progress_bar_refresh_rate: 1
num_sanity_val_steps: 0
log_every_n_steps: 1
logger:
logger+:
- class_path: finetuning.lightning_modules.patches.patched_loggers.PatchedWandbLogger
init_args:
entity: yale-lily
project: unified-codegen
entity: niansong1996
project: cot-codegen
save_dir: *exp_name
name: *exp_name
log_model: False
save_code: True
offline: True
callbacks:
offline: False
# offline: True
callbacks+:
- class_path: pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint
init_args:
monitor: exec_acc
# monitor: pass@100
mode: max
filename: '{step}-{exec_acc:.4f}-{exec_rate:.4f}'
save_top_k: 5
# filename: '{step}-{pass@100:.4f}-{exec_acc:.4f}'
save_top_k: 3
save_last: True
- class_path: pytorch_lightning.callbacks.LearningRateMonitor
init_args:
logging_interval: step
- class_path: pytorch_lightning.callbacks.progress.TQDMProgressBar
init_args:
refresh_rate: 1
# - class_path: pytorch_lightning.callbacks.gpu_stats_monitor.GPUStatsMonitor
# init_args:
# memory_utilization: true
# gpu_utilization: true

accelerator: gpu
# replace_sampler_ddp: False
# https://github.com/PyTorchLightning/pytorch-lightning/issues/8262
# strategy: deepspeed_stage_2
strategy: ddp_find_unused_parameters_false
precision: 16
# precision: 16
# accumulate_grad_batches: 4

model:
class_path: lightning_modules.models.seq2seq_model.Seq2SeqModel
init_args:
transformer_model_name: &transformer EleutherAI/gpt-neo-125M
exec_func: execution.mathqa_execution.mathqa_execution
answer_eq_func: execution.mathqa_execution.mathqa_answer_eq
get_gold_prog_func: finetuning.lightning_modules.datasets.mathqa_reader.get_gold_program_func
get_gold_answer_func: finetuning.lightning_modules.datasets.mathqa_reader.get_gold_answer_func
program_len_func: finetuning.lightning_modules.models.seq2seq_model_util.python_program_len
max_gen_len: 256
sampling_temp: 0.2
# sampling_temp_at_k: 0.8
# pass_at_k: 80
executor_cls: execution.executors.SpiderExecutor
# categorize_func: execution.spider_execution.spider_categorize_complexity
# category_list: ["JOIN", "NESTED", "COMPOUND", "SIMPLE"]
max_gen_len: 128
sampling_temp: 0.01
# sampling_temp_at_k: 1.0
# pass_at_k: 10
# load_ckpt_file: results/spider-t5_base-finetuning-skg_data-linear_lrs-bs_32/cot-codegen/29q9irfu/checkpoints/step=12862-exec_acc=0.5742-exec_rate=0.6855.ckpt
# eval_pass_at_k_every_n_epochs: 1
# max_generation_batches: 10
gradient_ckpt: true
Expand All @@ -68,7 +70,7 @@ model:
- 0.9
- 0.999
eps: 1.0e-8
weight_decay: 0.1
weight_decay: 0.01
lr_scheduler:
name: linear
init_args:
Expand All @@ -81,7 +83,11 @@ data:
transformer_model_name: *transformer
batch_size: 1
val_batch_size: 2
train_file_path: data/gsmath/gsmath_train_annotated.jsonl
val_file_path: data/gsmath/gsmath_val.jsonl
# train_max_instances: 20
# val_max_instances: 40
train_max_instances: 200
val_max_instances: 100
train_set_init_args:
file_path: data/gsmath/gsmath_train_annotated.jsonl
val_set_init_args:
file_path: data/gsmath/gsmath_val.jsonl
set_common_init_args:
use_skg_format: false
70 changes: 40 additions & 30 deletions finetuning/training_configs/mathqa_codet5_finetuning.yaml
Original file line number Diff line number Diff line change
@@ -1,71 +1,76 @@
seed_everything: 333
trainer:
gpus: 2
gpus: 0, 1
gradient_clip_val: 1.0
default_root_dir: debug-tmp
val_check_interval: 1.0
# default_root_dir: &exp_name results/mathqa-codet5-finetuning
default_root_dir: &exp_name results/debug-tmp
# val_check_interval: 1.0
check_val_every_n_epoch: 4
max_steps: &max_steps 100000
# progress_bar_refresh_rate: 1
num_sanity_val_steps: 0
log_every_n_steps: 1
logger:
logger+:
- class_path: finetuning.lightning_modules.patches.patched_loggers.PatchedWandbLogger
init_args:
entity: yale-lily
project: unified-codegen
name: mathqa-gpt-finetuning
entity: niansong1996
project: cot-codegen
save_dir: *exp_name
name: *exp_name
log_model: False
save_code: True
tags:
- mathqa
- gpt-neo-125M
- finetuning
callbacks:
offline: False
# offline: True
callbacks+:
- class_path: pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint
init_args:
monitor: exec_acc
# monitor: pass@100
mode: max
filename: '{step}-{exec_acc:.4f}-{exec_rate:.4f}'
save_top_k: 5
# filename: '{step}-{pass@100:.4f}-{exec_acc:.4f}'
save_top_k: 3
save_last: True
- class_path: pytorch_lightning.callbacks.LearningRateMonitor
init_args:
logging_interval: step
- class_path: pytorch_lightning.callbacks.progress.TQDMProgressBar
init_args:
refresh_rate: 1
# - class_path: pytorch_lightning.callbacks.gpu_stats_monitor.GPUStatsMonitor
# init_args:
# memory_utilization: true
# gpu_utilization: true

accelerator: gpu
# replace_sampler_ddp: False
# https://github.com/PyTorchLightning/pytorch-lightning/issues/8262
# strategy: deepspeed_stage_2_offload
# strategy: deepspeed_stage_2
strategy: ddp_find_unused_parameters_false
precision: 16
# precision: 16
# accumulate_grad_batches: 4

model:
class_path: lightning_modules.models.seq2seq_model.Seq2SeqModel
init_args:
transformer_model_name: &transformer Salesforce/codet5-base
max_gen_len: 256
sampling_temp: 0.2
# sampling_temp_at_k: 0.8
# pass_at_k: 80
executor_cls: execution.executors.SpiderExecutor
# categorize_func: execution.spider_execution.spider_categorize_complexity
# category_list: ["JOIN", "NESTED", "COMPOUND", "SIMPLE"]
max_gen_len: 128
sampling_temp: 0.01
# sampling_temp_at_k: 1.0
# pass_at_k: 10
# load_ckpt_file: results/spider-t5_base-finetuning-skg_data-linear_lrs-bs_32/cot-codegen/29q9irfu/checkpoints/step=12862-exec_acc=0.5742-exec_rate=0.6855.ckpt
# eval_pass_at_k_every_n_epochs: 1
# max_generation_batches: 10
gradient_ckpt: false
gradient_ckpt: true
# eval_greedy_search: true
optimizer:
init_args:
lr: 1.0e-4
lr: 5.0e-5
# lr: 0.0
betas:
- 0.9
- 0.999
eps: 1.0e-8
weight_decay: 0.1
weight_decay: 0.01
lr_scheduler:
name: linear
init_args:
Expand All @@ -78,7 +83,12 @@ data:
transformer_model_name: *transformer
batch_size: 4
val_batch_size: 4
train_file_path: data/mathqa/train-python.jsonl
val_file_path: data/mathqa/val-python.jsonl
# train_max_instances: 20
# val_max_instances: 40
train_max_instances: 200
val_max_instances: 100
train_set_init_args:
file_path: data/mathqa/train-python.jsonl
val_set_init_args:
file_path: data/mathqa/val-python.jsonl
# file_path: data/mathqa/val_python_with_states.jsonl
# set_common_init_args:
# use_skg_format: false