diff --git a/finetuning/lightning_modules/datasets/mathqa_reader.py b/finetuning/lightning_modules/datasets/mathqa_reader.py index c9f3a873..0bf7c37c 100644 --- a/finetuning/lightning_modules/datasets/mathqa_reader.py +++ b/finetuning/lightning_modules/datasets/mathqa_reader.py @@ -4,6 +4,9 @@ from finetuning.lightning_modules.datasets.base_reader import NL2CodeDataset, NL2CodeDataModule class MathQADataset(NL2CodeDataset): + + def __init__(self, **kwargs): + super().__init__(**kwargs) @overrides def get_train_instance(self, example: Dict[str, Any]) -> List[Dict[str, Any]]: @@ -20,16 +23,12 @@ def setup(self, stage: Optional[str] = None): # OPTIONAL, called for every GPU/machine (assigning state is OK) assert stage in ["fit", "validate", "test"] - train_data = MathQADataset(file_path=self.train_file_path, - transformer_model_name=self.transformer_model_name, - max_instances=self.train_max_instances, - mask_context_loss=self.mask_context_loss, - mode="train", few_shot_n=self.few_shot_n) + train_data = MathQADataset(transformer_model_name=self.transformer_model_name, + mode="train", + **self.train_set_init_args) self.train_data = train_data - val_data = MathQADataset(file_path=self.val_file_path, - transformer_model_name=self.transformer_model_name, - max_instances=self.val_max_instances, - mask_context_loss=self.mask_context_loss, - mode="test", few_shot_n=self.few_shot_n) + val_data = MathQADataset(transformer_model_name=self.transformer_model_name, + mode="test", + **self.val_set_init_args) self.val_data = val_data \ No newline at end of file diff --git a/finetuning/training_configs/gsmath_gpt_finetuning.yaml b/finetuning/training_configs/gsmath_gpt_finetuning.yaml index 1299288d..82417002 100755 --- a/finetuning/training_configs/gsmath_gpt_finetuning.yaml +++ b/finetuning/training_configs/gsmath_gpt_finetuning.yaml @@ -1,61 +1,63 @@ seed_everything: 333 trainer: - gpus: 0, 1 + gpus: 2, 3 gradient_clip_val: 1.0 - default_root_dir: &exp_name results/gsmath-gpt_neo_125M-finetuning - # default_root_dir: &exp_name results/debug-tmp - val_check_interval: 1.0 + # default_root_dir: &exp_name results/gsmath-gpt_neo_125M-finetuning + default_root_dir: &exp_name results/debug-tmp + # val_check_interval: 1.0 + check_val_every_n_epoch: 4 max_steps: &max_steps 50000 # progress_bar_refresh_rate: 1 + num_sanity_val_steps: 0 log_every_n_steps: 1 - logger: + logger+: - class_path: finetuning.lightning_modules.patches.patched_loggers.PatchedWandbLogger init_args: - entity: yale-lily - project: unified-codegen + entity: niansong1996 + project: cot-codegen + save_dir: *exp_name name: *exp_name log_model: False save_code: True - offline: True - callbacks: + offline: False + # offline: True + callbacks+: - class_path: pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint init_args: monitor: exec_acc + # monitor: pass@100 mode: max filename: '{step}-{exec_acc:.4f}-{exec_rate:.4f}' - save_top_k: 5 + # filename: '{step}-{pass@100:.4f}-{exec_acc:.4f}' + save_top_k: 3 + save_last: True - class_path: pytorch_lightning.callbacks.LearningRateMonitor init_args: logging_interval: step - class_path: pytorch_lightning.callbacks.progress.TQDMProgressBar init_args: refresh_rate: 1 - # - class_path: pytorch_lightning.callbacks.gpu_stats_monitor.GPUStatsMonitor - # init_args: - # memory_utilization: true - # gpu_utilization: true accelerator: gpu # replace_sampler_ddp: False # https://github.com/PyTorchLightning/pytorch-lightning/issues/8262 # strategy: deepspeed_stage_2 strategy: ddp_find_unused_parameters_false - precision: 16 + # precision: 16 # accumulate_grad_batches: 4 model: class_path: lightning_modules.models.seq2seq_model.Seq2SeqModel init_args: transformer_model_name: &transformer EleutherAI/gpt-neo-125M - exec_func: execution.mathqa_execution.mathqa_execution - answer_eq_func: execution.mathqa_execution.mathqa_answer_eq - get_gold_prog_func: finetuning.lightning_modules.datasets.mathqa_reader.get_gold_program_func - get_gold_answer_func: finetuning.lightning_modules.datasets.mathqa_reader.get_gold_answer_func - program_len_func: finetuning.lightning_modules.models.seq2seq_model_util.python_program_len - max_gen_len: 256 - sampling_temp: 0.2 - # sampling_temp_at_k: 0.8 - # pass_at_k: 80 + executor_cls: execution.executors.SpiderExecutor + # categorize_func: execution.spider_execution.spider_categorize_complexity + # category_list: ["JOIN", "NESTED", "COMPOUND", "SIMPLE"] + max_gen_len: 128 + sampling_temp: 0.01 + # sampling_temp_at_k: 1.0 + # pass_at_k: 10 + # load_ckpt_file: results/spider-t5_base-finetuning-skg_data-linear_lrs-bs_32/cot-codegen/29q9irfu/checkpoints/step=12862-exec_acc=0.5742-exec_rate=0.6855.ckpt # eval_pass_at_k_every_n_epochs: 1 # max_generation_batches: 10 gradient_ckpt: true @@ -68,7 +70,7 @@ model: - 0.9 - 0.999 eps: 1.0e-8 - weight_decay: 0.1 + weight_decay: 0.01 lr_scheduler: name: linear init_args: @@ -81,7 +83,11 @@ data: transformer_model_name: *transformer batch_size: 1 val_batch_size: 2 - train_file_path: data/gsmath/gsmath_train_annotated.jsonl - val_file_path: data/gsmath/gsmath_val.jsonl - # train_max_instances: 20 - # val_max_instances: 40 + train_max_instances: 200 + val_max_instances: 100 + train_set_init_args: + file_path: data/gsmath/gsmath_train_annotated.jsonl + val_set_init_args: + file_path: data/gsmath/gsmath_val.jsonl + set_common_init_args: + use_skg_format: false \ No newline at end of file diff --git a/finetuning/training_configs/mathqa_codet5_finetuning.yaml b/finetuning/training_configs/mathqa_codet5_finetuning.yaml index abd3e1a3..ee030e6c 100755 --- a/finetuning/training_configs/mathqa_codet5_finetuning.yaml +++ b/finetuning/training_configs/mathqa_codet5_finetuning.yaml @@ -1,71 +1,76 @@ seed_everything: 333 trainer: - gpus: 2 + gpus: 0, 1 gradient_clip_val: 1.0 - default_root_dir: debug-tmp - val_check_interval: 1.0 + # default_root_dir: &exp_name results/mathqa-codet5-finetuning + default_root_dir: &exp_name results/debug-tmp + # val_check_interval: 1.0 + check_val_every_n_epoch: 4 max_steps: &max_steps 100000 # progress_bar_refresh_rate: 1 + num_sanity_val_steps: 0 log_every_n_steps: 1 - logger: + logger+: - class_path: finetuning.lightning_modules.patches.patched_loggers.PatchedWandbLogger init_args: - entity: yale-lily - project: unified-codegen - name: mathqa-gpt-finetuning + entity: niansong1996 + project: cot-codegen + save_dir: *exp_name + name: *exp_name log_model: False save_code: True - tags: - - mathqa - - gpt-neo-125M - - finetuning - callbacks: + offline: False + # offline: True + callbacks+: - class_path: pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint init_args: monitor: exec_acc + # monitor: pass@100 mode: max filename: '{step}-{exec_acc:.4f}-{exec_rate:.4f}' - save_top_k: 5 + # filename: '{step}-{pass@100:.4f}-{exec_acc:.4f}' + save_top_k: 3 + save_last: True - class_path: pytorch_lightning.callbacks.LearningRateMonitor init_args: logging_interval: step - class_path: pytorch_lightning.callbacks.progress.TQDMProgressBar init_args: refresh_rate: 1 - # - class_path: pytorch_lightning.callbacks.gpu_stats_monitor.GPUStatsMonitor - # init_args: - # memory_utilization: true - # gpu_utilization: true accelerator: gpu # replace_sampler_ddp: False # https://github.com/PyTorchLightning/pytorch-lightning/issues/8262 - # strategy: deepspeed_stage_2_offload + # strategy: deepspeed_stage_2 strategy: ddp_find_unused_parameters_false - precision: 16 + # precision: 16 # accumulate_grad_batches: 4 model: class_path: lightning_modules.models.seq2seq_model.Seq2SeqModel init_args: transformer_model_name: &transformer Salesforce/codet5-base - max_gen_len: 256 - sampling_temp: 0.2 - # sampling_temp_at_k: 0.8 - # pass_at_k: 80 + executor_cls: execution.executors.SpiderExecutor + # categorize_func: execution.spider_execution.spider_categorize_complexity + # category_list: ["JOIN", "NESTED", "COMPOUND", "SIMPLE"] + max_gen_len: 128 + sampling_temp: 0.01 + # sampling_temp_at_k: 1.0 + # pass_at_k: 10 + # load_ckpt_file: results/spider-t5_base-finetuning-skg_data-linear_lrs-bs_32/cot-codegen/29q9irfu/checkpoints/step=12862-exec_acc=0.5742-exec_rate=0.6855.ckpt # eval_pass_at_k_every_n_epochs: 1 # max_generation_batches: 10 - gradient_ckpt: false + gradient_ckpt: true # eval_greedy_search: true optimizer: init_args: - lr: 1.0e-4 + lr: 5.0e-5 # lr: 0.0 betas: - 0.9 - 0.999 eps: 1.0e-8 - weight_decay: 0.1 + weight_decay: 0.01 lr_scheduler: name: linear init_args: @@ -78,7 +83,12 @@ data: transformer_model_name: *transformer batch_size: 4 val_batch_size: 4 - train_file_path: data/mathqa/train-python.jsonl - val_file_path: data/mathqa/val-python.jsonl - # train_max_instances: 20 - # val_max_instances: 40 \ No newline at end of file + train_max_instances: 200 + val_max_instances: 100 + train_set_init_args: + file_path: data/mathqa/train-python.jsonl + val_set_init_args: + file_path: data/mathqa/val-python.jsonl + # file_path: data/mathqa/val_python_with_states.jsonl + # set_common_init_args: + # use_skg_format: false \ No newline at end of file