diff --git a/finetuning/lightning_modules/datasets/mathqa_reader.py b/finetuning/lightning_modules/datasets/mathqa_reader.py
index c9f3a873..0bf7c37c 100644
--- a/finetuning/lightning_modules/datasets/mathqa_reader.py
+++ b/finetuning/lightning_modules/datasets/mathqa_reader.py
@@ -4,6 +4,9 @@
 from finetuning.lightning_modules.datasets.base_reader import NL2CodeDataset, NL2CodeDataModule
 
 class MathQADataset(NL2CodeDataset):
+    
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
 
     @overrides
     def get_train_instance(self, example: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -20,16 +23,12 @@ def setup(self, stage: Optional[str] = None):
         # OPTIONAL, called for every GPU/machine (assigning state is OK)
         assert stage in ["fit", "validate", "test"]
 
-        train_data = MathQADataset(file_path=self.train_file_path,
-                                   transformer_model_name=self.transformer_model_name,
-                                   max_instances=self.train_max_instances, 
-                                   mask_context_loss=self.mask_context_loss,
-                                   mode="train", few_shot_n=self.few_shot_n)
+        train_data = MathQADataset(transformer_model_name=self.transformer_model_name,
+                                   mode="train",
+                                   **self.train_set_init_args)
         self.train_data = train_data
 
-        val_data = MathQADataset(file_path=self.val_file_path,
-                                 transformer_model_name=self.transformer_model_name,
-                                 max_instances=self.val_max_instances, 
-                                 mask_context_loss=self.mask_context_loss,
-                                 mode="test", few_shot_n=self.few_shot_n)
+        val_data = MathQADataset(transformer_model_name=self.transformer_model_name,
+                                 mode="test",
+                                 **self.val_set_init_args)
         self.val_data = val_data 
\ No newline at end of file
diff --git a/finetuning/training_configs/gsmath_gpt_finetuning.yaml b/finetuning/training_configs/gsmath_gpt_finetuning.yaml
index 1299288d..82417002 100755
--- a/finetuning/training_configs/gsmath_gpt_finetuning.yaml
+++ b/finetuning/training_configs/gsmath_gpt_finetuning.yaml
@@ -1,61 +1,63 @@
 seed_everything: 333
 trainer:
-  gpus: 0, 1
+  gpus: 2, 3
   gradient_clip_val: 1.0
-  default_root_dir: &exp_name results/gsmath-gpt_neo_125M-finetuning
-  # default_root_dir: &exp_name results/debug-tmp
-  val_check_interval: 1.0
+  # default_root_dir: &exp_name results/gsmath-gpt_neo_125M-finetuning
+  default_root_dir: &exp_name results/debug-tmp
+  # val_check_interval: 1.0
+  check_val_every_n_epoch: 4
   max_steps: &max_steps 50000
   # progress_bar_refresh_rate: 1
+  num_sanity_val_steps: 0
   log_every_n_steps: 1
-  logger:
+  logger+:
     - class_path: finetuning.lightning_modules.patches.patched_loggers.PatchedWandbLogger
       init_args:
-        entity: yale-lily
-        project: unified-codegen
+        entity: niansong1996
+        project: cot-codegen
+        save_dir: *exp_name
         name: *exp_name
         log_model: False
         save_code: True
-        offline: True
-  callbacks:
+        offline: False
+        # offline: True
+  callbacks+:
     - class_path: pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint
       init_args:
         monitor: exec_acc
+        # monitor: pass@100
         mode: max
         filename: '{step}-{exec_acc:.4f}-{exec_rate:.4f}'
-        save_top_k: 5
+        # filename: '{step}-{pass@100:.4f}-{exec_acc:.4f}'
+        save_top_k: 3
+        save_last: True
     - class_path: pytorch_lightning.callbacks.LearningRateMonitor
       init_args:
         logging_interval: step
     - class_path: pytorch_lightning.callbacks.progress.TQDMProgressBar
       init_args:
         refresh_rate: 1
-    # - class_path: pytorch_lightning.callbacks.gpu_stats_monitor.GPUStatsMonitor
-    #   init_args:
-    #     memory_utilization: true
-    #     gpu_utilization: true
         
   accelerator: gpu
   # replace_sampler_ddp: False
   # https://github.com/PyTorchLightning/pytorch-lightning/issues/8262
   # strategy: deepspeed_stage_2
   strategy: ddp_find_unused_parameters_false
-  precision: 16
+  # precision: 16
   # accumulate_grad_batches: 4
 
 model:
   class_path: lightning_modules.models.seq2seq_model.Seq2SeqModel
   init_args:
     transformer_model_name: &transformer EleutherAI/gpt-neo-125M
-    exec_func: execution.mathqa_execution.mathqa_execution
-    answer_eq_func: execution.mathqa_execution.mathqa_answer_eq
-    get_gold_prog_func: finetuning.lightning_modules.datasets.mathqa_reader.get_gold_program_func
-    get_gold_answer_func: finetuning.lightning_modules.datasets.mathqa_reader.get_gold_answer_func
-    program_len_func: finetuning.lightning_modules.models.seq2seq_model_util.python_program_len
-    max_gen_len: 256
-    sampling_temp: 0.2
-    # sampling_temp_at_k: 0.8
-    # pass_at_k: 80
+    executor_cls: execution.executors.SpiderExecutor
+    # categorize_func: execution.spider_execution.spider_categorize_complexity
+    # category_list: ["JOIN", "NESTED", "COMPOUND", "SIMPLE"]
+    max_gen_len: 128
+    sampling_temp: 0.01
+    # sampling_temp_at_k: 1.0
+    # pass_at_k: 10
+    # load_ckpt_file: results/spider-t5_base-finetuning-skg_data-linear_lrs-bs_32/cot-codegen/29q9irfu/checkpoints/step=12862-exec_acc=0.5742-exec_rate=0.6855.ckpt
     # eval_pass_at_k_every_n_epochs: 1
     # max_generation_batches: 10
     gradient_ckpt: true
@@ -68,7 +70,7 @@ model:
           - 0.9
           - 0.999
         eps: 1.0e-8
-        weight_decay: 0.1
+        weight_decay: 0.01
     lr_scheduler:
       name: linear
       init_args:
@@ -81,7 +83,11 @@ data:
     transformer_model_name: *transformer
     batch_size: 1
     val_batch_size: 2
-    train_file_path: data/gsmath/gsmath_train_annotated.jsonl
-    val_file_path: data/gsmath/gsmath_val.jsonl
-    # train_max_instances: 20
-    # val_max_instances: 40
+    train_max_instances: 200
+    val_max_instances: 100
+    train_set_init_args:
+      file_path: data/gsmath/gsmath_train_annotated.jsonl
+    val_set_init_args:
+      file_path: data/gsmath/gsmath_val.jsonl
+    set_common_init_args:
+      use_skg_format: false
\ No newline at end of file
diff --git a/finetuning/training_configs/mathqa_codet5_finetuning.yaml b/finetuning/training_configs/mathqa_codet5_finetuning.yaml
index abd3e1a3..ee030e6c 100755
--- a/finetuning/training_configs/mathqa_codet5_finetuning.yaml
+++ b/finetuning/training_configs/mathqa_codet5_finetuning.yaml
@@ -1,71 +1,76 @@
 seed_everything: 333
 trainer:
-  gpus: 2
+  gpus: 0, 1
   gradient_clip_val: 1.0
-  default_root_dir: debug-tmp
-  val_check_interval: 1.0
+  # default_root_dir: &exp_name results/mathqa-codet5-finetuning
+  default_root_dir: &exp_name results/debug-tmp
+  # val_check_interval: 1.0
+  check_val_every_n_epoch: 4
   max_steps: &max_steps 100000
   # progress_bar_refresh_rate: 1
+  num_sanity_val_steps: 0
   log_every_n_steps: 1
-  logger:
+  logger+:
     - class_path: finetuning.lightning_modules.patches.patched_loggers.PatchedWandbLogger
       init_args:
-        entity: yale-lily
-        project: unified-codegen
-        name: mathqa-gpt-finetuning
+        entity: niansong1996
+        project: cot-codegen
+        save_dir: *exp_name
+        name: *exp_name
         log_model: False
         save_code: True
-        tags:
-          - mathqa
-          - gpt-neo-125M
-          - finetuning
-  callbacks:
+        offline: False
+        # offline: True
+  callbacks+:
     - class_path: pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint
       init_args:
         monitor: exec_acc
+        # monitor: pass@100
         mode: max
         filename: '{step}-{exec_acc:.4f}-{exec_rate:.4f}'
-        save_top_k: 5
+        # filename: '{step}-{pass@100:.4f}-{exec_acc:.4f}'
+        save_top_k: 3
+        save_last: True
     - class_path: pytorch_lightning.callbacks.LearningRateMonitor
       init_args:
         logging_interval: step
     - class_path: pytorch_lightning.callbacks.progress.TQDMProgressBar
       init_args:
         refresh_rate: 1
-    # - class_path: pytorch_lightning.callbacks.gpu_stats_monitor.GPUStatsMonitor
-    #   init_args:
-    #     memory_utilization: true
-    #     gpu_utilization: true
         
   accelerator: gpu
   # replace_sampler_ddp: False
   # https://github.com/PyTorchLightning/pytorch-lightning/issues/8262
-  # strategy: deepspeed_stage_2_offload
+  # strategy: deepspeed_stage_2
   strategy: ddp_find_unused_parameters_false
-  precision: 16
+  # precision: 16
   # accumulate_grad_batches: 4
 
 model:
   class_path: lightning_modules.models.seq2seq_model.Seq2SeqModel
   init_args:
     transformer_model_name: &transformer Salesforce/codet5-base
-    max_gen_len: 256
-    sampling_temp: 0.2
-    # sampling_temp_at_k: 0.8
-    # pass_at_k: 80
+    executor_cls: execution.executors.SpiderExecutor
+    # categorize_func: execution.spider_execution.spider_categorize_complexity
+    # category_list: ["JOIN", "NESTED", "COMPOUND", "SIMPLE"]
+    max_gen_len: 128
+    sampling_temp: 0.01
+    # sampling_temp_at_k: 1.0
+    # pass_at_k: 10
+    # load_ckpt_file: results/spider-t5_base-finetuning-skg_data-linear_lrs-bs_32/cot-codegen/29q9irfu/checkpoints/step=12862-exec_acc=0.5742-exec_rate=0.6855.ckpt
     # eval_pass_at_k_every_n_epochs: 1
     # max_generation_batches: 10
-    gradient_ckpt: false
+    gradient_ckpt: true
     # eval_greedy_search: true
     optimizer:
       init_args: 
-        lr: 1.0e-4
+        lr: 5.0e-5
         # lr: 0.0
         betas: 
           - 0.9
           - 0.999
         eps: 1.0e-8
-        weight_decay: 0.1
+        weight_decay: 0.01
     lr_scheduler:
       name: linear
       init_args:
@@ -78,7 +83,12 @@ data:
     transformer_model_name: *transformer
     batch_size: 4
     val_batch_size: 4
-    train_file_path: data/mathqa/train-python.jsonl
-    val_file_path: data/mathqa/val-python.jsonl
-    # train_max_instances: 20
-    # val_max_instances: 40
\ No newline at end of file
+    train_max_instances: 200
+    val_max_instances: 100
+    train_set_init_args:
+      file_path: data/mathqa/train-python.jsonl
+    val_set_init_args:
+      file_path: data/mathqa/val-python.jsonl
+      # file_path: data/mathqa/val_python_with_states.jsonl
+    # set_common_init_args:
+    #   use_skg_format: false
\ No newline at end of file