JohnnyPeng18 · vinzid · May 11, 2025
diff --git a/.gitignore b/.gitignore
@@ -124,3 +124,13 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+final_combined_commits.json
+large_mined_templates.json
+patches
+prompt_patches
+transformers
+beam_search.py
+requirements.txt
+distances_*.csv
+typefix/benchmarks
diff --git a/README.md b/README.md
@@ -20,6 +20,18 @@ For more details, please check `final_combined_commits.json`.
 
 All source code is included in the `typefix/` directory.
 
+### Step 0: Initial Preparation
+Download [code](https://zenodo.org/records/6819444/files/code.zip) for AlphaRepair, extract, copy `bert_beam_search.py` into `typefix/` directory, and change it to `beam_search.py`.
+
+```
+cd typefix
+pipreqs .
+pip install -r requirements.txt
+cp -rpv ../benchmarks .
+```
+
+The above command install python dependent libraries with `requirements.txt` generated by `pipreqs`, and put `benchmarks` into `typefix/` directory.
+
 ### Step 1: Mining Fix Template
 
 ```
@@ -37,6 +49,7 @@ python patch_generator.py
 The above command will generate code prompts and store them as several python files under directory `patches/{benchmark_name}` based on the mined fix templates from `large_mined_templates.json` , for both `BugsInPy` and `TypeBugs` benchmarks.
 
 ### Step 3: Generating Candidate Patches
+Run `nvidia-smi` to check the number under column GPU. If there's not anyone with `1` (For example that you have only one GPU with `0`), change `1` of `DEVICE = "cuda:1"` in [repair.py](https://github.com/JohnnyPeng18/TypeFix/blob/55d9368bc5f37d51a2abd56f325806e5989a82d8/typefix/repair.py#L16) to any number you get (such as `DEVICE = "cuda:0"`).
 
 ```
 python repair.py
@@ -58,17 +71,17 @@ You can run the above two function calls in `evaluate.py` to evaluate the templa
 **Exact Match:**
 
 ```python
-evaluate_exactmatch('prompt_patches/typebugs', 'patches/typebugs', 'benchmarks/typebugs', 'benchmarks/all_bug_info_typebugs.json', mask_all = False, benchmark = 'typebugs')
-evaluate_exactmatch('prompt_patches/bugsinpy', 'patches/bugsinpy', 'benchmarks/bugsinpy', 'benchmarks/all_bug_info_bugsinpy.json', mask_all = False, benchmark = 'bugsinpy')
+evaluate_exactmatch('prompt_patches/typebugs', 'patches/typebugs', 'benchmarks/typebugs', 'benchmarks/all_bug_info_typebugs.json', benchmark = 'typebugs')
+evaluate_exactmatch('prompt_patches/bugsinpy', 'patches/bugsinpy', 'benchmarks/bugsinpy', 'benchmarks/all_bug_info_bugsinpy.json', benchmark = 'bugsinpy')
 ```
 
 You can run the above two function calls in `evaluate.py` to evaluate in how many cases that TypeFix can generate exactly the same patches (i.e., with the same ASTs) with developer patches. Note that this result is neither the **Correct** metirc nor the **Plausible** metric, which require human inspection or test case validation. This step is to speed up the validation process of generated patches since patches exactly matched to developer patches are both correct and plausible and no further validation is required. For the definition of correct and plausible patches, please refer to Section 3.2.2 of the paper.
 
 **Check Plausible Patches:**
 
 ```python
-gen_test_script('prompt_patches/typebugs/correctness_failed_cases.json', split = 5, benchmark = "typebugs")
-gen_test_script('prompt_patches/bugsinpy/correctness_failed_cases.json', split = 5, benchmark = "bugsinpy")
+gen_test_script('prompt_patches/typebugs/exactmatch_failed_cases.json', split = 5, benchmark = "typebugs")
+gen_test_script('prompt_patches/bugsinpy/exactmatch_failed_cases.json', split = 5, benchmark = "bugsinpy")
 ```
 
 You can run the above two function calls in `evaluate.py` to generate test scripts and then follow the instructions in [PyTER](https://github.com/kupl/PyTER/blob/main/INSTALL.md) to build dockers and run test cases. Patches that pass all the test cases are considered plausible patches.

diff --git a/typefix/evaluate.py b/typefix/evaluate.py
@@ -1,6 +1,7 @@
 import json
 import os
 import ast
+import math
 from tqdm import tqdm
 from difflib import Differ
 from patch_generator import PatchGenerator
@@ -131,7 +132,7 @@ def evaluate_template_coverage(metafile, benchmark_path, template_file, benchmar
                         buggy_file = os.path.join(path, bf)
                         logger.debug(f'------------------------------Evaluating buggy file #{buggy_file}------------------------------')
                         try:
-                            patches[r][i][buggy_file] = generator.run_one(buggy_file, buglines = buglines, added = added)
+                            patches[r][i][buggy_file] = generator.run_one(f'{r}/{i}', buggy_file, buglines = buglines, added = added)
                         except Exception as e:
                             traceback.print_exc()
                             logger.debug('Error occurred when generating patches, reason: {}.'.format(e))
@@ -216,7 +217,7 @@ def evaluate_template_coverage(metafile, benchmark_path, template_file, benchmar
                     buggy_file = os.path.join(path, bf)
                     logger.debug(f'------------------------------Evaluating buggy file #{buggy_file}------------------------------')
                     try:
-                        patches[r][buggy_file] = generator.run_one(buggy_file, buglines = buglines, added = added)
+                        patches[r][buggy_file] = generator.run_one(r, buggy_file, buglines = buglines, added = added)
                     except Exception as e:
                         traceback.print_exc()
                         logger.debug('Error occurred when generating patches, reason: {}.'.format(e))
@@ -630,10 +631,10 @@ def gen_test_script(failed_file, split = 1, benchmark = "bugsinpy"):
 
 
 if __name__ == "__main__":
-    evaluate_template_coverage('benchmarks/all_bug_info_typebugs.json', 'benchmarks/typebugs', 'large_min5_templates.json', benchmark = 'typebugs', remove_comment = True)#, patch_path = '/Users/py/workspace/typefix/patches_v2/typebugs')
-    evaluate_template_coverage('benchmarks/all_bug_info_bugsinpy.json', 'benchmarks/bugsinpy', 'large_min5_templates.json', benchmark = 'bugsinpy', remove_comment = True)#, patch_path = '/Users/py/workspace/typefix/patches_v2/bugsinpy')
-    evaluate_exactmatch('prompt_patches/typebugs', 'patches/typebugs', 'benchmarks/typebugs', 'benchmarks/all_bug_info_typebugs.json', mask_all = False, benchmark = 'typebugs')
-    evaluate_exactmatch('prompt_patches/bugsinpy', 'patches/bugsinpy', 'benchmarks/bugsinpy', 'benchmarks/all_bug_info_bugsinpy.json', mask_all = False, benchmark = 'bugsinpy')
+    evaluate_template_coverage('benchmarks/all_bug_info_typebugs.json', 'benchmarks/typebugs', 'large_mined_templates.json', benchmark = 'typebugs', remove_comment = True)#, patch_path = '/Users/py/workspace/typefix/patches_v2/typebugs')
+    evaluate_template_coverage('benchmarks/all_bug_info_bugsinpy.json', 'benchmarks/bugsinpy', 'large_mined_templates.json', benchmark = 'bugsinpy', remove_comment = True)#, patch_path = '/Users/py/workspace/typefix/patches_v2/bugsinpy')
+    evaluate_exactmatch('prompt_patches/typebugs', 'patches/typebugs', 'benchmarks/typebugs', 'benchmarks/all_bug_info_typebugs.json', benchmark = 'typebugs')
+    evaluate_exactmatch('prompt_patches/bugsinpy', 'patches/bugsinpy', 'benchmarks/bugsinpy', 'benchmarks/all_bug_info_bugsinpy.json', benchmark = 'bugsinpy')
     gen_test_script('prompt_patches/typebugs/exactmatch_failed_cases.json', split = 5, benchmark = "typebugs")
     gen_test_script('prompt_patches/bugsinpy/exactmatch_failed_cases.json', split = 5, benchmark = "bugsinpy")
 

diff --git a/typefix/patch_generator.py b/typefix/patch_generator.py
@@ -20,6 +20,8 @@ def __init__(self, template_file, remove_comment = False):
         self.format_templates()
         self.remove_comment = remove_comment
         self.benchmark = 'bugsinpy'
+        if not os.path.exists('patches'):
+            os.system('mkdir -p patches/typebugs patches/bugsinpy')
 
 
 
@@ -643,8 +645,10 @@ def print_ast_changes(self, ori2news):
                 logger.debug('    From: {} to {}'.format(ast.dump(o), ast.dump(ori2new[o]) if ori2new[o] else None))
 
     def dump_patches(self, patches, filerepo):
+        if not os.path.exists(filerepo):
+            os.system(f'mkdir -p {filerepo}')
         for i, p in enumerate(patches):
-            with open(os.path.join(filerepo, 'Patch_{}_from_{}.py'.format(i, patches[p][1])), 'w', encoding = 'utf-8') as pf:
+            with open(os.path.join(filerepo, '{}_from_{}.py'.format(i, patches[p][1])), 'w', encoding = 'utf-8') as pf:
                 pf.write(ast.unparse(patches[p][0]))
 
 
@@ -761,7 +765,8 @@ def implement_templates(self, parsed_info):
                                     patches[index] = [new_root, t.id, t.action, newsource]
                                     index += 1
                                 cur_num += 1
-        self.dump_patches(patches, 'patches/{}'.format(self.benchmark))
+        patches_path = os.path.join('patches', self.benchmark, self.patch_folder, f'TypeErrorFix/{self.buggy_file}'.replace('/', '_'))
+        self.dump_patches(patches, patches_path)
         return patches
 
 
@@ -775,9 +780,10 @@ def print_info(self, parsed_info):
                 for g in p["selected_templates"][k]:
                     logger.debug("{}".format([(t.id, round(t.before_within.cal_abstract_ratio(), 2) if t.before_within else 0, len(t.instances)) for t in g]))
 
-    def run_one(self, buggy_file, buglines = None, added = None):
+    def run_one(self, patch_folder, buggy_file, buglines = None, added = None):
         #os.system('rm -rf figures2/*')
         logger.info('Generating patches for buggy file {}'.format(buggy_file))
+        self.patch_folder = patch_folder
         self.buggy_file = buggy_file
         try:
             self.buggy_source = open(self.buggy_file, "r", encoding = "utf-8").read()
@@ -810,24 +816,24 @@ def run_all(self, metafile, benchmark_path, benchmark = 'bugsinpy'):
             for r in metadata:
                 for i in metadata[r]:
                     path = os.path.join(benchmark_path, r, f'{r}-{i}')
-                    for f in metadata[r][i]['code_files']:
+                    for f in metadata[r][i]['buglines']:
                         if not f.endswith('.py'):
                             continue
                         buggy_file = os.path.join(path, f)
-                        patch_file = self.run_one(buggy_file, buglines = metadata[r][i]['buglines'][f], added = metadata[r][i]['added'][f])
+                        patch_file = self.run_one(f'{r}/{i}', buggy_file, buglines = metadata[r][i]['buglines'][f], added = metadata[r][i]['added'][f])
                         if patch_file == None:
                             logger.error('Cannot generate patch files for buggy file {}.'.format(buggy_file))
                             continue
         elif benchmark == 'typebugs':
             for r in metadata:
-                if r != 'numpy/numpy-9999':
-                    continue
+                # if r != 'numpy/numpy-9999':
+                #     continue
                 path = os.path.join(benchmark_path, r)
-                for f in metadata[r]['code_files']:
+                for f in metadata[r]['buglines']:
                     if not f.endswith('.py'):
                         continue
                     buggy_file = os.path.join(path, f)
-                    patches = self.run_one(buggy_file, buglines = metadata[r]['buglines'][f], added = metadata[r]['added'][f])
+                    patches = self.run_one(r, buggy_file, buglines = metadata[r]['buglines'][f], added = metadata[r]['added'][f])
 
     def test_one(self, metadata, template):
         for index, i in enumerate(template.instances):
@@ -907,5 +913,5 @@ def test_all(self, metafile):
 
 if __name__ == "__main__":
     generator = PatchGenerator('large_mined_templates.json')
-    generator.run_all('all_bug_info_bugsinpy.json', 'benchmarks/bugsinpy')
-    generator.run_all('all_bug_info_typebugs.json', 'benchmarks/typebugs', benchmark = 'typebugs')
+    generator.run_all('benchmarks/all_bug_info_bugsinpy.json', 'benchmarks/bugsinpy')
+    generator.run_all('benchmarks/all_bug_info_typebugs.json', 'benchmarks/typebugs', benchmark = 'typebugs')
diff --git a/typefix/repair.py b/typefix/repair.py
@@ -25,6 +25,8 @@ def __init__(self):
         self.tokenizer = RobertaTokenizer.from_pretrained("Salesforce/codet5-base", cache_dir = './transformers')
         self.mask_token = '<mask>'
         self.model_type = "codet5"
+        if not os.path.exists('prompt_patches'):
+            os.system('mkdir -p prompt_patches/typebugs prompt_patches/bugsinpy')
 
     def process_file(self, patch_source, buggy_lines = None, added = None):
         if buggy_lines == None: