-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcollect_metrics.py
More file actions
101 lines (87 loc) · 3.82 KB
/
collect_metrics.py
File metadata and controls
101 lines (87 loc) · 3.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
import json
import subprocess
from datetime import datetime
from typing import Dict, List, Optional
class BugMetricsCollector:
def __init__(self, repo_path: str):
self.repo_path = repo_path
def get_project_size(self, project_path: str) -> str:
"""获取项目大小分类"""
total_lines = 0
for root, _, files in os.walk(project_path):
for file in files:
if file.endswith('.py'):
with open(os.path.join(root, file), 'r', encoding='utf-8') as f:
total_lines += sum(1 for _ in f)
if total_lines < 1000:
return 'S'
elif total_lines < 10000:
return 'M'
else:
return 'L'
def get_commit_info(self, commit_hash: str) -> Dict:
"""获取commit相关信息"""
cmd = f'git show --name-only {commit_hash}'
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
files = result.stdout.strip().split('\n')[6:] # 跳过commit信息,只获取文件列表
return {
'hash': commit_hash,
'affected_files': files
}
def collect_bug_data(self, bug_dir: str) -> Dict:
"""收集单个bug的数据"""
with open(os.path.join(bug_dir, 'bug.info'), 'r') as f:
bug_info = {}
for line in f:
key, value = line.strip().split('=')
bug_info[key.strip()] = value.strip().strip('"')
project_name = os.path.basename(os.path.dirname(bug_dir))
return {
"project": {
"repo_url": f"https://github.com/python/{project_name}",
"size_category": self.get_project_size(os.path.join(self.repo_path, 'projects', project_name)),
"language": "Python"
},
"change": {
"type": "bugfix",
"description": f"Bug fix in {project_name}",
"task_link": "",
"branch": "master",
"affected_files": self.get_commit_info(bug_info['fixed_commit_id'])['affected_files'],
"commit": {
"before": bug_info['buggy_commit_id'],
"after": bug_info['fixed_commit_id']
},
"tests": {
"file_paths": [bug_info['test_file']],
"modules": [os.path.dirname(bug_info['test_file'])]
},
"timestamp": datetime.now().isoformat()
}
}
def collect_all_metrics(self) -> List[Dict]:
"""收集所有bug的数据"""
all_metrics = []
projects_dir = os.path.join(self.repo_path, 'projects')
for project in os.listdir(projects_dir):
project_path = os.path.join(projects_dir, project)
bugs_dir = os.path.join(project_path, 'bugs')
if os.path.exists(bugs_dir):
for bug in os.listdir(bugs_dir):
bug_dir = os.path.join(bugs_dir, bug)
if os.path.isdir(bug_dir):
try:
metrics = self.collect_bug_data(bug_dir)
all_metrics.append(metrics)
except Exception as e:
print(f"Error collecting data for {bug_dir}: {e}")
return all_metrics
def main():
collector = BugMetricsCollector('d:\\Code_cpp_py\\githubs\\BugsInPy')
metrics = collector.collect_all_metrics()
# 保存到JSON文件
with open('baseline_metrics.json', 'w', encoding='utf-8') as f:
json.dump(metrics, f, indent=2, ensure_ascii=False)
if __name__ == '__main__':
main()