-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfix_accuracy_display.py
More file actions
executable file
Β·151 lines (118 loc) Β· 5.9 KB
/
fix_accuracy_display.py
File metadata and controls
executable file
Β·151 lines (118 loc) Β· 5.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python3
"""
Fix Accuracy Display Script
Recalculates and displays correct accuracy values from existing benchmark results
"""
import json
import glob
def fix_accuracy_display(result):
"""Fix accuracy display based on the framework and values"""
accuracy = result['quality_metrics']['accuracy']
if accuracy is None:
return 0.0
# If accuracy is > 1, it's likely already in percentage format
# If accuracy is <= 1, it's in decimal format (0.0-1.0)
if accuracy > 1:
return accuracy # Already in percentage
else:
return accuracy * 100 # Convert decimal to percentage
def analyze_fixed_results():
"""Analyze results with corrected accuracy display"""
# Load all result files
pattern = "*_complete_training_results.json"
files = glob.glob(pattern)
print("π§ CORRECTED ACCURACY ANALYSIS")
print("=" * 80)
print(f"π Analyzing {len(files)} result files...")
print()
results = []
for file in files:
try:
with open(file, 'r') as f:
data = json.load(f)
results.append(data)
except Exception as e:
print(f"β Error loading {file}: {e}")
# Extract and fix metrics
fixed_metrics = []
print("π CORRECTED ACCURACY TABLE")
print("-" * 100)
print(f"{'Language':<8} {'Dataset':<12} {'Architecture':<12} {'Device':<6} {'Time(s)':<8} {'Accuracy(%)':<12} {'Loss':<8}")
print("-" * 100)
for result in results:
try:
# Extract basic info
language = result['language'].title()
dataset = result['dataset'].title()
architecture = result['metadata']['architecture']
device = result['metadata'].get('device', 'Unknown').replace('Cuda(0)', 'GPU').replace('Cpu', 'CPU')
training_time = result['performance_metrics']['training_time_seconds']
loss = result['quality_metrics']['loss']
# Fix accuracy calculation
corrected_accuracy = fix_accuracy_display(result)
# Store corrected metrics
fixed_metrics.append({
'language': language,
'dataset': dataset,
'architecture': architecture,
'device': device,
'training_time': training_time,
'accuracy': corrected_accuracy,
'loss': loss,
'run_id': result['run_id']
})
# Print row
print(f"{language:<8} {dataset:<12} {architecture:<12} {device:<6} {training_time:<8.3f} {corrected_accuracy:<12.2f} {loss:<8.3f}")
except KeyError as e:
print(f"β Missing key {e} in result: {result.get('run_id', 'unknown')}")
# Analysis with corrected accuracies
print(f"\nπ― ACCURACY RANKINGS (CORRECTED)")
print("-" * 50)
# Sort by corrected accuracy
accuracy_sorted = sorted(fixed_metrics, key=lambda x: x['accuracy'], reverse=True)
print(f"{'Rank':<4} {'Language':<8} {'Dataset':<12} {'Architecture':<12} {'Device':<6} {'Accuracy(%)':<12}")
print("-" * 70)
for i, metrics in enumerate(accuracy_sorted[:10], 1):
print(f"{i:<4} {metrics['language']:<8} {metrics['dataset']:<12} {metrics['architecture']:<12} "
f"{metrics['device']:<6} {metrics['accuracy']:<12.2f}")
# Language comparison with corrected accuracies
print(f"\nππ¦ PYTHON vs RUST - CORRECTED COMPARISON")
print("-" * 60)
python_results = [m for m in fixed_metrics if m['language'] == 'Python']
rust_results = [m for m in fixed_metrics if m['language'] == 'Rust']
if python_results and rust_results:
py_avg_acc = sum(r['accuracy'] for r in python_results) / len(python_results)
rust_avg_acc = sum(r['accuracy'] for r in rust_results) / len(rust_results)
py_avg_time = sum(r['training_time'] for r in python_results) / len(python_results)
rust_avg_time = sum(r['training_time'] for r in rust_results) / len(rust_results)
print(f"Python ({len(python_results)} tests):")
print(f" Average Accuracy: {py_avg_acc:.2f}%")
print(f" Average Training Time: {py_avg_time:.3f}s")
print(f"\nRust ({len(rust_results)} tests):")
print(f" Average Accuracy: {rust_avg_acc:.2f}%")
print(f" Average Training Time: {rust_avg_time:.3f}s")
print(f"\nCORRECTED COMPARISON:")
if py_avg_acc > rust_avg_acc:
acc_diff = py_avg_acc - rust_avg_acc
print(f" π― Python is {acc_diff:.2f}% MORE ACCURATE")
else:
acc_diff = rust_avg_acc - py_avg_acc
print(f" π― Rust is {acc_diff:.2f}% MORE ACCURATE")
speed_ratio = py_avg_time / rust_avg_time
print(f" β‘ Rust is {speed_ratio:.1f}x FASTER")
# Dataset-specific corrected analysis
print(f"\nπ DATASET ANALYSIS (CORRECTED ACCURACIES)")
print("-" * 60)
datasets = set(m['dataset'] for m in fixed_metrics)
for dataset in sorted(datasets):
dataset_results = [m for m in fixed_metrics if m['dataset'] == dataset]
print(f"\nπ {dataset} Dataset:")
# Best accuracy for this dataset
best_accuracy = max(dataset_results, key=lambda x: x['accuracy'])
fastest = min(dataset_results, key=lambda x: x['training_time'])
print(f" π Best Accuracy: {best_accuracy['language']} {best_accuracy['architecture']} {best_accuracy['device']} ({best_accuracy['accuracy']:.2f}%)")
print(f" β‘ Fastest: {fastest['language']} {fastest['architecture']} {fastest['device']} ({fastest['training_time']:.3f}s)")
print(f"\nβ
ACCURACY ANALYSIS COMPLETE!")
print(f"π― The accuracy values should now be displayed correctly!")
if __name__ == "__main__":
analyze_fixed_results()