-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathexecute_benchmark.py
More file actions
2956 lines (2528 loc) · 134 KB
/
execute_benchmark.py
File metadata and controls
2956 lines (2528 loc) · 134 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
import dotenv
from typing import Dict, List, Tuple
import re
import subprocess
import sys
import json
import argparse
import time
dotenv.load_dotenv()
def comb(n, k):
"""Calculate binomial coefficient n choose k"""
if k > n or k < 0:
return 0
if k == 0 or k == n:
return 1
k = min(k, n - k) # Take advantage of symmetry
result = 1
for i in range(k):
result = result * (n - i) // (i + 1)
return result
def find_jsonl_files(directory):
"""
Find all JSONL files in the specified directory (excluding _formatted.jsonl files).
Args:
directory: Path to the directory to search
Returns:
List of JSONL file paths
"""
jsonl_files = []
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.jsonl') and not file.endswith('_formatted.jsonl'):
jsonl_files.append(os.path.join(root, file))
return jsonl_files
def calculate_pass_at_k(n: int, c: int, k: int) -> float:
"""
Calculate pass@k using the formula: pass@k := E[1 - comb(n-c, k) / comb(n, k)]
Args:
n: Total number of samples
c: Number of correct samples
k: Number of samples to consider
Returns:
pass@k score
"""
if n - c < k:
return 1.0
return 1.0 - (comb(n - c, k) / comb(n, k))
def run_python_test_case(prefix: str, golden_completion: str, suffix: str,
assertions: str = "", verbose=True, timeout=30) -> Tuple[bool, str]:
"""
Run a Python test case by creating a temporary file and executing it.
Args:
prefix: Prefix code
golden_completion: Golden completion code
suffix: Suffix code
assertions: Assertion code
verbose: Whether to print detailed information
timeout: Maximum execution time in seconds before killing the process
Returns:
Tuple containing success flag and error message if any
"""
import uuid
import random
# Generate unique identifiers to avoid race conditions
unique_id = str(uuid.uuid4())[:8]
# Add matplotlib non-interactive mode to prevent plt.show() from blocking
matplotlib_header = f"""
# Added automatically to prevent matplotlib from blocking
import matplotlib
matplotlib.use('Agg') # Use non-interactive backend
import matplotlib.pyplot as plt
# Override plt.show to prevent blocking
original_show = plt.show
def non_blocking_show(*args, **kwargs):
plt.savefig('temp_plot_{unique_id}.png') # Use unique filename
plt.close()
plt.show = non_blocking_show
"""
# Add code to load API keys from environment
env_vars_header = """
# Added automatically to provide access to environment variables
import os
# Configure more robust HTTP client settings to reduce timeouts
import asyncio
try:
# Increase default timeouts for HTTP operations
import tornado.httpclient
original_fetch = tornado.httpclient.AsyncHTTPClient.fetch
async def robust_fetch(self, request, *args, **kwargs):
# Add timeout and retry logic
if isinstance(request, str):
request = tornado.httpclient.HTTPRequest(request, connect_timeout=10, request_timeout=20)
elif hasattr(request, 'connect_timeout'):
request.connect_timeout = max(request.connect_timeout or 0, 10)
request.request_timeout = max(request.request_timeout or 0, 20)
return await original_fetch(self, request, *args, **kwargs)
tornado.httpclient.AsyncHTTPClient.fetch = robust_fetch
except:
pass # Ignore if tornado is not available
"""
# Environment variables are inherited from the parent process
# No need to manually inject them
# Combine all code sections
combined_code = f"""{matplotlib_header}
{env_vars_header}
{prefix}
{golden_completion}
{suffix}
# Run assertions
{assertions}
"""
# Create a temporary python file to execute with unique name
temp_file = f"temp_test_execution_{unique_id}.py"
try:
# Add a small random delay to reduce race conditions on network requests
time.sleep(random.uniform(0.1, 0.5))
with open(temp_file, 'w', encoding='utf-8') as f:
f.write(combined_code)
# Run the code with the current environment variables and a timeout
try:
process = subprocess.run(
[sys.executable, temp_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
env=os.environ.copy(), # Pass the current environment variables to the subprocess
timeout=timeout # Add timeout parameter
)
if process.returncode != 0:
error = process.stderr.strip()
# Check if it's an import error
if "ImportError" in error or "ModuleNotFoundError" in error:
# Extract the missing module name
match = re.search(r"No module named '([^']+)'", error)
if match:
module_name = match.group(1).split('.')[0]
if verbose:
print(f" Missing dependency: {module_name}, attempting to install...")
# Try to install the module
install_process = subprocess.run(
[sys.executable, "-m", "pip", "install", module_name],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False
)
if install_process.returncode == 0:
if verbose:
print(f" Successfully installed {module_name}, retrying test case...")
# Retry running the test
return run_python_test_case(prefix, golden_completion, suffix, assertions, verbose, timeout)
else:
return False, f"Failed to install dependency {module_name}: {install_process.stderr.strip()}"
return False, f"Execution failed: {error}"
return True, ""
except subprocess.TimeoutExpired:
# Handle timeout case
if verbose:
print(f" Test case execution timed out after {timeout} seconds")
return False, f"Execution timed out after {timeout} seconds"
except Exception as e:
return False, f"Error: {str(e)}"
finally:
# Clean up temporary files - with retry logic for Windows
for attempt in range(5):
try:
if os.path.exists(temp_file):
os.remove(temp_file)
break
except PermissionError:
# If file is still in use, wait briefly and retry
time.sleep(0.5)
if attempt == 4: # Last attempt
print(f"Warning: Could not remove temporary file {temp_file}")
# Clean up unique plot file
plot_file = f'temp_plot_{unique_id}.png'
if os.path.exists(plot_file):
try:
os.remove(plot_file)
except:
print(f"Warning: Could not remove temporary plot file {plot_file}")
def run_java_test_case_simple(prefix: str, golden_completion: str, suffix: str,
assertions: str = "", verbose=True, timeout=30) -> Tuple[bool, str]:
"""
Run a Java test case by creating a temporary file, compiling with javac, and executing.
Args:
prefix: Prefix code (before the completion)
golden_completion: Golden completion code
suffix: Suffix code (after the completion)
assertions: Assertion code (currently unused, handled in suffix)
verbose: Whether to print detailed information
timeout: Maximum execution time in seconds before killing the process
Returns:
Tuple containing success flag and error message if any
"""
import uuid
import tempfile
import shutil
# Generate unique identifier to avoid race conditions
unique_id = str(uuid.uuid4())[:8]
temp_dir = tempfile.mkdtemp(prefix=f"java_test_{unique_id}_")
try:
# Combine all code sections with proper newlines to avoid concatenation issues
# Add newlines if not already present to ensure proper code separation
prefix_clean = prefix.rstrip()
suffix_clean = suffix.lstrip()
# If golden_completion doesn't start with newline and prefix doesn't end with one, add it
if not prefix.endswith('\n') and not golden_completion.startswith('\n'):
combined_code = f"{prefix_clean}\n{golden_completion}{suffix_clean}"
else:
combined_code = f"{prefix}{golden_completion}{suffix}"
# Extract class name from the code to determine filename
class_name = "TestCase" # Default fallback
class_match = re.search(r'public\s+class\s+(\w+)', combined_code)
if class_match:
class_name = class_match.group(1)
# Create Java source file
java_file = os.path.join(temp_dir, f"{class_name}.java")
if verbose:
print(f" Creating Java file: {java_file}")
print(f" Class name: {class_name}")
with open(java_file, 'w', encoding='utf-8') as f:
f.write(combined_code)
# Enable assertions for execution (-ea flag)
compile_command = ["javac", java_file]
run_command = ["java", "-ea", "-cp", temp_dir, class_name]
if verbose:
print(f" Compile command: {' '.join(compile_command)}")
print(f" Run command: {' '.join(run_command)}")
try:
# Compile the Java file
compile_process = subprocess.run(
compile_command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
timeout=timeout
)
if compile_process.returncode != 0:
compile_error = compile_process.stderr.strip()
if verbose:
print(f" Compilation failed: {compile_error}")
return False, f"Compilation failed: {compile_error}"
if verbose:
print(" Compilation successful, running test...")
# Run the compiled Java program
run_process = subprocess.run(
run_command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
timeout=timeout
)
if run_process.returncode != 0:
runtime_error = run_process.stderr.strip()
if verbose:
print(f" Runtime error: {runtime_error}")
# Check if it's an assertion error
if "AssertionError" in runtime_error:
return False, f"Assertion failed: {runtime_error}"
else:
return False, f"Runtime error: {runtime_error}"
if verbose:
output = run_process.stdout.strip()
if output:
print(f" Program output: {output}")
print(" Test completed successfully")
return True, ""
except subprocess.TimeoutExpired:
if verbose:
print(f" Test execution timed out after {timeout} seconds")
return False, f"Execution timed out after {timeout} seconds"
except Exception as e:
if verbose:
print(f" Unexpected error: {str(e)}")
return False, f"Unexpected error: {str(e)}"
finally:
# Clean up temporary directory
try:
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
if verbose:
print(f" Cleaned up temporary directory: {temp_dir}")
except Exception as e:
print(f"Warning: Could not remove temporary directory {temp_dir}: {str(e)}")
def run_java_test_case_gradle(prefix: str, golden_completion: str, suffix: str,
assertions: str = "", verbose=True, timeout=60) -> Tuple[bool, str]:
"""
Run a Java test case using Gradle for complex cases with dependencies and packages.
Uses Gradle to build/compile with dependencies, then runs the main method directly.
"""
import uuid
import tempfile
import shutil
unique_id = str(uuid.uuid4())[:8]
temp_dir = tempfile.mkdtemp(prefix=f"java_gradle_test_{unique_id}_")
try:
# Combine code sections
prefix_clean = prefix.rstrip()
suffix_clean = suffix.lstrip()
if not prefix.endswith('\n') and not golden_completion.startswith('\n'):
combined_code = f"{prefix_clean}\n{golden_completion}{suffix_clean}"
else:
combined_code = f"{prefix}{golden_completion}{suffix}"
# Extract package and class name
package_match = re.search(r'^\s*package\s+([\w.]+);', combined_code, re.MULTILINE)
package_name = package_match.group(1) if package_match else "devbench.test"
class_match = re.search(r'public\s+class\s+(\w+)', combined_code)
class_name = class_match.group(1) if class_match else "TestCase"
# If no package declaration, add one
if not package_match:
combined_code = f"package {package_name};\n\n{combined_code}"
# Create Gradle directory structure - use src/main/java instead of src/test/java
src_dir = os.path.join(temp_dir, "src", "main", "java", *package_name.split('.'))
os.makedirs(src_dir, exist_ok=True)
# Write Java file
java_file = os.path.join(src_dir, f"{class_name}.java")
with open(java_file, 'w', encoding='utf-8') as f:
f.write(combined_code)
# Common dependencies for Java benchmarks
dependencies = []
# Check for specific libraries in imports
if 'org.apache.commons' in combined_code:
dependencies.append("implementation 'org.apache.commons:commons-lang3:3.12.0'")
# Also add Commons IO if needed
if 'org.apache.commons.io' in combined_code:
dependencies.append("implementation 'commons-io:commons-io:2.11.0'")
# Also add Commons DBCP2 if needed
if 'org.apache.commons.dbcp2' in combined_code:
dependencies.append("implementation 'org.apache.commons:commons-dbcp2:2.9.0'")
dependencies.append("implementation 'com.h2database:h2:2.1.214'") # H2 database for testing
# Also add Commons DBCP (older version) if needed
if 'org.apache.commons.dbcp' in combined_code:
dependencies.append("implementation 'commons-dbcp:commons-dbcp:1.4'")
dependencies.append("implementation 'com.h2database:h2:2.1.214'") # H2 database for testing
# Also add Commons Compress if needed
if 'org.apache.commons.compress' in combined_code:
dependencies.append("implementation 'org.apache.commons:commons-compress:1.21'")
if 'org.junit' in combined_code:
dependencies.append("implementation 'junit:junit:4.13.2'")
if 'com.fasterxml.jackson' in combined_code:
dependencies.append("implementation 'com.fasterxml.jackson.core:jackson-core:2.15.2'")
dependencies.append("implementation 'com.fasterxml.jackson.core:jackson-databind:2.15.2'")
dependencies.append("implementation 'com.fasterxml.jackson.core:jackson-annotations:2.15.2'")
if 'com.google.common' in combined_code:
dependencies.append("implementation 'com.google.guava:guava:31.1-jre'")
if 'org.json' in combined_code:
dependencies.append("implementation 'org.json:json:20210307'")
if 'javax.xml.bind' in combined_code or 'jakarta.xml.bind' in combined_code:
# Use widely compatible JAXB implementation
dependencies.append("implementation 'javax.xml.bind:jaxb-api:2.3.1'")
dependencies.append("implementation 'com.sun.xml.bind:jaxb-core:2.3.0.1'")
dependencies.append("implementation 'com.sun.xml.bind:jaxb-impl:2.3.1'")
dependencies.append("implementation 'javax.activation:activation:1.1.1'")
if 'org.hibernate' in combined_code:
dependencies.append("implementation 'org.hibernate:hibernate-core:5.6.15.Final'")
dependencies.append("implementation 'com.h2database:h2:2.1.214'") # H2 database for testing
if 'org.jdom2' in combined_code:
dependencies.append("implementation 'org.jdom:jdom2:2.0.6'")
if 'org.apache.poi' in combined_code:
dependencies.append("implementation 'org.apache.poi:poi:5.2.3'")
dependencies.append("implementation 'org.apache.poi:poi-ooxml:5.2.3'")
if 'com.google.gson' in combined_code:
dependencies.append("implementation 'com.google.code.gson:gson:2.10.1'")
if 'org.dom4j' in combined_code:
dependencies.append("implementation 'org.dom4j:dom4j:2.1.4'")
if 'org.apache.logging.log4j' in combined_code:
dependencies.append("implementation 'org.apache.logging.log4j:log4j-core:2.20.0'")
dependencies.append("implementation 'org.apache.logging.log4j:log4j-api:2.20.0'")
if 'org.springframework' in combined_code:
dependencies.append("implementation 'org.springframework:spring-context:5.3.23'")
dependencies.append("implementation 'org.springframework:spring-core:5.3.23'")
dependencies.append("implementation 'org.springframework:spring-beans:5.3.23'")
# Create build.gradle - simplified without test configuration
build_gradle_content = f"""
plugins {{
id 'java'
id 'application'
}}
repositories {{
mavenCentral()
}}
dependencies {{
{chr(10).join(' ' + dep for dep in dependencies)}
}}
java {{
sourceCompatibility = JavaVersion.VERSION_11
targetCompatibility = JavaVersion.VERSION_11
}}
application {{
mainClass = '{package_name}.{class_name}'
}}
"""
with open(os.path.join(temp_dir, "build.gradle"), 'w') as f:
f.write(build_gradle_content)
if verbose:
print(f" Using Gradle for dependencies")
print(f" Package: {package_name}")
print(f" Class: {class_name}")
print(f" Dependencies: {len(dependencies)}")
# Step 1: Build with Gradle to compile and resolve dependencies
build_cmd = ['gradle', 'build', '--no-daemon', '--console=plain']
if verbose:
print(f" Building: {' '.join(build_cmd)}")
build_result = subprocess.run(
build_cmd,
cwd=temp_dir,
capture_output=True,
text=True,
timeout=timeout
)
if build_result.returncode != 0:
error_output = build_result.stdout + build_result.stderr
if "cannot find symbol" in error_output or "package does not exist" in error_output:
return False, f"Compilation failed: {error_output[-300:]}"
else:
return False, f"Gradle build failed: {error_output[-200:]}"
# Step 2: Get the classpath from Gradle
classpath_cmd = ['gradle', 'printClasspath', '--no-daemon', '--console=plain']
# Add a task to print classpath to build.gradle
with open(os.path.join(temp_dir, "build.gradle"), 'a') as f:
f.write("""
task printClasspath {
doLast {
println configurations.runtimeClasspath.asPath
}
}
""")
classpath_result = subprocess.run(
classpath_cmd,
cwd=temp_dir,
capture_output=True,
text=True,
timeout=30
)
# Extract classpath from output (last line that looks like a classpath)
classpath = ""
if classpath_result.returncode == 0:
lines = classpath_result.stdout.strip().split('\n')
for line in reversed(lines):
if '.jar' in line and ('/' in line or '\\' in line):
classpath = line.strip()
break
# Add build output directory to classpath
build_classes_dir = os.path.join(temp_dir, "build", "classes", "java", "main")
if classpath:
full_classpath = f"{build_classes_dir}{os.pathsep}{classpath}"
else:
full_classpath = build_classes_dir
# Step 3: Run the Java class directly with assertions enabled
java_cmd = ['java', '-ea', '-cp', full_classpath, f'{package_name}.{class_name}']
if verbose:
print(f" Running: {' '.join(java_cmd[:4])} [classpath] {java_cmd[-1]}")
run_result = subprocess.run(
java_cmd,
cwd=temp_dir,
capture_output=True,
text=True,
timeout=30
)
if verbose and run_result.stdout:
print(f" Program output: {run_result.stdout.strip()}")
if run_result.returncode == 0:
if verbose:
print(" Gradle test completed successfully")
return True, ""
else:
error_output = run_result.stdout + run_result.stderr
# Check for common error patterns
if "AssertionError" in error_output:
return False, f"Assertion failed: {error_output.strip()}"
elif "Exception" in error_output:
return False, f"Runtime exception: {error_output.strip()}"
else:
return False, f"Program failed: {error_output.strip()}"
except subprocess.TimeoutExpired:
return False, f"Gradle execution timed out after {timeout} seconds"
except Exception as e:
return False, f"Gradle error: {str(e)}"
finally:
# Clean up
try:
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
if verbose:
print(f" Cleaned up Gradle temp directory")
except Exception as e:
print(f"Warning: Could not remove Gradle temp directory {temp_dir}: {str(e)}")
def run_java_test_case(prefix: str, golden_completion: str, suffix: str,
assertions: str = "", verbose=True, timeout=30) -> Tuple[bool, str]:
"""
Run a Java test case with automatic dependency detection and build tool selection.
Uses simple javac for basic cases, Gradle for complex cases with dependencies.
"""
# Combine code to analyze
combined_code = f"{prefix}{golden_completion}{suffix}"
# Detect if we need external dependencies or complex setup
needs_build_tool = False
# Check for external dependencies (non-standard Java libraries)
external_imports = [
'org.apache.commons',
'com.google.common',
'org.junit',
'org.json',
'com.fasterxml.jackson',
'org.springframework',
'org.hibernate',
'org.jdom2',
'org.apache.poi',
'com.google.gson',
'org.dom4j',
'org.apache.logging.log4j', # Apache Log4j2
'javax.xml.bind', # JAXB was removed from JDK 11+, needs external dependency
'jakarta.xml.bind' # Jakarta JAXB (modern replacement)
]
for ext_import in external_imports:
if ext_import in combined_code:
needs_build_tool = True
if verbose:
print(f" Detected external dependency: {ext_import}")
break
# Check for package declaration
package_match = re.search(r'^\s*package\s+([\w.]+);', combined_code, re.MULTILINE)
if package_match:
needs_build_tool = True
if verbose:
print(f" Detected package declaration: {package_match.group(1)}")
# Route to appropriate execution method
if needs_build_tool:
if verbose:
print(" Using Gradle for complex case...")
return run_java_test_case_gradle(prefix, golden_completion, suffix, assertions, verbose, timeout)
else:
if verbose:
print(" Using simple javac for basic case...")
return run_java_test_case_simple(prefix, golden_completion, suffix, assertions, verbose, timeout)
def run_javascript_test_case(prefix: str, golden_completion: str, suffix: str,
assertions: str = "", verbose=True, timeout=30) -> Tuple[bool, str]:
"""
Run a JavaScript test case by creating a temporary file and executing it with Node.js.
Args:
prefix: Prefix code
golden_completion: Golden completion code
suffix: Suffix code
assertions: Assertion code
verbose: Whether to print detailed information
timeout: Maximum execution time in seconds before killing the process
Returns:
Tuple containing success flag and error message if any
"""
import uuid
import random
import tempfile
import shutil
# Generate unique identifiers to avoid race conditions
unique_id = str(uuid.uuid4())[:8]
temp_dir = tempfile.mkdtemp(prefix=f"js_test_{unique_id}_")
try:
# Combine all code sections
# Environment variables are inherited from parent process
combined_code = f"""{prefix}
{golden_completion}
{suffix}
// Run assertions
{assertions}
"""
# Create a temporary JavaScript file
js_file = os.path.join(temp_dir, f"test_{unique_id}.js")
if verbose:
print(f" Creating JavaScript file: {js_file}")
# Add a small random delay to reduce race conditions on network requests
time.sleep(random.uniform(0.1, 0.3))
with open(js_file, 'w', encoding='utf-8') as f:
f.write(combined_code)
# Ensure we get the same PATH as your shell by prioritizing NVM paths
env = os.environ.copy()
# IMPORTANT: Replace [NODE-BIN-PATH] with your Node.js binary path
# Example: "/Users/yourname/.nvm/versions/node/v22.17.0/bin" or "/usr/local/bin"
nvm_bin_path = "[NODE-BIN-PATH]"
# Prepend Node path to ensure we get the working node/npm
if nvm_bin_path != "[NODE-BIN-PATH]": # Only prepend if user has configured it
if "PATH" in env:
env["PATH"] = f"{nvm_bin_path}:{env['PATH']}"
else:
env["PATH"] = nvm_bin_path
if verbose:
# Now check what we get with the updated PATH
import shutil as shutil_module
# Temporarily update PATH for this check
original_path = os.environ.get("PATH", "")
os.environ["PATH"] = env["PATH"]
node_path = shutil_module.which("node")
npm_path = shutil_module.which("npm")
print(f" Node path: {node_path}")
print(f" npm path: {npm_path}")
# Restore original PATH
os.environ["PATH"] = original_path
# Try execution with dependency installation if needed
# Allow up to 5 attempts to handle multiple missing dependencies
for attempt in range(5): # Allow more attempts for multiple dependencies
try:
# Run the code with Node.js
process = subprocess.run(
["node", js_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
env=env,
timeout=timeout,
cwd=temp_dir
)
if process.returncode == 0:
# Success!
return True, ""
# There was an error - check if it's a missing module
error = process.stderr.strip()
# Check if it's a module not found error (allow installation on any attempt)
if "Cannot find module" in error or "MODULE_NOT_FOUND" in error or "Cannot find package" in error:
# Extract the missing module name
patterns = [
r"Cannot find module '([^']+)'",
r"Cannot find package '([^']+)' imported from",
r"Error: Cannot find module '([^']+)'",
r"MODULE_NOT_FOUND.*'([^']+)'"
]
module_name = None
for pattern in patterns:
match = re.search(pattern, error)
if match:
module_name = match.group(1)
# Remove any path components to get just the package name
# For scoped packages (@scope/package), preserve both scope and package
if '/' in module_name:
if module_name.startswith('@'):
# Scoped package: keep @scope/package
parts = module_name.split('/')
if len(parts) >= 2:
module_name = parts[0] + '/' + parts[1]
else:
# Regular package: just take first part
module_name = module_name.split('/')[0]
break
if module_name:
if verbose:
print(f" Missing dependency: {module_name}, attempting to install...")
# Simple npm install - works in your shell!
if verbose:
print(f" Installing with: npm install {module_name}")
# Try to install the module using npm
install_process = subprocess.run(
["npm", "install", module_name],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
cwd=temp_dir,
env=env
)
if install_process.returncode == 0:
if verbose:
print(f" Successfully installed {module_name}, retrying execution...")
# Continue to next attempt
continue
else:
return False, f"Failed to install dependency {module_name}: {install_process.stderr.strip()}"
# Different error or second attempt failed
if "SyntaxError" in error:
return False, f"JavaScript syntax error: {error}"
elif "ReferenceError" in error:
return False, f"JavaScript reference error: {error}"
elif "TypeError" in error:
return False, f"JavaScript type error: {error}"
else:
return False, f"JavaScript execution failed: {error}"
except subprocess.TimeoutExpired:
if verbose:
print(f" Test case execution timed out after {timeout} seconds")
return False, f"Execution timed out after {timeout} seconds"
# Should not reach here after 5 attempts
return False, "Failed to execute test case after multiple dependency installation attempts"
except Exception as e:
return False, f"Error: {str(e)}"
finally:
# Clean up temporary directory
try:
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
if verbose:
print(f" Cleaned up temporary directory: {temp_dir}")
except Exception as e:
print(f"Warning: Could not remove temporary directory {temp_dir}: {str(e)}")
def run_typescript_test_case(prefix: str, golden_completion: str, suffix: str,
assertions: str = "", verbose=True, timeout=30) -> Tuple[bool, str]:
"""
Run a TypeScript test case by compiling it to JavaScript and executing with Node.js.
Args:
prefix: Prefix code
golden_completion: Golden completion code
suffix: Suffix code
assertions: Assertion code
verbose: Whether to print detailed information
timeout: Maximum execution time in seconds before killing the process
Returns:
Tuple containing success flag and error message if any
"""
import uuid
import random
import tempfile
import shutil
# Generate unique identifiers to avoid race conditions
unique_id = str(uuid.uuid4())[:8]
temp_dir = tempfile.mkdtemp(prefix=f"ts_test_{unique_id}_")
try:
# Combine all code sections for TypeScript
# Environment variables are inherited from parent process
combined_code = f"""{prefix}
{golden_completion}
{suffix}
// Run assertions
{assertions}
"""
# Create a temporary TypeScript file
ts_file = os.path.join(temp_dir, f"test_{unique_id}.ts")
js_file = os.path.join(temp_dir, f"test_{unique_id}.js")
if verbose:
print(f" Creating TypeScript file: {ts_file}")
# Add a small random delay to reduce race conditions on network requests
time.sleep(random.uniform(0.1, 0.3))
with open(ts_file, 'w', encoding='utf-8') as f:
f.write(combined_code)
# Ensure we get the same PATH as your shell by prioritizing NVM paths
env = os.environ.copy()
# IMPORTANT: Replace [NODE-BIN-PATH] with your Node.js binary path
# Example: "/Users/yourname/.nvm/versions/node/v22.17.0/bin" or "/usr/local/bin"
nvm_bin_path = "[NODE-BIN-PATH]"
# Prepend Node path to ensure we get the working node/npm/npx
if nvm_bin_path != "[NODE-BIN-PATH]": # Only prepend if user has configured it
if "PATH" in env:
env["PATH"] = f"{nvm_bin_path}:{env['PATH']}"
else:
env["PATH"] = nvm_bin_path
# First, install TypeScript and Node types in the temp directory
if verbose:
print(f" Installing TypeScript and Node types in temp directory...")
install_ts_process = subprocess.run(
["npm", "install", "typescript", "@types/node"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
env=env,
timeout=30,
cwd=temp_dir
)
if install_ts_process.returncode != 0:
if verbose:
print(f" Warning: Could not install TypeScript locally, will try to use global installation")
# Try compilation with dependency installation if needed
# Allow up to 5 attempts to handle multiple missing dependencies
for compile_attempt in range(5):
if verbose and compile_attempt > 0:
print(f" Retrying TypeScript compilation (attempt {compile_attempt + 1})...")
elif verbose:
print(f" Compiling TypeScript to JavaScript...")
# Compile TypeScript to JavaScript using tsc
compile_process = subprocess.run(
["npx", "tsc", ts_file, "--outDir", temp_dir, "--target", "ES2020", "--module", "commonjs", "--esModuleInterop", "--allowSyntheticDefaultImports", "--skipLibCheck"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
env=env,
timeout=30,
cwd=temp_dir
)
if compile_process.returncode == 0:
# Compilation succeeded!
break
# Compilation failed - check if it's due to missing modules
compile_error = compile_process.stderr.strip()
compile_output = compile_process.stdout.strip()
full_error = compile_error if compile_error else compile_output
# Check for missing module errors
if "Cannot find module" in full_error or "Cannot resolve" in full_error:
# Extract module name from TypeScript error
patterns = [
r"Cannot find module '([^']+)'",
r"Cannot resolve '([^']+)'",
r"Could not find a declaration file for module '([^']+)'"
]
module_name = None
for pattern in patterns:
match = re.search(pattern, full_error)
if match:
module_name = match.group(1)
# Remove any path components or file extensions
# For scoped packages (@scope/package), preserve both scope and package
if '/' in module_name:
if module_name.startswith('@'):
# Scoped package: keep @scope/package
parts = module_name.split('/')
if len(parts) >= 2:
module_name = parts[0] + '/' + parts[1]
else:
# Regular package: just take first part
module_name = module_name.split('/')[0]
if module_name.endswith('.js'):
module_name = module_name[:-3]
break
if module_name:
if verbose:
print(f" Missing dependency: {module_name}, attempting to install...")
# Install both the module and its type definitions
# Also try to install @types package (may not exist, that's OK)
types_package = f"@types/{module_name}"
if verbose:
print(f" Installing: npm install {module_name}")
install_process = subprocess.run(
["npm", "install", module_name],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
cwd=temp_dir,
env=env
)
if install_process.returncode == 0:
if verbose:
print(f" Successfully installed {module_name}")
# Try to install types package (don't fail if it doesn't exist)
install_types = subprocess.run(
["npm", "install", types_package],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
cwd=temp_dir,
env=env
)
if install_types.returncode == 0 and verbose:
print(f" Also installed {types_package}")
# Continue to retry compilation
continue
else:
# Couldn't install the package
if verbose: