devbench/execute_benchmark.py at main · microsoft/devbench · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
import dotenv
from typing import Dict, List, Tuple
import re
import subprocess
import sys
import json
import argparse
import time

dotenv.load_dotenv()

def comb(n, k):
    """Calculate binomial coefficient n choose k"""
    if k > n or k < 0:
        return 0
    if k == 0 or k == n:
        return 1
    k = min(k, n - k)  # Take advantage of symmetry
    result = 1
    for i in range(k):
        result = result * (n - i) // (i + 1)
    return result

def find_jsonl_files(directory):
    """
    Find all JSONL files in the specified directory (excluding _formatted.jsonl files).

    Args:
        directory: Path to the directory to search

    Returns:
        List of JSONL file paths
    """
    jsonl_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.jsonl') and not file.endswith('_formatted.jsonl'):
                jsonl_files.append(os.path.join(root, file))
    return jsonl_files

def calculate_pass_at_k(n: int, c: int, k: int) -> float:
    """
    Calculate pass@k using the formula: pass@k := E[1 - comb(n-c, k) / comb(n, k)]

    Args:
        n: Total number of samples
        c: Number of correct samples
        k: Number of samples to consider

    Returns:
        pass@k score
    """
    if n - c < k:
        return 1.0
    return 1.0 - (comb(n - c, k) / comb(n, k))

def run_python_test_case(prefix: str, golden_completion: str, suffix: str,
                         assertions: str = "", verbose=True, timeout=30) -> Tuple[bool, str]:
    """
    Run a Python test case by creating a temporary file and executing it.

    Args:
        prefix: Prefix code
        golden_completion: Golden completion code
        suffix: Suffix code
        assertions: Assertion code
        verbose: Whether to print detailed information
        timeout: Maximum execution time in seconds before killing the process

    Returns:
        Tuple containing success flag and error message if any
    """
    import uuid
    import random

    # Generate unique identifiers to avoid race conditions
    unique_id = str(uuid.uuid4())[:8]

    # Add matplotlib non-interactive mode to prevent plt.show() from blocking
    matplotlib_header = f"""
# Added automatically to prevent matplotlib from blocking
import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend
import matplotlib.pyplot as plt
# Override plt.show to prevent blocking
original_show = plt.show
def non_blocking_show(*args, **kwargs):
    plt.savefig('temp_plot_{unique_id}.png')  # Use unique filename
    plt.close()
plt.show = non_blocking_show
"""

    # Add code to load API keys from environment
    env_vars_header = """
# Added automatically to provide access to environment variables
import os

# Configure more robust HTTP client settings to reduce timeouts
import asyncio
try:
    # Increase default timeouts for HTTP operations
    import tornado.httpclient
    original_fetch = tornado.httpclient.AsyncHTTPClient.fetch
    async def robust_fetch(self, request, *args, **kwargs):
        # Add timeout and retry logic
        if isinstance(request, str):
            request = tornado.httpclient.HTTPRequest(request, connect_timeout=10, request_timeout=20)
        elif hasattr(request, 'connect_timeout'):
            request.connect_timeout = max(request.connect_timeout or 0, 10)
            request.request_timeout = max(request.request_timeout or 0, 20)
        return await original_fetch(self, request, *args, **kwargs)
    tornado.httpclient.AsyncHTTPClient.fetch = robust_fetch
except:
    pass  # Ignore if tornado is not available
"""

    # Environment variables are inherited from the parent process
    # No need to manually inject them

    # Combine all code sections
    combined_code = f"""{matplotlib_header}
{env_vars_header}

{prefix}
{golden_completion}
{suffix}

# Run assertions
{assertions}
"""

    # Create a temporary python file to execute with unique name
    temp_file = f"temp_test_execution_{unique_id}.py"
    try:
        # Add a small random delay to reduce race conditions on network requests
        time.sleep(random.uniform(0.1, 0.5))

        with open(temp_file, 'w', encoding='utf-8') as f:
            f.write(combined_code)

        # Run the code with the current environment variables and a timeout
        try:
            process = subprocess.run(
                [sys.executable, temp_file],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
                env=os.environ.copy(),  # Pass the current environment variables to the subprocess
                timeout=timeout  # Add timeout parameter
            )

            if process.returncode != 0:
                error = process.stderr.strip()
                # Check if it's an import error
                if "ImportError" in error or "ModuleNotFoundError" in error:
                    # Extract the missing module name
                    match = re.search(r"No module named '([^']+)'", error)
                    if match:
                        module_name = match.group(1).split('.')[0]
                        if verbose:
                            print(f"  Missing dependency: {module_name}, attempting to install...")

                        # Try to install the module
                        install_process = subprocess.run(
                            [sys.executable, "-m", "pip", "install", module_name],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            text=True,
                            check=False
                        )

                        if install_process.returncode == 0:
                            if verbose:
                                print(f"  Successfully installed {module_name}, retrying test case...")

                            # Retry running the test
                            return run_python_test_case(prefix, golden_completion, suffix, assertions, verbose, timeout)
                        else:
                            return False, f"Failed to install dependency {module_name}: {install_process.stderr.strip()}"

                return False, f"Execution failed: {error}"

            return True, ""

        except subprocess.TimeoutExpired:
            # Handle timeout case
            if verbose:
                print(f"  Test case execution timed out after {timeout} seconds")
            return False, f"Execution timed out after {timeout} seconds"

    except Exception as e:
        return False, f"Error: {str(e)}"
    finally:
        # Clean up temporary files - with retry logic for Windows
        for attempt in range(5):
            try:
                if os.path.exists(temp_file):
                    os.remove(temp_file)
                break
            except PermissionError:
                # If file is still in use, wait briefly and retry
                time.sleep(0.5)
                if attempt == 4:  # Last attempt
                    print(f"Warning: Could not remove temporary file {temp_file}")

        # Clean up unique plot file
        plot_file = f'temp_plot_{unique_id}.png'
        if os.path.exists(plot_file):
            try:
                os.remove(plot_file)
            except:
                print(f"Warning: Could not remove temporary plot file {plot_file}")

def run_java_test_case_simple(prefix: str, golden_completion: str, suffix: str,
                       assertions: str = "", verbose=True, timeout=30) -> Tuple[bool, str]:
    """
    Run a Java test case by creating a temporary file, compiling with javac, and executing.

    Args:
        prefix: Prefix code (before the completion)
        golden_completion: Golden completion code
        suffix: Suffix code (after the completion)
        assertions: Assertion code (currently unused, handled in suffix)
        verbose: Whether to print detailed information
        timeout: Maximum execution time in seconds before killing the process

    Returns:
        Tuple containing success flag and error message if any
    """
    import uuid
    import tempfile
    import shutil

    # Generate unique identifier to avoid race conditions
    unique_id = str(uuid.uuid4())[:8]
    temp_dir = tempfile.mkdtemp(prefix=f"java_test_{unique_id}_")

    try:
        # Combine all code sections with proper newlines to avoid concatenation issues
        # Add newlines if not already present to ensure proper code separation
        prefix_clean = prefix.rstrip()
        suffix_clean = suffix.lstrip()

        # If golden_completion doesn't start with newline and prefix doesn't end with one, add it
        if not prefix.endswith('\n') and not golden_completion.startswith('\n'):
            combined_code = f"{prefix_clean}\n{golden_completion}{suffix_clean}"
        else:
            combined_code = f"{prefix}{golden_completion}{suffix}"

        # Extract class name from the code to determine filename
        class_name = "TestCase"  # Default fallback
        class_match = re.search(r'public\s+class\s+(\w+)', combined_code)
        if class_match:
            class_name = class_match.group(1)

        # Create Java source file
        java_file = os.path.join(temp_dir, f"{class_name}.java")

        if verbose:
            print(f"  Creating Java file: {java_file}")
            print(f"  Class name: {class_name}")

        with open(java_file, 'w', encoding='utf-8') as f:
            f.write(combined_code)

        # Enable assertions for execution (-ea flag)
        compile_command = ["javac", java_file]
        run_command = ["java", "-ea", "-cp", temp_dir, class_name]

        if verbose:
            print(f"  Compile command: {' '.join(compile_command)}")
            print(f"  Run command: {' '.join(run_command)}")

        try:
            # Compile the Java file
            compile_process = subprocess.run(
                compile_command,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
                timeout=timeout
            )

            if compile_process.returncode != 0:
                compile_error = compile_process.stderr.strip()
                if verbose:
                    print(f"  Compilation failed: {compile_error}")
                return False, f"Compilation failed: {compile_error}"

            if verbose:
                print("  Compilation successful, running test...")

            # Run the compiled Java program
            run_process = subprocess.run(
                run_command,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
                timeout=timeout
            )

            if run_process.returncode != 0:
                runtime_error = run_process.stderr.strip()
                if verbose:
                    print(f"  Runtime error: {runtime_error}")
                # Check if it's an assertion error
                if "AssertionError" in runtime_error:
                    return False, f"Assertion failed: {runtime_error}"
                else:
                    return False, f"Runtime error: {runtime_error}"

            if verbose:
                output = run_process.stdout.strip()
                if output:
                    print(f"  Program output: {output}")
                print("  Test completed successfully")

            return True, ""

        except subprocess.TimeoutExpired:
            if verbose:
                print(f"  Test execution timed out after {timeout} seconds")
            return False, f"Execution timed out after {timeout} seconds"

    except Exception as e:
        if verbose:
            print(f"  Unexpected error: {str(e)}")
        return False, f"Unexpected error: {str(e)}"

    finally:
        # Clean up temporary directory
        try:
            if os.path.exists(temp_dir):
                shutil.rmtree(temp_dir)
                if verbose:
                    print(f"  Cleaned up temporary directory: {temp_dir}")
        except Exception as e:
            print(f"Warning: Could not remove temporary directory {temp_dir}: {str(e)}")

def run_java_test_case_gradle(prefix: str, golden_completion: str, suffix: str,
                             assertions: str = "", verbose=True, timeout=60) -> Tuple[bool, str]:
    """
    Run a Java test case using Gradle for complex cases with dependencies and packages.
    Uses Gradle to build/compile with dependencies, then runs the main method directly.
    """
    import uuid
    import tempfile
    import shutil

    unique_id = str(uuid.uuid4())[:8]
    temp_dir = tempfile.mkdtemp(prefix=f"java_gradle_test_{unique_id}_")

    try:
        # Combine code sections
        prefix_clean = prefix.rstrip()
        suffix_clean = suffix.lstrip()

        if not prefix.endswith('\n') and not golden_completion.startswith('\n'):
            combined_code = f"{prefix_clean}\n{golden_completion}{suffix_clean}"
        else:
            combined_code = f"{prefix}{golden_completion}{suffix}"

        # Extract package and class name
        package_match = re.search(r'^\s*package\s+([\w.]+);', combined_code, re.MULTILINE)
        package_name = package_match.group(1) if package_match else "devbench.test"

        class_match = re.search(r'public\s+class\s+(\w+)', combined_code)
        class_name = class_match.group(1) if class_match else "TestCase"

        # If no package declaration, add one
        if not package_match:
            combined_code = f"package {package_name};\n\n{combined_code}"

        # Create Gradle directory structure - use src/main/java instead of src/test/java
        src_dir = os.path.join(temp_dir, "src", "main", "java", *package_name.split('.'))
        os.makedirs(src_dir, exist_ok=True)

        # Write Java file
        java_file = os.path.join(src_dir, f"{class_name}.java")
        with open(java_file, 'w', encoding='utf-8') as f:
            f.write(combined_code)

        # Common dependencies for Java benchmarks
        dependencies = []

        # Check for specific libraries in imports
        if 'org.apache.commons' in combined_code:
            dependencies.append("implementation 'org.apache.commons:commons-lang3:3.12.0'")
            # Also add Commons IO if needed
            if 'org.apache.commons.io' in combined_code:
                dependencies.append("implementation 'commons-io:commons-io:2.11.0'")
            # Also add Commons DBCP2 if needed
            if 'org.apache.commons.dbcp2' in combined_code:
                dependencies.append("implementation 'org.apache.commons:commons-dbcp2:2.9.0'")
                dependencies.append("implementation 'com.h2database:h2:2.1.214'")  # H2 database for testing
            # Also add Commons DBCP (older version) if needed
            if 'org.apache.commons.dbcp' in combined_code:
                dependencies.append("implementation 'commons-dbcp:commons-dbcp:1.4'")
                dependencies.append("implementation 'com.h2database:h2:2.1.214'")  # H2 database for testing
            # Also add Commons Compress if needed
            if 'org.apache.commons.compress' in combined_code:
                dependencies.append("implementation 'org.apache.commons:commons-compress:1.21'")
        if 'org.junit' in combined_code:
            dependencies.append("implementation 'junit:junit:4.13.2'")
        if 'com.fasterxml.jackson' in combined_code:
            dependencies.append("implementation 'com.fasterxml.jackson.core:jackson-core:2.15.2'")
            dependencies.append("implementation 'com.fasterxml.jackson.core:jackson-databind:2.15.2'")
            dependencies.append("implementation 'com.fasterxml.jackson.core:jackson-annotations:2.15.2'")
        if 'com.google.common' in combined_code:
            dependencies.append("implementation 'com.google.guava:guava:31.1-jre'")
        if 'org.json' in combined_code:
            dependencies.append("implementation 'org.json:json:20210307'")
        if 'javax.xml.bind' in combined_code or 'jakarta.xml.bind' in combined_code:
            # Use widely compatible JAXB implementation
            dependencies.append("implementation 'javax.xml.bind:jaxb-api:2.3.1'")
            dependencies.append("implementation 'com.sun.xml.bind:jaxb-core:2.3.0.1'")
            dependencies.append("implementation 'com.sun.xml.bind:jaxb-impl:2.3.1'")
            dependencies.append("implementation 'javax.activation:activation:1.1.1'")
        if 'org.hibernate' in combined_code:
            dependencies.append("implementation 'org.hibernate:hibernate-core:5.6.15.Final'")
            dependencies.append("implementation 'com.h2database:h2:2.1.214'")  # H2 database for testing
        if 'org.jdom2' in combined_code:
            dependencies.append("implementation 'org.jdom:jdom2:2.0.6'")
        if 'org.apache.poi' in combined_code:
            dependencies.append("implementation 'org.apache.poi:poi:5.2.3'")
            dependencies.append("implementation 'org.apache.poi:poi-ooxml:5.2.3'")
        if 'com.google.gson' in combined_code:
            dependencies.append("implementation 'com.google.code.gson:gson:2.10.1'")
        if 'org.dom4j' in combined_code:
            dependencies.append("implementation 'org.dom4j:dom4j:2.1.4'")
        if 'org.apache.logging.log4j' in combined_code:
            dependencies.append("implementation 'org.apache.logging.log4j:log4j-core:2.20.0'")
            dependencies.append("implementation 'org.apache.logging.log4j:log4j-api:2.20.0'")
        if 'org.springframework' in combined_code:
            dependencies.append("implementation 'org.springframework:spring-context:5.3.23'")
            dependencies.append("implementation 'org.springframework:spring-core:5.3.23'")
            dependencies.append("implementation 'org.springframework:spring-beans:5.3.23'")

        # Create build.gradle - simplified without test configuration
        build_gradle_content = f"""
plugins {{
    id 'java'
    id 'application'
}}

repositories {{
    mavenCentral()
}}

dependencies {{
    {chr(10).join('    ' + dep for dep in dependencies)}
}}

java {{
    sourceCompatibility = JavaVersion.VERSION_11
    targetCompatibility = JavaVersion.VERSION_11
}}

application {{
    mainClass = '{package_name}.{class_name}'
}}
"""

        with open(os.path.join(temp_dir, "build.gradle"), 'w') as f:
            f.write(build_gradle_content)

        if verbose:
            print(f"  Using Gradle for dependencies")
            print(f"  Package: {package_name}")
            print(f"  Class: {class_name}")
            print(f"  Dependencies: {len(dependencies)}")

        # Step 1: Build with Gradle to compile and resolve dependencies
        build_cmd = ['gradle', 'build', '--no-daemon', '--console=plain']
        if verbose:
            print(f"  Building: {' '.join(build_cmd)}")

        build_result = subprocess.run(
            build_cmd,
            cwd=temp_dir,
            capture_output=True,
            text=True,
            timeout=timeout
        )

        if build_result.returncode != 0:
            error_output = build_result.stdout + build_result.stderr
            if "cannot find symbol" in error_output or "package does not exist" in error_output:
                return False, f"Compilation failed: {error_output[-300:]}"
            else:
                return False, f"Gradle build failed: {error_output[-200:]}"

        # Step 2: Get the classpath from Gradle
        classpath_cmd = ['gradle', 'printClasspath', '--no-daemon', '--console=plain']

        # Add a task to print classpath to build.gradle
        with open(os.path.join(temp_dir, "build.gradle"), 'a') as f:
            f.write("""

task printClasspath {
    doLast {
        println configurations.runtimeClasspath.asPath
    }
}
""")

        classpath_result = subprocess.run(
            classpath_cmd,
            cwd=temp_dir,
            capture_output=True,
            text=True,
            timeout=30
        )

        # Extract classpath from output (last line that looks like a classpath)
        classpath = ""
        if classpath_result.returncode == 0:
            lines = classpath_result.stdout.strip().split('\n')
            for line in reversed(lines):
                if '.jar' in line and ('/' in line or '\\' in line):
                    classpath = line.strip()
                    break

        # Add build output directory to classpath
        build_classes_dir = os.path.join(temp_dir, "build", "classes", "java", "main")
        if classpath:
            full_classpath = f"{build_classes_dir}{os.pathsep}{classpath}"
        else:
            full_classpath = build_classes_dir

        # Step 3: Run the Java class directly with assertions enabled
        java_cmd = ['java', '-ea', '-cp', full_classpath, f'{package_name}.{class_name}']

        if verbose:
            print(f"  Running: {' '.join(java_cmd[:4])} [classpath] {java_cmd[-1]}")

        run_result = subprocess.run(
            java_cmd,
            cwd=temp_dir,
            capture_output=True,
            text=True,
            timeout=30
        )

        if verbose and run_result.stdout:
            print(f"  Program output: {run_result.stdout.strip()}")

        if run_result.returncode == 0:
            if verbose:
                print("  Gradle test completed successfully")
            return True, ""
        else:
            error_output = run_result.stdout + run_result.stderr

            # Check for common error patterns
            if "AssertionError" in error_output:
                return False, f"Assertion failed: {error_output.strip()}"
            elif "Exception" in error_output:
                return False, f"Runtime exception: {error_output.strip()}"
            else:
                return False, f"Program failed: {error_output.strip()}"

    except subprocess.TimeoutExpired:
        return False, f"Gradle execution timed out after {timeout} seconds"
    except Exception as e:
        return False, f"Gradle error: {str(e)}"
    finally:
        # Clean up
        try:
            if os.path.exists(temp_dir):
                shutil.rmtree(temp_dir)
                if verbose:
                    print(f"  Cleaned up Gradle temp directory")
        except Exception as e:
            print(f"Warning: Could not remove Gradle temp directory {temp_dir}: {str(e)}")

def run_java_test_case(prefix: str, golden_completion: str, suffix: str,
                       assertions: str = "", verbose=True, timeout=30) -> Tuple[bool, str]:
    """
    Run a Java test case with automatic dependency detection and build tool selection.
    Uses simple javac for basic cases, Gradle for complex cases with dependencies.
    """
    # Combine code to analyze
    combined_code = f"{prefix}{golden_completion}{suffix}"

    # Detect if we need external dependencies or complex setup
    needs_build_tool = False

    # Check for external dependencies (non-standard Java libraries)
    external_imports = [
        'org.apache.commons',
        'com.google.common',
        'org.junit',
        'org.json',
        'com.fasterxml.jackson',
        'org.springframework',
        'org.hibernate',
        'org.jdom2',
        'org.apache.poi',
        'com.google.gson',
        'org.dom4j',
        'org.apache.logging.log4j',  # Apache Log4j2
        'javax.xml.bind',  # JAXB was removed from JDK 11+, needs external dependency
        'jakarta.xml.bind'  # Jakarta JAXB (modern replacement)
    ]

    for ext_import in external_imports:
        if ext_import in combined_code:
            needs_build_tool = True
            if verbose:
                print(f"  Detected external dependency: {ext_import}")
            break

    # Check for package declaration
    package_match = re.search(r'^\s*package\s+([\w.]+);', combined_code, re.MULTILINE)
    if package_match:
        needs_build_tool = True
        if verbose:
            print(f"  Detected package declaration: {package_match.group(1)}")

    # Route to appropriate execution method
    if needs_build_tool:
        if verbose:
            print("  Using Gradle for complex case...")
        return run_java_test_case_gradle(prefix, golden_completion, suffix, assertions, verbose, timeout)
    else:
        if verbose:
            print("  Using simple javac for basic case...")
        return run_java_test_case_simple(prefix, golden_completion, suffix, assertions, verbose, timeout)

def run_javascript_test_case(prefix: str, golden_completion: str, suffix: str,
                             assertions: str = "", verbose=True, timeout=30) -> Tuple[bool, str]:
    """
    Run a JavaScript test case by creating a temporary file and executing it with Node.js.

    Args:
        prefix: Prefix code
        golden_completion: Golden completion code
        suffix: Suffix code
        assertions: Assertion code
        verbose: Whether to print detailed information
        timeout: Maximum execution time in seconds before killing the process

    Returns:
        Tuple containing success flag and error message if any
    """
    import uuid
    import random
    import tempfile
    import shutil

    # Generate unique identifiers to avoid race conditions
    unique_id = str(uuid.uuid4())[:8]
    temp_dir = tempfile.mkdtemp(prefix=f"js_test_{unique_id}_")

    try:
        # Combine all code sections
        # Environment variables are inherited from parent process
        combined_code = f"""{prefix}
{golden_completion}
{suffix}

// Run assertions
{assertions}
"""

        # Create a temporary JavaScript file
        js_file = os.path.join(temp_dir, f"test_{unique_id}.js")

        if verbose:
            print(f"  Creating JavaScript file: {js_file}")

        # Add a small random delay to reduce race conditions on network requests
        time.sleep(random.uniform(0.1, 0.3))

        with open(js_file, 'w', encoding='utf-8') as f:
            f.write(combined_code)

        # Ensure we get the same PATH as your shell by prioritizing NVM paths
        env = os.environ.copy()
        # IMPORTANT: Replace [NODE-BIN-PATH] with your Node.js binary path
        # Example: "/Users/yourname/.nvm/versions/node/v22.17.0/bin" or "/usr/local/bin"
        nvm_bin_path = "[NODE-BIN-PATH]"

        # Prepend Node path to ensure we get the working node/npm
        if nvm_bin_path != "[NODE-BIN-PATH]":  # Only prepend if user has configured it
            if "PATH" in env:
                env["PATH"] = f"{nvm_bin_path}:{env['PATH']}"
            else:
                env["PATH"] = nvm_bin_path

        if verbose:
            # Now check what we get with the updated PATH
            import shutil as shutil_module
            # Temporarily update PATH for this check
            original_path = os.environ.get("PATH", "")
            os.environ["PATH"] = env["PATH"]

            node_path = shutil_module.which("node")
            npm_path = shutil_module.which("npm")
            print(f"  Node path: {node_path}")
            print(f"  npm path: {npm_path}")

            # Restore original PATH
            os.environ["PATH"] = original_path

        # Try execution with dependency installation if needed
        # Allow up to 5 attempts to handle multiple missing dependencies
        for attempt in range(5):  # Allow more attempts for multiple dependencies
            try:
                # Run the code with Node.js
                process = subprocess.run(
                    ["node", js_file],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    text=True,
                    check=False,
                    env=env,
                    timeout=timeout,
                    cwd=temp_dir
                )

                if process.returncode == 0:
                    # Success!
                    return True, ""

                # There was an error - check if it's a missing module
                error = process.stderr.strip()

                # Check if it's a module not found error (allow installation on any attempt)
                if "Cannot find module" in error or "MODULE_NOT_FOUND" in error or "Cannot find package" in error:
                    # Extract the missing module name
                    patterns = [
                        r"Cannot find module '([^']+)'",
                        r"Cannot find package '([^']+)' imported from",
                        r"Error: Cannot find module '([^']+)'",
                        r"MODULE_NOT_FOUND.*'([^']+)'"
                    ]

                    module_name = None
                    for pattern in patterns:
                        match = re.search(pattern, error)
                        if match:
                            module_name = match.group(1)
                            # Remove any path components to get just the package name
                            # For scoped packages (@scope/package), preserve both scope and package
                            if '/' in module_name:
                                if module_name.startswith('@'):
                                    # Scoped package: keep @scope/package
                                    parts = module_name.split('/')
                                    if len(parts) >= 2:
                                        module_name = parts[0] + '/' + parts[1]
                                else:
                                    # Regular package: just take first part
                                    module_name = module_name.split('/')[0]
                            break

                    if module_name:
                        if verbose:
                            print(f"  Missing dependency: {module_name}, attempting to install...")

                        # Simple npm install - works in your shell!
                        if verbose:
                            print(f"  Installing with: npm install {module_name}")

                        # Try to install the module using npm
                        install_process = subprocess.run(
                            ["npm", "install", module_name],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            text=True,
                            check=False,
                            cwd=temp_dir,
                            env=env
                        )

                        if install_process.returncode == 0:
                            if verbose:
                                print(f"  Successfully installed {module_name}, retrying execution...")
                            # Continue to next attempt
                            continue
                        else:
                            return False, f"Failed to install dependency {module_name}: {install_process.stderr.strip()}"

                # Different error or second attempt failed
                if "SyntaxError" in error:
                    return False, f"JavaScript syntax error: {error}"
                elif "ReferenceError" in error:
                    return False, f"JavaScript reference error: {error}"
                elif "TypeError" in error:
                    return False, f"JavaScript type error: {error}"
                else:
                    return False, f"JavaScript execution failed: {error}"

            except subprocess.TimeoutExpired:
                if verbose:
                    print(f"  Test case execution timed out after {timeout} seconds")
                return False, f"Execution timed out after {timeout} seconds"

        # Should not reach here after 5 attempts
        return False, "Failed to execute test case after multiple dependency installation attempts"

    except Exception as e:
        return False, f"Error: {str(e)}"
    finally:
        # Clean up temporary directory
        try:
            if os.path.exists(temp_dir):
                shutil.rmtree(temp_dir)
                if verbose:
                    print(f"  Cleaned up temporary directory: {temp_dir}")
        except Exception as e:
            print(f"Warning: Could not remove temporary directory {temp_dir}: {str(e)}")

def run_typescript_test_case(prefix: str, golden_completion: str, suffix: str,
                             assertions: str = "", verbose=True, timeout=30) -> Tuple[bool, str]:
    """
    Run a TypeScript test case by compiling it to JavaScript and executing with Node.js.

    Args:
        prefix: Prefix code
        golden_completion: Golden completion code
        suffix: Suffix code
        assertions: Assertion code
        verbose: Whether to print detailed information
        timeout: Maximum execution time in seconds before killing the process

    Returns:
        Tuple containing success flag and error message if any
    """
    import uuid
    import random
    import tempfile
    import shutil

    # Generate unique identifiers to avoid race conditions
    unique_id = str(uuid.uuid4())[:8]
    temp_dir = tempfile.mkdtemp(prefix=f"ts_test_{unique_id}_")

    try:
        # Combine all code sections for TypeScript
        # Environment variables are inherited from parent process
        combined_code = f"""{prefix}
{golden_completion}
{suffix}

// Run assertions
{assertions}
"""

        # Create a temporary TypeScript file
        ts_file = os.path.join(temp_dir, f"test_{unique_id}.ts")
        js_file = os.path.join(temp_dir, f"test_{unique_id}.js")

        if verbose:
            print(f"  Creating TypeScript file: {ts_file}")

        # Add a small random delay to reduce race conditions on network requests
        time.sleep(random.uniform(0.1, 0.3))

        with open(ts_file, 'w', encoding='utf-8') as f:
            f.write(combined_code)

        # Ensure we get the same PATH as your shell by prioritizing NVM paths
        env = os.environ.copy()
        # IMPORTANT: Replace [NODE-BIN-PATH] with your Node.js binary path
        # Example: "/Users/yourname/.nvm/versions/node/v22.17.0/bin" or "/usr/local/bin"
        nvm_bin_path = "[NODE-BIN-PATH]"

        # Prepend Node path to ensure we get the working node/npm/npx
        if nvm_bin_path != "[NODE-BIN-PATH]":  # Only prepend if user has configured it
            if "PATH" in env:
                env["PATH"] = f"{nvm_bin_path}:{env['PATH']}"
            else:
                env["PATH"] = nvm_bin_path

        # First, install TypeScript and Node types in the temp directory
        if verbose:
            print(f"  Installing TypeScript and Node types in temp directory...")

        install_ts_process = subprocess.run(
            ["npm", "install", "typescript", "@types/node"],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            check=False,
            env=env,
            timeout=30,
            cwd=temp_dir
        )

        if install_ts_process.returncode != 0:
            if verbose:
                print(f"  Warning: Could not install TypeScript locally, will try to use global installation")

        # Try compilation with dependency installation if needed
        # Allow up to 5 attempts to handle multiple missing dependencies
        for compile_attempt in range(5):
            if verbose and compile_attempt > 0:
                print(f"  Retrying TypeScript compilation (attempt {compile_attempt + 1})...")
            elif verbose:
                print(f"  Compiling TypeScript to JavaScript...")

            # Compile TypeScript to JavaScript using tsc
            compile_process = subprocess.run(
                ["npx", "tsc", ts_file, "--outDir", temp_dir, "--target", "ES2020", "--module", "commonjs", "--esModuleInterop", "--allowSyntheticDefaultImports", "--skipLibCheck"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
                env=env,
                timeout=30,
                cwd=temp_dir
            )

            if compile_process.returncode == 0:
                # Compilation succeeded!
                break

            # Compilation failed - check if it's due to missing modules
            compile_error = compile_process.stderr.strip()
            compile_output = compile_process.stdout.strip()
            full_error = compile_error if compile_error else compile_output

            # Check for missing module errors
            if "Cannot find module" in full_error or "Cannot resolve" in full_error:
                # Extract module name from TypeScript error
                patterns = [
                    r"Cannot find module '([^']+)'",
                    r"Cannot resolve '([^']+)'",
                    r"Could not find a declaration file for module '([^']+)'"
                ]

                module_name = None
                for pattern in patterns:
                    match = re.search(pattern, full_error)
                    if match:
                        module_name = match.group(1)
                        # Remove any path components or file extensions
                        # For scoped packages (@scope/package), preserve both scope and package
                        if '/' in module_name:
                            if module_name.startswith('@'):
                                # Scoped package: keep @scope/package
                                parts = module_name.split('/')
                                if len(parts) >= 2:
                                    module_name = parts[0] + '/' + parts[1]
                            else:
                                # Regular package: just take first part
                                module_name = module_name.split('/')[0]
                        if module_name.endswith('.js'):
                            module_name = module_name[:-3]
                        break

                if module_name:
                    if verbose:
                        print(f"  Missing dependency: {module_name}, attempting to install...")

                    # Install both the module and its type definitions
                    # Also try to install @types package (may not exist, that's OK)
                    types_package = f"@types/{module_name}"

                    if verbose:
                        print(f"  Installing: npm install {module_name}")

                    install_process = subprocess.run(
                        ["npm", "install", module_name],
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE,
                        text=True,
                        check=False,
                        cwd=temp_dir,
                        env=env
                    )

                    if install_process.returncode == 0:
                        if verbose:
                            print(f"  Successfully installed {module_name}")

                        # Try to install types package (don't fail if it doesn't exist)
                        install_types = subprocess.run(
                            ["npm", "install", types_package],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            text=True,
                            check=False,
                            cwd=temp_dir,
                            env=env
                        )
                        if install_types.returncode == 0 and verbose:
                            print(f"  Also installed {types_package}")

                        # Continue to retry compilation
                        continue
                    else:
                        # Couldn't install the package
                        if verbose: