Skip to content

Commit efbeaa5

Browse files
committed
[3.14] GH-136895: Update JIT builds to use LLVM 20 (GH-140329)
1 parent 4f5f6ea commit efbeaa5

File tree

9 files changed

+165
-51
lines changed

9 files changed

+165
-51
lines changed

.github/workflows/jit.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ concurrency:
2121

2222
env:
2323
FORCE_COLOR: 1
24-
LLVM_VERSION: 19
24+
LLVM_VERSION: 20
2525

2626
jobs:
2727
interpreter:
@@ -168,7 +168,6 @@ jobs:
168168
fail-fast: false
169169
matrix:
170170
include:
171-
172171
- name: JIT without optimizations (Debug)
173172
configure_flags: --enable-experimental-jit --with-pydebug
174173
test_env: "PYTHON_UOPS_OPTIMIZE=0"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Update JIT compilation to use LLVM 20 at build time.

PCbuild/get_external.py

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import argparse
44
import os
55
import pathlib
6+
import shutil
67
import sys
78
import time
89
import urllib.error
@@ -22,15 +23,13 @@ def retrieve_with_retries(download_location, output_path, reporthook,
2223
)
2324
except (urllib.error.URLError, ConnectionError) as ex:
2425
if attempt == max_retries:
25-
msg = f"Download from {download_location} failed."
26-
raise OSError(msg) from ex
26+
raise OSError(f'Download from {download_location} failed.') from ex
2727
time.sleep(2.25**attempt)
2828
else:
2929
return resp
3030

31-
3231
def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
33-
repo = f'cpython-{"bin" if binary else "source"}-deps'
32+
repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
3433
url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
3534
reporthook = None
3635
if verbose:
@@ -44,6 +43,23 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
4443
return filename
4544

4645

46+
def fetch_release(tag, tarball_dir, *, org='python', verbose=False):
47+
url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz'
48+
reporthook = None
49+
if verbose:
50+
reporthook = print
51+
tarball_dir.mkdir(parents=True, exist_ok=True)
52+
output_path = tarball_dir / f'{tag}.tar.xz'
53+
retrieve_with_retries(url, output_path, reporthook)
54+
return output_path
55+
56+
57+
def extract_tarball(externals_dir, tarball_path, tag):
58+
output_path = externals_dir / tag
59+
shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path))
60+
return output_path
61+
62+
4763
def extract_zip(externals_dir, zip_path):
4864
with zipfile.ZipFile(os.fspath(zip_path)) as zf:
4965
zf.extractall(os.fspath(externals_dir))
@@ -55,6 +71,8 @@ def parse_args():
5571
p.add_argument('-v', '--verbose', action='store_true')
5672
p.add_argument('-b', '--binary', action='store_true',
5773
help='Is the dependency in the binary repo?')
74+
p.add_argument('-r', '--release', action='store_true',
75+
help='Download from GitHub release assets instead of branch')
5876
p.add_argument('-O', '--organization',
5977
help='Organization owning the deps repos', default='python')
6078
p.add_argument('-e', '--externals-dir', type=pathlib.Path,
@@ -67,15 +85,36 @@ def parse_args():
6785

6886
def main():
6987
args = parse_args()
70-
zip_path = fetch_zip(
71-
args.tag,
72-
args.externals_dir / 'zips',
73-
org=args.organization,
74-
binary=args.binary,
75-
verbose=args.verbose,
76-
)
7788
final_name = args.externals_dir / args.tag
78-
extracted = extract_zip(args.externals_dir, zip_path)
89+
90+
# Check if the dependency already exists in externals/ directory
91+
# (either already downloaded/extracted, or checked into the git tree)
92+
if final_name.exists():
93+
if args.verbose:
94+
print(f'{args.tag} already exists at {final_name}, skipping download.')
95+
return
96+
97+
# Determine download method: release artifacts for large deps (like LLVM),
98+
# otherwise zip download from GitHub branches
99+
if args.release:
100+
tarball_path = fetch_release(
101+
args.tag,
102+
args.externals_dir / 'tarballs',
103+
org=args.organization,
104+
verbose=args.verbose,
105+
)
106+
extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
107+
else:
108+
# Use zip download from GitHub branches
109+
# (cpython-bin-deps if --binary, cpython-source-deps otherwise)
110+
zip_path = fetch_zip(
111+
args.tag,
112+
args.externals_dir / 'zips',
113+
org=args.organization,
114+
binary=args.binary,
115+
verbose=args.verbose,
116+
)
117+
extracted = extract_zip(args.externals_dir, zip_path)
79118
for wait in [1, 2, 3, 5, 8, 0]:
80119
try:
81120
extracted.replace(final_name)

PCbuild/get_externals.bat

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4
8282
if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.19
8383
if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0
8484
if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06
85-
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0
85+
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-20.1.8.0
8686

8787
for %%b in (%binaries%) do (
8888
if exist "%EXTERNALS_DIR%\%%b" (
@@ -92,7 +92,11 @@ for %%b in (%binaries%) do (
9292
git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b"
9393
) else (
9494
echo.Fetching %%b...
95-
%PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b
95+
if "%%b"=="llvm-20.1.8.0" (
96+
%PYTHON% -E "%PCBUILD%\get_external.py" --release --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
97+
) else (
98+
%PYTHON% -E "%PCBUILD%\get_external.py" --binary --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
99+
)
96100
)
97101
)
98102

Python/jit.c

Lines changed: 71 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -419,15 +419,42 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
419419
}
420420

421421
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
422+
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
422423

423424
#include "jit_stencils.h"
424425

425426
#if defined(__aarch64__) || defined(_M_ARM64)
426427
#define TRAMPOLINE_SIZE 16
428+
#define DATA_ALIGN 8
429+
#elif defined(__x86_64__) && defined(__APPLE__)
430+
// LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative
431+
// range.
432+
#define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment
433+
#define DATA_ALIGN 8
427434
#else
428435
#define TRAMPOLINE_SIZE 0
436+
#define DATA_ALIGN 1
429437
#endif
430438

439+
// Get the trampoline memory location for a given symbol ordinal.
440+
static unsigned char *
441+
get_trampoline_slot(int ordinal, jit_state *state)
442+
{
443+
const uint32_t symbol_mask = 1 << (ordinal % 32);
444+
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
445+
assert(symbol_mask & trampoline_mask);
446+
447+
// Count the number of set bits in the trampoline mask lower than ordinal.
448+
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
449+
for (int i = 0; i < ordinal / 32; i++) {
450+
index += _Py_popcount32(state->trampolines.mask[i]);
451+
}
452+
453+
unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
454+
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
455+
return trampoline;
456+
}
457+
431458
// Generate and patch AArch64 trampolines. The symbols to jump to are stored
432459
// in the jit_stencils.h in the symbols_map.
433460
void
@@ -444,20 +471,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
444471
return;
445472
}
446473

447-
// Masking is done modulo 32 as the mask is stored as an array of uint32_t
448-
const uint32_t symbol_mask = 1 << (ordinal % 32);
449-
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
450-
assert(symbol_mask & trampoline_mask);
451-
452-
// Count the number of set bits in the trampoline mask lower than ordinal,
453-
// this gives the index into the array of trampolines.
454-
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
455-
for (int i = 0; i < ordinal / 32; i++) {
456-
index += _Py_popcount32(state->trampolines.mask[i]);
457-
}
458-
459-
uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
460-
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
474+
// Out of range - need a trampoline
475+
uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);
461476

462477

463478
/* Generate the trampoline
@@ -474,6 +489,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
474489
patch_aarch64_26r(location, (uintptr_t)p);
475490
}
476491

492+
// Generate and patch x86_64 trampolines.
493+
void
494+
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
495+
{
496+
uint64_t value = (uintptr_t)symbols_map[ordinal];
497+
int64_t range = (int64_t)value - 4 - (int64_t)location;
498+
499+
// If we are in range of 32 signed bits, we can patch directly
500+
if (range >= -(1LL << 31) && range < (1LL << 31)) {
501+
patch_32r(location, value - 4);
502+
return;
503+
}
504+
505+
// Out of range - need a trampoline
506+
unsigned char *trampoline = get_trampoline_slot(ordinal, state);
507+
508+
/* Generate the trampoline (14 bytes, padded to 16):
509+
0: ff 25 00 00 00 00 jmp *(%rip)
510+
6: XX XX XX XX XX XX XX XX (64-bit target address)
511+
512+
Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
513+
*/
514+
trampoline[0] = 0xFF;
515+
trampoline[1] = 0x25;
516+
memset(trampoline + 2, 0, 4);
517+
memcpy(trampoline + 6, &value, 8);
518+
519+
// Patch the call site to call the trampoline instead
520+
patch_32r(location, (uintptr_t)trampoline - 4);
521+
}
522+
477523
static void
478524
combine_symbol_mask(const symbol_mask src, symbol_mask dest)
479525
{
@@ -515,8 +561,13 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
515561
// Round up to the nearest page:
516562
size_t page_size = get_page_size();
517563
assert((page_size & (page_size - 1)) == 0);
518-
size_t padding = page_size - ((code_size + state.trampolines.size + data_size) & (page_size - 1));
519-
size_t total_size = code_size + state.trampolines.size + data_size + padding;
564+
size_t code_padding =
565+
DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1));
566+
size_t padding = page_size -
567+
((code_size + state.trampolines.size + code_padding + data_size) &
568+
(page_size - 1));
569+
size_t total_size =
570+
code_size + state.trampolines.size + code_padding + data_size + padding;
520571
unsigned char *memory = jit_alloc(total_size);
521572
if (memory == NULL) {
522573
return -1;
@@ -535,7 +586,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
535586
// Loop again to emit the code:
536587
unsigned char *code = memory;
537588
state.trampolines.mem = memory + code_size;
538-
unsigned char *data = memory + code_size + state.trampolines.size;
589+
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
539590
// Compile the shim, which handles converting between the native
540591
// calling convention and the calling convention used by jitted code
541592
// (which may be different for efficiency reasons).
@@ -557,7 +608,9 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
557608
code += group->code_size;
558609
data += group->data_size;
559610
assert(code == memory + code_size);
560-
assert(data == memory + code_size + state.trampolines.size + data_size);
611+
assert(
612+
data ==
613+
memory + code_size + state.trampolines.size + code_padding + data_size);
561614
if (mark_executable(memory, total_size)) {
562615
jit_free(memory, total_size);
563616
return -1;

Tools/jit/README.md

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,32 @@ Python 3.11 or newer is required to build the JIT.
99

1010
The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
1111

12-
LLVM version 19 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
12+
LLVM version 20 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
1313

1414
It's easy to install all of the required tools:
1515

1616
### Linux
1717

18-
Install LLVM 19 on Ubuntu/Debian:
18+
Install LLVM 20 on Ubuntu/Debian:
1919

2020
```sh
2121
wget https://apt.llvm.org/llvm.sh
2222
chmod +x llvm.sh
23-
sudo ./llvm.sh 19
23+
sudo ./llvm.sh 20
2424
```
2525

26-
Install LLVM 19 on Fedora Linux 40 or newer:
26+
Install LLVM 20 on Fedora Linux 40 or newer:
2727

2828
```sh
29-
sudo dnf install 'clang(major) = 19' 'llvm(major) = 19'
29+
sudo dnf install 'clang(major) = 20' 'llvm(major) = 20'
3030
```
3131

3232
### macOS
3333

34-
Install LLVM 19 with [Homebrew](https://brew.sh):
34+
Install LLVM 20 with [Homebrew](https://brew.sh):
3535

3636
```sh
37-
brew install llvm@19
37+
brew install llvm@20
3838
```
3939

4040
Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
@@ -43,14 +43,18 @@ Homebrew won't add any of the tools to your `$PATH`. That's okay; the build scri
4343

4444
LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`.
4545

46-
Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
46+
Otherwise, you can install LLVM 20 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=20), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
4747

4848
Alternatively, you can use [chocolatey](https://chocolatey.org):
4949

5050
```sh
51-
choco install llvm --version=19.1.0
51+
choco install llvm --version=20.1.8
5252
```
5353

54+
### Dev Containers
55+
56+
If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no
57+
need to install LLVM as the Fedora 42 base image includes LLVM 20 out of the box.
5458

5559
## Building
5660

Tools/jit/_llvm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010

1111
import _targets
1212

13-
_LLVM_VERSION = 19
13+
_LLVM_VERSION = 20
1414
_LLVM_VERSION_PATTERN = re.compile(rf"version\s+{_LLVM_VERSION}\.\d+\.\d+\S*\s+")
15-
_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0"
15+
_EXTERNALS_LLVM_TAG = "llvm-20.1.8.0"
1616

1717
_P = typing.ParamSpec("_P")
1818
_R = typing.TypeVar("_R")

Tools/jit/_stencils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,23 @@ def process_relocations(
302302
self._trampolines.add(ordinal)
303303
hole.addend = ordinal
304304
hole.symbol = None
305+
# x86_64 Darwin trampolines for external symbols
306+
elif (
307+
hole.kind == "X86_64_RELOC_BRANCH"
308+
and hole.value is HoleValue.ZERO
309+
and hole.symbol not in self.symbols
310+
):
311+
hole.func = "patch_x86_64_trampoline"
312+
hole.need_state = True
313+
assert hole.symbol is not None
314+
if hole.symbol in known_symbols:
315+
ordinal = known_symbols[hole.symbol]
316+
else:
317+
ordinal = len(known_symbols)
318+
known_symbols[hole.symbol] = ordinal
319+
self._trampolines.add(ordinal)
320+
hole.addend = ordinal
321+
hole.symbol = None
305322
self.code.remove_jump()
306323
self.code.add_nops(nop=nop, alignment=alignment)
307324
self.data.pad(8)

0 commit comments

Comments
 (0)