Skip to content

Commit d5381e1

Browse files
authored
[3.15] gh-149619: Harden _remote_debugging error paths (GH-150349) (#150435)
(cherry picked from commit a5be25d)
1 parent d23b06b commit d5381e1

11 files changed

Lines changed: 597 additions & 196 deletions

File tree

Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,7 +975,11 @@ def test_writer_total_samples_after_close_returns_zero(self):
975975
class TestBinaryFormatValidation(BinaryFormatTestBase):
976976
"""Tests for malformed binary files."""
977977

978+
HDR_OFF_SAMPLES = 28
978979
HDR_OFF_THREADS = 32
980+
HDR_OFF_STR_TABLE = 36
981+
HDR_OFF_FRAME_TABLE = 44
982+
FILE_HEADER_PLACEHOLDER_SIZE = 64
979983

980984
def test_replay_rejects_more_threads_than_declared(self):
981985
"""Replay rejects files with more unique threads than the header declares."""
@@ -1000,6 +1004,43 @@ def test_replay_rejects_more_threads_than_declared(self):
10001004
"threads than declared in header (declared 1, found at least 2)",
10011005
)
10021006

1007+
def test_replay_rejects_sample_count_mismatch(self):
1008+
"""Replay rejects files whose decoded samples disagree with the header."""
1009+
samples = [[make_interpreter(0, [
1010+
make_thread(1, [make_frame("sample.py", 10, "sample")])
1011+
])]]
1012+
filename = self.create_binary_file(samples, compression="none")
1013+
1014+
with open(filename, "r+b") as raw:
1015+
raw.seek(self.HDR_OFF_SAMPLES)
1016+
raw.write(struct.pack("=I", 2))
1017+
1018+
with BinaryReader(filename) as reader:
1019+
self.assertEqual(reader.get_info()["sample_count"], 2)
1020+
with self.assertRaises(ValueError) as cm:
1021+
reader.replay_samples(RawCollector())
1022+
self.assertEqual(
1023+
str(cm.exception),
1024+
"Sample count mismatch: header declares 2 samples "
1025+
"but replay decoded 1",
1026+
)
1027+
1028+
def test_replay_rejects_trailing_partial_sample_header(self):
1029+
"""Replay rejects partial sample bytes instead of silently stopping."""
1030+
filename = self.create_binary_file([], compression="none")
1031+
sample_data_end = self.FILE_HEADER_PLACEHOLDER_SIZE + 1
1032+
1033+
with open(filename, "r+b") as raw:
1034+
raw.seek(self.HDR_OFF_STR_TABLE)
1035+
raw.write(struct.pack("=Q", sample_data_end))
1036+
raw.seek(self.HDR_OFF_FRAME_TABLE)
1037+
raw.write(struct.pack("=Q", sample_data_end))
1038+
1039+
with BinaryReader(filename) as reader:
1040+
with self.assertRaises(ValueError) as cm:
1041+
reader.replay_samples(RawCollector())
1042+
self.assertEqual(str(cm.exception), "Truncated sample data: 1 trailing bytes")
1043+
10031044

10041045
class TestBinaryEncodings(BinaryFormatTestBase):
10051046
"""Tests specifically targeting different stack encodings."""

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ typedef enum _WIN32_THREADSTATE {
180180
#define set_exception_cause(unwinder, exc_type, message) \
181181
do { \
182182
assert(PyErr_Occurred() && "function returned -1 without setting exception"); \
183-
if (unwinder->debug) { \
183+
if (unwinder->debug && !_Py_RemoteDebug_HasPermissionError()) { \
184184
_set_debug_exception_cause(exc_type, message); \
185185
} \
186186
} while (0)

Modules/_remote_debugging/asyncio.c

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,35 +22,38 @@ _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
2222
address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio",
2323
NULL);
2424
if (address == 0) {
25-
// Error out: 'python' substring covers both executable and DLL
26-
PyObject *exc = PyErr_GetRaisedException();
27-
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
28-
_PyErr_ChainExceptions1(exc);
25+
if (!_Py_RemoteDebug_HasPermissionError()) {
26+
PyObject *exc = PyErr_GetRaisedException();
27+
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
28+
_PyErr_ChainExceptions1(exc);
29+
}
2930
}
3031
#elif defined(__linux__) && HAVE_PROCESS_VM_READV
3132
// On Linux, search for asyncio debug in executable or DLL
3233
address = search_linux_map_for_section(handle, "AsyncioDebug", "python",
3334
NULL);
3435
if (address == 0) {
35-
// Error out: 'python' substring covers both executable and DLL
36-
PyObject *exc = PyErr_GetRaisedException();
37-
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
38-
_PyErr_ChainExceptions1(exc);
36+
if (!_Py_RemoteDebug_HasPermissionError()) {
37+
PyObject *exc = PyErr_GetRaisedException();
38+
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
39+
_PyErr_ChainExceptions1(exc);
40+
}
3941
}
4042
#elif defined(__APPLE__) && TARGET_OS_OSX
4143
// On macOS, try libpython first, then fall back to python
4244
address = search_map_for_section(handle, "AsyncioDebug", "libpython",
4345
NULL);
44-
if (address == 0) {
46+
if (address == 0 && !_Py_RemoteDebug_HasPermissionError()) {
4547
PyErr_Clear();
4648
address = search_map_for_section(handle, "AsyncioDebug", "python",
4749
NULL);
4850
}
4951
if (address == 0) {
50-
// Error out: 'python' substring covers both executable and DLL
51-
PyObject *exc = PyErr_GetRaisedException();
52-
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
53-
_PyErr_ChainExceptions1(exc);
52+
if (!_Py_RemoteDebug_HasPermissionError()) {
53+
PyObject *exc = PyErr_GetRaisedException();
54+
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
55+
_PyErr_ChainExceptions1(exc);
56+
}
5457
}
5558
#else
5659
Py_UNREACHABLE();
@@ -96,10 +99,12 @@ ensure_async_debug_offsets(RemoteUnwinderObject *unwinder)
9699
return -1;
97100
}
98101
if (result < 0) {
99-
PyErr_Clear();
100-
PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
101-
set_exception_cause(unwinder, PyExc_RuntimeError,
102-
"AsyncioDebug section unavailable - asyncio module may not be loaded in target process");
102+
if (!_Py_RemoteDebug_HasPermissionError()) {
103+
PyErr_Clear();
104+
PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
105+
set_exception_cause(unwinder, PyExc_RuntimeError,
106+
"AsyncioDebug section unavailable - asyncio module may not be loaded in target process");
107+
}
103108
return -1;
104109
}
105110

@@ -218,7 +223,7 @@ parse_task_name(
218223

219224
if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) {
220225
long res = read_py_long(unwinder, task_name_addr);
221-
if (res == -1) {
226+
if (res == -1 && PyErr_Occurred()) {
222227
set_exception_cause(unwinder, PyExc_RuntimeError, "Task name PyLong parsing failed");
223228
return NULL;
224229
}

Modules/_remote_debugging/binary_io_reader.c

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,22 @@ binary_reader_open(PyObject *path)
380380
Py_fclose(fp);
381381
goto error;
382382
}
383+
if (st.st_size < 0) {
384+
PyErr_SetString(PyExc_IOError, "Invalid negative file size");
385+
Py_fclose(fp);
386+
goto error;
387+
}
388+
if ((uintmax_t)st.st_size > SIZE_MAX) {
389+
PyErr_SetString(PyExc_OverflowError, "File is too large to map");
390+
Py_fclose(fp);
391+
goto error;
392+
}
383393
reader->mapped_size = st.st_size;
394+
if (reader->mapped_size == 0) {
395+
PyErr_SetString(PyExc_ValueError, "File too small for header");
396+
Py_fclose(fp);
397+
goto error;
398+
}
384399

385400
/* Map the file into memory.
386401
* MAP_POPULATE (Linux-only) pre-faults all pages at mmap time, which:
@@ -424,7 +439,10 @@ binary_reader_open(PyObject *path)
424439
}
425440
#endif
426441

427-
(void)Py_fclose(fp);
442+
if (Py_fclose(fp) != 0) {
443+
PyErr_SetFromErrno(PyExc_IOError);
444+
goto error;
445+
}
428446

429447
uint8_t *data = reader->mapped_data;
430448
size_t file_size = reader->mapped_size;
@@ -444,7 +462,15 @@ binary_reader_open(PyObject *path)
444462
PyErr_SetFromErrno(PyExc_IOError);
445463
goto error;
446464
}
465+
if ((uint64_t)file_size_off > SIZE_MAX) {
466+
PyErr_SetString(PyExc_OverflowError, "File is too large to read");
467+
goto error;
468+
}
447469
reader->file_size = (size_t)file_size_off;
470+
if (reader->file_size == 0) {
471+
PyErr_SetString(PyExc_ValueError, "File too small for header");
472+
goto error;
473+
}
448474
if (FSEEK64(reader->fp, 0, SEEK_SET) != 0) {
449475
PyErr_SetFromErrno(PyExc_IOError);
450476
goto error;
@@ -456,8 +482,18 @@ binary_reader_open(PyObject *path)
456482
goto error;
457483
}
458484

459-
if (fread(reader->file_data, 1, reader->file_size, reader->fp) != reader->file_size) {
460-
PyErr_SetFromErrno(PyExc_IOError);
485+
size_t nread = fread(reader->file_data, 1, reader->file_size, reader->fp);
486+
if (nread != reader->file_size) {
487+
int err = errno;
488+
if (ferror(reader->fp) && err != 0) {
489+
errno = err;
490+
PyErr_SetFromErrno(PyExc_IOError);
491+
}
492+
else {
493+
PyErr_Format(PyExc_ValueError,
494+
"Unexpected end of file: read %zu of %zu bytes",
495+
nread, reader->file_size);
496+
}
461497
goto error;
462498
}
463499

@@ -944,10 +980,16 @@ invoke_progress_callback(PyObject *callback, Py_ssize_t current, uint32_t total)
944980
Py_ssize_t
945981
binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progress_callback)
946982
{
947-
if (!PyObject_HasAttrString(collector, "collect")) {
983+
PyObject *collect_method;
984+
int has_collect = PyObject_GetOptionalAttrString(collector, "collect", &collect_method);
985+
if (has_collect < 0) {
986+
return -1;
987+
}
988+
if (has_collect == 0) {
948989
PyErr_SetString(PyExc_TypeError, "Collector must have a collect() method");
949990
return -1;
950991
}
992+
Py_DECREF(collect_method);
951993

952994
/* Get module state for struct sequence types */
953995
PyObject *module = PyImport_ImportModule("_remote_debugging");
@@ -973,7 +1015,10 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
9731015
while (offset < reader->sample_data_size) {
9741016
/* Read thread_id (8 bytes) + interpreter_id (4 bytes) + encoding byte */
9751017
if (reader->sample_data_size - offset < SAMPLE_HEADER_FIXED_SIZE) {
976-
break; /* End of data */
1018+
PyErr_Format(PyExc_ValueError,
1019+
"Truncated sample data: %zu trailing bytes",
1020+
reader->sample_data_size - offset);
1021+
return -1;
9771022
}
9781023

9791024
/* Use memcpy to avoid strict aliasing violations, then byte-swap if needed */
@@ -1019,6 +1064,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
10191064
count, max_possible_samples);
10201065
return -1;
10211066
}
1067+
if ((uint64_t)count > (uint64_t)PY_SSIZE_T_MAX - (uint64_t)replayed) {
1068+
PyErr_SetString(PyExc_OverflowError,
1069+
"Sample count exceeds Py_ssize_t maximum");
1070+
return -1;
1071+
}
10221072

10231073
reader->stats.repeat_records++;
10241074
reader->stats.repeat_samples += count;
@@ -1149,6 +1199,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
11491199
return -1;
11501200
}
11511201
Py_DECREF(timestamps_list);
1202+
if (replayed == PY_SSIZE_T_MAX) {
1203+
PyErr_SetString(PyExc_OverflowError,
1204+
"Sample count exceeds Py_ssize_t maximum");
1205+
return -1;
1206+
}
11521207
replayed++;
11531208
reader->stats.total_samples++;
11541209
break;
@@ -1167,6 +1222,13 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
11671222
}
11681223
}
11691224

1225+
if ((uint64_t)replayed != reader->sample_count) {
1226+
PyErr_Format(PyExc_ValueError,
1227+
"Sample count mismatch: header declares %u samples but replay decoded %zd",
1228+
reader->sample_count, replayed);
1229+
return -1;
1230+
}
1231+
11701232
/* Final progress callback at 100% */
11711233
if (invoke_progress_callback(progress_callback, replayed, reader->sample_count) < 0) {
11721234
return -1;

Modules/_remote_debugging/binary_io_writer.c

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,15 @@ fwrite_checked_allow_threads(const void *data, size_t size, FILE *fp)
108108
written = fwrite(data, 1, size, fp);
109109
Py_END_ALLOW_THREADS
110110
if (written != size) {
111-
PyErr_SetFromErrno(PyExc_IOError);
111+
int err = errno;
112+
if (ferror(fp) && err != 0) {
113+
errno = err;
114+
PyErr_SetFromErrno(PyExc_IOError);
115+
}
116+
else {
117+
PyErr_Format(PyExc_IOError,
118+
"short write: wrote %zu of %zu bytes", written, size);
119+
}
112120
return -1;
113121
}
114122
return 0;
@@ -366,6 +374,11 @@ writer_intern_string(BinaryWriter *writer, PyObject *string, uint32_t *index)
366374
return 0;
367375
}
368376

377+
if (writer->string_count >= UINT32_MAX) {
378+
PyErr_SetString(PyExc_OverflowError,
379+
"too many strings for binary format");
380+
return -1;
381+
}
369382
if (writer->string_count >= writer->string_capacity) {
370383
if (grow_parallel_arrays((void **)&writer->strings,
371384
(void **)&writer->string_lengths,
@@ -380,6 +393,12 @@ writer_intern_string(BinaryWriter *writer, PyObject *string, uint32_t *index)
380393
if (!str_data) {
381394
return -1;
382395
}
396+
if ((uintmax_t)str_len > UINT32_MAX) {
397+
PyErr_Format(PyExc_OverflowError,
398+
"string length %zd exceeds binary format maximum %u",
399+
str_len, UINT32_MAX);
400+
return -1;
401+
}
383402

384403
char *str_copy = PyMem_Malloc(str_len + 1);
385404
if (!str_copy) {
@@ -422,6 +441,11 @@ writer_intern_frame(BinaryWriter *writer, const FrameEntry *entry, uint32_t *ind
422441
return 0;
423442
}
424443

444+
if (writer->frame_count >= UINT32_MAX) {
445+
PyErr_SetString(PyExc_OverflowError,
446+
"too many frames for binary format");
447+
return -1;
448+
}
425449
if (GROW_ARRAY(writer->frame_entries, writer->frame_count,
426450
writer->frame_capacity, FrameEntry) < 0) {
427451
return -1;
@@ -466,6 +490,11 @@ writer_get_or_create_thread_entry(BinaryWriter *writer, uint64_t thread_id,
466490
}
467491
}
468492

493+
if (writer->thread_count >= UINT32_MAX) {
494+
PyErr_SetString(PyExc_OverflowError,
495+
"too many threads for binary format");
496+
return NULL;
497+
}
469498
if (writer->thread_count >= writer->thread_capacity) {
470499
ThreadEntry *new_entries = grow_array(writer->thread_entries,
471500
&writer->thread_capacity,
@@ -600,6 +629,11 @@ flush_pending_rle(BinaryWriter *writer, ThreadEntry *entry)
600629
if (!entry->has_pending_rle || entry->pending_rle_count == 0) {
601630
return 0;
602631
}
632+
if (entry->pending_rle_count > UINT32_MAX - writer->total_samples) {
633+
PyErr_SetString(PyExc_OverflowError,
634+
"too many samples for binary format");
635+
return -1;
636+
}
603637

604638
/* Write RLE record:
605639
* [thread_id: 8] [interpreter_id: 4] [STACK_REPEAT: 1] [count: varint]
@@ -644,6 +678,12 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry,
644678
const uint32_t *frame_indices, size_t stack_depth,
645679
size_t shared_count, size_t pop_count, size_t push_count)
646680
{
681+
if (writer->total_samples == UINT32_MAX) {
682+
PyErr_SetString(PyExc_OverflowError,
683+
"too many samples for binary format");
684+
return -1;
685+
}
686+
647687
/* Header: thread_id(8) + interpreter_id(4) + encoding(1) + delta(varint) + status(1) */
648688
uint8_t header_buf[SAMPLE_HEADER_MAX_SIZE];
649689
memcpy(header_buf + SMP_OFF_THREAD_ID, &entry->thread_id, SMP_SIZE_THREAD_ID);

Modules/_remote_debugging/code_objects.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
4747

4848
// Read the TLBC array pointer
4949
if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0) {
50-
PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array pointer");
5150
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer");
5251
return 0; // Read error
5352
}
@@ -61,7 +60,6 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
6160
// Read the TLBC array size
6261
Py_ssize_t tlbc_size;
6362
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0) {
64-
PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array size");
6563
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size");
6664
return 0; // Read error
6765
}

Modules/_remote_debugging/module.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
411411
return -1;
412412
}
413413
if (async_debug_result < 0) {
414+
if (_Py_RemoteDebug_HasPermissionError()) {
415+
return -1;
416+
}
414417
PyErr_Clear();
415418
memset(&self->async_debug_offsets, 0, sizeof(self->async_debug_offsets));
416419
self->async_debug_offsets_available = 0;

0 commit comments

Comments
 (0)