python
diff --git a/‎Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py‎
Lines changed: 41 additions & 0 deletions b/‎Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎Modules/_remote_debugging/_remote_debugging.h‎
Lines changed: 1 addition & 1 deletion b/‎Modules/_remote_debugging/_remote_debugging.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Modules/_remote_debugging/asyncio.c‎
Lines changed: 23 additions & 18 deletions b/‎Modules/_remote_debugging/asyncio.c‎
Lines changed: 23 additions & 18 deletions
diff --git a/‎Modules/_remote_debugging/binary_io_reader.c‎
Lines changed: 67 additions & 5 deletions b/‎Modules/_remote_debugging/binary_io_reader.c‎
Lines changed: 67 additions & 5 deletions
diff --git a/‎Modules/_remote_debugging/binary_io_writer.c‎
Lines changed: 41 additions & 1 deletion b/‎Modules/_remote_debugging/binary_io_writer.c‎
Lines changed: 41 additions & 1 deletion
diff --git a/‎Modules/_remote_debugging/code_objects.c‎
Lines changed: 0 additions & 2 deletions b/‎Modules/_remote_debugging/code_objects.c‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎Modules/_remote_debugging/module.c‎
Lines changed: 3 additions & 0 deletions b/‎Modules/_remote_debugging/module.c‎
Lines changed: 3 additions & 0 deletions
@@ -975,7 +975,11 @@ def test_writer_total_samples_after_close_returns_zero(self):
 class TestBinaryFormatValidation(BinaryFormatTestBase):
     """Tests for malformed binary files."""
 
+    HDR_OFF_SAMPLES = 28
     HDR_OFF_THREADS = 32
+    HDR_OFF_STR_TABLE = 36
+    HDR_OFF_FRAME_TABLE = 44
+    FILE_HEADER_PLACEHOLDER_SIZE = 64
 
     def test_replay_rejects_more_threads_than_declared(self):
         """Replay rejects files with more unique threads than the header declares."""
@@ -1000,6 +1004,43 @@ def test_replay_rejects_more_threads_than_declared(self):
                 "threads than declared in header (declared 1, found at least 2)",
             )
 
+    def test_replay_rejects_sample_count_mismatch(self):
+        """Replay rejects files whose decoded samples disagree with the header."""
+        samples = [[make_interpreter(0, [
+            make_thread(1, [make_frame("sample.py", 10, "sample")])
+        ])]]
+        filename = self.create_binary_file(samples, compression="none")
+
+        with open(filename, "r+b") as raw:
+            raw.seek(self.HDR_OFF_SAMPLES)
+            raw.write(struct.pack("=I", 2))
+
+        with BinaryReader(filename) as reader:
+            self.assertEqual(reader.get_info()["sample_count"], 2)
+            with self.assertRaises(ValueError) as cm:
+                reader.replay_samples(RawCollector())
+            self.assertEqual(
+                str(cm.exception),
+                "Sample count mismatch: header declares 2 samples "
+                "but replay decoded 1",
+            )
+
+    def test_replay_rejects_trailing_partial_sample_header(self):
+        """Replay rejects partial sample bytes instead of silently stopping."""
+        filename = self.create_binary_file([], compression="none")
+        sample_data_end = self.FILE_HEADER_PLACEHOLDER_SIZE + 1
+
+        with open(filename, "r+b") as raw:
+            raw.seek(self.HDR_OFF_STR_TABLE)
+            raw.write(struct.pack("=Q", sample_data_end))
+            raw.seek(self.HDR_OFF_FRAME_TABLE)
+            raw.write(struct.pack("=Q", sample_data_end))
+
+        with BinaryReader(filename) as reader:
+            with self.assertRaises(ValueError) as cm:
+                reader.replay_samples(RawCollector())
+            self.assertEqual(str(cm.exception), "Truncated sample data: 1 trailing bytes")
+
 
 class TestBinaryEncodings(BinaryFormatTestBase):
     """Tests specifically targeting different stack encodings."""
 
@@ -180,7 +180,7 @@ typedef enum _WIN32_THREADSTATE {
 #define set_exception_cause(unwinder, exc_type, message)                              \
     do {                                                                              \
         assert(PyErr_Occurred() && "function returned -1 without setting exception"); \
-        if (unwinder->debug) {                                                        \
+        if (unwinder->debug && !_Py_RemoteDebug_HasPermissionError()) {               \
             _set_debug_exception_cause(exc_type, message);                            \
         }                                                                             \
     } while (0)
 
@@ -22,35 +22,38 @@ _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
     address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio",
                                              NULL);
     if (address == 0) {
-        // Error out: 'python' substring covers both executable and DLL
-        PyObject *exc = PyErr_GetRaisedException();
-        PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
-        _PyErr_ChainExceptions1(exc);
+        if (!_Py_RemoteDebug_HasPermissionError()) {
+            PyObject *exc = PyErr_GetRaisedException();
+            PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
+            _PyErr_ChainExceptions1(exc);
+        }
     }
 #elif defined(__linux__) && HAVE_PROCESS_VM_READV
     // On Linux, search for asyncio debug in executable or DLL
     address = search_linux_map_for_section(handle, "AsyncioDebug", "python",
                                            NULL);
     if (address == 0) {
-        // Error out: 'python' substring covers both executable and DLL
-        PyObject *exc = PyErr_GetRaisedException();
-        PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
-        _PyErr_ChainExceptions1(exc);
+        if (!_Py_RemoteDebug_HasPermissionError()) {
+            PyObject *exc = PyErr_GetRaisedException();
+            PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
+            _PyErr_ChainExceptions1(exc);
+        }
     }
 #elif defined(__APPLE__) && TARGET_OS_OSX
     // On macOS, try libpython first, then fall back to python
     address = search_map_for_section(handle, "AsyncioDebug", "libpython",
                                      NULL);
-    if (address == 0) {
+    if (address == 0 && !_Py_RemoteDebug_HasPermissionError()) {
         PyErr_Clear();
         address = search_map_for_section(handle, "AsyncioDebug", "python",
                                          NULL);
     }
     if (address == 0) {
-        // Error out: 'python' substring covers both executable and DLL
-        PyObject *exc = PyErr_GetRaisedException();
-        PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
-        _PyErr_ChainExceptions1(exc);
+        if (!_Py_RemoteDebug_HasPermissionError()) {
+            PyObject *exc = PyErr_GetRaisedException();
+            PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
+            _PyErr_ChainExceptions1(exc);
+        }
     }
 #else
     Py_UNREACHABLE();
@@ -96,10 +99,12 @@ ensure_async_debug_offsets(RemoteUnwinderObject *unwinder)
         return -1;
     }
     if (result < 0) {
-        PyErr_Clear();
-        PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
-        set_exception_cause(unwinder, PyExc_RuntimeError,
-            "AsyncioDebug section unavailable - asyncio module may not be loaded in target process");
+        if (!_Py_RemoteDebug_HasPermissionError()) {
+            PyErr_Clear();
+            PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
+            set_exception_cause(unwinder, PyExc_RuntimeError,
+                "AsyncioDebug section unavailable - asyncio module may not be loaded in target process");
+        }
         return -1;
     }
 
@@ -218,7 +223,7 @@ parse_task_name(
 
     if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) {
         long res = read_py_long(unwinder, task_name_addr);
-        if (res == -1) {
+        if (res == -1 && PyErr_Occurred()) {
             set_exception_cause(unwinder, PyExc_RuntimeError, "Task name PyLong parsing failed");
             return NULL;
         }
 
@@ -380,7 +380,22 @@ binary_reader_open(PyObject *path)
         Py_fclose(fp);
         goto error;
     }
+    if (st.st_size < 0) {
+        PyErr_SetString(PyExc_IOError, "Invalid negative file size");
+        Py_fclose(fp);
+        goto error;
+    }
+    if ((uintmax_t)st.st_size > SIZE_MAX) {
+        PyErr_SetString(PyExc_OverflowError, "File is too large to map");
+        Py_fclose(fp);
+        goto error;
+    }
     reader->mapped_size = st.st_size;
+    if (reader->mapped_size == 0) {
+        PyErr_SetString(PyExc_ValueError, "File too small for header");
+        Py_fclose(fp);
+        goto error;
+    }
 
     /* Map the file into memory.
      * MAP_POPULATE (Linux-only) pre-faults all pages at mmap time, which:
@@ -424,7 +439,10 @@ binary_reader_open(PyObject *path)
     }
 #endif
 
-    (void)Py_fclose(fp);
+    if (Py_fclose(fp) != 0) {
+        PyErr_SetFromErrno(PyExc_IOError);
+        goto error;
+    }
 
     uint8_t *data = reader->mapped_data;
     size_t file_size = reader->mapped_size;
@@ -444,7 +462,15 @@ binary_reader_open(PyObject *path)
         PyErr_SetFromErrno(PyExc_IOError);
         goto error;
     }
+    if ((uint64_t)file_size_off > SIZE_MAX) {
+        PyErr_SetString(PyExc_OverflowError, "File is too large to read");
+        goto error;
+    }
     reader->file_size = (size_t)file_size_off;
+    if (reader->file_size == 0) {
+        PyErr_SetString(PyExc_ValueError, "File too small for header");
+        goto error;
+    }
     if (FSEEK64(reader->fp, 0, SEEK_SET) != 0) {
         PyErr_SetFromErrno(PyExc_IOError);
         goto error;
@@ -456,8 +482,18 @@ binary_reader_open(PyObject *path)
         goto error;
     }
 
-    if (fread(reader->file_data, 1, reader->file_size, reader->fp) != reader->file_size) {
-        PyErr_SetFromErrno(PyExc_IOError);
+    size_t nread = fread(reader->file_data, 1, reader->file_size, reader->fp);
+    if (nread != reader->file_size) {
+        int err = errno;
+        if (ferror(reader->fp) && err != 0) {
+            errno = err;
+            PyErr_SetFromErrno(PyExc_IOError);
+        }
+        else {
+            PyErr_Format(PyExc_ValueError,
+                "Unexpected end of file: read %zu of %zu bytes",
+                nread, reader->file_size);
+        }
         goto error;
     }
 
@@ -944,10 +980,16 @@ invoke_progress_callback(PyObject *callback, Py_ssize_t current, uint32_t total)
 Py_ssize_t
 binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progress_callback)
 {
-    if (!PyObject_HasAttrString(collector, "collect")) {
+    PyObject *collect_method;
+    int has_collect = PyObject_GetOptionalAttrString(collector, "collect", &collect_method);
+    if (has_collect < 0) {
+        return -1;
+    }
+    if (has_collect == 0) {
         PyErr_SetString(PyExc_TypeError, "Collector must have a collect() method");
         return -1;
     }
+    Py_DECREF(collect_method);
 
     /* Get module state for struct sequence types */
     PyObject *module = PyImport_ImportModule("_remote_debugging");
@@ -973,7 +1015,10 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
     while (offset < reader->sample_data_size) {
         /* Read thread_id (8 bytes) + interpreter_id (4 bytes) + encoding byte */
         if (reader->sample_data_size - offset < SAMPLE_HEADER_FIXED_SIZE) {
-            break;  /* End of data */
+            PyErr_Format(PyExc_ValueError,
+                "Truncated sample data: %zu trailing bytes",
+                reader->sample_data_size - offset);
+            return -1;
         }
 
         /* Use memcpy to avoid strict aliasing violations, then byte-swap if needed */
@@ -1019,6 +1064,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
                     count, max_possible_samples);
                 return -1;
             }
+            if ((uint64_t)count > (uint64_t)PY_SSIZE_T_MAX - (uint64_t)replayed) {
+                PyErr_SetString(PyExc_OverflowError,
+                    "Sample count exceeds Py_ssize_t maximum");
+                return -1;
+            }
 
             reader->stats.repeat_records++;
             reader->stats.repeat_samples += count;
@@ -1149,6 +1199,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
                 return -1;
             }
             Py_DECREF(timestamps_list);
+            if (replayed == PY_SSIZE_T_MAX) {
+                PyErr_SetString(PyExc_OverflowError,
+                    "Sample count exceeds Py_ssize_t maximum");
+                return -1;
+            }
             replayed++;
             reader->stats.total_samples++;
             break;
@@ -1167,6 +1222,13 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
         }
     }
 
+    if ((uint64_t)replayed != reader->sample_count) {
+        PyErr_Format(PyExc_ValueError,
+            "Sample count mismatch: header declares %u samples but replay decoded %zd",
+            reader->sample_count, replayed);
+        return -1;
+    }
+
     /* Final progress callback at 100% */
     if (invoke_progress_callback(progress_callback, replayed, reader->sample_count) < 0) {
         return -1;
 
@@ -108,7 +108,15 @@ fwrite_checked_allow_threads(const void *data, size_t size, FILE *fp)
     written = fwrite(data, 1, size, fp);
     Py_END_ALLOW_THREADS
     if (written != size) {
-        PyErr_SetFromErrno(PyExc_IOError);
+        int err = errno;
+        if (ferror(fp) && err != 0) {
+            errno = err;
+            PyErr_SetFromErrno(PyExc_IOError);
+        }
+        else {
+            PyErr_Format(PyExc_IOError,
+                "short write: wrote %zu of %zu bytes", written, size);
+        }
         return -1;
     }
     return 0;
@@ -366,6 +374,11 @@ writer_intern_string(BinaryWriter *writer, PyObject *string, uint32_t *index)
         return 0;
     }
 
+    if (writer->string_count >= UINT32_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+            "too many strings for binary format");
+        return -1;
+    }
     if (writer->string_count >= writer->string_capacity) {
         if (grow_parallel_arrays((void **)&writer->strings,
                                   (void **)&writer->string_lengths,
@@ -380,6 +393,12 @@ writer_intern_string(BinaryWriter *writer, PyObject *string, uint32_t *index)
     if (!str_data) {
         return -1;
     }
+    if ((uintmax_t)str_len > UINT32_MAX) {
+        PyErr_Format(PyExc_OverflowError,
+            "string length %zd exceeds binary format maximum %u",
+            str_len, UINT32_MAX);
+        return -1;
+    }
 
     char *str_copy = PyMem_Malloc(str_len + 1);
     if (!str_copy) {
@@ -422,6 +441,11 @@ writer_intern_frame(BinaryWriter *writer, const FrameEntry *entry, uint32_t *ind
         return 0;
     }
 
+    if (writer->frame_count >= UINT32_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+            "too many frames for binary format");
+        return -1;
+    }
     if (GROW_ARRAY(writer->frame_entries, writer->frame_count,
                    writer->frame_capacity, FrameEntry) < 0) {
         return -1;
@@ -466,6 +490,11 @@ writer_get_or_create_thread_entry(BinaryWriter *writer, uint64_t thread_id,
         }
     }
 
+    if (writer->thread_count >= UINT32_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+            "too many threads for binary format");
+        return NULL;
+    }
     if (writer->thread_count >= writer->thread_capacity) {
         ThreadEntry *new_entries = grow_array(writer->thread_entries,
                                               &writer->thread_capacity,
@@ -600,6 +629,11 @@ flush_pending_rle(BinaryWriter *writer, ThreadEntry *entry)
     if (!entry->has_pending_rle || entry->pending_rle_count == 0) {
         return 0;
     }
+    if (entry->pending_rle_count > UINT32_MAX - writer->total_samples) {
+        PyErr_SetString(PyExc_OverflowError,
+            "too many samples for binary format");
+        return -1;
+    }
 
     /* Write RLE record:
      * [thread_id: 8] [interpreter_id: 4] [STACK_REPEAT: 1] [count: varint]
@@ -644,6 +678,12 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry,
                            const uint32_t *frame_indices, size_t stack_depth,
                            size_t shared_count, size_t pop_count, size_t push_count)
 {
+    if (writer->total_samples == UINT32_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+            "too many samples for binary format");
+        return -1;
+    }
+
     /* Header: thread_id(8) + interpreter_id(4) + encoding(1) + delta(varint) + status(1) */
     uint8_t header_buf[SAMPLE_HEADER_MAX_SIZE];
     memcpy(header_buf + SMP_OFF_THREAD_ID, &entry->thread_id, SMP_SIZE_THREAD_ID);
 
@@ -47,7 +47,6 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
 
     // Read the TLBC array pointer
     if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0) {
-        PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array pointer");
         set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer");
         return 0; // Read error
     }
@@ -61,7 +60,6 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
     // Read the TLBC array size
     Py_ssize_t tlbc_size;
     if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0) {
-        PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array size");
         set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size");
         return 0; // Read error
     }
 
@@ -411,6 +411,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
         return -1;
     }
     if (async_debug_result < 0) {
+        if (_Py_RemoteDebug_HasPermissionError()) {
+            return -1;
+        }
         PyErr_Clear();
         memset(&self->async_debug_offsets, 0, sizeof(self->async_debug_offsets));
         self->async_debug_offsets_available = 0;
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,6 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t`
`47`	`47`
`48`	`48`	`// Read the TLBC array pointer`
`49`	`49`	`if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0) {`
`50`		`- PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array pointer");`
`51`	`50`	`set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer");`
`52`	`51`	`return 0; // Read error`
`53`	`52`	`}`
`@@ -61,7 +60,6 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t`
`61`	`60`	`// Read the TLBC array size`
`62`	`61`	`Py_ssize_t tlbc_size;`
`63`	`62`	`if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0) {`
`64`		`- PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array size");`
`65`	`63`	`set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size");`
`66`	`64`	`return 0; // Read error`
`67`	`65`	`}`