@@ -380,7 +380,22 @@ binary_reader_open(PyObject *path)
380380 Py_fclose (fp );
381381 goto error ;
382382 }
383+ if (st .st_size < 0 ) {
384+ PyErr_SetString (PyExc_IOError , "Invalid negative file size" );
385+ Py_fclose (fp );
386+ goto error ;
387+ }
388+ if ((uintmax_t )st .st_size > SIZE_MAX ) {
389+ PyErr_SetString (PyExc_OverflowError , "File is too large to map" );
390+ Py_fclose (fp );
391+ goto error ;
392+ }
383393 reader -> mapped_size = st .st_size ;
394+ if (reader -> mapped_size == 0 ) {
395+ PyErr_SetString (PyExc_ValueError , "File too small for header" );
396+ Py_fclose (fp );
397+ goto error ;
398+ }
384399
385400 /* Map the file into memory.
386401 * MAP_POPULATE (Linux-only) pre-faults all pages at mmap time, which:
@@ -424,7 +439,10 @@ binary_reader_open(PyObject *path)
424439 }
425440#endif
426441
427- (void )Py_fclose (fp );
442+ if (Py_fclose (fp ) != 0 ) {
443+ PyErr_SetFromErrno (PyExc_IOError );
444+ goto error ;
445+ }
428446
429447 uint8_t * data = reader -> mapped_data ;
430448 size_t file_size = reader -> mapped_size ;
@@ -444,7 +462,15 @@ binary_reader_open(PyObject *path)
444462 PyErr_SetFromErrno (PyExc_IOError );
445463 goto error ;
446464 }
465+ if ((uint64_t )file_size_off > SIZE_MAX ) {
466+ PyErr_SetString (PyExc_OverflowError , "File is too large to read" );
467+ goto error ;
468+ }
447469 reader -> file_size = (size_t )file_size_off ;
470+ if (reader -> file_size == 0 ) {
471+ PyErr_SetString (PyExc_ValueError , "File too small for header" );
472+ goto error ;
473+ }
448474 if (FSEEK64 (reader -> fp , 0 , SEEK_SET ) != 0 ) {
449475 PyErr_SetFromErrno (PyExc_IOError );
450476 goto error ;
@@ -456,8 +482,18 @@ binary_reader_open(PyObject *path)
456482 goto error ;
457483 }
458484
459- if (fread (reader -> file_data , 1 , reader -> file_size , reader -> fp ) != reader -> file_size ) {
460- PyErr_SetFromErrno (PyExc_IOError );
485+ size_t nread = fread (reader -> file_data , 1 , reader -> file_size , reader -> fp );
486+ if (nread != reader -> file_size ) {
487+ int err = errno ;
488+ if (ferror (reader -> fp ) && err != 0 ) {
489+ errno = err ;
490+ PyErr_SetFromErrno (PyExc_IOError );
491+ }
492+ else {
493+ PyErr_Format (PyExc_ValueError ,
494+ "Unexpected end of file: read %zu of %zu bytes" ,
495+ nread , reader -> file_size );
496+ }
461497 goto error ;
462498 }
463499
@@ -944,10 +980,16 @@ invoke_progress_callback(PyObject *callback, Py_ssize_t current, uint32_t total)
944980Py_ssize_t
945981binary_reader_replay (BinaryReader * reader , PyObject * collector , PyObject * progress_callback )
946982{
947- if (!PyObject_HasAttrString (collector , "collect" )) {
983+ PyObject * collect_method ;
984+ int has_collect = PyObject_GetOptionalAttrString (collector , "collect" , & collect_method );
985+ if (has_collect < 0 ) {
986+ return -1 ;
987+ }
988+ if (has_collect == 0 ) {
948989 PyErr_SetString (PyExc_TypeError , "Collector must have a collect() method" );
949990 return -1 ;
950991 }
992+ Py_DECREF (collect_method );
951993
952994 /* Get module state for struct sequence types */
953995 PyObject * module = PyImport_ImportModule ("_remote_debugging" );
@@ -973,7 +1015,10 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
9731015 while (offset < reader -> sample_data_size ) {
9741016 /* Read thread_id (8 bytes) + interpreter_id (4 bytes) + encoding byte */
9751017 if (reader -> sample_data_size - offset < SAMPLE_HEADER_FIXED_SIZE ) {
976- break ; /* End of data */
1018+ PyErr_Format (PyExc_ValueError ,
1019+ "Truncated sample data: %zu trailing bytes" ,
1020+ reader -> sample_data_size - offset );
1021+ return -1 ;
9771022 }
9781023
9791024 /* Use memcpy to avoid strict aliasing violations, then byte-swap if needed */
@@ -1019,6 +1064,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
10191064 count , max_possible_samples );
10201065 return -1 ;
10211066 }
1067+ if ((uint64_t )count > (uint64_t )PY_SSIZE_T_MAX - (uint64_t )replayed ) {
1068+ PyErr_SetString (PyExc_OverflowError ,
1069+ "Sample count exceeds Py_ssize_t maximum" );
1070+ return -1 ;
1071+ }
10221072
10231073 reader -> stats .repeat_records ++ ;
10241074 reader -> stats .repeat_samples += count ;
@@ -1149,6 +1199,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
11491199 return -1 ;
11501200 }
11511201 Py_DECREF (timestamps_list );
1202+ if (replayed == PY_SSIZE_T_MAX ) {
1203+ PyErr_SetString (PyExc_OverflowError ,
1204+ "Sample count exceeds Py_ssize_t maximum" );
1205+ return -1 ;
1206+ }
11521207 replayed ++ ;
11531208 reader -> stats .total_samples ++ ;
11541209 break ;
@@ -1167,6 +1222,13 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
11671222 }
11681223 }
11691224
1225+ if ((uint64_t )replayed != reader -> sample_count ) {
1226+ PyErr_Format (PyExc_ValueError ,
1227+ "Sample count mismatch: header declares %u samples but replay decoded %zd" ,
1228+ reader -> sample_count , replayed );
1229+ return -1 ;
1230+ }
1231+
11701232 /* Final progress callback at 100% */
11711233 if (invoke_progress_callback (progress_callback , replayed , reader -> sample_count ) < 0 ) {
11721234 return -1 ;
0 commit comments