3939#ifdef Py_DEBUG
4040 extern const char * _PyUOpName (int index );
4141 extern void _PyUOpPrint (const _PyUOpInstruction * uop );
42+ extern void _PyUOpSymPrint (JitOptRef ref );
4243 static const char * const DEBUG_ENV = "PYTHON_OPT_DEBUG" ;
4344 static inline int get_lltrace (void ) {
4445 char * uop_debug = Py_GETENV (DEBUG_ENV );
5051 }
5152 #define DPRINTF (level , ...) \
5253 if (get_lltrace() >= (level)) { printf(__VA_ARGS__); }
54+
55+
56+
57+ static void
58+ dump_abstract_stack (_Py_UOpsAbstractFrame * frame , JitOptRef * stack_pointer )
59+ {
60+ JitOptRef * stack_base = frame -> stack ;
61+ JitOptRef * locals_base = frame -> locals ;
62+ printf (" locals=[" );
63+ for (JitOptRef * ptr = locals_base ; ptr < stack_base ; ptr ++ ) {
64+ if (ptr != locals_base ) {
65+ printf (", " );
66+ }
67+ _PyUOpSymPrint (* ptr );
68+ }
69+ printf ("]\n" );
70+ if (stack_pointer < stack_base ) {
71+ printf (" stack=%d\n" , (int )(stack_pointer - stack_base ));
72+ }
73+ else {
74+ printf (" stack=[" );
75+ for (JitOptRef * ptr = stack_base ; ptr < stack_pointer ; ptr ++ ) {
76+ if (ptr != stack_base ) {
77+ printf (", " );
78+ }
79+ _PyUOpSymPrint (* ptr );
80+ }
81+ printf ("]\n" );
82+ }
83+ fflush (stdout );
84+ }
85+
5386#else
5487 #define DPRINTF (level , ...)
5588#endif
@@ -143,6 +176,18 @@ incorrect_keys(PyObject *obj, uint32_t version)
143176#define STACK_LEVEL () ((int)(stack_pointer - ctx->frame->stack))
144177#define STACK_SIZE () ((int)(ctx->frame->stack_len))
145178
179+ static inline int
180+ is_terminator_uop (const _PyUOpInstruction * uop )
181+ {
182+ int opcode = uop -> opcode ;
183+ return (
184+ opcode == _EXIT_TRACE ||
185+ opcode == _JUMP_TO_TOP ||
186+ opcode == _DYNAMIC_EXIT ||
187+ opcode == _DEOPT
188+ );
189+ }
190+
146191#define CURRENT_FRAME_IS_INIT_SHIM () (ctx->frame->code == ((PyCodeObject *)&_Py_InitCleanup))
147192
148193#define GETLOCAL (idx ) ((ctx->frame->locals[idx]))
@@ -152,6 +197,22 @@ incorrect_keys(PyObject *obj, uint32_t version)
152197 (INST)->oparg = ARG; \
153198 (INST)->operand0 = OPERAND;
154199
200+ #define ADD_OP (OP , ARG , OPERAND ) add_op(ctx, this_instr, (OP), (ARG), (OPERAND))
201+
202+ static inline void
203+ add_op (JitOptContext * ctx , _PyUOpInstruction * this_instr ,
204+ uint16_t opcode , uint16_t oparg , uintptr_t operand0 )
205+ {
206+ _PyUOpInstruction * out = & ctx -> out_buffer [ctx -> out_len ];
207+ out -> opcode = (opcode );
208+ out -> format = this_instr -> format ;
209+ out -> oparg = (oparg );
210+ out -> target = this_instr -> target ;
211+ out -> operand0 = (operand0 );
212+ out -> operand1 = this_instr -> operand1 ;
213+ ctx -> out_len ++ ;
214+ }
215+
155216/* Shortened forms for convenience, used in optimizer_bytecodes.c */
156217#define sym_is_not_null _Py_uop_sym_is_not_null
157218#define sym_is_const _Py_uop_sym_is_const
@@ -219,7 +280,7 @@ optimize_to_bool(
219280 bool insert_mode )
220281{
221282 if (sym_matches_type (value , & PyBool_Type )) {
222- REPLACE_OP ( this_instr , _NOP , 0 , 0 );
283+ ADD_OP ( _NOP , 0 , 0 );
223284 * result_ptr = value ;
224285 return 1 ;
225286 }
@@ -229,17 +290,17 @@ optimize_to_bool(
229290 int opcode = insert_mode ?
230291 _INSERT_1_LOAD_CONST_INLINE_BORROW :
231292 _POP_TOP_LOAD_CONST_INLINE_BORROW ;
232- REPLACE_OP ( this_instr , opcode , 0 , (uintptr_t )load );
293+ ADD_OP ( opcode , 0 , (uintptr_t )load );
233294 * result_ptr = sym_new_const (ctx , load );
234295 return 1 ;
235296 }
236297 return 0 ;
237298}
238299
239300static void
240- eliminate_pop_guard (_PyUOpInstruction * this_instr , bool exit )
301+ eliminate_pop_guard (_PyUOpInstruction * this_instr , JitOptContext * ctx , bool exit )
241302{
242- REPLACE_OP ( this_instr , _POP_TOP , 0 , 0 );
303+ ADD_OP ( _POP_TOP , 0 , 0 );
243304 if (exit ) {
244305 REPLACE_OP ((this_instr + 1 ), _EXIT_TRACE , 0 , 0 );
245306 this_instr [1 ].target = this_instr -> target ;
@@ -256,7 +317,7 @@ lookup_attr(JitOptContext *ctx, _PyBloomFilter *dependencies, _PyUOpInstruction
256317 PyObject * lookup = _PyType_Lookup (type , name );
257318 if (lookup ) {
258319 int opcode = _Py_IsImmortal (lookup ) ? immortal : mortal ;
259- REPLACE_OP ( this_instr , opcode , 0 , (uintptr_t )lookup );
320+ ADD_OP ( opcode , 0 , (uintptr_t )lookup );
260321 PyType_Watch (TYPE_WATCHER_ID , (PyObject * )type );
261322 _Py_BloomFilter_Add (dependencies , type );
262323 return sym_new_const (ctx , lookup );
@@ -349,6 +410,8 @@ optimize_uops(
349410 JitOptContext * ctx = & tstate -> jit_tracer_state -> opt_context ;
350411 uint32_t opcode = UINT16_MAX ;
351412
413+ ctx -> out_buffer = tstate -> jit_tracer_state -> out_buffer ;
414+
352415 // Make sure that watchers are set up
353416 PyInterpreterState * interp = _PyInterpreterState_GET ();
354417 if (interp -> dict_state .watchers [GLOBALS_WATCHER_ID ] == NULL ) {
@@ -365,6 +428,8 @@ optimize_uops(
365428 ctx -> curr_frame_depth ++ ;
366429 ctx -> frame = frame ;
367430
431+ ctx -> out_len = 0 ;
432+
368433 _PyUOpInstruction * this_instr = NULL ;
369434 JitOptRef * stack_pointer = ctx -> frame -> stack_pointer ;
370435
@@ -383,7 +448,10 @@ optimize_uops(
383448 if (get_lltrace () >= 3 ) {
384449 printf ("%4d abs: " , (int )(this_instr - trace ));
385450 _PyUOpPrint (this_instr );
386- printf (" " );
451+ printf (" \n" );
452+ if (get_lltrace () >= 5 && !CURRENT_FRAME_IS_INIT_SHIM ()) {
453+ dump_abstract_stack (ctx -> frame , stack_pointer );
454+ }
387455 }
388456#endif
389457
@@ -395,6 +463,10 @@ optimize_uops(
395463 DPRINTF (1 , "\nUnknown opcode in abstract interpreter\n" );
396464 Py_UNREACHABLE ();
397465 }
466+ // If no ADD_OP was called during this iteration, copy the original instruction
467+ if (ctx -> out_len == i ) {
468+ ctx -> out_buffer [ctx -> out_len ++ ] = * this_instr ;
469+ }
398470 assert (ctx -> frame != NULL );
399471 if (!CURRENT_FRAME_IS_INIT_SHIM ()) {
400472 DPRINTF (3 , " stack_level %d\n" , STACK_LEVEL ());
@@ -423,7 +495,21 @@ optimize_uops(
423495 /* Either reached the end or cannot optimize further, but there
424496 * would be no benefit in retrying later */
425497 _Py_uop_abstractcontext_fini (ctx );
426- return trace_len ;
498+ // Check that the trace ends with a proper terminator
499+ if (ctx -> out_len > 0 ) {
500+ _PyUOpInstruction * last_uop = & ctx -> out_buffer [ctx -> out_len - 1 ];
501+ if (!is_terminator_uop (last_uop )) {
502+ // Copy remaining uops from original trace until we find a terminator
503+ for (int i = ctx -> out_len ; i < trace_len ; i ++ ) {
504+ ctx -> out_buffer [ctx -> out_len ++ ] = trace [i ];
505+ if (is_terminator_uop (& trace [i ])) {
506+ break ;
507+ }
508+ }
509+ }
510+ }
511+
512+ return ctx -> out_len ;
427513
428514error :
429515 DPRINTF (3 , "\n" );
@@ -595,7 +681,7 @@ _Py_uop_analyze_and_optimize(
595681
596682 assert (length > 0 );
597683
598- length = remove_unneeded_uops (buffer , length );
684+ length = remove_unneeded_uops (tstate -> jit_tracer_state -> out_buffer , length );
599685 assert (length > 0 );
600686
601687 OPT_STAT_INC (optimizer_successes );
0 commit comments