From 6d0b47de86eef3a396ff2bf6eea731d2fb778ded Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Wed, 21 Jan 2026 11:00:15 -0800 Subject: [PATCH 1/6] ZJIT: Add a CFG to the LIR backend (#15879) We want to use [linear scan register allocation](https://bernsteinbear.com/blog/linear-scan/), but a prerequisite is having a CFG available. Previously LIR only had a linear block of instructions, this PR introduces a CFG to the LIR backend. I've done my best to ensure that the "hot path" machine code we generate is the same (as I was testing I noticed that side exit machine code was being dumped in a different order). This PR doesn't make any changes to the existing register allocator, it simply introduces a CFG to LIR. The basic blocks in the LIR CFG always start with a label (the first instruction is a label) and the last 0, 1, or 2 instructions will be jump instructions. No other jump instructions should appear mid-block. --- zjit/src/backend/arm64/mod.rs | 114 ++++-- zjit/src/backend/lir.rs | 623 +++++++++++++++++++++++++++------ zjit/src/backend/tests.rs | 10 +- zjit/src/backend/x86_64/mod.rs | 47 ++- zjit/src/codegen.rs | 190 ++++++---- zjit/src/hir.rs | 4 + zjit/src/invariants.rs | 1 + zjit/src/options.rs | 3 + 8 files changed, 773 insertions(+), 219 deletions(-) diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 574249dabda81c..d06e84536f6345 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -694,7 +694,8 @@ impl Assembler { /// VRegs, most splits should happen in [`Self::arm64_split`]. However, some instructions /// need to be split with registers after `alloc_regs`, e.g. for `compile_exits`, so this /// splits them and uses scratch registers for it. - fn arm64_scratch_split(mut self) -> Assembler { + /// Linearizes all blocks into a single giant block. + fn arm64_scratch_split(self) -> Assembler { /// If opnd is Opnd::Mem with a too large disp, make the disp smaller using lea. fn split_large_disp(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd) -> Opnd { match opnd { @@ -750,12 +751,23 @@ impl Assembler { // Prepare StackState to lower MemBase::Stack let stack_state = StackState::new(self.stack_base_idx); - let mut asm_local = Assembler::new_with_asm(&self); + let mut asm_local = Assembler::new(); + asm_local.accept_scratch_reg = true; + asm_local.stack_base_idx = self.stack_base_idx; + asm_local.label_names = self.label_names.clone(); + asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + + // Create one giant block to linearize everything into + asm_local.new_block_without_id(); + let asm = &mut asm_local; - asm.accept_scratch_reg = true; - let iterator = &mut self.instruction_iterator(); - while let Some((_, mut insn)) = iterator.next(asm) { + // Get linearized instructions with branch parameters expanded into ParallelMov + let linearized_insns = self.linearize_instructions(); + + // Process each linearized instruction + for (idx, insn) in linearized_insns.iter().enumerate() { + let mut insn = insn.clone(); match &mut insn { Insn::Add { left, right, out } | Insn::Sub { left, right, out } | @@ -795,7 +807,7 @@ impl Assembler { }; // If the next instruction is JoMul - if matches!(iterator.peek(), Some((_, Insn::JoMul(_)))) { + if idx + 1 < linearized_insns.len() && matches!(linearized_insns[idx + 1], Insn::JoMul(_)) { // Produce a register that is all zeros or all ones // Based on the sign bit of the 64-bit mul result asm.push_insn(Insn::RShift { out: SCRATCH0_OPND, opnd: reg_out, shift: Opnd::UImm(63) }); @@ -940,7 +952,7 @@ impl Assembler { /// Emit a conditional jump instruction to a specific target. This is /// called when lowering any of the conditional jump instructions. - fn emit_conditional_jump(cb: &mut CodeBlock, target: Target) { + fn emit_conditional_jump(asm: &Assembler, cb: &mut CodeBlock, target: Target) { fn generate_branch(cb: &mut CodeBlock, src_addr: i64, dst_addr: i64) { let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { // If the jump offset fits into the conditional jump as @@ -991,30 +1003,31 @@ impl Assembler { (num_insns..cb.conditional_jump_insns()).for_each(|_| nop(cb)); } - match target { + let label = match target { Target::CodePtr(dst_ptr) => { let dst_addr = dst_ptr.as_offset(); let src_addr = cb.get_write_ptr().as_offset(); generate_branch::(cb, src_addr, dst_addr); + return; }, - Target::Label(label_idx) => { - // Try to use a single B.cond instruction - cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { - // +1 since src_addr is after the instruction while A64 - // counts the offset relative to the start. - let offset = (dst_addr - src_addr) / 4 + 1; - if bcond_offset_fits_bits(offset) { - bcond(cb, CONDITION, InstructionOffset::from_insns(offset as i32)); - Ok(()) - } else { - Err(()) - } - }); - }, + Target::Label(l) => l, + Target::Block(ref edge) => asm.block_label(edge.target), Target::SideExit { .. } => { unreachable!("Target::SideExit should have been compiled by compile_exits") }, }; + // Try to use a single B.cond instruction + cb.label_ref(label, 4, |cb, src_addr, dst_addr| { + // +1 since src_addr is after the instruction while A64 + // counts the offset relative to the start. + let offset = (dst_addr - src_addr) / 4 + 1; + if bcond_offset_fits_bits(offset) { + bcond(cb, CONDITION, InstructionOffset::from_insns(offset as i32)); + Ok(()) + } else { + Err(()) + } + }); } /// Emit a CBZ or CBNZ which branches when a register is zero or non-zero @@ -1117,8 +1130,13 @@ impl Assembler { let (_hook, mut hook_insn_idx) = AssemblerPanicHook::new(self, 0); // For each instruction + // NOTE: At this point, the assembler should have been linearized into a single giant block + // by either resolve_parallel_mov_pass() or arm64_scratch_split(). let mut insn_idx: usize = 0; - while let Some(insn) = self.insns.get(insn_idx) { + assert_eq!(self.basic_blocks.len(), 1, "Assembler should be linearized into a single block before arm64_emit"); + let insns = &self.basic_blocks[0].insns; + + while let Some(insn) = insns.get(insn_idx) { // Update insn_idx that is shown on panic hook_insn_idx.as_mut().map(|idx| idx.lock().map(|mut idx| *idx = insn_idx).unwrap()); @@ -1222,7 +1240,7 @@ impl Assembler { }, Insn::Mul { left, right, out } => { // If the next instruction is JoMul with RShift created by arm64_scratch_split - match (self.insns.get(insn_idx + 1), self.insns.get(insn_idx + 2)) { + match (insns.get(insn_idx + 1), insns.get(insn_idx + 2)) { (Some(Insn::RShift { out: out_sign, opnd: out_opnd, shift: out_shift }), Some(Insn::JoMul(_))) => { // Compute the high 64 bits smulh(cb, Self::EMIT_OPND, left.into(), right.into()); @@ -1487,34 +1505,48 @@ impl Assembler { } }); }, + Target::Block(ref edge) => { + let label = self.block_label(edge.target); + cb.label_ref(label, 4, |cb, src_addr, dst_addr| { + // +1 since src_addr is after the instruction while A64 + // counts the offset relative to the start. + let offset = (dst_addr - src_addr) / 4 + 1; + if b_offset_fits_bits(offset) { + b(cb, InstructionOffset::from_insns(offset as i32)); + Ok(()) + } else { + Err(()) + } + }); + }, Target::SideExit { .. } => { unreachable!("Target::SideExit should have been compiled by compile_exits") }, }; }, Insn::Je(target) | Insn::Jz(target) => { - emit_conditional_jump::<{Condition::EQ}>(cb, target.clone()); + emit_conditional_jump::<{Condition::EQ}>(self, cb, target.clone()); }, Insn::Jne(target) | Insn::Jnz(target) | Insn::JoMul(target) => { - emit_conditional_jump::<{Condition::NE}>(cb, target.clone()); + emit_conditional_jump::<{Condition::NE}>(self, cb, target.clone()); }, Insn::Jl(target) => { - emit_conditional_jump::<{Condition::LT}>(cb, target.clone()); + emit_conditional_jump::<{Condition::LT}>(self, cb, target.clone()); }, Insn::Jg(target) => { - emit_conditional_jump::<{Condition::GT}>(cb, target.clone()); + emit_conditional_jump::<{Condition::GT}>(self, cb, target.clone()); }, Insn::Jge(target) => { - emit_conditional_jump::<{Condition::GE}>(cb, target.clone()); + emit_conditional_jump::<{Condition::GE}>(self, cb, target.clone()); }, Insn::Jbe(target) => { - emit_conditional_jump::<{Condition::LS}>(cb, target.clone()); + emit_conditional_jump::<{Condition::LS}>(self, cb, target.clone()); }, Insn::Jb(target) => { - emit_conditional_jump::<{Condition::CC}>(cb, target.clone()); + emit_conditional_jump::<{Condition::CC}>(self, cb, target.clone()); }, Insn::Jo(target) => { - emit_conditional_jump::<{Condition::VS}>(cb, target.clone()); + emit_conditional_jump::<{Condition::VS}>(self, cb, target.clone()); }, Insn::Joz(opnd, target) => { emit_cmp_zero_jump(cb, opnd.into(), true, target.clone()); @@ -1537,8 +1569,8 @@ impl Assembler { let Some(Insn::Cmp { left: status_reg @ Opnd::Reg(_), right: Opnd::UImm(_) | Opnd::Imm(_), - }) = self.insns.get(insn_idx + 1) else { - panic!("arm64_scratch_split should add Cmp after IncrCounter: {:?}", self.insns.get(insn_idx + 1)); + }) = insns.get(insn_idx + 1) else { + panic!("arm64_scratch_split should add Cmp after IncrCounter: {:?}", insns.get(insn_idx + 1)); }; // Attempt to increment a counter @@ -1587,7 +1619,7 @@ impl Assembler { } else { // No bytes dropped, so the pos markers point to valid code for (insn_idx, pos) in pos_markers { - if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + if let Insn::PosMarker(callback) = insns.get(insn_idx).unwrap() { callback(pos, cb); } else { panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); @@ -1617,6 +1649,10 @@ impl Assembler { if use_scratch_reg { asm = asm.arm64_scratch_split(); asm_dump!(asm, scratch_split); + } else { + // For trampolines that use scratch registers, resolve ParallelMov without scratch_reg. + asm = asm.resolve_parallel_mov_pass(); + asm_dump!(asm, resolve_parallel_mov); } // Create label instances in the code block @@ -1681,12 +1717,15 @@ mod tests { use super::*; use insta::assert_snapshot; + use crate::hir; static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG]; fn setup_asm() -> (Assembler, CodeBlock) { crate::options::rb_zjit_prepare_options(); // Allow `get_option!` in Assembler - (Assembler::new(), CodeBlock::new_dummy()) + let mut asm = Assembler::new(); + asm.new_block_without_id(); + (asm, CodeBlock::new_dummy()) } #[test] @@ -1694,6 +1733,7 @@ mod tests { use crate::hir::SideExitReason; let mut asm = Assembler::new(); + asm.new_block_without_id(); asm.stack_base_idx = 1; let label = asm.new_label("bb0"); @@ -2107,6 +2147,7 @@ mod tests { #[test] fn test_store_with_valid_scratch_reg() { let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); + asm.new_block_without_id(); let mut cb = CodeBlock::new_dummy(); asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into()); @@ -2560,6 +2601,7 @@ mod tests { crate::options::rb_zjit_prepare_options(); // Allow `get_option!` in Assembler let mut asm = Assembler::new(); + asm.new_block_without_id(); let mut cb = CodeBlock::new_dummy_sized(memory_required); let far_label = asm.new_label("far"); diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 06127b5c1a40e4..f2f7bc61659d04 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -7,6 +7,7 @@ use std::sync::{Arc, Mutex}; use crate::codegen::local_size_and_idx_to_ep_offset; use crate::cruby::{Qundef, RUBY_OFFSET_CFP_PC, RUBY_OFFSET_CFP_SP, SIZEOF_VALUE_I32, vm_stack_canary}; use crate::hir::{Invariant, SideExitReason}; +use crate::hir; use crate::options::{TraceExits, debug, get_option}; use crate::cruby::VALUE; use crate::payload::IseqVersionRef; @@ -15,6 +16,104 @@ use crate::virtualmem::CodePtr; use crate::asm::{CodeBlock, Label}; use crate::state::rb_zjit_record_exit_stack; +/// LIR Block ID. Unique ID for each block, and also defined in LIR so +/// we can differentiate it from HIR block ids. +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)] +pub struct BlockId(pub usize); + +impl From for usize { + fn from(val: BlockId) -> Self { + val.0 + } +} + +impl std::fmt::Display for BlockId { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "l{}", self.0) + } +} + +/// Dummy HIR block ID used when creating test or invalid LIR blocks +const DUMMY_HIR_BLOCK_ID: usize = usize::MAX; +/// Dummy RPO index used when creating test or invalid LIR blocks +const DUMMY_RPO_INDEX: usize = usize::MAX; + +#[derive(Debug, PartialEq, Clone)] +pub struct BranchEdge { + pub target: BlockId, + pub args: Vec, +} + +#[derive(Clone, Debug)] +pub struct BasicBlock { + // Unique id for this block + pub id: BlockId, + + // HIR block this LIR block was lowered from. Not injective: multiple LIR blocks may share + // the same hir_block_id because we split HIR blocks into multiple LIR blocks during lowering. + pub hir_block_id: hir::BlockId, + + pub is_entry: bool, + + // Instructions in this basic block + pub insns: Vec, + + // Input parameters for this block + pub parameters: Vec, + + // RPO position of the source HIR block + pub rpo_index: usize, +} + +pub struct EdgePair(Option, Option); + +impl BasicBlock { + fn new(id: BlockId, hir_block_id: hir::BlockId, is_entry: bool, rpo_index: usize) -> Self { + Self { + id, + hir_block_id, + is_entry, + insns: vec![], + parameters: vec![], + rpo_index, + } + } + + pub fn add_parameter(&mut self, param: Opnd) { + self.parameters.push(param); + } + + pub fn push_insn(&mut self, insn: Insn) { + self.insns.push(insn); + } + + pub fn edges(&self) -> EdgePair { + assert!(self.insns.last().unwrap().is_terminator()); + let extract_edge = |insn: &Insn| -> Option { + if let Some(Target::Block(edge)) = insn.target() { + Some(edge.clone()) + } else { + None + } + }; + + match self.insns.as_slice() { + [] => panic!("empty block"), + [.., second_last, last] => { + EdgePair(extract_edge(second_last), extract_edge(last)) + }, + [.., last] => { + EdgePair(extract_edge(last), None) + } + } + } + + /// Sort key for scheduling blocks in code layout order + pub fn sort_key(&self) -> (usize, usize) { + (self.rpo_index, self.id.0) + } +} + pub use crate::backend::current::{ mem_base_reg, Reg, @@ -309,13 +408,15 @@ pub struct SideExit { /// Branch target (something that we can jump to) /// for branch instructions -#[derive(Clone, Debug)] +#[derive(Clone)] pub enum Target { /// Pointer to a piece of ZJIT-generated code CodePtr(CodePtr), /// A label within the generated code Label(Label), + /// An LIR branch edge + Block(BranchEdge), /// Side exit to the interpreter SideExit { /// Context used for compiling the side exit @@ -325,6 +426,32 @@ pub enum Target }, } +impl fmt::Debug for Target { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Target::CodePtr(ptr) => write!(f, "CodePtr({:?})", ptr), + Target::Label(label) => write!(f, "Label({:?})", label), + Target::Block(edge) => { + if edge.args.is_empty() { + write!(f, "Block({:?})", edge.target) + } else { + write!(f, "Block({:?}(", edge.target)?; + for (i, arg) in edge.args.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{:?}", arg)?; + } + write!(f, "))") + } + } + Target::SideExit { exit, reason } => { + write!(f, "SideExit {{ exit: {:?}, reason: {:?} }}", exit, reason) + } + } + } +} + impl Target { pub fn unwrap_label(&self) -> Label { @@ -771,6 +898,29 @@ impl Insn { _ => None } } + + /// Returns true if this instruction is a terminator (ends a basic block). + pub fn is_terminator(&self) -> bool { + match self { + Insn::Jbe(_) | + Insn::Jb(_) | + Insn::Je(_) | + Insn::Jl(_) | + Insn::Jg(_) | + Insn::Jge(_) | + Insn::Jmp(_) | + Insn::JmpOpnd(_) | + Insn::Jne(_) | + Insn::Jnz(_) | + Insn::Jo(_) | + Insn::JoMul(_) | + Insn::Jz(_) | + Insn::Joz(..) | + Insn::Jonz(..) | + Insn::CRet(_) => true, + _ => false + } + } } /// An iterator that will yield a non-mutable reference to each operand in turn @@ -806,22 +956,33 @@ impl<'a> Iterator for InsnOpndIterator<'a> { Insn::Label(target) | Insn::LeaJumpTarget { target, .. } | Insn::PatchPoint { target, .. } => { - if let Target::SideExit { exit: SideExit { stack, locals, .. }, .. } = target { - let stack_idx = self.idx; - if stack_idx < stack.len() { - let opnd = &stack[stack_idx]; - self.idx += 1; - return Some(opnd); - } + match target { + Target::SideExit { exit: SideExit { stack, locals, .. }, .. } => { + let stack_idx = self.idx; + if stack_idx < stack.len() { + let opnd = &stack[stack_idx]; + self.idx += 1; + return Some(opnd); + } - let local_idx = self.idx - stack.len(); - if local_idx < locals.len() { - let opnd = &locals[local_idx]; - self.idx += 1; - return Some(opnd); + let local_idx = self.idx - stack.len(); + if local_idx < locals.len() { + let opnd = &locals[local_idx]; + self.idx += 1; + return Some(opnd); + } + None + } + Target::Block(edge) => { + if self.idx < edge.args.len() { + let opnd = &edge.args[self.idx]; + self.idx += 1; + return Some(opnd); + } + None } + _ => None } - None } Insn::Joz(opnd, target) | @@ -831,22 +992,34 @@ impl<'a> Iterator for InsnOpndIterator<'a> { return Some(opnd); } - if let Target::SideExit { exit: SideExit { stack, locals, .. }, .. } = target { - let stack_idx = self.idx - 1; - if stack_idx < stack.len() { - let opnd = &stack[stack_idx]; - self.idx += 1; - return Some(opnd); - } + match target { + Target::SideExit { exit: SideExit { stack, locals, .. }, .. } => { + let stack_idx = self.idx - 1; + if stack_idx < stack.len() { + let opnd = &stack[stack_idx]; + self.idx += 1; + return Some(opnd); + } - let local_idx = stack_idx - stack.len(); - if local_idx < locals.len() { - let opnd = &locals[local_idx]; - self.idx += 1; - return Some(opnd); + let local_idx = stack_idx - stack.len(); + if local_idx < locals.len() { + let opnd = &locals[local_idx]; + self.idx += 1; + return Some(opnd); + } + None + } + Target::Block(edge) => { + let arg_idx = self.idx - 1; + if arg_idx < edge.args.len() { + let opnd = &edge.args[arg_idx]; + self.idx += 1; + return Some(opnd); + } + None } + _ => None } - None } Insn::BakeString(_) | @@ -975,22 +1148,33 @@ impl<'a> InsnOpndMutIterator<'a> { Insn::Label(target) | Insn::LeaJumpTarget { target, .. } | Insn::PatchPoint { target, .. } => { - if let Target::SideExit { exit: SideExit { stack, locals, .. }, .. } = target { - let stack_idx = self.idx; - if stack_idx < stack.len() { - let opnd = &mut stack[stack_idx]; - self.idx += 1; - return Some(opnd); - } + match target { + Target::SideExit { exit: SideExit { stack, locals, .. }, .. } => { + let stack_idx = self.idx; + if stack_idx < stack.len() { + let opnd = &mut stack[stack_idx]; + self.idx += 1; + return Some(opnd); + } - let local_idx = self.idx - stack.len(); - if local_idx < locals.len() { - let opnd = &mut locals[local_idx]; - self.idx += 1; - return Some(opnd); + let local_idx = self.idx - stack.len(); + if local_idx < locals.len() { + let opnd = &mut locals[local_idx]; + self.idx += 1; + return Some(opnd); + } + None + } + Target::Block(edge) => { + if self.idx < edge.args.len() { + let opnd = &mut edge.args[self.idx]; + self.idx += 1; + return Some(opnd); + } + None } + _ => None } - None } Insn::Joz(opnd, target) | @@ -1000,22 +1184,34 @@ impl<'a> InsnOpndMutIterator<'a> { return Some(opnd); } - if let Target::SideExit { exit: SideExit { stack, locals, .. }, .. } = target { - let stack_idx = self.idx - 1; - if stack_idx < stack.len() { - let opnd = &mut stack[stack_idx]; - self.idx += 1; - return Some(opnd); - } + match target { + Target::SideExit { exit: SideExit { stack, locals, .. }, .. } => { + let stack_idx = self.idx - 1; + if stack_idx < stack.len() { + let opnd = &mut stack[stack_idx]; + self.idx += 1; + return Some(opnd); + } - let local_idx = stack_idx - stack.len(); - if local_idx < locals.len() { - let opnd = &mut locals[local_idx]; - self.idx += 1; - return Some(opnd); + let local_idx = stack_idx - stack.len(); + if local_idx < locals.len() { + let opnd = &mut locals[local_idx]; + self.idx += 1; + return Some(opnd); + } + None } + Target::Block(edge) => { + let arg_idx = self.idx - 1; + if arg_idx < edge.args.len() { + let opnd = &mut edge.args[arg_idx]; + self.idx += 1; + return Some(opnd); + } + None + } + _ => None } - None } Insn::BakeString(_) | @@ -1332,7 +1528,12 @@ const ASSEMBLER_INSNS_CAPACITY: usize = 256; /// optimized and lowered #[derive(Clone)] pub struct Assembler { - pub(super) insns: Vec, + pub basic_blocks: Vec, + + /// The block to which new instructions are added. Used during HIR to LIR lowering to + /// determine which LIR block we should add instructions to. Set by `set_current_block()` + /// and automatically set to new entry blocks created by `new_block()`. + current_block_id: BlockId, /// Live range for each VReg indexed by its `idx`` pub(super) live_ranges: Vec, @@ -1350,7 +1551,10 @@ pub struct Assembler { pub(super) stack_base_idx: usize, /// If Some, the next ccall should verify its leafness - leaf_ccall_stack_size: Option + leaf_ccall_stack_size: Option, + + /// Current instruction index, incremented for each instruction pushed + idx: usize, } impl Assembler @@ -1358,12 +1562,14 @@ impl Assembler /// Create an Assembler with defaults pub fn new() -> Self { Self { - insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), - live_ranges: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), label_names: Vec::default(), accept_scratch_reg: false, stack_base_idx: 0, leaf_ccall_stack_size: None, + basic_blocks: Vec::default(), + current_block_id: BlockId(0), + live_ranges: Vec::default(), + idx: 0, } } @@ -1387,11 +1593,62 @@ impl Assembler stack_base_idx: old_asm.stack_base_idx, ..Self::new() }; - // Bump the initial VReg index to allow the use of the VRegs for the old Assembler + + // Initialize basic blocks from the old assembler, preserving hir_block_id and entry flag + // but with empty instruction lists + for old_block in &old_asm.basic_blocks { + asm.new_block_from_old_block(&old_block); + } + + // Initialize live_ranges to match the old assembler's size + // This allows reusing VRegs from the old assembler asm.live_ranges.resize(old_asm.live_ranges.len(), LiveRange { start: None, end: None }); + asm } + // Create a new LIR basic block. Returns the newly created block ID + pub fn new_block(&mut self, hir_block_id: hir::BlockId, is_entry: bool, rpo_index: usize) -> BlockId { + let bb_id = BlockId(self.basic_blocks.len()); + let lir_bb = BasicBlock::new(bb_id, hir_block_id, is_entry, rpo_index); + self.basic_blocks.push(lir_bb); + if is_entry { + self.set_current_block(bb_id); + } + bb_id + } + + // Create a new LIR basic block from an old one. This should only be used + // when creating new assemblers during passes when we want to translate + // one assembler to a new one. + pub fn new_block_from_old_block(&mut self, old_block: &BasicBlock) -> BlockId { + let bb_id = BlockId(self.basic_blocks.len()); + let lir_bb = BasicBlock::new(bb_id, old_block.hir_block_id, old_block.is_entry, old_block.rpo_index); + self.basic_blocks.push(lir_bb); + bb_id + } + + // Create a LIR basic block without a valid HIR block ID (for testing or internal use). + pub fn new_block_without_id(&mut self) -> BlockId { + self.new_block(hir::BlockId(DUMMY_HIR_BLOCK_ID), true, DUMMY_RPO_INDEX) + } + + pub fn set_current_block(&mut self, block_id: BlockId) { + self.current_block_id = block_id; + } + + pub fn current_block(&mut self) -> &mut BasicBlock { + &mut self.basic_blocks[self.current_block_id.0] + } + + /// Return basic blocks sorted by RPO index, then by block ID. + /// TODO: Use a more advanced scheduling algorithm + pub fn sorted_blocks(&self) -> Vec<&BasicBlock> { + let mut sorted: Vec<&BasicBlock> = self.basic_blocks.iter().collect(); + sorted.sort_by_key(|block| block.sort_key()); + sorted + } + /// Return true if `opnd` is or depends on `reg` pub fn has_reg(opnd: Opnd, reg: Reg) -> bool { match opnd { @@ -1402,11 +1659,100 @@ impl Assembler } pub fn instruction_iterator(&mut self) -> InsnIter { - let insns = take(&mut self.insns); - InsnIter { - old_insns_iter: insns.into_iter(), + let mut blocks = take(&mut self.basic_blocks); + blocks.sort_by_key(|block| block.sort_key()); + + let mut iter = InsnIter { + blocks, + current_block_idx: 0, + current_insn_iter: vec![].into_iter(), // Will be replaced immediately peeked: None, index: 0, + }; + + // Set up first block's iterator + if !iter.blocks.is_empty() { + iter.current_insn_iter = take(&mut iter.blocks[0].insns).into_iter(); + } + + iter + } + + /// Return an operand for a basic block argument at a given index. + /// To simplify the implementation, we allocate a fixed register or a stack slot + /// for each basic block argument. + pub fn param_opnd(idx: usize) -> Opnd { + use crate::backend::current::ALLOC_REGS; + use crate::cruby::SIZEOF_VALUE_I32; + + if idx < ALLOC_REGS.len() { + Opnd::Reg(ALLOC_REGS[idx]) + } else { + // With FrameSetup, the address that NATIVE_BASE_PTR points to stores an old value in the register. + // To avoid clobbering it, we need to start from the next slot, hence `+ 1` for the index. + Opnd::mem(64, NATIVE_BASE_PTR, (idx - ALLOC_REGS.len() + 1) as i32 * -SIZEOF_VALUE_I32) + } + } + + pub fn linearize_instructions(&self) -> Vec { + // Emit instructions with labels, expanding branch parameters + let mut insns = Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY); + + for block in self.sorted_blocks() { + // Process each instruction, expanding branch params if needed + for insn in &block.insns { + self.expand_branch_insn(insn, &mut insns); + } + } + insns + } + + /// Expand and linearize a branch instruction: + /// 1. If the branch has Target::Block with arguments, insert a ParallelMov first + /// 2. Convert Target::Block to Target::Label + /// 3. Push the converted instruction + fn expand_branch_insn(&self, insn: &Insn, insns: &mut Vec) { + // Helper to process branch arguments and return the label target + let mut process_edge = |edge: &BranchEdge| -> Label { + if !edge.args.is_empty() { + insns.push(Insn::ParallelMov { + moves: edge.args.iter().enumerate() + .map(|(idx, &arg)| (Assembler::param_opnd(idx), arg)) + .collect() + }); + } + self.block_label(edge.target) + }; + + // Convert Target::Block to Target::Label, processing args if needed + let stripped_insn = match insn { + Insn::Jmp(Target::Block(edge)) => Insn::Jmp(Target::Label(process_edge(edge))), + Insn::Jz(Target::Block(edge)) => Insn::Jz(Target::Label(process_edge(edge))), + Insn::Jnz(Target::Block(edge)) => Insn::Jnz(Target::Label(process_edge(edge))), + Insn::Je(Target::Block(edge)) => Insn::Je(Target::Label(process_edge(edge))), + Insn::Jne(Target::Block(edge)) => Insn::Jne(Target::Label(process_edge(edge))), + Insn::Jl(Target::Block(edge)) => Insn::Jl(Target::Label(process_edge(edge))), + Insn::Jg(Target::Block(edge)) => Insn::Jg(Target::Label(process_edge(edge))), + Insn::Jge(Target::Block(edge)) => Insn::Jge(Target::Label(process_edge(edge))), + Insn::Jbe(Target::Block(edge)) => Insn::Jbe(Target::Label(process_edge(edge))), + Insn::Jb(Target::Block(edge)) => Insn::Jb(Target::Label(process_edge(edge))), + Insn::Jo(Target::Block(edge)) => Insn::Jo(Target::Label(process_edge(edge))), + Insn::JoMul(Target::Block(edge)) => Insn::JoMul(Target::Label(process_edge(edge))), + Insn::Joz(opnd, Target::Block(edge)) => Insn::Joz(*opnd, Target::Label(process_edge(edge))), + Insn::Jonz(opnd, Target::Block(edge)) => Insn::Jonz(*opnd, Target::Label(process_edge(edge))), + _ => insn.clone() + }; + + // Push the stripped instruction + insns.push(stripped_insn); + } + + // Get the label for a given block by extracting it from the first instruction. + pub(super) fn block_label(&self, block_id: BlockId) -> Label { + let block = &self.basic_blocks[block_id.0]; + match block.insns.first() { + Some(Insn::Label(Target::Label(label))) => *label, + other => panic!("Expected first instruction of block {:?} to be a Label, but found: {:?}", block_id, other), } } @@ -1444,7 +1790,7 @@ impl Assembler /// operands to this instruction. pub fn push_insn(&mut self, insn: Insn) { // Index of this instruction - let insn_idx = self.insns.len(); + let insn_idx = self.idx; // Initialize the live range of the output VReg to insn_idx..=insn_idx if let Some(Opnd::VReg { idx, .. }) = insn.out_opnd() { @@ -1475,7 +1821,9 @@ impl Assembler } } - self.insns.push(insn); + self.idx += 1; + + self.current_block().push_insn(insn); } /// Create a new label instance that we can jump to @@ -1533,6 +1881,7 @@ impl Assembler Some(new_moves) } + /// Sets the out field on the various instructions that require allocated /// registers because their output is used as the operand on a subsequent /// instruction. This is our implementation of the linear scan algorithm. @@ -1548,17 +1897,22 @@ impl Assembler let mut saved_regs: Vec<(Reg, usize)> = vec![]; // Remember the indexes of Insn::FrameSetup to update the stack size later - let mut frame_setup_idxs: Vec = vec![]; + let mut frame_setup_idxs: Vec<(BlockId, usize)> = vec![]; // live_ranges is indexed by original `index` given by the iterator. - let mut asm = Assembler::new_with_asm(&self); + let mut asm_local = Assembler::new_with_asm(&self); + + let iterator = &mut self.instruction_iterator(); + + let asm = &mut asm_local; + let live_ranges: Vec = take(&mut self.live_ranges); - let mut iterator = self.insns.into_iter().enumerate().peekable(); - while let Some((index, mut insn)) = iterator.next() { + while let Some((index, mut insn)) = iterator.next(asm) { // Remember the index of FrameSetup to bump slot_count when we know the max number of spilled VRegs. if let Insn::FrameSetup { .. } = insn { - frame_setup_idxs.push(asm.insns.len()); + assert!(asm.current_block().is_entry); + frame_setup_idxs.push((asm.current_block().id, asm.current_block().insns.len())); } let before_ccall = match (&insn, iterator.peek().map(|(_, insn)| insn)) { @@ -1715,17 +2069,6 @@ impl Assembler // Push instruction(s) let is_ccall = matches!(insn, Insn::CCall { .. }); match insn { - Insn::ParallelMov { moves } => { - // For trampolines that use scratch registers, attempt to lower ParallelMov without scratch_reg. - if let Some(moves) = Self::resolve_parallel_moves(&moves, None) { - for (dst, src) in moves { - asm.mov(dst, src); - } - } else { - // If it needs a scratch_reg, leave it to *_split_with_scratch_regs to handle it. - asm.push_insn(Insn::ParallelMov { moves }); - } - } Insn::CCall { opnds, fptr, start_marker, end_marker, out } => { // Split start_marker and end_marker here to avoid inserting push/pop between them. if let Some(start_marker) = start_marker { @@ -1768,8 +2111,8 @@ impl Assembler } // Extend the stack space for spilled operands - for frame_setup_idx in frame_setup_idxs { - match &mut asm.insns[frame_setup_idx] { + for (block_id, frame_setup_idx) in frame_setup_idxs { + match &mut asm.basic_blocks[block_id.0].insns[frame_setup_idx] { Insn::FrameSetup { slot_count, .. } => { *slot_count += pool.stack_state.stack_size; } @@ -1778,7 +2121,7 @@ impl Assembler } assert!(pool.is_empty(), "Expected all registers to be returned to the pool"); - Ok(asm) + Ok(asm_local) } /// Compile the instructions down to machine code. @@ -1852,16 +2195,19 @@ impl Assembler // Extract targets first so that we can update instructions while referencing part of them. let mut targets = HashMap::new(); - for (idx, insn) in self.insns.iter().enumerate() { - if let Some(target @ Target::SideExit { .. }) = insn.target() { - targets.insert(idx, target.clone()); + + for block in self.sorted_blocks().iter() { + for (idx, insn) in block.insns.iter().enumerate() { + if let Some(target @ Target::SideExit { .. }) = insn.target() { + targets.insert((block.id.0, idx), target.clone()); + } } } // Map from SideExit to compiled Label. This table is used to deduplicate side exit code. let mut compiled_exits: HashMap = HashMap::new(); - for (idx, target) in targets { + for ((block_id, idx), target) in targets { // Compile a side exit. Note that this is past the split pass and alloc_regs(), // so you can't use an instruction that returns a VReg. if let Target::SideExit { exit: exit @ SideExit { pc, .. }, reason } = target { @@ -1914,7 +2260,7 @@ impl Assembler new_exit }; - *self.insns[idx].target_mut().unwrap() = counted_exit.unwrap_or(compiled_exit); + *self.basic_blocks[block_id].insns[idx].target_mut().unwrap() = counted_exit.unwrap_or(compiled_exit); } } } @@ -1949,7 +2295,7 @@ impl fmt::Display for Assembler { } } - for insn in self.insns.iter() { + for insn in self.linearize_instructions().iter() { match insn { Insn::Comment(comment) => { writeln!(f, " {bold_begin}# {comment}{bold_end}")?; @@ -1985,6 +2331,20 @@ impl fmt::Display for Assembler { Target::CodePtr(code_ptr) => write!(f, " {code_ptr:?}")?, Target::Label(Label(label_idx)) => write!(f, " {}", label_name(self, *label_idx, &label_counts))?, Target::SideExit { reason, .. } => write!(f, " Exit({reason})")?, + Target::Block(edge) => { + if edge.args.is_empty() { + write!(f, " bb{}", edge.target.0)?; + } else { + write!(f, " bb{}(", edge.target.0)?; + for (i, arg) in edge.args.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", arg)?; + } + write!(f, ")")?; + } + } } } @@ -2000,6 +2360,17 @@ impl fmt::Display for Assembler { } _ => {} } + } else if let Some(Target::Block(_)) = insn.target() { + // If the instruction has a Block target, avoid using opnd_iter() for branch args + // since they're already printed inline with the target. Only print non-target operands. + match insn { + Insn::Joz(opnd, _) | + Insn::Jonz(opnd, _) | + Insn::LeaJumpTarget { out: opnd, target: _ } => { + write!(f, ", {opnd}")?; + } + _ => {} + } } else if let Insn::ParallelMov { moves } = insn { // Print operands with a special syntax for ParallelMov moves.iter().try_fold(" ", |prefix, (dst, src)| write!(f, "{prefix}{dst} <- {src}").and(Ok(", ")))?; @@ -2019,7 +2390,7 @@ impl fmt::Debug for Assembler { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { writeln!(fmt, "Assembler")?; - for (idx, insn) in self.insns.iter().enumerate() { + for (idx, insn) in self.linearize_instructions().iter().enumerate() { writeln!(fmt, " {idx:03} {insn:?}")?; } @@ -2028,7 +2399,9 @@ impl fmt::Debug for Assembler { } pub struct InsnIter { - old_insns_iter: std::vec::IntoIter, + blocks: Vec, + current_block_idx: usize, + current_insn_iter: std::vec::IntoIter, peeked: Option<(usize, Insn)>, index: usize, } @@ -2039,7 +2412,7 @@ impl InsnIter { pub fn peek(&mut self) -> Option<&(usize, Insn)> { // If we don't have a peeked value, get one if self.peeked.is_none() { - let insn = self.old_insns_iter.next()?; + let insn = self.current_insn_iter.next()?; let idx = self.index; self.index += 1; self.peeked = Some((idx, insn)); @@ -2048,17 +2421,34 @@ impl InsnIter { self.peeked.as_ref() } - // Get the next instruction. Right now we're passing the "new" assembler - // (the assembler we're copying in to) as a parameter. Once we've - // introduced basic blocks to LIR, we'll use the to set the correct BB - // on the new assembler, but for now it is unused. - pub fn next(&mut self, _new_asm: &mut Assembler) -> Option<(usize, Insn)> { + // Get the next instruction, advancing to the next block when current block is exhausted. + // Sets the current block on new_asm when moving to a new block. + pub fn next(&mut self, new_asm: &mut Assembler) -> Option<(usize, Insn)> { // If we have a peeked value, return it if let Some(item) = self.peeked.take() { return Some(item); } - // Otherwise get the next from underlying iterator - let insn = self.old_insns_iter.next()?; + + // Try to get the next instruction from current block + if let Some(insn) = self.current_insn_iter.next() { + let idx = self.index; + self.index += 1; + return Some((idx, insn)); + } + + // Current block is exhausted, move to next block + self.current_block_idx += 1; + if self.current_block_idx >= self.blocks.len() { + return None; + } + + // Set up the next block + let next_block = &mut self.blocks[self.current_block_idx]; + new_asm.set_current_block(next_block.id); + self.current_insn_iter = take(&mut next_block.insns).into_iter(); + + // Get first instruction from the new block + let insn = self.current_insn_iter.next()?; let idx = self.index; self.index += 1; Some((idx, insn)) @@ -2451,6 +2841,43 @@ impl Assembler { self.push_insn(Insn::Xor { left, right, out }); out } + + /// This is used for trampolines that don't allow scratch registers. + /// Linearizes all blocks into a single giant block. + pub fn resolve_parallel_mov_pass(self) -> Assembler { + let mut asm_local = Assembler::new(); + asm_local.accept_scratch_reg = self.accept_scratch_reg; + asm_local.stack_base_idx = self.stack_base_idx; + asm_local.label_names = self.label_names.clone(); + asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + + // Create one giant block to linearize everything into + asm_local.new_block_without_id(); + + // Get linearized instructions with branch parameters expanded into ParallelMov + let linearized_insns = self.linearize_instructions(); + + // Process each linearized instruction + for insn in linearized_insns { + match insn { + Insn::ParallelMov { moves } => { + // Resolve parallel moves without scratch register + if let Some(resolved_moves) = Assembler::resolve_parallel_moves(&moves, None) { + for (dst, src) in resolved_moves { + asm_local.mov(dst, src); + } + } else { + unreachable!("ParallelMov requires scratch register but scratch_reg is not allowed"); + } + } + _ => { + asm_local.push_insn(insn); + } + } + } + + asm_local + } } /// Macro to use format! for Insn::Comment, which skips a format! call diff --git a/zjit/src/backend/tests.rs b/zjit/src/backend/tests.rs index ece6f8605f1540..701029b8ec0c2c 100644 --- a/zjit/src/backend/tests.rs +++ b/zjit/src/backend/tests.rs @@ -3,10 +3,12 @@ use crate::backend::lir::*; use crate::cruby::*; use crate::codegen::c_callable; use crate::options::rb_zjit_prepare_options; +use crate::hir; #[test] fn test_add() { let mut asm = Assembler::new(); + asm.new_block_without_id(); let out = asm.add(SP, Opnd::UImm(1)); let _ = asm.add(out, Opnd::UImm(2)); } @@ -15,6 +17,7 @@ fn test_add() { fn test_alloc_regs() { rb_zjit_prepare_options(); // for asm.alloc_regs let mut asm = Assembler::new(); + asm.new_block_without_id(); // Get the first output that we're going to reuse later. let out1 = asm.add(EC, Opnd::UImm(1)); @@ -37,7 +40,7 @@ fn test_alloc_regs() { let _ = asm.add(out3, Opnd::UImm(6)); // Here we're going to allocate the registers. - let result = asm.alloc_regs(Assembler::get_alloc_regs()).unwrap(); + let result = &asm.alloc_regs(Assembler::get_alloc_regs()).unwrap().basic_blocks[0]; // Now we're going to verify that the out field has been appropriately // updated for each of the instructions that needs it. @@ -63,7 +66,9 @@ fn test_alloc_regs() { fn setup_asm() -> (Assembler, CodeBlock) { rb_zjit_prepare_options(); // for get_option! on asm.compile - (Assembler::new(), CodeBlock::new_dummy()) + let mut asm = Assembler::new(); + asm.new_block_without_id(); + (asm, CodeBlock::new_dummy()) } // Test full codegen pipeline @@ -293,6 +298,7 @@ fn test_no_pos_marker_callback_when_compile_fails() { // We don't want to invoke the pos_marker callbacks with positions of malformed code. let mut asm = Assembler::new(); rb_zjit_prepare_options(); // for asm.compile + asm.new_block_without_id(); // Markers around code to exhaust memory limit let fail_if_called = |_code_ptr, _cb: &_| panic!("pos_marker callback should not be called"); diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 38b9f2791b44f1..a4cf8dfcc5e892 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -392,7 +392,7 @@ impl Assembler { /// for VRegs, most splits should happen in [`Self::x86_split`]. However, some instructions /// need to be split with registers after `alloc_regs`, e.g. for `compile_exits`, so /// this splits them and uses scratch registers for it. - pub fn x86_scratch_split(mut self) -> Assembler { + pub fn x86_scratch_split(self) -> Assembler { /// For some instructions, we want to be able to lower a 64-bit operand /// without requiring more registers to be available in the register /// allocator. So we just use the SCRATCH0_OPND register temporarily to hold @@ -468,12 +468,22 @@ impl Assembler { // Prepare StackState to lower MemBase::Stack let stack_state = StackState::new(self.stack_base_idx); - let mut asm_local = Assembler::new_with_asm(&self); + let mut asm_local = Assembler::new(); + asm_local.accept_scratch_reg = true; + asm_local.stack_base_idx = self.stack_base_idx; + asm_local.label_names = self.label_names.clone(); + asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + + // Create one giant block to linearize everything into + asm_local.new_block_without_id(); + let asm = &mut asm_local; - asm.accept_scratch_reg = true; - let mut iterator = self.instruction_iterator(); - while let Some((_, mut insn)) = iterator.next(asm) { + // Get linearized instructions with branch parameters expanded into ParallelMov + let linearized_insns = self.linearize_instructions(); + + for insn in linearized_insns.iter() { + let mut insn = insn.clone(); match &mut insn { Insn::Add { left, right, out } | Insn::Sub { left, right, out } | @@ -703,7 +713,10 @@ impl Assembler { // For each instruction let mut insn_idx: usize = 0; - while let Some(insn) = self.insns.get(insn_idx) { + assert_eq!(self.basic_blocks.len(), 1, "Assembler should be linearized into a single block before arm64_emit"); + let insns = &self.basic_blocks[0].insns; + + while let Some(insn) = insns.get(insn_idx) { // Update insn_idx that is shown on panic hook_insn_idx.as_mut().map(|idx| idx.lock().map(|mut idx| *idx = insn_idx).unwrap()); @@ -907,6 +920,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jmp_ptr(cb, code_ptr), Target::Label(label) => jmp_label(cb, label), + Target::Block(ref edge) => jmp_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -915,6 +929,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => je_ptr(cb, code_ptr), Target::Label(label) => je_label(cb, label), + Target::Block(ref edge) => je_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -923,6 +938,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jne_ptr(cb, code_ptr), Target::Label(label) => jne_label(cb, label), + Target::Block(ref edge) => jne_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -931,6 +947,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jl_ptr(cb, code_ptr), Target::Label(label) => jl_label(cb, label), + Target::Block(ref edge) => jl_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } }, @@ -939,6 +956,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jg_ptr(cb, code_ptr), Target::Label(label) => jg_label(cb, label), + Target::Block(ref edge) => jg_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } }, @@ -947,6 +965,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jge_ptr(cb, code_ptr), Target::Label(label) => jge_label(cb, label), + Target::Block(ref edge) => jge_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } }, @@ -955,6 +974,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jbe_ptr(cb, code_ptr), Target::Label(label) => jbe_label(cb, label), + Target::Block(ref edge) => jbe_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } }, @@ -963,6 +983,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jb_ptr(cb, code_ptr), Target::Label(label) => jb_label(cb, label), + Target::Block(ref edge) => jb_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } }, @@ -971,6 +992,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jz_ptr(cb, code_ptr), Target::Label(label) => jz_label(cb, label), + Target::Block(ref edge) => jz_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -979,6 +1001,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jnz_ptr(cb, code_ptr), Target::Label(label) => jnz_label(cb, label), + Target::Block(ref edge) => jnz_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -988,6 +1011,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jo_ptr(cb, code_ptr), Target::Label(label) => jo_label(cb, label), + Target::Block(ref edge) => jo_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -1052,7 +1076,7 @@ impl Assembler { } else { // No bytes dropped, so the pos markers point to valid code for (insn_idx, pos) in pos_markers { - if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + if let Insn::PosMarker(callback) = insns.get(insn_idx).unwrap() { callback(pos, cb); } else { panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); @@ -1082,6 +1106,10 @@ impl Assembler { if use_scratch_regs { asm = asm.x86_scratch_split(); asm_dump!(asm, scratch_split); + } else { + // For trampolines that use scratch registers, resolve ParallelMov without scratch_reg. + asm = asm.resolve_parallel_mov_pass(); + asm_dump!(asm, resolve_parallel_mov); } // Create label instances in the code block @@ -1115,7 +1143,9 @@ mod tests { fn setup_asm() -> (Assembler, CodeBlock) { rb_zjit_prepare_options(); // for get_option! on asm.compile - (Assembler::new(), CodeBlock::new_dummy()) + let mut asm = Assembler::new(); + asm.new_block_without_id(); + (asm, CodeBlock::new_dummy()) } #[test] @@ -1123,6 +1153,7 @@ mod tests { use crate::hir::SideExitReason; let mut asm = Assembler::new(); + asm.new_block_without_id(); asm.stack_base_idx = 1; let label = asm.new_label("bb0"); diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index a3cf09d7c41372..7c867cd6b4ab03 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -18,8 +18,8 @@ use crate::state::ZJITState; use crate::stats::{CompileError, exit_counter_for_compile_error, exit_counter_for_unhandled_hir_insn, incr_counter, incr_counter_by, send_fallback_counter, send_fallback_counter_for_method_type, send_fallback_counter_for_super_method_type, send_fallback_counter_ptr_for_opcode, send_without_block_fallback_counter_for_method_type, send_without_block_fallback_counter_for_optimized_method_type}; use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compile_error}}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; -use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NATIVE_BASE_PTR, NATIVE_STACK_PTR, Opnd, SP, SideExit, Target, asm_ccall, asm_comment}; -use crate::hir::{iseq_to_hir, BlockId, BranchEdge, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType}; +use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NATIVE_STACK_PTR, Opnd, SP, SideExit, Target, asm_ccall, asm_comment}; +use crate::hir::{iseq_to_hir, BlockId, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType}; use crate::hir::{Const, FrameState, Function, Insn, InsnId, SendFallbackReason}; use crate::hir_type::{types, Type}; use crate::options::get_option; @@ -75,12 +75,17 @@ impl JITState { } /// Find or create a label for a given BlockId - fn get_label(&mut self, asm: &mut Assembler, block_id: BlockId) -> Target { - match &self.labels[block_id.0] { + fn get_label(&mut self, asm: &mut Assembler, lir_block_id: lir::BlockId, hir_block_id: BlockId) -> Target { + // Extend labels vector if the requested index is out of bounds + if lir_block_id.0 >= self.labels.len() { + self.labels.resize(lir_block_id.0 + 1, None); + } + + match &self.labels[lir_block_id.0] { Some(label) => label.clone(), None => { - let label = asm.new_label(&format!("{block_id}")); - self.labels[block_id.0] = Some(label.clone()); + let label = asm.new_label(&format!("{hir_block_id}_{lir_block_id}")); + self.labels[lir_block_id.0] = Some(label.clone()); label } } @@ -176,6 +181,7 @@ fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) { pub fn gen_entry_trampoline(cb: &mut CodeBlock) -> Result { // Set up registers for CFP, EC, SP, and basic block arguments let mut asm = Assembler::new(); + asm.new_block_without_id(); gen_entry_prologue(&mut asm); // Jump to the first block using a call instruction. This trampoline is used @@ -264,11 +270,28 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func let mut jit = JITState::new(iseq, version, function.num_insns(), function.num_blocks()); let mut asm = Assembler::new_with_stack_slots(num_spilled_params); - // Compile each basic block + // Mapping from HIR block IDs to LIR block IDs. + // This is is a one-to-one mapping from HIR to LIR blocks used for finding + // jump targets in LIR (LIR should always jump to the head of an HIR block) + let mut hir_to_lir: Vec> = vec![None; function.num_blocks()]; + let reverse_post_order = function.rpo(); - for &block_id in reverse_post_order.iter() { + + // Create all LIR basic blocks corresponding to HIR basic blocks + for (rpo_idx, &block_id) in reverse_post_order.iter().enumerate() { + let lir_block_id = asm.new_block(block_id, function.is_entry_block(block_id), rpo_idx); + hir_to_lir[block_id.0] = Some(lir_block_id); + } + + // Compile each basic block + for (rpo_idx, &block_id) in reverse_post_order.iter().enumerate() { + // Set the current block to the LIR block that corresponds to this + // HIR block. + let lir_block_id = hir_to_lir[block_id.0].unwrap(); + asm.set_current_block(lir_block_id); + // Write a label to jump to the basic block - let label = jit.get_label(&mut asm, block_id); + let label = jit.get_label(&mut asm, lir_block_id, block_id); asm.write_label(label); let block = function.block(block_id); @@ -291,15 +314,73 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func // Compile all instructions for &insn_id in block.insns() { let insn = function.find(insn_id); - if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) { - debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit."); - gen_incr_counter(&mut asm, exit_counter_for_unhandled_hir_insn(&insn)); - gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledHIRInsn(insn_id), &function.frame_state(last_snapshot)); - // Don't bother generating code after a side-exit. We won't run it. - // TODO(max): Generate ud2 or equivalent. - break; - }; - // It's fine; we generated the instruction + match insn { + Insn::IfFalse { val, target } => { + let val_opnd = jit.get_opnd(val); + + let lir_target = hir_to_lir[target.target.0].unwrap(); + + let fall_through_target = asm.new_block(block_id, false, rpo_idx); + + let branch_edge = lir::BranchEdge { + target: lir_target, + args: target.args.iter().map(|insn_id| jit.get_opnd(*insn_id)).collect() + }; + + let fall_through_edge = lir::BranchEdge { + target: fall_through_target, + args: vec![] + }; + + gen_if_false(&mut asm, val_opnd, branch_edge, fall_through_edge); + asm.set_current_block(fall_through_target); + + let label = jit.get_label(&mut asm, fall_through_target, block_id); + asm.write_label(label); + }, + Insn::IfTrue { val, target } => { + let val_opnd = jit.get_opnd(val); + + let lir_target = hir_to_lir[target.target.0].unwrap(); + + let fall_through_target = asm.new_block(block_id, false, rpo_idx); + + let branch_edge = lir::BranchEdge { + target: lir_target, + args: target.args.iter().map(|insn_id| jit.get_opnd(*insn_id)).collect() + }; + + let fall_through_edge = lir::BranchEdge { + target: fall_through_target, + args: vec![] + }; + + gen_if_true(&mut asm, val_opnd, branch_edge, fall_through_edge); + asm.set_current_block(fall_through_target); + + let label = jit.get_label(&mut asm, fall_through_target, block_id); + asm.write_label(label); + } + Insn::Jump(target) => { + let lir_target = hir_to_lir[target.target.0].unwrap(); + let branch_edge = lir::BranchEdge { + target: lir_target, + args: target.args.iter().map(|insn_id| jit.get_opnd(*insn_id)).collect() + }; + gen_jump(&mut asm, branch_edge); + }, + _ => { + if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) { + debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit."); + gen_incr_counter(&mut asm, exit_counter_for_unhandled_hir_insn(&insn)); + gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledHIRInsn(insn_id), &function.frame_state(last_snapshot)); + // Don't bother generating code after a side-exit. We won't run it. + // TODO(max): Generate ud2 or equivalent. + break; + }; + // It's fine; we generated the instruction + } + } } // Make sure the last patch point has enough space to insert a jump asm.pad_patch_point(); @@ -395,9 +476,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)), Insn::Param => unreachable!("block.insns should not have Insn::Param"), Insn::Snapshot { .. } => return Ok(()), // we don't need to do anything for this instruction at the moment - Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)), - Insn::IfTrue { val, target } => no_output!(gen_if_true(jit, asm, opnd!(val), target)), - Insn::IfFalse { val, target } => no_output!(gen_if_false(jit, asm, opnd!(val), target)), &Insn::Send { cd, blockiseq, state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason), @@ -511,6 +589,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::ArrayMax { state, .. } | &Insn::Throw { state, .. } => return Err(state), + &Insn::IfFalse { .. } | Insn::IfTrue { .. } + | &Insn::Jump { .. } => unreachable!(), }; assert!(insn.has_output(), "Cannot write LIR output of HIR instruction with no output: {insn}"); @@ -1190,18 +1270,6 @@ fn gen_entry_prologue(asm: &mut Assembler) { asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); } -/// Set branch params to basic block arguments -fn gen_branch_params(jit: &mut JITState, asm: &mut Assembler, branch: &BranchEdge) { - if branch.args.is_empty() { - return; - } - - asm_comment!(asm, "set branch params: {}", branch.args.len()); - asm.parallel_mov(branch.args.iter().enumerate().map(|(idx, &arg)| - (param_opnd(idx), jit.get_opnd(arg)) - ).collect()); -} - /// Compile a constant fn gen_const_value(val: VALUE) -> lir::Opnd { // Just propagate the constant value and generate nothing @@ -1228,7 +1296,7 @@ fn gen_const_uint32(val: u32) -> lir::Opnd { /// Compile a basic block argument fn gen_param(asm: &mut Assembler, idx: usize) -> lir::Opnd { // Allocate a register or a stack slot - match param_opnd(idx) { + match Assembler::param_opnd(idx) { // If it's a register, insert LiveReg instruction to reserve the register // in the register pool for register allocation. param @ Opnd::Reg(_) => asm.live_reg_opnd(param), @@ -1237,45 +1305,25 @@ fn gen_param(asm: &mut Assembler, idx: usize) -> lir::Opnd { } /// Compile a jump to a basic block -fn gen_jump(jit: &mut JITState, asm: &mut Assembler, branch: &BranchEdge) { - // Set basic block arguments - gen_branch_params(jit, asm, branch); - +fn gen_jump(asm: &mut Assembler, branch: lir::BranchEdge) { // Jump to the basic block - let target = jit.get_label(asm, branch.target); - asm.jmp(target); + asm.jmp(Target::Block(branch)); } /// Compile a conditional branch to a basic block -fn gen_if_true(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, branch: &BranchEdge) { +fn gen_if_true(asm: &mut Assembler, val: lir::Opnd, branch: lir::BranchEdge, fall_through: lir::BranchEdge) { // If val is zero, move on to the next instruction. - let if_false = asm.new_label("if_false"); asm.test(val, val); - asm.jz(if_false.clone()); - - // If val is not zero, set basic block arguments and jump to the branch target. - // TODO: Consider generating the loads out-of-line - let if_true = jit.get_label(asm, branch.target); - gen_branch_params(jit, asm, branch); - asm.jmp(if_true); - - asm.write_label(if_false); + asm.jz(Target::Block(fall_through)); + asm.jmp(Target::Block(branch)); } /// Compile a conditional branch to a basic block -fn gen_if_false(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, branch: &BranchEdge) { +fn gen_if_false(asm: &mut Assembler, val: lir::Opnd, branch: lir::BranchEdge, fall_through: lir::BranchEdge) { // If val is not zero, move on to the next instruction. - let if_true = asm.new_label("if_true"); asm.test(val, val); - asm.jnz(if_true.clone()); - - // If val is zero, set basic block arguments and jump to the branch target. - // TODO: Consider generating the loads out-of-line - let if_false = jit.get_label(asm, branch.target); - gen_branch_params(jit, asm, branch); - asm.jmp(if_false); - - asm.write_label(if_true); + asm.jnz(Target::Block(fall_through)); + asm.jmp(Target::Block(branch)); } /// Compile a dynamic dispatch with block @@ -2411,19 +2459,6 @@ fn gen_stack_overflow_check(jit: &mut JITState, asm: &mut Assembler, state: &Fra asm.jbe(side_exit(jit, state, StackOverflow)); } -/// Return an operand we use for the basic block argument at a given index -fn param_opnd(idx: usize) -> Opnd { - // To simplify the implementation, allocate a fixed register or a stack slot for each basic block argument for now. - // Note that this is implemented here as opposed to automatically inside LIR machineries. - // TODO: Allow allocating arbitrary registers for basic block arguments - if idx < ALLOC_REGS.len() { - Opnd::Reg(ALLOC_REGS[idx]) - } else { - // With FrameSetup, the address that NATIVE_BASE_PTR points to stores an old value in the register. - // To avoid clobbering it, we need to start from the next slot, hence `+ 1` for the index. - Opnd::mem(64, NATIVE_BASE_PTR, (idx - ALLOC_REGS.len() + 1) as i32 * -SIZEOF_VALUE_I32) - } -} /// Inverse of ep_offset_to_local_idx(). See ep_offset_to_local_idx() for details. pub fn local_idx_to_ep_offset(iseq: IseqPtr, local_idx: usize) -> i32 { @@ -2618,6 +2653,7 @@ fn function_stub_hit_body(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result /// Compile a stub for an ISEQ called by SendWithoutBlockDirect fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result { let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); + asm.new_block_without_id(); asm_comment!(asm, "Stub: {}", iseq_get_location(iseq_call.iseq.get(), 0)); // Call function_stub_hit using the shared trampoline. See `gen_function_stub_hit_trampoline`. @@ -2635,6 +2671,7 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result Result { let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); + asm.new_block_without_id(); asm_comment!(asm, "function_stub_hit trampoline"); // Maintain alignment for x86_64, and set up a frame for arm64 properly @@ -2693,6 +2730,7 @@ pub fn gen_function_stub_hit_trampoline(cb: &mut CodeBlock) -> Result Result { let mut asm = Assembler::new(); + asm.new_block_without_id(); asm_comment!(asm, "side-exit trampoline"); asm.frame_teardown(&[]); // matching the setup in gen_entry_point() @@ -2707,6 +2745,7 @@ pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Result /// Generate a trampoline that increments exit_compilation_failure and jumps to exit_trampoline. pub fn gen_exit_trampoline_with_counter(cb: &mut CodeBlock, exit_trampoline: CodePtr) -> Result { let mut asm = Assembler::new(); + asm.new_block_without_id(); asm_comment!(asm, "function stub exit trampoline"); gen_incr_counter(&mut asm, exit_compile_error); @@ -2915,6 +2954,7 @@ impl IseqCall { fn regenerate(&self, cb: &mut CodeBlock, callback: impl Fn(&mut Assembler)) { cb.with_write_ptr(self.start_addr.get().unwrap(), |cb| { let mut asm = Assembler::new(); + asm.new_block_without_id(); callback(&mut asm); asm.compile(cb).unwrap(); assert_eq!(self.end_addr.get().unwrap(), cb.get_write_ptr()); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index fc49dc0c3c6c9e..6bbf477c943196 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -4702,6 +4702,10 @@ impl Function { entry_blocks } + pub fn is_entry_block(&self, block_id: BlockId) -> bool { + self.entry_block == block_id || self.jit_entry_blocks.contains(&block_id) + } + /// Return a traversal of the `Function`'s `BlockId`s in reverse post-order. pub fn rpo(&self) -> Vec { let mut result = self.po_from(self.entry_blocks()); diff --git a/zjit/src/invariants.rs b/zjit/src/invariants.rs index d183eb18abccb8..f1180acf2a2689 100644 --- a/zjit/src/invariants.rs +++ b/zjit/src/invariants.rs @@ -16,6 +16,7 @@ macro_rules! compile_patch_points { for patch_point in $patch_points { let written_range = $cb.with_write_ptr(patch_point.patch_point_ptr, |cb| { let mut asm = Assembler::new(); + asm.new_block_without_id(); asm_comment!(asm, $($comment_args)*); asm.jmp(patch_point.side_exit_ptr.into()); asm.compile(cb).expect("can write existing code"); diff --git a/zjit/src/options.rs b/zjit/src/options.rs index 40b49146b726a3..9121e49bff5e1f 100644 --- a/zjit/src/options.rs +++ b/zjit/src/options.rs @@ -180,6 +180,8 @@ pub enum DumpLIR { alloc_regs, /// Dump LIR after compile_exits compile_exits, + /// Dump LIR after resolve_parallel_mov + resolve_parallel_mov, /// Dump LIR after {arch}_scratch_split scratch_split, } @@ -190,6 +192,7 @@ const DUMP_LIR_ALL: &[DumpLIR] = &[ DumpLIR::split, DumpLIR::alloc_regs, DumpLIR::compile_exits, + DumpLIR::resolve_parallel_mov, DumpLIR::scratch_split, ]; From cfa97af7e1426c76b769495ef4b1689be3b0a685 Mon Sep 17 00:00:00 2001 From: Kevin Menard Date: Wed, 21 Jan 2026 14:07:20 -0500 Subject: [PATCH 2/6] ZJIT: Introduce `GetLEP` HIR instruction (#15917) This PR is a follow-up to #15816. There, I introduced the `GuardSuperMethodEntry` HIR instruction and that needed the LEP. The LEP was also used by `GetBlockHandler`. Consequently, the codegen for `invokesuper` ended up loading the LEP twice. By introducing a new HIR instruction, we can load the LEP once and use it in both `GetBlockHandler` and `GuardSuperMethodEntry`. I also updated `IsBlockGiven`, which conditionally loaded the LEP. To ensure we only use `GetLEP` in the cases we need it, I lifted most of the `IsBlockGiven` handler to HIR. As an added benefit, this addressed a TODO that @tekknolagi had written: when `block_given?` is called outside of a method we can rewrite to a constant `false`. We could use `GetLEP` in the handling of `Defined`, but that looked a bit more involved and I wanted to keep this PR focused, so I'm suggesting we handle that as future work. --- zjit/src/codegen.rs | 26 +++++++---------- zjit/src/cruby_methods.rs | 10 +++++-- zjit/src/hir.rs | 60 +++++++++++++++++++++++++++------------ zjit/src/hir/opt_tests.rs | 33 +++++++++++---------- 4 files changed, 78 insertions(+), 51 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 7c867cd6b4ab03..d777002e311a88 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -532,8 +532,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::GuardNotShared { recv, state } => gen_guard_not_shared(jit, asm, opnd!(recv), &function.frame_state(*state)), &Insn::GuardLess { left, right, state } => gen_guard_less(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), &Insn::GuardGreaterEq { left, right, state } => gen_guard_greater_eq(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), - &Insn::GuardSuperMethodEntry { cme, state } => no_output!(gen_guard_super_method_entry(jit, asm, cme, &function.frame_state(state))), - Insn::GetBlockHandler => gen_get_block_handler(jit, asm), + &Insn::GuardSuperMethodEntry { lep, cme, state } => no_output!(gen_guard_super_method_entry(jit, asm, opnd!(lep), cme, &function.frame_state(state))), + Insn::GetBlockHandler { lep } => gen_get_block_handler(asm, opnd!(lep)), Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))), Insn::CCall { cfunc, recv, args, name, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnd!(recv), opnds!(args)), // Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args). @@ -576,11 +576,12 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::GuardShape { val, shape, state } => gen_guard_shape(jit, asm, opnd!(val), shape, &function.frame_state(state)), Insn::LoadPC => gen_load_pc(asm), Insn::LoadEC => gen_load_ec(), + Insn::GetLEP => gen_get_lep(jit, asm), Insn::LoadSelf => gen_load_self(), &Insn::LoadField { recv, id, offset, return_type } => gen_load_field(asm, opnd!(recv), id, offset, return_type), &Insn::StoreField { recv, id, offset, val } => no_output!(gen_store_field(asm, opnd!(recv), id, offset, opnd!(val), function.type_of(val))), &Insn::WriteBarrier { recv, val } => no_output!(gen_write_barrier(asm, opnd!(recv), opnd!(val), function.type_of(val))), - &Insn::IsBlockGiven => gen_is_block_given(jit, asm), + &Insn::IsBlockGiven { lep } => gen_is_block_given(asm, opnd!(lep)), Insn::ArrayInclude { elements, target, state } => gen_array_include(jit, asm, opnds!(elements), opnd!(target), &function.frame_state(*state)), Insn::ArrayPackBuffer { elements, fmt, buffer, state } => gen_array_pack_buffer(jit, asm, opnds!(elements), opnd!(fmt), opnd!(buffer), &function.frame_state(*state)), &Insn::DupArrayInclude { ary, target, state } => gen_dup_array_include(jit, asm, ary, opnd!(target), &function.frame_state(state)), @@ -688,16 +689,10 @@ fn gen_defined(jit: &JITState, asm: &mut Assembler, op_type: usize, obj: VALUE, } /// Similar to gen_defined for DEFINED_YIELD -fn gen_is_block_given(jit: &JITState, asm: &mut Assembler) -> Opnd { - let local_iseq = unsafe { rb_get_iseq_body_local_iseq(jit.iseq) }; - if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD { - let lep = gen_get_lep(jit, asm); - let block_handler = asm.load(Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); - asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); - asm.csel_e(Qfalse.into(), Qtrue.into()) - } else { - Qfalse.into() - } +fn gen_is_block_given(asm: &mut Assembler, lep: Opnd) -> Opnd { + let block_handler = asm.load(Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); + asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); + asm.csel_e(Qfalse.into(), Qtrue.into()) } fn gen_unbox_fixnum(asm: &mut Assembler, val: Opnd) -> Opnd { @@ -803,11 +798,11 @@ fn gen_guard_greater_eq(jit: &JITState, asm: &mut Assembler, left: Opnd, right: fn gen_guard_super_method_entry( jit: &JITState, asm: &mut Assembler, + lep: Opnd, cme: *const rb_callable_method_entry_t, state: &FrameState, ) { asm_comment!(asm, "guard super method entry"); - let lep = gen_get_lep(jit, asm); let ep_me_opnd = Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF); let ep_me = asm.load(ep_me_opnd); asm.cmp(ep_me, Opnd::UImm(cme as u64)); @@ -815,9 +810,8 @@ fn gen_guard_super_method_entry( } /// Get the block handler from ep[VM_ENV_DATA_INDEX_SPECVAL] at the local EP (LEP). -fn gen_get_block_handler(jit: &JITState, asm: &mut Assembler) -> Opnd { +fn gen_get_block_handler(asm: &mut Assembler, lep: Opnd) -> Opnd { asm_comment!(asm, "get block handler from LEP"); - let lep = gen_get_lep(jit, asm); asm.load(Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)) } diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index 357c8b0c122b8d..8121b0065f593a 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -318,8 +318,14 @@ fn inline_kernel_itself(_fun: &mut hir::Function, _block: hir::BlockId, recv: hi fn inline_kernel_block_given_p(fun: &mut hir::Function, block: hir::BlockId, _recv: hir::InsnId, args: &[hir::InsnId], _state: hir::InsnId) -> Option { let &[] = args else { return None; }; - // TODO(max): In local iseq types that are not ISEQ_TYPE_METHOD, rewrite to Constant false. - Some(fun.push_insn(block, hir::Insn::IsBlockGiven)) + + let local_iseq = unsafe { rb_get_iseq_body_local_iseq(fun.iseq()) }; + if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD { + let lep = fun.push_insn(block, hir::Insn::GetLEP); + Some(fun.push_insn(block, hir::Insn::IsBlockGiven { lep })) + } else { + Some(fun.push_insn(block, hir::Insn::Const { val: hir::Const::Value(Qfalse) })) + } } fn inline_array_aref(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 6bbf477c943196..fc071e3d67a96c 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -802,7 +802,7 @@ pub enum Insn { GetConstantPath { ic: *const iseq_inline_constant_cache, state: InsnId }, /// Kernel#block_given? but without pushing a frame. Similar to [`Insn::Defined`] with /// `DEFINED_YIELD` - IsBlockGiven, + IsBlockGiven { lep: InsnId }, /// Test the bit at index of val, a Fixnum. /// Return Qtrue if the bit is set, else Qfalse. FixnumBitCheck { val: InsnId, index: u8 }, @@ -849,6 +849,10 @@ pub enum Insn { /// Set a class variable `id` to `val` SetClassVar { id: ID, val: InsnId, ic: *const iseq_inline_cvar_cache_entry, state: InsnId }, + /// Get the EP of the ISeq of the containing method, or "local level", skipping over block-level EPs. + /// Equivalent of GET_LEP() macro. + GetLEP, + /// Own a FrameState so that instructions can look up their dominating FrameState when /// generating deopt side-exits and frame reconstruction metadata. Does not directly generate /// any code. @@ -1012,9 +1016,9 @@ pub enum Insn { GuardLess { left: InsnId, right: InsnId, state: InsnId }, /// Side-exit if the method entry at ep[VM_ENV_DATA_INDEX_ME_CREF] doesn't match the expected CME. /// Used to ensure super calls are made from the expected method context. - GuardSuperMethodEntry { cme: *const rb_callable_method_entry_t, state: InsnId }, + GuardSuperMethodEntry { lep: InsnId, cme: *const rb_callable_method_entry_t, state: InsnId }, /// Get the block handler from ep[VM_ENV_DATA_INDEX_SPECVAL] at the local EP (LEP). - GetBlockHandler, + GetBlockHandler { lep: InsnId }, /// Generate no code (or padding if necessary) and insert a patch point /// that can be rewritten to a side exit when the Invariant is broken. @@ -1131,6 +1135,7 @@ impl Insn { Insn::DefinedIvar { .. } => effects::Any, Insn::LoadPC { .. } => Effect::read_write(abstract_heaps::PC, abstract_heaps::Empty), Insn::LoadEC { .. } => effects::Empty, + Insn::GetLEP { .. } => effects::Empty, Insn::LoadSelf { .. } => Effect::read_write(abstract_heaps::Frame, abstract_heaps::Empty), Insn::LoadField { .. } => Effect::read_write(abstract_heaps::Other, abstract_heaps::Empty), Insn::StoreField { .. } => effects::Any, @@ -1510,11 +1515,11 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::GuardNotShared { recv, .. } => write!(f, "GuardNotShared {recv}"), Insn::GuardLess { left, right, .. } => write!(f, "GuardLess {left}, {right}"), Insn::GuardGreaterEq { left, right, .. } => write!(f, "GuardGreaterEq {left}, {right}"), - Insn::GuardSuperMethodEntry { cme, .. } => write!(f, "GuardSuperMethodEntry {:p}", self.ptr_map.map_ptr(cme)), - Insn::GetBlockHandler => write!(f, "GetBlockHandler"), + Insn::GuardSuperMethodEntry { lep, cme, .. } => write!(f, "GuardSuperMethodEntry {lep}, {:p}", self.ptr_map.map_ptr(cme)), + Insn::GetBlockHandler { lep } => write!(f, "GetBlockHandler {lep}"), Insn::PatchPoint { invariant, .. } => { write!(f, "PatchPoint {}", invariant.print(self.ptr_map)) }, Insn::GetConstantPath { ic, .. } => { write!(f, "GetConstantPath {:p}", self.ptr_map.map_ptr(ic)) }, - Insn::IsBlockGiven => { write!(f, "IsBlockGiven") }, + Insn::IsBlockGiven { lep } => { write!(f, "IsBlockGiven {lep}") }, Insn::FixnumBitCheck {val, index} => { write!(f, "FixnumBitCheck {val}, {index}") }, Insn::CCall { cfunc, recv, args, name, return_type: _, elidable: _ } => { write!(f, "CCall {recv}, :{}@{:p}", name.contents_lossy(), self.ptr_map.map_ptr(cfunc))?; @@ -1562,6 +1567,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy()), Insn::LoadPC => write!(f, "LoadPC"), Insn::LoadEC => write!(f, "LoadEC"), + Insn::GetLEP => write!(f, "GetLEP"), Insn::LoadSelf => write!(f, "LoadSelf"), &Insn::LoadField { recv, id, offset, return_type: _ } => write!(f, "LoadField {recv}, :{}@{:p}", id.contents_lossy(), self.ptr_map.map_offset(offset)), &Insn::StoreField { recv, id, offset, val } => write!(f, "StoreField {recv}, :{}@{:p}, {val}", id.contents_lossy(), self.ptr_map.map_offset(offset)), @@ -1969,6 +1975,10 @@ impl Function { } } + pub fn iseq(&self) -> *const rb_iseq_t { + self.iseq + } + // Add an instruction to the function without adding it to any block fn new_insn(&mut self, insn: Insn) -> InsnId { let id = InsnId(self.insns.len()); @@ -2119,7 +2129,6 @@ impl Function { result@(Const {..} | Param | GetConstantPath {..} - | IsBlockGiven | PatchPoint {..} | PutSpecialObject {..} | GetGlobal {..} @@ -2128,6 +2137,7 @@ impl Function { | EntryPoint {..} | LoadPC | LoadEC + | GetLEP | LoadSelf | IncrCounterPtr {..} | IncrCounter(_)) => result.clone(), @@ -2173,8 +2183,9 @@ impl Function { &GuardNotShared { recv, state } => GuardNotShared { recv: find!(recv), state }, &GuardGreaterEq { left, right, state } => GuardGreaterEq { left: find!(left), right: find!(right), state }, &GuardLess { left, right, state } => GuardLess { left: find!(left), right: find!(right), state }, - &GuardSuperMethodEntry { cme, state } => GuardSuperMethodEntry { cme, state }, - &GetBlockHandler => GetBlockHandler, + &GuardSuperMethodEntry { lep, cme, state } => GuardSuperMethodEntry { lep: find!(lep), cme, state }, + &GetBlockHandler { lep } => GetBlockHandler { lep: find!(lep) }, + &IsBlockGiven { lep } => IsBlockGiven { lep: find!(lep) }, &FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state }, &FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state }, &FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state }, @@ -2446,7 +2457,7 @@ impl Function { Insn::Defined { pushval, .. } => Type::from_value(*pushval).union(types::NilClass), Insn::DefinedIvar { pushval, .. } => Type::from_value(*pushval).union(types::NilClass), Insn::GetConstantPath { .. } => types::BasicObject, - Insn::IsBlockGiven => types::BoolExact, + Insn::IsBlockGiven { .. } => types::BoolExact, Insn::FixnumBitCheck { .. } => types::BoolExact, Insn::ArrayMax { .. } => types::BasicObject, Insn::ArrayInclude { .. } => types::BoolExact, @@ -2457,6 +2468,7 @@ impl Function { Insn::GetIvar { .. } => types::BasicObject, Insn::LoadPC => types::CPtr, Insn::LoadEC => types::CPtr, + Insn::GetLEP => types::CPtr, Insn::LoadSelf => types::BasicObject, &Insn::LoadField { return_type, .. } => return_type, Insn::GetSpecialSymbol { .. } => types::BasicObject, @@ -2468,7 +2480,7 @@ impl Function { Insn::AnyToString { .. } => types::String, Insn::GetLocal { rest_param: true, .. } => types::ArrayExact, Insn::GetLocal { .. } => types::BasicObject, - Insn::GetBlockHandler => types::RubyValue, + Insn::GetBlockHandler { .. } => types::RubyValue, // The type of Snapshot doesn't really matter; it's never materialized. It's used only // as a reference for FrameState, which we use to generate side-exit code. Insn::Snapshot { .. } => types::Any, @@ -3413,10 +3425,15 @@ impl Function { }); // Guard that we're calling `super` from the expected method context. - self.push_insn(block, Insn::GuardSuperMethodEntry { cme: current_cme, state }); + let lep = self.push_insn(block, Insn::GetLEP); + self.push_insn(block, Insn::GuardSuperMethodEntry { + lep, + cme: current_cme, + state + }); // Guard that no block is being passed (implicit or explicit). - let block_handler = self.push_insn(block, Insn::GetBlockHandler); + let block_handler = self.push_insn(block, Insn::GetBlockHandler { lep }); self.push_insn(block, Insn::GuardBitEquals { val: block_handler, expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), @@ -4357,14 +4374,17 @@ impl Function { | &Insn::EntryPoint { .. } | &Insn::LoadPC | &Insn::LoadEC + | &Insn::GetLEP | &Insn::LoadSelf | &Insn::GetLocal { .. } - | &Insn::GetBlockHandler | &Insn::PutSpecialObject { .. } - | &Insn::IsBlockGiven | &Insn::IncrCounter(_) | &Insn::IncrCounterPtr { .. } => {} + &Insn::GetBlockHandler { lep } + | &Insn::IsBlockGiven { lep } => { + worklist.push_back(lep); + } &Insn::PatchPoint { state, .. } | &Insn::CheckInterrupts { state } | &Insn::GetConstantPath { ic: _, state } => { @@ -4589,12 +4609,15 @@ impl Function { worklist.push_back(val); } &Insn::GuardBlockParamProxy { state, .. } | - &Insn::GuardSuperMethodEntry { state, .. } | &Insn::GetGlobal { state, .. } | &Insn::GetSpecialSymbol { state, .. } | &Insn::GetSpecialNumber { state, .. } | &Insn::ObjectAllocClass { state, .. } | &Insn::SideExit { state, .. } => worklist.push_back(state), + &Insn::GuardSuperMethodEntry { lep, state, .. } => { + worklist.push_back(lep); + worklist.push_back(state); + } &Insn::UnboxFixnum { val } => worklist.push_back(val), &Insn::FixnumAref { recv, index } => { worklist.push_back(recv); @@ -5099,17 +5122,18 @@ impl Function { | Insn::PutSpecialObject { .. } | Insn::LoadField { .. } | Insn::GetConstantPath { .. } - | Insn::IsBlockGiven + | Insn::IsBlockGiven { .. } | Insn::GetGlobal { .. } | Insn::LoadPC | Insn::LoadEC + | Insn::GetLEP | Insn::LoadSelf | Insn::Snapshot { .. } | Insn::Jump { .. } | Insn::EntryPoint { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } - | Insn::GetBlockHandler + | Insn::GetBlockHandler { .. } | Insn::PatchPoint { .. } | Insn::SideExit { .. } | Insn::IncrCounter { .. } diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 9fa622dd2c0d78..b7595f1b27221c 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -2653,10 +2653,11 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, block_given?@0x1008, cme:0x1010) v19:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v20:BoolExact = IsBlockGiven + v20:CPtr = GetLEP + v21:BoolExact = IsBlockGiven v20 IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - Return v20 + Return v21 "); } @@ -2679,7 +2680,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, block_given?@0x1008, cme:0x1010) v19:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v20:BoolExact = IsBlockGiven + v20:FalseClass = Const Value(false) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts Return v20 @@ -10943,12 +10944,13 @@ mod hir_opt_tests { Jump bb2(v4) bb2(v6:BasicObject): PatchPoint MethodRedefined(A@0x1000, foo@0x1008, cme:0x1010) - GuardSuperMethodEntry 0x1038 - v18:RubyValue = GetBlockHandler - v19:FalseClass = GuardBitEquals v18, Value(false) - v20:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040) + v17:CPtr = GetLEP + GuardSuperMethodEntry v17, 0x1038 + v19:RubyValue = GetBlockHandler v17 + v20:FalseClass = GuardBitEquals v19, Value(false) + v21:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040) CheckInterrupts - Return v20 + Return v21 "); } @@ -10986,17 +10988,18 @@ mod hir_opt_tests { Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): PatchPoint MethodRedefined(A@0x1000, foo@0x1008, cme:0x1010) - GuardSuperMethodEntry 0x1038 - v27:RubyValue = GetBlockHandler - v28:FalseClass = GuardBitEquals v27, Value(false) - v29:BasicObject = SendWithoutBlockDirect v8, :foo (0x1040), v9 + v26:CPtr = GetLEP + GuardSuperMethodEntry v26, 0x1038 + v28:RubyValue = GetBlockHandler v26 + v29:FalseClass = GuardBitEquals v28, Value(false) + v30:BasicObject = SendWithoutBlockDirect v8, :foo (0x1040), v9 v17:Fixnum[1] = Const Value(1) PatchPoint MethodRedefined(Integer@0x1048, +@0x1050, cme:0x1058) - v32:Fixnum = GuardType v29, Fixnum - v33:Fixnum = FixnumAdd v32, v17 + v33:Fixnum = GuardType v30, Fixnum + v34:Fixnum = FixnumAdd v33, v17 IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - Return v33 + Return v34 "); } From 0cc4e212c47f15f9f5384fb9871a2da8a6276ed4 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 21 Jan 2026 14:23:29 -0500 Subject: [PATCH 3/6] ZJIT: Get type information from branchif, branchunless, branchnil instructions (#15915) Do a sort of "partial static single information (SSI)" form that learns types of operands from branch instructions. A branchif, for example, tells us that in the truthy path, we know the operand is not nil, and not false. Similarly, in the falsy path, we know the operand is either nil or false. Add a RefineType instruction to attach this information. This PR does this in SSA construction because it's pretty straightforward, but we can also do a more aggressive version of this that can learn information about e.g. int ranges from other checks later in the optimization pipeline. --- zjit/src/codegen.rs | 1 + zjit/src/hir.rs | 78 +++++++++++--- zjit/src/hir/opt_tests.rs | 132 ++++++++++++----------- zjit/src/hir/tests.rs | 170 ++++++++++++++++++++---------- zjit/src/hir_type/gen_hir_type.rb | 5 + zjit/src/hir_type/hir_type.inc.rs | 11 +- zjit/src/hir_type/mod.rs | 60 +++++++++++ 7 files changed, 324 insertions(+), 133 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index d777002e311a88..0030493ddfa3d3 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -524,6 +524,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::BoxFixnum { val, state } => gen_box_fixnum(jit, asm, opnd!(val), &function.frame_state(state)), &Insn::UnboxFixnum { val } => gen_unbox_fixnum(asm, opnd!(val)), Insn::Test { val } => gen_test(asm, opnd!(val)), + Insn::RefineType { val, .. } => opnd!(val), Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), &Insn::GuardBitEquals { val, expected, reason, state } => gen_guard_bit_equals(jit, asm, opnd!(val), expected, reason, &function.frame_state(state)), diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index fc071e3d67a96c..4326d37b344980 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -994,6 +994,10 @@ pub enum Insn { ObjToString { val: InsnId, cd: *const rb_call_data, state: InsnId }, AnyToString { val: InsnId, str: InsnId, state: InsnId }, + /// Refine the known type information of with additional type information. + /// Computes the intersection of the existing type and the new type. + RefineType { val: InsnId, new_type: Type }, + /// Side-exit if val doesn't have the expected type. GuardType { val: InsnId, guard_type: Type, state: InsnId }, GuardTypeNot { val: InsnId, guard_type: Type, state: InsnId }, @@ -1212,6 +1216,7 @@ impl Insn { Insn::IncrCounterPtr { .. } => effects::Any, Insn::CheckInterrupts { .. } => effects::Any, Insn::InvokeProc { .. } => effects::Any, + Insn::RefineType { .. } => effects::Empty, } } @@ -1507,6 +1512,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::FixnumLShift { left, right, .. } => { write!(f, "FixnumLShift {left}, {right}") }, Insn::FixnumRShift { left, right, .. } => { write!(f, "FixnumRShift {left}, {right}") }, Insn::GuardType { val, guard_type, .. } => { write!(f, "GuardType {val}, {}", guard_type.print(self.ptr_map)) }, + Insn::RefineType { val, new_type, .. } => { write!(f, "RefineType {val}, {}", new_type.print(self.ptr_map)) }, Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, &Insn::GuardShape { val, shape, .. } => { write!(f, "GuardShape {val}, {:p}", self.ptr_map.map_shape(shape)) }, @@ -2174,6 +2180,7 @@ impl Function { Jump(target) => Jump(find_branch_edge!(target)), &IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) }, &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) }, + &RefineType { val, new_type } => RefineType { val: find!(val), new_type }, &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type, state }, &GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state }, &GuardBitEquals { val, expected, reason, state } => GuardBitEquals { val: find!(val), expected, reason, state }, @@ -2423,6 +2430,7 @@ impl Function { Insn::CCall { return_type, .. } => *return_type, &Insn::CCallVariadic { return_type, .. } => return_type, Insn::GuardType { val, guard_type, .. } => self.type_of(*val).intersection(*guard_type), + Insn::RefineType { val, new_type, .. } => self.type_of(*val).intersection(*new_type), Insn::GuardTypeNot { .. } => types::BasicObject, Insn::GuardBitEquals { val, expected, .. } => self.type_of(*val).intersection(Type::from_const(*expected)), Insn::GuardShape { val, .. } => self.type_of(*val), @@ -2594,6 +2602,7 @@ impl Function { | Insn::GuardTypeNot { val, .. } | Insn::GuardShape { val, .. } | Insn::GuardBitEquals { val, .. } => self.chase_insn(val), + | Insn::RefineType { val, .. } => self.chase_insn(val), _ => id, } } @@ -4445,6 +4454,7 @@ impl Function { worklist.extend(values); worklist.push_back(state); } + | &Insn::RefineType { val, .. } | &Insn::Return { val } | &Insn::Test { val } | &Insn::SetLocal { val, .. } @@ -5370,6 +5380,7 @@ impl Function { self.assert_subtype(insn_id, val, types::BasicObject)?; self.assert_subtype(insn_id, class, types::Class) } + Insn::RefineType { .. } => Ok(()), } } @@ -5562,6 +5573,19 @@ impl FrameState { state.stack.extend_from_slice(new_args); state } + + fn replace(&mut self, old: InsnId, new: InsnId) { + for slot in &mut self.stack { + if *slot == old { + *slot = new; + } + } + for slot in &mut self.locals { + if *slot == old { + *slot = new; + } + } + } } /// Print adaptor for [`FrameState`]. See [`PtrPrintMap`]. @@ -6245,10 +6269,17 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let test_id = fun.push_insn(block, Insn::Test { val }); let target_idx = insn_idx_at_offset(insn_idx, offset); let target = insn_idx_to_block[&target_idx]; + let nil_false_type = types::NilClass.union(types::FalseClass); + let nil_false = fun.push_insn(block, Insn::RefineType { val, new_type: nil_false_type }); + let mut iffalse_state = state.clone(); + iffalse_state.replace(val, nil_false); let _branch_id = fun.push_insn(block, Insn::IfFalse { val: test_id, - target: BranchEdge { target, args: state.as_args(self_param) } + target: BranchEdge { target, args: iffalse_state.as_args(self_param) } }); + let not_nil_false_type = types::BasicObject.subtract(types::NilClass).subtract(types::FalseClass); + let not_nil_false = fun.push_insn(block, Insn::RefineType { val, new_type: not_nil_false_type }); + state.replace(val, not_nil_false); queue.push_back((state.clone(), target, target_idx, local_inval)); } YARVINSN_branchif => { @@ -6258,10 +6289,17 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let test_id = fun.push_insn(block, Insn::Test { val }); let target_idx = insn_idx_at_offset(insn_idx, offset); let target = insn_idx_to_block[&target_idx]; + let not_nil_false_type = types::BasicObject.subtract(types::NilClass).subtract(types::FalseClass); + let not_nil_false = fun.push_insn(block, Insn::RefineType { val, new_type: not_nil_false_type }); + let mut iftrue_state = state.clone(); + iftrue_state.replace(val, not_nil_false); let _branch_id = fun.push_insn(block, Insn::IfTrue { val: test_id, - target: BranchEdge { target, args: state.as_args(self_param) } + target: BranchEdge { target, args: iftrue_state.as_args(self_param) } }); + let nil_false_type = types::NilClass.union(types::FalseClass); + let nil_false = fun.push_insn(block, Insn::RefineType { val, new_type: nil_false_type }); + state.replace(val, nil_false); queue.push_back((state.clone(), target, target_idx, local_inval)); } YARVINSN_branchnil => { @@ -6271,10 +6309,16 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let test_id = fun.push_insn(block, Insn::IsNil { val }); let target_idx = insn_idx_at_offset(insn_idx, offset); let target = insn_idx_to_block[&target_idx]; + let nil = fun.push_insn(block, Insn::Const { val: Const::Value(Qnil) }); + let mut iftrue_state = state.clone(); + iftrue_state.replace(val, nil); let _branch_id = fun.push_insn(block, Insn::IfTrue { val: test_id, - target: BranchEdge { target, args: state.as_args(self_param) } + target: BranchEdge { target, args: iftrue_state.as_args(self_param) } }); + let new_type = types::BasicObject.subtract(types::NilClass); + let not_nil = fun.push_insn(block, Insn::RefineType { val, new_type }); + state.replace(val, not_nil); queue.push_back((state.clone(), target, target_idx, local_inval)); } YARVINSN_opt_case_dispatch => { @@ -7693,21 +7737,23 @@ mod graphviz_tests { PatchPoint NoTracePoint  CheckInterrupts  v15:CBool = Test v9  - IfFalse v15, bb3(v8, v9)  - PatchPoint NoTracePoint  - v19:Fixnum[3] = Const Value(3)  - PatchPoint NoTracePoint  - CheckInterrupts  - Return v19  + v16:Falsy = RefineType v9, Falsy  + IfFalse v15, bb3(v8, v16)  + v18:Truthy = RefineType v9, Truthy  + PatchPoint NoTracePoint  + v21:Fixnum[3] = Const Value(3)  + PatchPoint NoTracePoint  + CheckInterrupts  + Return v21  >]; - bb2:v16 -> bb3:params:n; + bb2:v17 -> bb3:params:n; bb3 [label=< - - - - - - + + + + + +
bb3(v24:BasicObject, v25:BasicObject) 
PatchPoint NoTracePoint 
v29:Fixnum[4] = Const Value(4) 
PatchPoint NoTracePoint 
CheckInterrupts 
Return v29 
bb3(v26:BasicObject, v27:Falsy) 
PatchPoint NoTracePoint 
v31:Fixnum[4] = Const Value(4) 
PatchPoint NoTracePoint 
CheckInterrupts 
Return v31 
>]; } "#); diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index b7595f1b27221c..da240250101687 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -52,9 +52,10 @@ mod hir_opt_tests { bb2(v8:BasicObject, v9:NilClass): v13:TrueClass = Const Value(true) CheckInterrupts - v23:Fixnum[3] = Const Value(3) + v22:TrueClass = RefineType v13, Truthy + v25:Fixnum[3] = Const Value(3) CheckInterrupts - Return v23 + Return v25 "); } @@ -84,9 +85,10 @@ mod hir_opt_tests { bb2(v8:BasicObject, v9:NilClass): v13:FalseClass = Const Value(false) CheckInterrupts - v33:Fixnum[4] = Const Value(4) + v20:FalseClass = RefineType v13, Falsy + v35:Fixnum[4] = Const Value(4) CheckInterrupts - Return v33 + Return v35 "); } @@ -267,12 +269,12 @@ mod hir_opt_tests { v10:Fixnum[1] = Const Value(1) v12:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Integer@0x1000, <@0x1008, cme:0x1010) - v40:TrueClass = Const Value(true) + v42:TrueClass = Const Value(true) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v22:Fixnum[3] = Const Value(3) + v24:Fixnum[3] = Const Value(3) CheckInterrupts - Return v22 + Return v24 "); } @@ -300,18 +302,18 @@ mod hir_opt_tests { v10:Fixnum[1] = Const Value(1) v12:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Integer@0x1000, <=@0x1008, cme:0x1010) - v55:TrueClass = Const Value(true) + v59:TrueClass = Const Value(true) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v21:Fixnum[2] = Const Value(2) v23:Fixnum[2] = Const Value(2) + v25:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Integer@0x1000, <=@0x1008, cme:0x1010) - v57:TrueClass = Const Value(true) + v61:TrueClass = Const Value(true) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v33:Fixnum[3] = Const Value(3) + v37:Fixnum[3] = Const Value(3) CheckInterrupts - Return v33 + Return v37 "); } @@ -339,12 +341,12 @@ mod hir_opt_tests { v10:Fixnum[2] = Const Value(2) v12:Fixnum[1] = Const Value(1) PatchPoint MethodRedefined(Integer@0x1000, >@0x1008, cme:0x1010) - v40:TrueClass = Const Value(true) + v42:TrueClass = Const Value(true) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v22:Fixnum[3] = Const Value(3) + v24:Fixnum[3] = Const Value(3) CheckInterrupts - Return v22 + Return v24 "); } @@ -372,18 +374,18 @@ mod hir_opt_tests { v10:Fixnum[2] = Const Value(2) v12:Fixnum[1] = Const Value(1) PatchPoint MethodRedefined(Integer@0x1000, >=@0x1008, cme:0x1010) - v55:TrueClass = Const Value(true) + v59:TrueClass = Const Value(true) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v21:Fixnum[2] = Const Value(2) v23:Fixnum[2] = Const Value(2) + v25:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Integer@0x1000, >=@0x1008, cme:0x1010) - v57:TrueClass = Const Value(true) + v61:TrueClass = Const Value(true) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v33:Fixnum[3] = Const Value(3) + v37:Fixnum[3] = Const Value(3) CheckInterrupts - Return v33 + Return v37 "); } @@ -411,12 +413,12 @@ mod hir_opt_tests { v10:Fixnum[1] = Const Value(1) v12:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Integer@0x1000, ==@0x1008, cme:0x1010) - v40:FalseClass = Const Value(false) + v42:FalseClass = Const Value(false) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v31:Fixnum[4] = Const Value(4) + v33:Fixnum[4] = Const Value(4) CheckInterrupts - Return v31 + Return v33 "); } @@ -444,12 +446,12 @@ mod hir_opt_tests { v10:Fixnum[2] = Const Value(2) v12:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Integer@0x1000, ==@0x1008, cme:0x1010) - v40:TrueClass = Const Value(true) + v42:TrueClass = Const Value(true) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v22:Fixnum[3] = Const Value(3) + v24:Fixnum[3] = Const Value(3) CheckInterrupts - Return v22 + Return v24 "); } @@ -478,12 +480,12 @@ mod hir_opt_tests { v12:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Integer@0x1000, !=@0x1008, cme:0x1010) PatchPoint BOPRedefined(INTEGER_REDEFINED_OP_FLAG, BOP_EQ) - v41:TrueClass = Const Value(true) + v43:TrueClass = Const Value(true) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v22:Fixnum[3] = Const Value(3) + v24:Fixnum[3] = Const Value(3) CheckInterrupts - Return v22 + Return v24 "); } @@ -512,12 +514,12 @@ mod hir_opt_tests { v12:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Integer@0x1000, !=@0x1008, cme:0x1010) PatchPoint BOPRedefined(INTEGER_REDEFINED_OP_FLAG, BOP_EQ) - v41:FalseClass = Const Value(false) + v43:FalseClass = Const Value(false) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v31:Fixnum[4] = Const Value(4) + v33:Fixnum[4] = Const Value(4) CheckInterrupts - Return v31 + Return v33 "); } @@ -4992,8 +4994,9 @@ mod hir_opt_tests { bb2(v8:BasicObject, v9:NilClass): v13:NilClass = Const Value(nil) CheckInterrupts + v21:NilClass = Const Value(nil) CheckInterrupts - Return v13 + Return v21 "); } @@ -5020,10 +5023,11 @@ mod hir_opt_tests { bb2(v8:BasicObject, v9:NilClass): v13:Fixnum[1] = Const Value(1) CheckInterrupts + v23:Fixnum[1] = RefineType v13, NotNil PatchPoint MethodRedefined(Integer@0x1000, itself@0x1008, cme:0x1010) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - Return v13 + Return v23 "); } @@ -5840,20 +5844,22 @@ mod hir_opt_tests { bb2(v8:BasicObject, v9:BasicObject): CheckInterrupts v15:CBool = Test v9 - IfFalse v15, bb3(v8, v9) - v18:FalseClass = Const Value(false) + v16:Falsy = RefineType v9, Falsy + IfFalse v15, bb3(v8, v16) + v18:Truthy = RefineType v9, Truthy + v20:FalseClass = Const Value(false) CheckInterrupts - Jump bb4(v8, v9, v18) - bb3(v22:BasicObject, v23:BasicObject): - v26:NilClass = Const Value(nil) - Jump bb4(v22, v23, v26) - bb4(v28:BasicObject, v29:BasicObject, v30:NilClass|FalseClass): + Jump bb4(v8, v18, v20) + bb3(v24:BasicObject, v25:Falsy): + v28:NilClass = Const Value(nil) + Jump bb4(v24, v25, v28) + bb4(v30:BasicObject, v31:BasicObject, v32:Falsy): PatchPoint MethodRedefined(NilClass@0x1000, !@0x1008, cme:0x1010) - v41:NilClass = GuardType v30, NilClass - v42:TrueClass = Const Value(true) + v43:NilClass = GuardType v32, NilClass + v44:TrueClass = Const Value(true) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - Return v42 + Return v44 "); } @@ -10059,9 +10065,9 @@ mod hir_opt_tests { bb2(v6:BasicObject): PatchPoint NoSingletonClass(C@0x1000) PatchPoint MethodRedefined(C@0x1000, class@0x1008, cme:0x1010) - v40:HeapObject[class_exact:C] = GuardType v6, HeapObject[class_exact:C] + v42:HeapObject[class_exact:C] = GuardType v6, HeapObject[class_exact:C] IncrCounter inline_iseq_optimized_send_count - v44:Class[C@0x1000] = Const Value(VALUE(0x1000)) + v46:Class[C@0x1000] = Const Value(VALUE(0x1000)) IncrCounter inline_cfunc_optimized_send_count v13:StaticSymbol[:_lex_actions] = Const Value(VALUE(0x1038)) v15:TrueClass = Const Value(true) @@ -10069,12 +10075,12 @@ mod hir_opt_tests { PatchPoint MethodRedefined(Class@0x1040, respond_to?@0x1048, cme:0x1050) PatchPoint NoSingletonClass(Class@0x1040) PatchPoint MethodRedefined(Class@0x1040, _lex_actions@0x1078, cme:0x1080) - v52:TrueClass = Const Value(true) + v54:TrueClass = Const Value(true) IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - v24:StaticSymbol[:CORRECT] = Const Value(VALUE(0x10a8)) + v26:StaticSymbol[:CORRECT] = Const Value(VALUE(0x10a8)) CheckInterrupts - Return v24 + Return v26 "); } @@ -10230,23 +10236,23 @@ mod hir_opt_tests { CheckInterrupts SetLocal :formatted, l0, EP@3, v15 PatchPoint SingleRactorMode - v54:HeapBasicObject = GuardType v14, HeapBasicObject - v55:CShape = LoadField v54, :_shape_id@0x1000 - v56:CShape[0x1001] = GuardBitEquals v55, CShape(0x1001) - StoreField v54, :@formatted@0x1002, v15 - WriteBarrier v54, v15 - v59:CShape[0x1003] = Const CShape(0x1003) - StoreField v54, :_shape_id@0x1000, v59 - v43:Class[VMFrozenCore] = Const Value(VALUE(0x1008)) + v56:HeapBasicObject = GuardType v14, HeapBasicObject + v57:CShape = LoadField v56, :_shape_id@0x1000 + v58:CShape[0x1001] = GuardBitEquals v57, CShape(0x1001) + StoreField v56, :@formatted@0x1002, v15 + WriteBarrier v56, v15 + v61:CShape[0x1003] = Const CShape(0x1003) + StoreField v56, :_shape_id@0x1000, v61 + v45:Class[VMFrozenCore] = Const Value(VALUE(0x1008)) PatchPoint NoSingletonClass(Class@0x1010) PatchPoint MethodRedefined(Class@0x1010, lambda@0x1018, cme:0x1020) - v64:BasicObject = CCallWithFrame v43, :RubyVM::FrozenCore.lambda@0x1048, block=0x1050 - v46:BasicObject = GetLocal :a, l0, EP@6 - v47:BasicObject = GetLocal :_b, l0, EP@5 - v48:BasicObject = GetLocal :_c, l0, EP@4 - v49:BasicObject = GetLocal :formatted, l0, EP@3 + v66:BasicObject = CCallWithFrame v45, :RubyVM::FrozenCore.lambda@0x1048, block=0x1050 + v48:BasicObject = GetLocal :a, l0, EP@6 + v49:BasicObject = GetLocal :_b, l0, EP@5 + v50:BasicObject = GetLocal :_c, l0, EP@4 + v51:BasicObject = GetLocal :formatted, l0, EP@3 CheckInterrupts - Return v64 + Return v66 "); } diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index 3b0f5915997ba1..3e281782738d42 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -1083,14 +1083,16 @@ pub mod hir_build_tests { v10:TrueClass|NilClass = DefinedIvar v6, :@foo CheckInterrupts v13:CBool = Test v10 + v14:NilClass = RefineType v10, Falsy IfFalse v13, bb3(v6) - v17:Fixnum[3] = Const Value(3) + v16:TrueClass = RefineType v10, Truthy + v19:Fixnum[3] = Const Value(3) CheckInterrupts - Return v17 - bb3(v22:BasicObject): - v26:Fixnum[4] = Const Value(4) + Return v19 + bb3(v24:BasicObject): + v28:Fixnum[4] = Const Value(4) CheckInterrupts - Return v26 + Return v28 "); } @@ -1146,14 +1148,16 @@ pub mod hir_build_tests { bb2(v8:BasicObject, v9:BasicObject): CheckInterrupts v15:CBool = Test v9 - IfFalse v15, bb3(v8, v9) - v19:Fixnum[3] = Const Value(3) + v16:Falsy = RefineType v9, Falsy + IfFalse v15, bb3(v8, v16) + v18:Truthy = RefineType v9, Truthy + v21:Fixnum[3] = Const Value(3) CheckInterrupts - Return v19 - bb3(v24:BasicObject, v25:BasicObject): - v29:Fixnum[4] = Const Value(4) + Return v21 + bb3(v26:BasicObject, v27:Falsy): + v31:Fixnum[4] = Const Value(4) CheckInterrupts - Return v29 + Return v31 "); } @@ -1184,16 +1188,18 @@ pub mod hir_build_tests { bb2(v10:BasicObject, v11:BasicObject, v12:NilClass): CheckInterrupts v18:CBool = Test v11 - IfFalse v18, bb3(v10, v11, v12) - v22:Fixnum[3] = Const Value(3) + v19:Falsy = RefineType v11, Falsy + IfFalse v18, bb3(v10, v19, v12) + v21:Truthy = RefineType v11, Truthy + v24:Fixnum[3] = Const Value(3) CheckInterrupts - Jump bb4(v10, v11, v22) - bb3(v27:BasicObject, v28:BasicObject, v29:NilClass): - v33:Fixnum[4] = Const Value(4) - Jump bb4(v27, v28, v33) - bb4(v36:BasicObject, v37:BasicObject, v38:Fixnum): + Jump bb4(v10, v21, v24) + bb3(v29:BasicObject, v30:Falsy, v31:NilClass): + v35:Fixnum[4] = Const Value(4) + Jump bb4(v29, v30, v35) + bb4(v38:BasicObject, v39:BasicObject, v40:Fixnum): CheckInterrupts - Return v38 + Return v40 "); } @@ -1484,16 +1490,18 @@ pub mod hir_build_tests { v35:BasicObject = SendWithoutBlock v28, :>, v32 # SendFallbackReason: Uncategorized(opt_gt) CheckInterrupts v38:CBool = Test v35 + v39:Truthy = RefineType v35, Truthy IfTrue v38, bb3(v26, v27, v28) - v41:NilClass = Const Value(nil) + v41:Falsy = RefineType v35, Falsy + v43:NilClass = Const Value(nil) CheckInterrupts Return v27 - bb3(v49:BasicObject, v50:BasicObject, v51:BasicObject): - v56:Fixnum[1] = Const Value(1) - v59:BasicObject = SendWithoutBlock v50, :+, v56 # SendFallbackReason: Uncategorized(opt_plus) - v64:Fixnum[1] = Const Value(1) - v67:BasicObject = SendWithoutBlock v51, :-, v64 # SendFallbackReason: Uncategorized(opt_minus) - Jump bb4(v49, v59, v67) + bb3(v51:BasicObject, v52:BasicObject, v53:BasicObject): + v58:Fixnum[1] = Const Value(1) + v61:BasicObject = SendWithoutBlock v52, :+, v58 # SendFallbackReason: Uncategorized(opt_plus) + v66:Fixnum[1] = Const Value(1) + v69:BasicObject = SendWithoutBlock v53, :-, v66 # SendFallbackReason: Uncategorized(opt_minus) + Jump bb4(v51, v61, v69) "); } @@ -1549,14 +1557,16 @@ pub mod hir_build_tests { v13:TrueClass = Const Value(true) CheckInterrupts v19:CBool[true] = Test v13 - IfFalse v19, bb3(v8, v13) - v23:Fixnum[3] = Const Value(3) + v20 = RefineType v13, Falsy + IfFalse v19, bb3(v8, v20) + v22:TrueClass = RefineType v13, Truthy + v25:Fixnum[3] = Const Value(3) CheckInterrupts - Return v23 - bb3(v28, v29): - v33 = Const Value(4) + Return v25 + bb3(v30, v31): + v35 = Const Value(4) CheckInterrupts - Return v33 + Return v35 "); } @@ -3090,12 +3100,60 @@ pub mod hir_build_tests { bb2(v8:BasicObject, v9:BasicObject): CheckInterrupts v16:CBool = IsNil v9 - IfTrue v16, bb3(v8, v9, v9) - v19:BasicObject = SendWithoutBlock v9, :itself # SendFallbackReason: Uncategorized(opt_send_without_block) - Jump bb3(v8, v9, v19) - bb3(v21:BasicObject, v22:BasicObject, v23:BasicObject): + v17:NilClass = Const Value(nil) + IfTrue v16, bb3(v8, v17, v17) + v19:NotNil = RefineType v9, NotNil + v21:BasicObject = SendWithoutBlock v19, :itself # SendFallbackReason: Uncategorized(opt_send_without_block) + Jump bb3(v8, v19, v21) + bb3(v23:BasicObject, v24:BasicObject, v25:BasicObject): CheckInterrupts - Return v23 + Return v25 + "); + } + + #[test] + fn test_infer_nilability_from_branchif() { + eval(" + def test(x) + if x + x&.itself + else + 4 + end + end + "); + assert_contains_opcode("test", YARVINSN_branchnil); + // Note that IsNil has as its operand a value that we know statically *cannot* be nil + assert_snapshot!(hir_string("test"), @r" + fn test@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :x, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + CheckInterrupts + v15:CBool = Test v9 + v16:Falsy = RefineType v9, Falsy + IfFalse v15, bb3(v8, v16) + v18:Truthy = RefineType v9, Truthy + CheckInterrupts + v24:CBool[false] = IsNil v18 + v25:NilClass = Const Value(nil) + IfTrue v24, bb4(v8, v25, v25) + v27:Truthy = RefineType v18, NotNil + v29:BasicObject = SendWithoutBlock v27, :itself # SendFallbackReason: Uncategorized(opt_send_without_block) + CheckInterrupts + Return v29 + bb3(v34:BasicObject, v35:Falsy): + v39:Fixnum[4] = Const Value(4) + Jump bb4(v34, v35, v39) + bb4(v41:BasicObject, v42:Falsy, v43:Fixnum[4]): + CheckInterrupts + Return v43 "); } @@ -3174,14 +3232,16 @@ pub mod hir_build_tests { v32:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) CheckInterrupts v35:CBool[true] = Test v32 + v36 = RefineType v32, Falsy IfFalse v35, bb3(v16, v17, v18, v19, v20, v25) - v40:BasicObject = InvokeBlock, v25 # SendFallbackReason: Uncategorized(invokeblock) - v43:BasicObject = InvokeBuiltin dir_s_close, v16, v25 + v38:HeapObject[BlockParamProxy] = RefineType v32, Truthy + v42:BasicObject = InvokeBlock, v25 # SendFallbackReason: Uncategorized(invokeblock) + v45:BasicObject = InvokeBuiltin dir_s_close, v16, v25 CheckInterrupts - Return v40 - bb3(v49, v50, v51, v52, v53, v54): + Return v42 + bb3(v51, v52, v53, v54, v55, v56): CheckInterrupts - Return v54 + Return v56 "); } @@ -3302,14 +3362,16 @@ pub mod hir_build_tests { v21:BasicObject = SendWithoutBlock v9, :[], v16, v18 # SendFallbackReason: Uncategorized(opt_send_without_block) CheckInterrupts v25:CBool = Test v21 - IfTrue v25, bb3(v8, v9, v13, v9, v16, v18, v21) - v29:Fixnum[2] = Const Value(2) - v32:BasicObject = SendWithoutBlock v9, :[]=, v16, v18, v29 # SendFallbackReason: Uncategorized(opt_send_without_block) + v26:Truthy = RefineType v21, Truthy + IfTrue v25, bb3(v8, v9, v13, v9, v16, v18, v26) + v28:Falsy = RefineType v21, Falsy + v31:Fixnum[2] = Const Value(2) + v34:BasicObject = SendWithoutBlock v9, :[]=, v16, v18, v31 # SendFallbackReason: Uncategorized(opt_send_without_block) CheckInterrupts - Return v29 - bb3(v38:BasicObject, v39:BasicObject, v40:NilClass, v41:BasicObject, v42:Fixnum[0], v43:Fixnum[1], v44:BasicObject): + Return v31 + bb3(v40:BasicObject, v41:BasicObject, v42:NilClass, v43:BasicObject, v44:Fixnum[0], v45:Fixnum[1], v46:Truthy): CheckInterrupts - Return v44 + Return v46 "); } @@ -3652,14 +3714,16 @@ pub mod hir_build_tests { v15:BoolExact = FixnumBitCheck v12, 0 CheckInterrupts v18:CBool = Test v15 + v19:TrueClass = RefineType v15, Truthy IfTrue v18, bb3(v10, v11, v12) - v21:Fixnum[1] = Const Value(1) + v21:FalseClass = RefineType v15, Falsy v23:Fixnum[1] = Const Value(1) - v26:BasicObject = SendWithoutBlock v21, :+, v23 # SendFallbackReason: Uncategorized(opt_plus) - Jump bb3(v10, v26, v12) - bb3(v29:BasicObject, v30:BasicObject, v31:BasicObject): + v25:Fixnum[1] = Const Value(1) + v28:BasicObject = SendWithoutBlock v23, :+, v25 # SendFallbackReason: Uncategorized(opt_plus) + Jump bb3(v10, v28, v12) + bb3(v31:BasicObject, v32:BasicObject, v33:BasicObject): CheckInterrupts - Return v30 + Return v32 "); } diff --git a/zjit/src/hir_type/gen_hir_type.rb b/zjit/src/hir_type/gen_hir_type.rb index 9576d2b1c06f19..f952a8b71561fe 100644 --- a/zjit/src/hir_type/gen_hir_type.rb +++ b/zjit/src/hir_type/gen_hir_type.rb @@ -178,10 +178,15 @@ def add_union name, type_names add_union "Subclass", $subclass add_union "BoolExact", [true_exact.name, false_exact.name] add_union "Immediate", [fixnum.name, flonum.name, static_sym.name, nil_exact.name, true_exact.name, false_exact.name, undef_.name] +add_union "Falsy", [nil_exact.name, false_exact.name] $bits["HeapBasicObject"] = ["BasicObject & !Immediate"] $numeric_bits["HeapBasicObject"] = $numeric_bits["BasicObject"] & ~$numeric_bits["Immediate"] $bits["HeapObject"] = ["Object & !Immediate"] $numeric_bits["HeapObject"] = $numeric_bits["Object"] & ~$numeric_bits["Immediate"] +$bits["Truthy"] = ["BasicObject & !Falsy"] +$numeric_bits["Truthy"] = $numeric_bits["BasicObject"] & ~$numeric_bits["Falsy"] +$bits["NotNil"] = ["BasicObject & !NilClass"] +$numeric_bits["NotNil"] = $numeric_bits["BasicObject"] & ~$numeric_bits["NilClass"] # ===== Finished generating the DAG; write Rust code ===== diff --git a/zjit/src/hir_type/hir_type.inc.rs b/zjit/src/hir_type/hir_type.inc.rs index b388b3a0d10780..886b4b54dd2811 100644 --- a/zjit/src/hir_type/hir_type.inc.rs +++ b/zjit/src/hir_type/hir_type.inc.rs @@ -32,6 +32,7 @@ mod bits { pub const DynamicSymbol: u64 = 1u64 << 20; pub const Empty: u64 = 0u64; pub const FalseClass: u64 = 1u64 << 21; + pub const Falsy: u64 = FalseClass | NilClass; pub const Fixnum: u64 = 1u64 << 22; pub const Float: u64 = Flonum | HeapFloat; pub const Flonum: u64 = 1u64 << 23; @@ -47,6 +48,7 @@ mod bits { pub const ModuleExact: u64 = 1u64 << 27; pub const ModuleSubclass: u64 = 1u64 << 28; pub const NilClass: u64 = 1u64 << 29; + pub const NotNil: u64 = BasicObject & !NilClass; pub const Numeric: u64 = Float | Integer | NumericExact | NumericSubclass; pub const NumericExact: u64 = 1u64 << 30; pub const NumericSubclass: u64 = 1u64 << 31; @@ -70,14 +72,17 @@ mod bits { pub const Subclass: u64 = ArraySubclass | BasicObjectSubclass | HashSubclass | ModuleSubclass | NumericSubclass | ObjectSubclass | RangeSubclass | RegexpSubclass | SetSubclass | StringSubclass; pub const Symbol: u64 = DynamicSymbol | StaticSymbol; pub const TrueClass: u64 = 1u64 << 43; + pub const Truthy: u64 = BasicObject & !Falsy; pub const Undef: u64 = 1u64 << 44; - pub const AllBitPatterns: [(&str, u64); 71] = [ + pub const AllBitPatterns: [(&str, u64); 74] = [ ("Any", Any), ("RubyValue", RubyValue), ("Immediate", Immediate), ("Undef", Undef), ("BasicObject", BasicObject), ("Object", Object), + ("NotNil", NotNil), + ("Truthy", Truthy), ("BuiltinExact", BuiltinExact), ("BoolExact", BoolExact), ("TrueClass", TrueClass), @@ -103,6 +108,7 @@ mod bits { ("Numeric", Numeric), ("NumericSubclass", NumericSubclass), ("NumericExact", NumericExact), + ("Falsy", Falsy), ("NilClass", NilClass), ("Module", Module), ("ModuleSubclass", ModuleSubclass), @@ -180,6 +186,7 @@ pub mod types { pub const DynamicSymbol: Type = Type::from_bits(bits::DynamicSymbol); pub const Empty: Type = Type::from_bits(bits::Empty); pub const FalseClass: Type = Type::from_bits(bits::FalseClass); + pub const Falsy: Type = Type::from_bits(bits::Falsy); pub const Fixnum: Type = Type::from_bits(bits::Fixnum); pub const Float: Type = Type::from_bits(bits::Float); pub const Flonum: Type = Type::from_bits(bits::Flonum); @@ -195,6 +202,7 @@ pub mod types { pub const ModuleExact: Type = Type::from_bits(bits::ModuleExact); pub const ModuleSubclass: Type = Type::from_bits(bits::ModuleSubclass); pub const NilClass: Type = Type::from_bits(bits::NilClass); + pub const NotNil: Type = Type::from_bits(bits::NotNil); pub const Numeric: Type = Type::from_bits(bits::Numeric); pub const NumericExact: Type = Type::from_bits(bits::NumericExact); pub const NumericSubclass: Type = Type::from_bits(bits::NumericSubclass); @@ -218,6 +226,7 @@ pub mod types { pub const Subclass: Type = Type::from_bits(bits::Subclass); pub const Symbol: Type = Type::from_bits(bits::Symbol); pub const TrueClass: Type = Type::from_bits(bits::TrueClass); + pub const Truthy: Type = Type::from_bits(bits::Truthy); pub const Undef: Type = Type::from_bits(bits::Undef); pub const ExactBitsAndClass: [(u64, *const VALUE); 17] = [ (bits::ObjectExact, &raw const crate::cruby::rb_cObject), diff --git a/zjit/src/hir_type/mod.rs b/zjit/src/hir_type/mod.rs index cc6a208bcd413e..1f7526915c2f16 100644 --- a/zjit/src/hir_type/mod.rs +++ b/zjit/src/hir_type/mod.rs @@ -453,6 +453,25 @@ impl Type { types::Empty } + /// Subtract `other` from `self`, preserving specialization if possible. + pub fn subtract(&self, other: Type) -> Type { + // If self is a subtype of other, the result is empty (no negative types). + if self.is_subtype(other) { return types::Empty; } + // Self is not a subtype of other. That means either: + // * Their type bits do not overlap at all (eg Int vs String) + // * Their type bits overlap but self's specialization is not a subtype of other's (eg + // Fixnum[5] vs Fixnum[4]) + // Check for the latter case, returning self unchanged if so. + if !self.spec_is_subtype_of(other) { + return *self; + } + // Now self is either a supertype of other (eg Object vs String or Fixnum vs Fixnum[5]) or + // their type bits do not overlap at all (eg Int vs String). + // Just subtract the bits and keep self's specialization. + let bits = self.bits & !other.bits; + Type { bits, spec: self.spec } + } + pub fn could_be(&self, other: Type) -> bool { !self.intersection(other).bit_equal(types::Empty) } @@ -1060,4 +1079,45 @@ mod tests { assert!(!types::CBool.has_value(Const::CBool(true))); assert!(!types::CShape.has_value(Const::CShape(crate::cruby::ShapeId(0x1234)))); } + + #[test] + fn test_subtract_with_superset_returns_empty() { + let left = types::NilClass; + let right = types::BasicObject; + let result = left.subtract(right); + assert_bit_equal(result, types::Empty); + } + + #[test] + fn test_subtract_with_subset_removes_bits() { + let left = types::BasicObject; + let right = types::NilClass; + let result = left.subtract(right); + assert_subtype(result, types::BasicObject); + assert_not_subtype(types::NilClass, result); + } + + #[test] + fn test_subtract_with_no_overlap_returns_self() { + let left = types::Fixnum; + let right = types::StringExact; + let result = left.subtract(right); + assert_bit_equal(result, left); + } + + #[test] + fn test_subtract_with_no_specialization_overlap_returns_self() { + let left = Type::fixnum(4); + let right = Type::fixnum(5); + let result = left.subtract(right); + assert_bit_equal(result, left); + } + + #[test] + fn test_subtract_with_specialization_subset_removes_specialization() { + let left = types::Fixnum; + let right = Type::fixnum(42); + let result = left.subtract(right); + assert_bit_equal(result, types::Fixnum); + } } From 965b16d766df66e296a8d8254263da3c1cf45717 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 20 Jan 2026 19:39:14 -0500 Subject: [PATCH 4/6] [DOC] Add doc about eval coverage --- ext/coverage/coverage.c | 56 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/ext/coverage/coverage.c b/ext/coverage/coverage.c index 1fda8191ccde75..6b7e96f622b18b 100644 --- a/ext/coverage/coverage.c +++ b/ext/coverage/coverage.c @@ -600,6 +600,62 @@ rb_coverage_running(VALUE klass) * 5. The ending line number the method appears on in the file. * 6. The ending column number the method appears on in the file. * + * == Eval \Coverage + * + * Eval coverage can be combined with the coverage types above to track + * coverage for eval. + * + * require "coverage" + * Coverage.start(eval: true, lines: true) + * + * eval(<<~RUBY, nil, "eval 1") + * ary = [] + * 10.times do |i| + * ary << "hello" * i + * end + * RUBY + * + * Coverage.result # => {"eval 1" => {lines: [1, 1, 10, nil]}} + * + * Note that the eval must have a filename assigned, otherwise coverage + * will not be measured. + * + * require "coverage" + * Coverage.start(eval: true, lines: true) + * + * eval(<<~RUBY) + * ary = [] + * 10.times do |i| + * ary << "hello" * i + * end + * RUBY + * + * Coverage.result # => {"(eval)" => {lines: [nil, nil, nil, nil]}} + * + * Also note that if a line number is assigned to the eval and it is not 1, + * then the resulting coverage will be padded with +nil+ if the line number is + * greater than 1, and truncated if the line number is less than 1. + * + * require "coverage" + * Coverage.start(eval: true, lines: true) + * + * eval(<<~RUBY, nil, "eval 1", 3) + * ary = [] + * 10.times do |i| + * ary << "hello" * i + * end + * RUBY + * + * eval(<<~RUBY, nil, "eval 2", -1) + * ary = [] + * 10.times do |i| + * ary << "hello" * i + * end + * RUBY + * + * Coverage.result + * # => {"eval 1" => {lines: [nil, nil, 1, 1, 10, nil]}, "eval 2" => {lines: [10, nil]}} + * * == All \Coverage Modes * * You can also run all modes of coverage simultaneously with this shortcut. From 436ec3a9d68ae9282fbc79e9400382d93d05e800 Mon Sep 17 00:00:00 2001 From: Nozomi Hijikata <121233810+nozomemein@users.noreply.github.com> Date: Thu, 22 Jan 2026 08:54:10 +0900 Subject: [PATCH 5/6] ZJIT: Compile getblockparam (#15896) Closes: https://github.com/Shopify/ruby/issues/863 Compile `getblockparam` insn to `GetBlockParam` HIR so that we can handle it in ZJIT. ## Benchmark ### lobsters
before patch ``` Average of last 10, non-warmup iters: 778ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (58.4% of total 16,091,748): Hash#fetch: 3,237,974 (20.1%) Regexp#match?: 708,838 ( 4.4%) Hash#key?: 702,565 ( 4.4%) String#sub!: 489,843 ( 3.0%) Set#include?: 402,395 ( 2.5%) String#<<: 396,364 ( 2.5%) String#start_with?: 379,338 ( 2.4%) Hash#delete: 331,679 ( 2.1%) String.new: 308,268 ( 1.9%) Integer#===: 279,074 ( 1.7%) Symbol#end_with?: 255,538 ( 1.6%) Kernel#is_a?: 250,000 ( 1.6%) Process.clock_gettime: 221,598 ( 1.4%) Integer#>: 219,718 ( 1.4%) String#match?: 218,057 ( 1.4%) String#downcase: 213,127 ( 1.3%) Integer#<=: 202,617 ( 1.3%) Time#to_i: 195,248 ( 1.2%) Time#subsec: 192,277 ( 1.2%) Time#utc?: 188,500 ( 1.2%) Top-20 calls to C functions from JIT code (83.4% of total 126,501,142): rb_vm_opt_send_without_block: 35,338,443 (27.9%) rb_vm_send: 10,126,272 ( 8.0%) rb_hash_aref: 9,221,146 ( 7.3%) rb_vm_env_write: 8,615,394 ( 6.8%) rb_zjit_writebarrier_check_immediate: 7,666,061 ( 6.1%) rb_vm_getinstancevariable: 5,902,473 ( 4.7%) rb_ivar_get_at_no_ractor_check: 4,775,750 ( 3.8%) rb_obj_is_kind_of: 3,718,303 ( 2.9%) rb_vm_invokesuper: 2,705,394 ( 2.1%) rb_hash_aset: 2,422,892 ( 1.9%) rb_vm_setinstancevariable: 2,385,262 ( 1.9%) rb_vm_opt_getconstant_path: 2,321,875 ( 1.8%) Hash#fetch: 1,819,675 ( 1.4%) fetch: 1,418,299 ( 1.1%) rb_vm_invokeblock: 1,387,466 ( 1.1%) rb_str_buf_append: 1,378,634 ( 1.1%) rb_ec_ary_new_from_values: 1,338,599 ( 1.1%) rb_class_allocate_instance: 1,300,827 ( 1.0%) rb_hash_new_with_size: 906,352 ( 0.7%) rb_vm_sendforward: 799,626 ( 0.6%) Top-2 not optimized method types for send (100.0% of total 5,166,211): iseq: 5,163,389 (99.9%) null: 2,822 ( 0.1%) Top-3 not optimized method types for send_without_block (100.0% of total 526,119): optimized_send: 479,643 (91.2%) null: 42,176 ( 8.0%) optimized_block_call: 4,300 ( 0.8%) Top-3 not optimized method types for super (100.0% of total 2,365,999): cfunc: 2,251,438 (95.2%) alias: 111,257 ( 4.7%) attrset: 3,304 ( 0.1%) Top-3 instructions with uncategorized fallback reason (100.0% of total 2,214,821): invokeblock: 1,387,466 (62.6%) sendforward: 799,626 (36.1%) opt_send_without_block: 27,729 ( 1.3%) Top-20 send fallback reasons (100.0% of total 50,357,201): send_without_block_polymorphic: 18,307,466 (36.4%) singleton_class_seen: 9,310,336 (18.5%) send_not_optimized_method_type: 5,166,211 (10.3%) send_without_block_no_profiles: 4,756,165 ( 9.4%) one_or_more_complex_arg_pass: 2,906,412 ( 5.8%) send_no_profiles: 2,864,323 ( 5.7%) super_not_optimized_method_type: 2,365,999 ( 4.7%) uncategorized: 2,214,821 ( 4.4%) send_without_block_megamorphic: 581,552 ( 1.2%) send_without_block_not_optimized_method_type_optimized: 483,943 ( 1.0%) send_without_block_not_optimized_need_permission: 390,364 ( 0.8%) send_polymorphic: 329,064 ( 0.7%) too_many_args_for_lir: 173,570 ( 0.3%) super_target_complex_args_pass: 131,841 ( 0.3%) super_complex_args_pass: 111,056 ( 0.2%) super_polymorphic: 86,986 ( 0.2%) argc_param_mismatch: 48,546 ( 0.1%) send_without_block_not_optimized_method_type: 42,176 ( 0.1%) send_without_block_direct_keyword_mismatch: 37,484 ( 0.1%) obj_to_string_not_string: 34,865 ( 0.1%) Top-4 setivar fallback reasons (100.0% of total 2,385,262): not_monomorphic: 2,162,525 (90.7%) not_t_object: 125,178 ( 5.2%) too_complex: 97,538 ( 4.1%) new_shape_needs_extension: 21 ( 0.0%) Top-2 getivar fallback reasons (100.0% of total 6,027,586): not_monomorphic: 5,776,418 (95.8%) too_complex: 251,168 ( 4.2%) Top-3 definedivar fallback reasons (100.0% of total 406,027): not_monomorphic: 397,876 (98.0%) too_complex: 5,122 ( 1.3%) not_t_object: 3,029 ( 0.7%) Top-6 invokeblock handler (100.0% of total 1,387,466): monomorphic_iseq: 700,051 (50.5%) polymorphic: 513,455 (37.0%) monomorphic_other: 106,268 ( 7.7%) monomorphic_ifunc: 55,505 ( 4.0%) megamorphic: 6,762 ( 0.5%) no_profiles: 5,425 ( 0.4%) Top-9 popular complex argument-parameter features not optimized (100.0% of total 3,353,961): param_kw_opt: 1,408,663 (42.0%) param_forwardable: 697,209 (20.8%) param_block: 632,488 (18.9%) param_rest: 346,363 (10.3%) param_kwrest: 139,856 ( 4.2%) caller_kw_splat: 79,861 ( 2.4%) caller_splat: 43,585 ( 1.3%) caller_blockarg: 5,826 ( 0.2%) caller_kwarg: 110 ( 0.0%) Top-1 compile error reasons (100.0% of total 188,362): exception_handler: 188,362 (100.0%) Top-7 unhandled YARV insns (100.0% of total 184,408): getblockparam: 95,129 (51.6%) invokesuperforward: 81,668 (44.3%) getconstant: 3,318 ( 1.8%) setblockparam: 2,837 ( 1.5%) checkmatch: 929 ( 0.5%) expandarray: 360 ( 0.2%) once: 167 ( 0.1%) Top-3 unhandled HIR insns (100.0% of total 237,876): throw: 199,380 (83.8%) invokebuiltin: 35,775 (15.0%) array_max: 2,721 ( 1.1%) Top-20 side exit reasons (100.0% of total 15,592,861): guard_type_failure: 6,993,070 (44.8%) guard_shape_failure: 6,862,785 (44.0%) block_param_proxy_not_iseq_or_ifunc: 1,006,781 ( 6.5%) unhandled_hir_insn: 237,876 ( 1.5%) compile_error: 188,362 ( 1.2%) unhandled_yarv_insn: 184,408 ( 1.2%) block_param_proxy_modified: 29,130 ( 0.2%) patchpoint_stable_constant_names: 22,145 ( 0.1%) unhandled_newarray_send_pack: 14,481 ( 0.1%) unhandled_block_arg: 13,788 ( 0.1%) fixnum_mult_overflow: 10,866 ( 0.1%) fixnum_lshift_overflow: 10,085 ( 0.1%) patchpoint_no_ep_escape: 7,815 ( 0.1%) expandarray_failure: 4,533 ( 0.0%) guard_super_method_entry: 4,475 ( 0.0%) patchpoint_method_redefined: 1,212 ( 0.0%) patchpoint_no_singleton_class: 423 ( 0.0%) obj_to_string_fallback: 330 ( 0.0%) guard_less_failure: 163 ( 0.0%) interrupt: 114 ( 0.0%) send_count: 152,442,683 dynamic_send_count: 50,357,201 (33.0%) optimized_send_count: 102,085,482 (67.0%) dynamic_setivar_count: 2,385,262 ( 1.6%) dynamic_getivar_count: 6,027,586 ( 4.0%) dynamic_definedivar_count: 406,027 ( 0.3%) iseq_optimized_send_count: 39,671,621 (26.0%) inline_cfunc_optimized_send_count: 42,053,762 (27.6%) inline_iseq_optimized_send_count: 3,462,562 ( 2.3%) non_variadic_cfunc_optimized_send_count: 9,195,248 ( 6.0%) variadic_cfunc_optimized_send_count: 7,702,289 ( 5.1%) compiled_iseq_count: 5,552 failed_iseq_count: 0 compile_time: 1,926ms profile_time: 20ms gc_time: 27ms invalidation_time: 531ms vm_write_pc_count: 132,750,117 vm_write_sp_count: 132,750,117 vm_write_locals_count: 128,780,465 vm_write_stack_count: 128,780,465 vm_write_to_parent_iseq_local_count: 694,799 vm_read_from_parent_iseq_local_count: 14,812,747 guard_type_count: 159,813,452 guard_type_exit_ratio: 4.4% guard_shape_count: 0 code_region_bytes: 29,425,664 zjit_alloc_bytes: 44,592,776 total_mem_bytes: 74,018,440 side_exit_count: 15,592,861 total_insn_count: 938,453,078 vm_insn_count: 167,693,539 zjit_insn_count: 770,759,539 ratio_in_zjit: 82.1% ```
after patch ``` Average of last 10, non-warmup iters: 725ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (58.2% of total 16,004,664): Hash#fetch: 3,185,115 (19.9%) Regexp#match?: 708,806 ( 4.4%) Hash#key?: 702,551 ( 4.4%) String#sub!: 489,841 ( 3.1%) Set#include?: 396,625 ( 2.5%) String#<<: 396,279 ( 2.5%) String#start_with?: 379,337 ( 2.4%) Hash#delete: 331,667 ( 2.1%) String.new: 307,248 ( 1.9%) Integer#===: 279,054 ( 1.7%) Symbol#end_with?: 255,538 ( 1.6%) Kernel#is_a?: 246,961 ( 1.5%) Process.clock_gettime: 221,588 ( 1.4%) Integer#>: 219,718 ( 1.4%) String#match?: 218,059 ( 1.4%) String#downcase: 213,109 ( 1.3%) Integer#<=: 202,617 ( 1.3%) Time#to_i: 192,211 ( 1.2%) Time#subsec: 189,240 ( 1.2%) String#to_sym: 185,947 ( 1.2%) Top-20 calls to C functions from JIT code (83.4% of total 126,772,007): rb_vm_opt_send_without_block: 35,829,863 (28.3%) rb_vm_send: 10,108,894 ( 8.0%) rb_hash_aref: 9,009,231 ( 7.1%) rb_vm_env_write: 8,571,665 ( 6.8%) rb_zjit_writebarrier_check_immediate: 7,702,599 ( 6.1%) rb_vm_getinstancevariable: 5,930,325 ( 4.7%) rb_ivar_get_at_no_ractor_check: 4,764,439 ( 3.8%) rb_obj_is_kind_of: 3,722,865 ( 2.9%) rb_vm_invokesuper: 2,687,484 ( 2.1%) rb_hash_aset: 2,421,186 ( 1.9%) rb_vm_setinstancevariable: 2,355,461 ( 1.9%) rb_vm_opt_getconstant_path: 2,295,528 ( 1.8%) Hash#fetch: 1,779,524 ( 1.4%) fetch: 1,405,591 ( 1.1%) rb_vm_invokeblock: 1,385,989 ( 1.1%) rb_str_buf_append: 1,369,177 ( 1.1%) rb_ec_ary_new_from_values: 1,337,865 ( 1.1%) rb_class_allocate_instance: 1,295,755 ( 1.0%) rb_hash_new_with_size: 902,684 ( 0.7%) rb_vm_sendforward: 798,572 ( 0.6%) Top-2 not optimized method types for send (100.0% of total 4,902,716): iseq: 4,899,894 (99.9%) null: 2,822 ( 0.1%) Top-3 not optimized method types for send_without_block (100.0% of total 526,064): optimized_send: 479,589 (91.2%) null: 42,176 ( 8.0%) optimized_block_call: 4,299 ( 0.8%) Top-3 not optimized method types for super (100.0% of total 2,350,245): cfunc: 2,239,567 (95.3%) alias: 107,374 ( 4.6%) attrset: 3,304 ( 0.1%) Top-3 instructions with uncategorized fallback reason (100.0% of total 2,216,683): invokeblock: 1,385,989 (62.5%) sendforward: 798,572 (36.0%) opt_send_without_block: 32,122 ( 1.4%) Top-20 send fallback reasons (99.9% of total 50,810,802): send_without_block_polymorphic: 18,668,686 (36.7%) singleton_class_seen: 9,323,039 (18.3%) send_not_optimized_method_type: 4,902,716 ( 9.6%) send_without_block_no_profiles: 4,824,297 ( 9.5%) send_no_profiles: 2,853,944 ( 5.6%) one_or_more_complex_arg_pass: 2,829,717 ( 5.6%) super_not_optimized_method_type: 2,350,245 ( 4.6%) uncategorized: 2,216,683 ( 4.4%) send_without_block_megamorphic: 723,037 ( 1.4%) send_polymorphic: 544,026 ( 1.1%) send_without_block_not_optimized_method_type_optimized: 483,888 ( 1.0%) send_without_block_not_optimized_need_permission: 390,364 ( 0.8%) too_many_args_for_lir: 172,809 ( 0.3%) super_target_complex_args_pass: 128,824 ( 0.3%) super_complex_args_pass: 111,053 ( 0.2%) super_polymorphic: 87,851 ( 0.2%) argc_param_mismatch: 50,382 ( 0.1%) send_without_block_not_optimized_method_type: 42,176 ( 0.1%) obj_to_string_not_string: 34,861 ( 0.1%) send_without_block_direct_keyword_mismatch: 32,436 ( 0.1%) Top-4 setivar fallback reasons (100.0% of total 2,355,461): not_monomorphic: 2,132,746 (90.5%) not_t_object: 125,163 ( 5.3%) too_complex: 97,531 ( 4.1%) new_shape_needs_extension: 21 ( 0.0%) Top-2 getivar fallback reasons (100.0% of total 6,055,438): not_monomorphic: 5,806,179 (95.9%) too_complex: 249,259 ( 4.1%) Top-3 definedivar fallback reasons (100.0% of total 405,302): not_monomorphic: 397,150 (98.0%) too_complex: 5,122 ( 1.3%) not_t_object: 3,030 ( 0.7%) Top-6 invokeblock handler (100.0% of total 1,385,989): monomorphic_iseq: 688,167 (49.7%) polymorphic: 523,864 (37.8%) monomorphic_other: 106,268 ( 7.7%) monomorphic_ifunc: 55,505 ( 4.0%) megamorphic: 6,761 ( 0.5%) no_profiles: 5,424 ( 0.4%) Top-9 popular complex argument-parameter features not optimized (100.0% of total 3,234,958): param_kw_opt: 1,381,881 (42.7%) param_forwardable: 685,939 (21.2%) param_block: 640,948 (19.8%) param_rest: 327,046 (10.1%) param_kwrest: 120,209 ( 3.7%) caller_kw_splat: 38,970 ( 1.2%) caller_splat: 34,029 ( 1.1%) caller_blockarg: 5,826 ( 0.2%) caller_kwarg: 110 ( 0.0%) Top-1 compile error reasons (100.0% of total 187,347): exception_handler: 187,347 (100.0%) Top-6 unhandled YARV insns (100.0% of total 89,278): invokesuperforward: 81,667 (91.5%) getconstant: 3,318 ( 3.7%) setblockparam: 2,837 ( 3.2%) checkmatch: 929 ( 1.0%) expandarray: 360 ( 0.4%) once: 167 ( 0.2%) Top-3 unhandled HIR insns (100.0% of total 236,977): throw: 198,481 (83.8%) invokebuiltin: 35,775 (15.1%) array_max: 2,721 ( 1.1%) Top-20 side exit reasons (100.0% of total 15,458,443): guard_type_failure: 6,918,397 (44.8%) guard_shape_failure: 6,859,686 (44.4%) block_param_proxy_not_iseq_or_ifunc: 1,008,346 ( 6.5%) unhandled_hir_insn: 236,977 ( 1.5%) compile_error: 187,347 ( 1.2%) unhandled_yarv_insn: 89,278 ( 0.6%) fixnum_mult_overflow: 50,739 ( 0.3%) block_param_proxy_modified: 28,119 ( 0.2%) patchpoint_stable_constant_names: 22,145 ( 0.1%) unhandled_newarray_send_pack: 14,481 ( 0.1%) unhandled_block_arg: 13,787 ( 0.1%) fixnum_lshift_overflow: 10,085 ( 0.1%) patchpoint_no_ep_escape: 7,815 ( 0.1%) expandarray_failure: 4,533 ( 0.0%) guard_super_method_entry: 4,475 ( 0.0%) patchpoint_method_redefined: 1,212 ( 0.0%) patchpoint_no_singleton_class: 423 ( 0.0%) obj_to_string_fallback: 330 ( 0.0%) guard_less_failure: 163 ( 0.0%) interrupt: 86 ( 0.0%) send_count: 151,889,096 dynamic_send_count: 50,810,802 (33.5%) optimized_send_count: 101,078,294 (66.5%) dynamic_setivar_count: 2,355,461 ( 1.6%) dynamic_getivar_count: 6,055,438 ( 4.0%) dynamic_definedivar_count: 405,302 ( 0.3%) iseq_optimized_send_count: 39,470,508 (26.0%) inline_cfunc_optimized_send_count: 41,381,565 (27.2%) inline_iseq_optimized_send_count: 3,370,961 ( 2.2%) non_variadic_cfunc_optimized_send_count: 9,210,651 ( 6.1%) variadic_cfunc_optimized_send_count: 7,644,609 ( 5.0%) compiled_iseq_count: 5,552 failed_iseq_count: 0 compile_time: 1,809ms profile_time: 15ms gc_time: 21ms invalidation_time: 526ms vm_write_pc_count: 132,774,559 vm_write_sp_count: 132,774,559 vm_write_locals_count: 128,748,998 vm_write_stack_count: 128,748,998 vm_write_to_parent_iseq_local_count: 693,262 vm_read_from_parent_iseq_local_count: 14,737,431 guard_type_count: 158,811,089 guard_type_exit_ratio: 4.4% guard_shape_count: 0 code_region_bytes: 29,458,432 zjit_alloc_bytes: 44,650,569 total_mem_bytes: 74,109,001 side_exit_count: 15,458,443 total_insn_count: 934,491,306 vm_insn_count: 166,025,364 zjit_insn_count: 768,465,942 ratio_in_zjit: 82.2% ```
### rails-bench
before patch ``` Average of last 10, non-warmup iters: 1254ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (52.8% of total 39,182,033): Hash#key?: 3,141,634 ( 8.0%) Regexp#match?: 2,420,227 ( 6.2%) Hash#fetch: 2,245,557 ( 5.7%) Array#any?: 1,157,418 ( 3.0%) Hash#delete: 1,114,346 ( 2.8%) Integer#===: 1,098,163 ( 2.8%) String.new: 1,004,713 ( 2.6%) MatchData#[]: 831,442 ( 2.1%) String#b: 797,913 ( 2.0%) String#to_sym: 680,943 ( 1.7%) Kernel#dup: 680,022 ( 1.7%) Array#all?: 650,132 ( 1.7%) Fiber.current: 649,003 ( 1.7%) Array#join: 641,038 ( 1.6%) Array#include?: 613,837 ( 1.6%) Kernel#Array: 610,311 ( 1.6%) String#<<: 606,240 ( 1.5%) Symbol#end_with?: 598,807 ( 1.5%) String#force_encoding: 593,535 ( 1.5%) Kernel#respond_to?: 550,441 ( 1.4%) Top-20 calls to C functions from JIT code (75.2% of total 260,204,372): rb_vm_opt_send_without_block: 52,620,850 (20.2%) rb_hash_aref: 22,920,184 ( 8.8%) rb_vm_env_write: 19,484,445 ( 7.5%) rb_vm_send: 16,570,926 ( 6.4%) rb_zjit_writebarrier_check_immediate: 13,628,686 ( 5.2%) rb_vm_getinstancevariable: 12,378,112 ( 4.8%) rb_ivar_get_at_no_ractor_check: 12,208,856 ( 4.7%) rb_vm_invokesuper: 8,086,664 ( 3.1%) rb_hash_aset: 5,043,532 ( 1.9%) rb_obj_is_kind_of: 4,431,294 ( 1.7%) rb_vm_invokeblock: 4,036,483 ( 1.6%) Hash#key?: 3,141,634 ( 1.2%) rb_vm_opt_getconstant_path: 3,051,909 ( 1.2%) rb_class_allocate_instance: 2,878,743 ( 1.1%) rb_hash_new_with_size: 2,873,398 ( 1.1%) rb_ec_ary_new_from_values: 2,584,790 ( 1.0%) rb_str_concat_literals: 2,450,752 ( 0.9%) Regexp#match?: 2,420,227 ( 0.9%) rb_obj_alloc: 2,419,180 ( 0.9%) rb_vm_setinstancevariable: 2,357,067 ( 0.9%) Top-2 not optimized method types for send (100.0% of total 8,550,761): iseq: 8,518,290 (99.6%) optimized: 32,471 ( 0.4%) Top-2 not optimized method types for send_without_block (100.0% of total 790,792): optimized_send: 608,036 (76.9%) null: 182,756 (23.1%) Top-2 not optimized method types for super (100.0% of total 6,689,860): cfunc: 6,640,181 (99.3%) attrset: 49,679 ( 0.7%) Top-3 instructions with uncategorized fallback reason (100.0% of total 5,911,882): invokeblock: 4,036,483 (68.3%) sendforward: 1,871,601 (31.7%) opt_send_without_block: 3,798 ( 0.1%) Top-20 send fallback reasons (100.0% of total 83,186,524): send_without_block_polymorphic: 33,814,235 (40.6%) send_not_optimized_method_type: 8,550,761 (10.3%) send_without_block_no_profiles: 8,405,471 (10.1%) super_not_optimized_method_type: 6,689,860 ( 8.0%) uncategorized: 5,911,882 ( 7.1%) one_or_more_complex_arg_pass: 5,502,146 ( 6.6%) send_no_profiles: 4,700,820 ( 5.7%) send_polymorphic: 3,318,564 ( 4.0%) send_without_block_not_optimized_need_permission: 1,274,177 ( 1.5%) singleton_class_seen: 1,101,973 ( 1.3%) too_many_args_for_lir: 905,412 ( 1.1%) super_complex_args_pass: 829,842 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 608,036 ( 0.7%) send_without_block_megamorphic: 565,874 ( 0.7%) super_target_complex_args_pass: 414,600 ( 0.5%) send_without_block_not_optimized_method_type: 182,756 ( 0.2%) obj_to_string_not_string: 158,141 ( 0.2%) super_call_with_block: 100,004 ( 0.1%) send_without_block_direct_keyword_mismatch: 99,588 ( 0.1%) super_polymorphic: 52,358 ( 0.1%) Top-2 setivar fallback reasons (100.0% of total 2,357,067): not_monomorphic: 2,255,283 (95.7%) not_t_object: 101,784 ( 4.3%) Top-1 getivar fallback reasons (100.0% of total 12,378,137): not_monomorphic: 12,378,137 (100.0%) Top-2 definedivar fallback reasons (100.0% of total 350,548): not_monomorphic: 350,461 (100.0%) not_t_object: 87 ( 0.0%) Top-6 invokeblock handler (100.0% of total 4,036,483): monomorphic_iseq: 2,189,057 (54.2%) polymorphic: 1,207,002 (29.9%) monomorphic_other: 334,248 ( 8.3%) monomorphic_ifunc: 221,225 ( 5.5%) megamorphic: 84,439 ( 2.1%) no_profiles: 512 ( 0.0%) Top-9 popular complex argument-parameter features not optimized (100.0% of total 7,096,505): param_kw_opt: 1,834,705 (25.9%) param_forwardable: 1,824,953 (25.7%) param_block: 1,792,214 (25.3%) param_rest: 861,894 (12.1%) caller_kw_splat: 297,937 ( 4.2%) caller_splat: 283,669 ( 4.0%) param_kwrest: 200,208 ( 2.8%) caller_blockarg: 752 ( 0.0%) caller_kwarg: 173 ( 0.0%) Top-1 compile error reasons (100.0% of total 391,562): exception_handler: 391,562 (100.0%) Top-7 unhandled YARV insns (100.0% of total 1,899,393): getblockparam: 898,862 (47.3%) invokesuperforward: 498,993 (26.3%) getconstant: 400,945 (21.1%) expandarray: 49,985 ( 2.6%) setblockparam: 49,972 ( 2.6%) checkmatch: 480 ( 0.0%) once: 156 ( 0.0%) Top-2 unhandled HIR insns (100.0% of total 268,151): throw: 232,560 (86.7%) invokebuiltin: 35,591 (13.3%) Top-19 side exit reasons (100.0% of total 9,609,677): guard_shape_failure: 2,498,160 (26.0%) block_param_proxy_not_iseq_or_ifunc: 1,988,408 (20.7%) unhandled_yarv_insn: 1,899,393 (19.8%) guard_type_failure: 1,722,167 (17.9%) compile_error: 391,562 ( 4.1%) unhandled_newarray_send_pack: 298,017 ( 3.1%) unhandled_hir_insn: 268,151 ( 2.8%) patchpoint_method_redefined: 200,632 ( 2.1%) unhandled_block_arg: 151,295 ( 1.6%) block_param_proxy_modified: 124,245 ( 1.3%) guard_less_failure: 50,126 ( 0.5%) fixnum_lshift_overflow: 9,985 ( 0.1%) patchpoint_stable_constant_names: 6,366 ( 0.1%) fixnum_mult_overflow: 570 ( 0.0%) obj_to_string_fallback: 429 ( 0.0%) patchpoint_no_ep_escape: 109 ( 0.0%) interrupt: 48 ( 0.0%) guard_super_method_entry: 8 ( 0.0%) guard_greater_eq_failure: 6 ( 0.0%) send_count: 328,547,991 dynamic_send_count: 83,186,524 (25.3%) optimized_send_count: 245,361,467 (74.7%) dynamic_setivar_count: 2,357,067 ( 0.7%) dynamic_getivar_count: 12,378,137 ( 3.8%) dynamic_definedivar_count: 350,548 ( 0.1%) iseq_optimized_send_count: 93,424,465 (28.4%) inline_cfunc_optimized_send_count: 98,338,280 (29.9%) inline_iseq_optimized_send_count: 9,338,763 ( 2.8%) non_variadic_cfunc_optimized_send_count: 26,452,910 ( 8.1%) variadic_cfunc_optimized_send_count: 17,807,049 ( 5.4%) compiled_iseq_count: 2,887 failed_iseq_count: 0 compile_time: 877ms profile_time: 32ms gc_time: 11ms invalidation_time: 15ms vm_write_pc_count: 284,341,923 vm_write_sp_count: 284,341,923 vm_write_locals_count: 272,137,494 vm_write_stack_count: 272,137,494 vm_write_to_parent_iseq_local_count: 1,079,867 vm_read_from_parent_iseq_local_count: 30,816,135 guard_type_count: 313,667,907 guard_type_exit_ratio: 0.5% guard_shape_count: 0 code_region_bytes: 14,417,920 zjit_alloc_bytes: 19,075,183 total_mem_bytes: 33,493,103 side_exit_count: 9,609,677 total_insn_count: 1,706,360,231 vm_insn_count: 124,793,155 zjit_insn_count: 1,581,567,076 ratio_in_zjit: 92.7% ```
after patch ``` Average of last 10, non-warmup iters: 1136ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (52.8% of total 39,182,033): Hash#key?: 3,141,634 ( 8.0%) Regexp#match?: 2,420,227 ( 6.2%) Hash#fetch: 2,245,557 ( 5.7%) Array#any?: 1,157,418 ( 3.0%) Hash#delete: 1,114,346 ( 2.8%) Integer#===: 1,098,163 ( 2.8%) String.new: 1,004,713 ( 2.6%) MatchData#[]: 831,442 ( 2.1%) String#b: 797,913 ( 2.0%) String#to_sym: 680,943 ( 1.7%) Kernel#dup: 680,022 ( 1.7%) Array#all?: 650,132 ( 1.7%) Fiber.current: 649,003 ( 1.7%) Array#join: 641,038 ( 1.6%) Array#include?: 613,837 ( 1.6%) Kernel#Array: 610,311 ( 1.6%) String#<<: 606,240 ( 1.5%) Symbol#end_with?: 598,807 ( 1.5%) String#force_encoding: 593,535 ( 1.5%) Kernel#respond_to?: 550,441 ( 1.4%) Top-20 calls to C functions from JIT code (74.8% of total 261,805,313): rb_vm_opt_send_without_block: 52,621,173 (20.1%) rb_hash_aref: 22,920,184 ( 8.8%) rb_vm_env_write: 19,484,925 ( 7.4%) rb_vm_send: 16,571,020 ( 6.3%) rb_zjit_writebarrier_check_immediate: 13,780,332 ( 5.3%) rb_vm_getinstancevariable: 12,378,114 ( 4.7%) rb_ivar_get_at_no_ractor_check: 12,208,856 ( 4.7%) rb_vm_invokesuper: 8,086,666 ( 3.1%) rb_hash_aset: 5,043,537 ( 1.9%) rb_obj_is_kind_of: 4,431,299 ( 1.7%) rb_vm_invokeblock: 4,036,481 ( 1.5%) Hash#key?: 3,141,634 ( 1.2%) rb_vm_opt_getconstant_path: 3,051,909 ( 1.2%) rb_class_allocate_instance: 2,878,746 ( 1.1%) rb_hash_new_with_size: 2,873,398 ( 1.1%) rb_ec_ary_new_from_values: 2,585,224 ( 1.0%) rb_str_concat_literals: 2,450,752 ( 0.9%) Regexp#match?: 2,420,227 ( 0.9%) rb_obj_alloc: 2,419,182 ( 0.9%) rb_vm_setinstancevariable: 2,357,067 ( 0.9%) Top-2 not optimized method types for send (100.0% of total 8,550,761): iseq: 8,518,290 (99.6%) optimized: 32,471 ( 0.4%) Top-2 not optimized method types for send_without_block (100.0% of total 790,792): optimized_send: 608,036 (76.9%) null: 182,756 (23.1%) Top-2 not optimized method types for super (100.0% of total 6,689,860): cfunc: 6,640,181 (99.3%) attrset: 49,679 ( 0.7%) Top-3 instructions with uncategorized fallback reason (100.0% of total 5,911,883): invokeblock: 4,036,481 (68.3%) sendforward: 1,871,601 (31.7%) opt_send_without_block: 3,801 ( 0.1%) Top-20 send fallback reasons (100.0% of total 83,186,941): send_without_block_polymorphic: 33,814,528 (40.6%) send_not_optimized_method_type: 8,550,761 (10.3%) send_without_block_no_profiles: 8,405,497 (10.1%) super_not_optimized_method_type: 6,689,860 ( 8.0%) uncategorized: 5,911,883 ( 7.1%) one_or_more_complex_arg_pass: 5,502,147 ( 6.6%) send_no_profiles: 4,700,820 ( 5.7%) send_polymorphic: 3,318,658 ( 4.0%) send_without_block_not_optimized_need_permission: 1,274,177 ( 1.5%) singleton_class_seen: 1,101,973 ( 1.3%) too_many_args_for_lir: 905,412 ( 1.1%) super_complex_args_pass: 829,842 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 608,036 ( 0.7%) send_without_block_megamorphic: 565,874 ( 0.7%) super_target_complex_args_pass: 414,600 ( 0.5%) send_without_block_not_optimized_method_type: 182,756 ( 0.2%) obj_to_string_not_string: 158,141 ( 0.2%) super_call_with_block: 100,004 ( 0.1%) send_without_block_direct_keyword_mismatch: 99,588 ( 0.1%) super_polymorphic: 52,360 ( 0.1%) Top-2 setivar fallback reasons (100.0% of total 2,357,067): not_monomorphic: 2,255,283 (95.7%) not_t_object: 101,784 ( 4.3%) Top-1 getivar fallback reasons (100.0% of total 12,378,139): not_monomorphic: 12,378,139 (100.0%) Top-2 definedivar fallback reasons (100.0% of total 350,548): not_monomorphic: 350,461 (100.0%) not_t_object: 87 ( 0.0%) Top-6 invokeblock handler (100.0% of total 4,036,481): monomorphic_iseq: 2,189,057 (54.2%) polymorphic: 1,207,002 (29.9%) monomorphic_other: 334,248 ( 8.3%) monomorphic_ifunc: 221,223 ( 5.5%) megamorphic: 84,439 ( 2.1%) no_profiles: 512 ( 0.0%) Top-9 popular complex argument-parameter features not optimized (100.0% of total 7,096,506): param_kw_opt: 1,834,706 (25.9%) param_forwardable: 1,824,953 (25.7%) param_block: 1,792,214 (25.3%) param_rest: 861,894 (12.1%) caller_kw_splat: 297,937 ( 4.2%) caller_splat: 283,669 ( 4.0%) param_kwrest: 200,208 ( 2.8%) caller_blockarg: 752 ( 0.0%) caller_kwarg: 173 ( 0.0%) Top-1 compile error reasons (100.0% of total 391,562): exception_handler: 391,562 (100.0%) Top-6 unhandled YARV insns (100.0% of total 1,000,531): invokesuperforward: 498,993 (49.9%) getconstant: 400,945 (40.1%) expandarray: 49,985 ( 5.0%) setblockparam: 49,972 ( 5.0%) checkmatch: 480 ( 0.0%) once: 156 ( 0.0%) Top-2 unhandled HIR insns (100.0% of total 268,154): throw: 232,560 (86.7%) invokebuiltin: 35,594 (13.3%) Top-19 side exit reasons (100.0% of total 8,710,811): guard_shape_failure: 2,498,161 (28.7%) block_param_proxy_not_iseq_or_ifunc: 1,988,408 (22.8%) guard_type_failure: 1,722,168 (19.8%) unhandled_yarv_insn: 1,000,531 (11.5%) compile_error: 391,562 ( 4.5%) unhandled_newarray_send_pack: 298,017 ( 3.4%) unhandled_hir_insn: 268,154 ( 3.1%) patchpoint_method_redefined: 200,632 ( 2.3%) unhandled_block_arg: 151,295 ( 1.7%) block_param_proxy_modified: 124,245 ( 1.4%) guard_less_failure: 50,126 ( 0.6%) fixnum_lshift_overflow: 9,985 ( 0.1%) patchpoint_stable_constant_names: 6,366 ( 0.1%) fixnum_mult_overflow: 570 ( 0.0%) obj_to_string_fallback: 429 ( 0.0%) patchpoint_no_ep_escape: 109 ( 0.0%) interrupt: 39 ( 0.0%) guard_super_method_entry: 8 ( 0.0%) guard_greater_eq_failure: 6 ( 0.0%) send_count: 328,747,903 dynamic_send_count: 83,186,941 (25.3%) optimized_send_count: 245,560,962 (74.7%) dynamic_setivar_count: 2,357,067 ( 0.7%) dynamic_getivar_count: 12,378,139 ( 3.8%) dynamic_definedivar_count: 350,548 ( 0.1%) iseq_optimized_send_count: 93,623,831 (28.5%) inline_cfunc_optimized_send_count: 98,338,311 (29.9%) inline_iseq_optimized_send_count: 9,338,766 ( 2.8%) non_variadic_cfunc_optimized_send_count: 26,453,005 ( 8.0%) variadic_cfunc_optimized_send_count: 17,807,049 ( 5.4%) compiled_iseq_count: 2,888 failed_iseq_count: 0 compile_time: 858ms profile_time: 29ms gc_time: 59ms invalidation_time: 15ms vm_write_pc_count: 285,990,091 vm_write_sp_count: 285,990,091 vm_write_locals_count: 272,886,376 vm_write_stack_count: 272,886,376 vm_write_to_parent_iseq_local_count: 1,079,877 vm_read_from_parent_iseq_local_count: 30,816,135 guard_type_count: 314,169,071 guard_type_exit_ratio: 0.5% guard_shape_count: 0 code_region_bytes: 14,401,536 zjit_alloc_bytes: 19,128,598 total_mem_bytes: 33,530,134 side_exit_count: 8,710,811 total_insn_count: 1,705,461,649 vm_insn_count: 121,244,824 zjit_insn_count: 1,584,216,825 ratio_in_zjit: 92.9% ```
--- test/ruby/test_zjit.rb | 36 +++++++++++ zjit/src/codegen.rs | 42 ++++++++++++ zjit/src/hir.rs | 130 ++++++++++++++++++++++++++++++++++++++ zjit/src/hir/opt_tests.rs | 61 ++++++++++++++++++ zjit/src/hir/tests.rs | 65 +++++++++++++++++++ zjit/src/stats.rs | 2 + 6 files changed, 336 insertions(+) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index e347986abc471b..2066610cb27be2 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -470,6 +470,42 @@ def test(&block) }, insns: [:getblockparamproxy] end + def test_getblockparam + assert_compiles '2', %q{ + def test(&blk) + blk + end + test { 2 }.call + test { 2 }.call + }, insns: [:getblockparam] + end + + def test_getblockparam_proxy_side_exit_restores_block_local + assert_compiles '2', %q{ + def test(&block) + b = block + # sideexits here + raise "test" unless block + b ? 2 : 3 + end + test {} + test {} + }, insns: [:getblockparam, :getblockparamproxy] + end + + def test_getblockparam_used_twice_in_args + assert_compiles '1', %q{ + def f(*args) = args + def test(&blk) + b = blk + f(*[1], blk) + blk + end + test {1}.call + test {1}.call + }, insns: [:getblockparam] + end + def test_optimized_method_call_proc_call assert_compiles '2', %q{ p = proc { |x| x * 2 } diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 0030493ddfa3d3..870fe7584a1fee 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -550,6 +550,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::SetGlobal { id, val, state } => no_output!(gen_setglobal(jit, asm, *id, opnd!(val), &function.frame_state(*state))), Insn::GetGlobal { id, state } => gen_getglobal(jit, asm, *id, &function.frame_state(*state)), &Insn::GetLocal { ep_offset, level, use_sp, .. } => gen_getlocal(asm, ep_offset, level, use_sp), + &Insn::IsBlockParamModified { level } => gen_is_block_param_modified(asm, level), + &Insn::GetBlockParam { ep_offset, level, state } => gen_getblockparam(jit, asm, ep_offset, level, &function.frame_state(state)), &Insn::SetLocal { val, ep_offset, level } => no_output!(gen_setlocal(asm, opnd!(val), function.type_of(val), ep_offset, level)), Insn::GetConstantPath { ic, state } => gen_get_constant_path(jit, asm, *ic, &function.frame_state(*state)), Insn::GetClassVar { id, ic, state } => gen_getclassvar(jit, asm, *id, *ic, &function.frame_state(*state)), @@ -743,6 +745,46 @@ fn gen_setlocal(asm: &mut Assembler, val: Opnd, val_type: Type, local_ep_offset: } } +/// Returns 1 (as CBool) when VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM is set; returns 0 otherwise. +fn gen_is_block_param_modified(asm: &mut Assembler, level: u32) -> Opnd { + let ep = gen_get_ep(asm, level); + let flags = asm.load(Opnd::mem(VALUE_BITS, ep, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32))); + asm.test(flags, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + asm.csel_nz(Opnd::Imm(1), Opnd::Imm(0)) +} + +/// Get the block parameter as a Proc, write it to the environment, +/// and mark the flag as modified. +fn gen_getblockparam(jit: &mut JITState, asm: &mut Assembler, ep_offset: u32, level: u32, state: &FrameState) -> Opnd { + gen_prepare_leaf_call_with_gc(asm, state); + // Bail out if write barrier is required. + let ep = gen_get_ep(asm, level); + let flags = Opnd::mem(VALUE_BITS, ep, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); + asm.test(flags, VM_ENV_FLAG_WB_REQUIRED.into()); + asm.jnz(side_exit(jit, state, SideExitReason::BlockParamWbRequired)); + + // Convert block handler to Proc. + let block_handler = asm.load(Opnd::mem(VALUE_BITS, ep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); + let proc = asm_ccall!(asm, rb_vm_bh_to_procval, EC, block_handler); + + // Write Proc to EP and mark modified. + let ep = gen_get_ep(asm, level); + let local_ep_offset = c_int::try_from(ep_offset).unwrap_or_else(|_| { + panic!("Could not convert local_ep_offset {ep_offset} to i32") + }); + let offset = -(SIZEOF_VALUE_I32 * local_ep_offset); + asm.mov(Opnd::mem(VALUE_BITS, ep, offset), proc); + + let flags = Opnd::mem(VALUE_BITS, ep, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); + let flags_val = asm.load(flags); + let modified = asm.or(flags_val, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + asm.store(flags, modified); + + // Read the Proc from EP. + let ep = gen_get_ep(asm, level); + asm.load(Opnd::mem(VALUE_BITS, ep, offset)) +} + fn gen_guard_block_param_proxy(jit: &JITState, asm: &mut Assembler, level: u32, state: &FrameState) { // Bail out if the `&block` local variable has been modified let ep = gen_get_ep(asm, level); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 4326d37b344980..b4f78c025d3aa9 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -506,6 +506,7 @@ pub enum SideExitReason { Interrupt, BlockParamProxyModified, BlockParamProxyNotIseqOrIfunc, + BlockParamWbRequired, StackOverflow, FixnumModByZero, FixnumDivByZero, @@ -839,6 +840,11 @@ pub enum Insn { /// If `use_sp` is true, it uses the SP register to optimize the read. /// `rest_param` is used by infer_types to infer the ArrayExact type. GetLocal { level: u32, ep_offset: u32, use_sp: bool, rest_param: bool }, + /// Check whether VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM is set in the environment flags. + /// Returns CBool (0/1). + IsBlockParamModified { level: u32 }, + /// Get the block parameter as a Proc. + GetBlockParam { level: u32, ep_offset: u32, state: InsnId }, /// Set a local variable in a higher scope or the heap SetLocal { level: u32, ep_offset: u32, val: InsnId }, GetSpecialSymbol { symbol_type: SpecialBackrefSymbol, state: InsnId }, @@ -1150,6 +1156,8 @@ impl Insn { Insn::GetSpecialNumber { .. } => effects::Any, Insn::GetClassVar { .. } => effects::Any, Insn::SetClassVar { .. } => effects::Any, + Insn::IsBlockParamModified { .. } => effects::Any, + Insn::GetBlockParam { .. } => effects::Any, Insn::Snapshot { .. } => effects::Empty, Insn::Jump(_) => effects::Any, Insn::IfTrue { .. } => effects::Any, @@ -1523,6 +1531,11 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::GuardGreaterEq { left, right, .. } => write!(f, "GuardGreaterEq {left}, {right}"), Insn::GuardSuperMethodEntry { lep, cme, .. } => write!(f, "GuardSuperMethodEntry {lep}, {:p}", self.ptr_map.map_ptr(cme)), Insn::GetBlockHandler { lep } => write!(f, "GetBlockHandler {lep}"), + &Insn::GetBlockParam { level, ep_offset, .. } => { + let name = get_local_var_name_for_printer(self.iseq, level, ep_offset) + .map_or(String::new(), |x| format!("{x}, ")); + write!(f, "GetBlockParam {name}l{level}, EP@{ep_offset}") + }, Insn::PatchPoint { invariant, .. } => { write!(f, "PatchPoint {}", invariant.print(self.ptr_map)) }, Insn::GetConstantPath { ic, .. } => { write!(f, "GetConstantPath {:p}", self.ptr_map.map_ptr(ic)) }, Insn::IsBlockGiven { lep } => { write!(f, "IsBlockGiven {lep}") }, @@ -1589,6 +1602,9 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { let name = get_local_var_name_for_printer(self.iseq, level, ep_offset).map_or(String::new(), |x| format!("{x}, ")); write!(f, "GetLocal {name}l{level}, EP@{ep_offset}{}", if rest_param { ", *" } else { "" }) }, + &Insn::IsBlockParamModified { level } => { + write!(f, "IsBlockParamModified l{level}") + }, &Insn::SetLocal { val, level, ep_offset } => { let name = get_local_var_name_for_printer(self.iseq, level, ep_offset).map_or(String::new(), |x| format!("{x}, ")); write!(f, "SetLocal {name}l{level}, EP@{ep_offset}, {val}") @@ -2139,6 +2155,7 @@ impl Function { | PutSpecialObject {..} | GetGlobal {..} | GetLocal {..} + | IsBlockParamModified {..} | SideExit {..} | EntryPoint {..} | LoadPC @@ -2193,6 +2210,7 @@ impl Function { &GuardSuperMethodEntry { lep, cme, state } => GuardSuperMethodEntry { lep: find!(lep), cme, state }, &GetBlockHandler { lep } => GetBlockHandler { lep: find!(lep) }, &IsBlockGiven { lep } => IsBlockGiven { lep: find!(lep) }, + &GetBlockParam { level, ep_offset, state } => GetBlockParam { level, ep_offset, state: find!(state) }, &FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state }, &FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state }, &FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state }, @@ -2488,6 +2506,8 @@ impl Function { Insn::AnyToString { .. } => types::String, Insn::GetLocal { rest_param: true, .. } => types::ArrayExact, Insn::GetLocal { .. } => types::BasicObject, + Insn::IsBlockParamModified { .. } => types::CBool, + Insn::GetBlockParam { .. } => types::BasicObject, Insn::GetBlockHandler { .. } => types::RubyValue, // The type of Snapshot doesn't really matter; it's never materialized. It's used only // as a reference for FrameState, which we use to generate side-exit code. @@ -4386,6 +4406,7 @@ impl Function { | &Insn::GetLEP | &Insn::LoadSelf | &Insn::GetLocal { .. } + | &Insn::IsBlockParamModified { .. } | &Insn::PutSpecialObject { .. } | &Insn::IncrCounter(_) | &Insn::IncrCounterPtr { .. } => @@ -4396,6 +4417,7 @@ impl Function { } &Insn::PatchPoint { state, .. } | &Insn::CheckInterrupts { state } + | &Insn::GetBlockParam { state, .. } | &Insn::GetConstantPath { ic: _, state } => { worklist.push_back(state); } @@ -5153,6 +5175,8 @@ impl Function { | Insn::GetSpecialNumber { .. } | Insn::GetSpecialSymbol { .. } | Insn::GetLocal { .. } + | Insn::GetBlockParam { .. } + | Insn::IsBlockParamModified { .. } | Insn::StoreField { .. } => { Ok(()) } @@ -6428,6 +6452,112 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { // TODO(Shopify/ruby#753): GC root, so we should be able to avoid unnecessary GC tracing state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(unsafe { rb_block_param_proxy }) })); } + YARVINSN_getblockparam => { + fn new_branch_block( + fun: &mut Function, + insn_idx: u32, + exit_state: &FrameState, + locals_count: usize, + stack_count: usize, + ) -> (BlockId, InsnId, FrameState, InsnId) { + let block = fun.new_block(insn_idx); + let self_param = fun.push_insn(block, Insn::Param); + let mut state = exit_state.clone(); + state.locals.clear(); + state.stack.clear(); + state.locals.extend((0..locals_count).map(|_| fun.push_insn(block, Insn::Param))); + state.stack.extend((0..stack_count).map(|_| fun.push_insn(block, Insn::Param))); + let snapshot = fun.push_insn(block, Insn::Snapshot { state: state.clone() }); + (block, self_param, state, snapshot) + } + + fn finish_getblockparam_branch( + fun: &mut Function, + block: BlockId, + self_param: InsnId, + state: &mut FrameState, + join_block: BlockId, + ep_offset: u32, + level: u32, + val: InsnId, + ) { + if level == 0 { + state.setlocal(ep_offset, val); + } + state.stack_push(val); + fun.push_insn(block, Insn::Jump(BranchEdge { + target: join_block, + args: state.as_args(self_param), + })); + } + + let ep_offset = get_arg(pc, 0).as_u32(); + let level = get_arg(pc, 1).as_u32(); + let branch_insn_idx = exit_state.insn_idx as u32; + + // If the block param is already a Proc (modified), read it from EP. + // Otherwise, convert it to a Proc and store it to EP. + let is_modified = fun.push_insn(block, Insn::IsBlockParamModified { level }); + + let locals_count = state.locals.len(); + let stack_count = state.stack.len(); + let entry_args = state.as_args(self_param); + + // Set up branch and join blocks. + let (modified_block, modified_self_param, mut modified_state, ..) = + new_branch_block(&mut fun, branch_insn_idx, &exit_state, locals_count, stack_count); + let (unmodified_block, unmodified_self_param, mut unmodified_state, unmodified_exit_id) = + new_branch_block(&mut fun, branch_insn_idx, &exit_state, locals_count, stack_count); + let join_block = insn_idx_to_block.get(&insn_idx).copied().unwrap_or_else(|| fun.new_block(insn_idx)); + + fun.push_insn(block, Insn::IfTrue { + val: is_modified, + target: BranchEdge { target: modified_block, args: entry_args.clone() }, + }); + fun.push_insn(block, Insn::Jump(BranchEdge { + target: unmodified_block, + args: entry_args, + })); + + // Push modified block: read Proc from EP. + let modified_val = fun.push_insn(modified_block, Insn::GetLocal { + ep_offset, + level, + use_sp: false, + rest_param: false, + }); + finish_getblockparam_branch( + &mut fun, + modified_block, + modified_self_param, + &mut modified_state, + join_block, + ep_offset, + level, + modified_val, + ); + + // Push unmodified block: convert block handler to Proc. + let unmodified_val = fun.push_insn(unmodified_block, Insn::GetBlockParam { + ep_offset, + level, + state: unmodified_exit_id, + }); + finish_getblockparam_branch( + &mut fun, + unmodified_block, + unmodified_self_param, + &mut unmodified_state, + join_block, + ep_offset, + level, + unmodified_val, + ); + + // Continue compilation from the join block at the next instruction. + queue.push_back((unmodified_state, join_block, insn_idx, local_inval)); + break; + } YARVINSN_pop => { state.stack_pop()?; } YARVINSN_dup => { state.stack_push(state.stack_top()?); } YARVINSN_dupn => { diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index da240250101687..0a42652993894f 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -3852,6 +3852,67 @@ mod hir_opt_tests { "); } + #[test] + fn test_getblockparam() { + eval(" + def test(&block) = block + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :block, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v13:CBool = IsBlockParamModified l0 + IfTrue v13, bb3(v8, v9) + v24:BasicObject = GetBlockParam :block, l0, EP@3 + Jump bb5(v8, v24, v24) + bb3(v14:BasicObject, v15:BasicObject): + v22:BasicObject = GetLocal :block, l0, EP@3 + Jump bb5(v14, v22, v22) + bb5(v26:BasicObject, v27:BasicObject, v28:BasicObject): + CheckInterrupts + Return v28 + "); + } + + #[test] + fn test_getblockparam_nested_block() { + eval(" + def test(&block) + proc do + block + end + end + "); + assert_snapshot!(hir_string_proc("test"), @r" + fn block in test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:CBool = IsBlockParamModified l1 + IfTrue v10, bb3(v6) + v19:BasicObject = GetBlockParam :block, l1, EP@3 + Jump bb5(v6, v19) + bb3(v11:BasicObject): + v17:BasicObject = GetLocal :block, l1, EP@3 + Jump bb5(v11, v17) + bb5(v21:BasicObject, v22:BasicObject): + CheckInterrupts + Return v22 + "); + } + #[test] fn test_getinstancevariable() { eval(" diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index 3e281782738d42..44082ce908757d 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -2683,6 +2683,71 @@ pub mod hir_build_tests { "); } + #[test] + fn test_getblockparam() { + eval(" + def test(&block) = block + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :block, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v13:CBool = IsBlockParamModified l0 + IfTrue v13, bb3(v8, v9) + Jump bb4(v8, v9) + bb3(v14:BasicObject, v15:BasicObject): + v22:BasicObject = GetLocal :block, l0, EP@3 + Jump bb5(v14, v22, v22) + bb4(v17:BasicObject, v18:BasicObject): + v24:BasicObject = GetBlockParam :block, l0, EP@3 + Jump bb5(v17, v24, v24) + bb5(v26:BasicObject, v27:BasicObject, v28:BasicObject): + CheckInterrupts + Return v28 + "); + } + + #[test] + fn test_getblockparam_nested_block() { + eval(" + def test(&block) + proc do + block + end + end + "); + assert_snapshot!(hir_string_proc("test"), @r" + fn block in test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:CBool = IsBlockParamModified l1 + IfTrue v10, bb3(v6) + Jump bb4(v6) + bb3(v11:BasicObject): + v17:BasicObject = GetLocal :block, l1, EP@3 + Jump bb5(v11, v17) + bb4(v13:BasicObject): + v19:BasicObject = GetBlockParam :block, l1, EP@3 + Jump bb5(v13, v19) + bb5(v21:BasicObject, v22:BasicObject): + CheckInterrupts + Return v22 + "); + } + #[test] fn test_splatarray_mut() { eval(" diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index cf100dcda2a393..556a1417a42338 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -210,6 +210,7 @@ make_counters! { exit_stackoverflow, exit_block_param_proxy_modified, exit_block_param_proxy_not_iseq_or_ifunc, + exit_block_param_wb_required, exit_too_many_keyword_parameters, } @@ -557,6 +558,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { StackOverflow => exit_stackoverflow, BlockParamProxyModified => exit_block_param_proxy_modified, BlockParamProxyNotIseqOrIfunc => exit_block_param_proxy_not_iseq_or_ifunc, + BlockParamWbRequired => exit_block_param_wb_required, TooManyKeywordParameters => exit_too_many_keyword_parameters, PatchPoint(Invariant::BOPRedefined { .. }) => exit_patchpoint_bop_redefined, From 6c2ecb231a90eee2a6d2a50164e44a185b52dfe9 Mon Sep 17 00:00:00 2001 From: Kevin Menard Date: Wed, 21 Jan 2026 19:19:55 -0500 Subject: [PATCH 6/6] ZJIT: Use `TypeDistribution` to track stats about the `super` CME (#15928) This is a follow up to #15816. Since I was only optimizing `invokesuper` for monomorphic cases, I could track that with a boolean value (actually, `Option` in this case). But, `TypeDistribution` is a better way to track this information and will put us on better footing if we end up handling polymorphic cases. --- zjit/src/profile.rs | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index 7a584afd6fd1f6..c1feb759529e15 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -159,23 +159,8 @@ fn profile_invokesuper(profiler: &mut Profiler, profile: &mut IseqProfile) { let cme = unsafe { rb_vm_frame_method_entry(profiler.cfp) }; let cme_value = VALUE(cme as usize); // CME is a T_IMEMO, which is a VALUE - match profile.super_cme.get(&profiler.insn_idx) { - None => { - // If `None`, then this is our first time looking at `super` for this instruction. - profile.super_cme.insert(profiler.insn_idx, Some(cme_value)); - }, - Some(Some(existing_cme)) => { - // Check if the stored method entry is the same as the current one. If it isn't, then - // mark the call site as polymorphic. - if *existing_cme != cme_value { - profile.super_cme.insert(profiler.insn_idx, None); - } - } - Some(None) => { - // We've visited this instruction and explicitly stored `None` to mark the call site - // as polymorphic. - } - } + profile.super_cme.entry(profiler.insn_idx) + .or_insert_with(|| TypeDistribution::new()).observe(ProfiledType::object(cme_value)); unsafe { rb_gc_writebarrier(profiler.iseq.into(), cme_value) }; @@ -359,7 +344,7 @@ pub struct IseqProfile { num_profiles: Vec, /// Method entries for `super` calls (stored as VALUE to be GC-safe) - super_cme: HashMap> + super_cme: HashMap } impl IseqProfile { @@ -377,8 +362,14 @@ impl IseqProfile { } pub fn get_super_method_entry(&self, insn_idx: usize) -> Option<*const rb_callable_method_entry_t> { - self.super_cme.get(&insn_idx) - .and_then(|opt| opt.map(|v| v.0 as *const rb_callable_method_entry_t)) + let Some(entry) = self.super_cme.get(&insn_idx) else { return None }; + let summary = TypeDistributionSummary::new(entry); + + if summary.is_monomorphic() { + Some(summary.bucket(0).class.0 as *const rb_callable_method_entry_t) + } else { + None + } } /// Run a given callback with every object in IseqProfile @@ -392,9 +383,9 @@ impl IseqProfile { } } - for cme_value in self.super_cme.values() { - if let Some(cme) = cme_value { - callback(*cme); + for super_cme_values in self.super_cme.values() { + for profiled_type in super_cme_values.each_item() { + callback(profiled_type.class) } } } @@ -411,9 +402,9 @@ impl IseqProfile { } // Update CME references if they move during compaction. - for cme_value in self.super_cme.values_mut() { - if let Some(cme) = cme_value { - callback(cme); + for super_cme_values in self.super_cme.values_mut() { + for ref mut profiled_type in super_cme_values.each_item_mut() { + callback(&mut profiled_type.class) } } }