diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 703a93e18..9582054f6 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -135,8 +135,8 @@ offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], jump_table_start :: non_neg_integer(), - available_regs :: [aarch64_register()], - used_regs :: [aarch64_register()], + available_regs :: non_neg_integer(), + used_regs :: non_neg_integer(), labels :: [{integer() | reference(), integer()}], variant :: non_neg_integer() }). @@ -198,9 +198,37 @@ -define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). -define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). --define(AVAILABLE_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6]). -define(PARAMETER_REGS, [r0, r1, r2, r3, r4, r5]). --define(SCRATCH_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6, r17]). + +-define(REG_BIT_R0, (1 bsl 0)). +-define(REG_BIT_R1, (1 bsl 1)). +-define(REG_BIT_R2, (1 bsl 2)). +-define(REG_BIT_R3, (1 bsl 3)). +-define(REG_BIT_R4, (1 bsl 4)). +-define(REG_BIT_R5, (1 bsl 5)). +-define(REG_BIT_R6, (1 bsl 6)). +-define(REG_BIT_R7, (1 bsl 7)). +-define(REG_BIT_R8, (1 bsl 8)). +-define(REG_BIT_R9, (1 bsl 9)). +-define(REG_BIT_R10, (1 bsl 10)). +-define(REG_BIT_R11, (1 bsl 11)). +-define(REG_BIT_R12, (1 bsl 12)). +-define(REG_BIT_R13, (1 bsl 13)). +-define(REG_BIT_R14, (1 bsl 14)). +-define(REG_BIT_R15, (1 bsl 15)). +-define(REG_BIT_R16, (1 bsl 16)). +-define(REG_BIT_R17, (1 bsl 17)). + +-define(AVAILABLE_REGS_MASK, + (?REG_BIT_R7 bor ?REG_BIT_R8 bor ?REG_BIT_R9 bor ?REG_BIT_R10 bor ?REG_BIT_R11 bor + ?REG_BIT_R12 bor ?REG_BIT_R13 bor ?REG_BIT_R14 bor ?REG_BIT_R15 bor + ?REG_BIT_R3 bor ?REG_BIT_R4 bor ?REG_BIT_R5 bor ?REG_BIT_R6) +). +-define(SCRATCH_REGS_MASK, + (?REG_BIT_R7 bor ?REG_BIT_R8 bor ?REG_BIT_R9 bor ?REG_BIT_R10 bor ?REG_BIT_R11 bor + ?REG_BIT_R12 bor ?REG_BIT_R13 bor ?REG_BIT_R14 bor ?REG_BIT_R15 bor + ?REG_BIT_R3 bor ?REG_BIT_R4 bor ?REG_BIT_R5 bor ?REG_BIT_R6 bor ?REG_BIT_R17) +). %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. @@ -237,8 +265,8 @@ new(Variant, StreamModule, Stream) -> branches = [], jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS, - used_regs = [], + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0, labels = [], variant = Variant }. @@ -293,7 +321,7 @@ debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> %% @return The list of used registers %%----------------------------------------------------------------------------- -spec used_regs(state()) -> [aarch64_register()]. -used_regs(#state{used_regs = Used}) -> Used. +used_regs(#state{used_regs = Used}) -> mask_to_list(Used). %%----------------------------------------------------------------------------- %% @doc Return the list of currently available native scratch registers. This @@ -303,7 +331,7 @@ used_regs(#state{used_regs = Used}) -> Used. %% @return The list of available registers %%----------------------------------------------------------------------------- -spec available_regs(state()) -> [aarch64_register()]. -available_regs(#state{available_regs = Available}) -> Available. +available_regs(#state{available_regs = Available}) -> mask_to_list(Available). %%----------------------------------------------------------------------------- %% @doc Free native registers. The passed list of registers can contain @@ -327,8 +355,11 @@ free_native_register( ) when is_atom(Reg) -> - {Available1, Used1} = free_reg(Available0, Used0, Reg), - State#state{available_regs = Available1, used_regs = Used1}; + Bit = reg_bit(Reg), + State#state{ + available_regs = Available0 bor Bit, + used_regs = Used0 band (bnot Bit) + }; free_native_register(State, {ptr, Reg}) -> free_native_register(State, Reg); free_native_register(State, _Other) -> @@ -342,9 +373,9 @@ free_native_register(State, _Other) -> %% @return ok %%----------------------------------------------------------------------------- -spec assert_all_native_free(state()) -> ok. -assert_all_native_free(#state{ - available_regs = ?AVAILABLE_REGS, used_regs = [] -}) -> +assert_all_native_free(State) -> + 0 = State#state.used_regs, + ?AVAILABLE_REGS_MASK = State#state.available_regs, ok. %%----------------------------------------------------------------------------- @@ -506,9 +537,13 @@ call_primitive_last( % registers used for parameters ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), ArgsRegs = args_regs(Args), - ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, - [Temp | AvailableRegs1] = ScratchRegs, - UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + ArgsMask = regs_to_mask(ArgsRegs), + ParamMask = regs_to_mask(ParamRegs), + ScratchMask = ?AVAILABLE_REGS_MASK band (bnot (ArgsMask bor ParamMask)), + Temp = first_avail(ScratchMask), + TempBit = reg_bit(Temp), + AvailableRegs1 = ScratchMask band (bnot TempBit), + UsedRegs = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), PrepCall = case Primitive of 0 -> @@ -526,7 +561,11 @@ call_primitive_last( #state{stream = Stream2} = State1, Call = jit_aarch64_asm:br(Temp), Stream3 = StreamModule:append(Stream2, Call), - State1#state{stream = Stream3, available_regs = ?AVAILABLE_REGS, used_regs = []}. + State1#state{ + stream = Stream3, + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0 + }. %%----------------------------------------------------------------------------- %% @doc Emit a return of a value if it's not equal to ctx. @@ -558,7 +597,9 @@ return_if_not_equal_to_ctx( I4 = jit_aarch64_asm:ret(), I2 = jit_aarch64_asm:bcc(eq, 4 + byte_size(I3) + byte_size(I4)), Stream1 = StreamModule:append(Stream0, <>), - {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, Reg), + Bit = reg_bit(Reg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), State#state{ stream = Stream1, available_regs = AvailableRegs1, @@ -615,10 +656,11 @@ jump_to_continuation( stream_module = StreamModule, stream = Stream0, offset = BaseOffset, - available_regs = [TempReg | _] + available_regs = Available } = State, {free, OffsetReg} ) -> + TempReg = first_avail(Available), % Calculate absolute address: native_code_base + target_offset % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) CurrentStreamOffset = StreamModule:offset(Stream0), @@ -634,7 +676,11 @@ jump_to_continuation( Code = <>, Stream1 = StreamModule:append(Stream0, Code), % Free all registers since this is a tail jump - State#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + State#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0 + }. %% @private -spec rewrite_branch_instruction( @@ -982,7 +1028,7 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Available } = State0, {RegOrTuple, '&', Val, '!=', 0} ) -> @@ -991,6 +1037,7 @@ if_block_cond( {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, + Temp = first_avail(Available), % Test bits TestCode = try @@ -1014,10 +1061,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Available } = State0, {Reg, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg) -> + Temp = first_avail(Available), % AND with mask OffsetBefore = StreamModule:offset(Stream0), State1 = op_imm(State0, and_, Temp, Reg, Mask), @@ -1055,7 +1103,9 @@ if_block_cond( -spec if_block_free_reg(aarch64_register() | {free, aarch64_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> #state{available_regs = AvR0, used_regs = UR0} = State0, - {AvR1, UR1} = free_reg(AvR0, UR0, Reg), + Bit = reg_bit(Reg), + AvR1 = AvR0 bor Bit, + UR1 = UR0 band (bnot Bit), State0#state{ available_regs = AvR1, used_regs = UR1 @@ -1064,22 +1114,11 @@ if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. %% @private --spec merge_used_regs(state(), [aarch64_register()]) -> state(). -merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ - Reg | T -]) -> - case lists:member(Reg, UR0) of - true -> - merge_used_regs(State, T); - false -> - AvR1 = lists:delete(Reg, AvR0), - UR1 = [Reg | UR0], - merge_used_regs( - State#state{used_regs = UR1, available_regs = AvR1}, T - ) - end; -merge_used_regs(State, []) -> - State. +-spec merge_used_regs(state(), non_neg_integer()) -> state(). +merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> + MergedUR = UR bor OtherUR, + MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), + State#state{used_regs = MergedUR, available_regs = MergedAvail}. %%----------------------------------------------------------------------------- %% @doc Emit a shift register right by a fixed number of bits, effectively @@ -1101,17 +1140,24 @@ shift_right( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [ResultReg | T], - used_regs = UR + available_regs = Available, + used_regs = Used } = State, Reg, Shift ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + ResultReg = first_avail(Available), + Bit = reg_bit(ResultReg), I = jit_aarch64_asm:lsr(ResultReg, Reg, Shift), Stream1 = StreamModule:append(Stream0, I), - {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + { + State#state{ + stream = Stream1, available_regs = Available band (bnot Bit), used_regs = Used bor Bit + }, + ResultReg + }. %%----------------------------------------------------------------------------- %% @doc Emit a shift register left by a fixed number of bits, effectively @@ -1159,8 +1205,9 @@ call_func_ptr( end, [FuncPtrTuple | Args] ), - UsedRegs1 = UsedRegs0 -- FreeRegs, - SavedRegs = [?LR_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + FreeMask = regs_to_mask(FreeRegs), + UsedRegs1 = UsedRegs0 band (bnot FreeMask), + SavedRegs = [?LR_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | mask_to_list(UsedRegs1)], {SavedRegsOdd, Stream1} = push_registers(SavedRegs, StreamModule, Stream0), % Set up arguments following AArch64 calling convention @@ -1188,12 +1235,12 @@ call_func_ptr( Stream4 = StreamModule:append(Stream3, Call), % If r0 is in used regs, save it to another temporary register - FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), - AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + FreeGPMask = FreeMask band ?AVAILABLE_REGS_MASK, + AvailableRegs1 = AvailableRegs0 bor FreeGPMask, {Stream5, ResultReg} = case lists:member(r0, SavedRegs) of true -> - [Temp | _] = AvailableRegs1, + Temp = first_avail(AvailableRegs1), {StreamModule:append(Stream4, jit_aarch64_asm:mov(Temp, r0)), Temp}; false -> {Stream4, r0} @@ -1201,9 +1248,10 @@ call_func_ptr( Stream6 = pop_registers(SavedRegsOdd, lists:reverse(SavedRegs), StreamModule, Stream5), - AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), - AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), - UsedRegs2 = [ResultReg | UsedRegs1], + ResultBit = reg_bit(ResultReg), + AvailableRegs2 = AvailableRegs1 band (bnot ResultBit), + AvailableRegs3 = AvailableRegs2 band ?AVAILABLE_REGS_MASK, + UsedRegs2 = UsedRegs1 bor ResultBit, { State1#state{ stream = Stream6, @@ -1243,8 +1291,11 @@ set_args( ) -> ParamRegs = parameter_regs(Args), ArgsRegs = args_regs(Args), - AvailableScratchGP = - ?SCRATCH_REGS -- ParamRegs -- ArgsRegs -- UsedRegs, + ParamMask = regs_to_mask(ParamRegs), + ArgsMask = regs_to_mask(ArgsRegs), + AvailableScratchMask = + ?SCRATCH_REGS_MASK band (bnot (ParamMask bor ArgsMask bor UsedRegs)), + AvailableScratchGP = mask_to_list(AvailableScratchMask), Offset = StreamModule:offset(Stream0), Args1 = [ case Arg of @@ -1255,10 +1306,10 @@ set_args( ], SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, []), Stream1 = StreamModule:append(Stream0, SetArgsCode), - NewUsedRegs = lists:foldl( + NewUsedMask = lists:foldl( fun - ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); - ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, {ptr, Reg}}, AccUsed) -> AccUsed band (bnot reg_bit(Reg)); + ({free, Reg}, AccUsed) when is_atom(Reg) -> AccUsed band (bnot reg_bit(Reg)); (_, AccUsed) -> AccUsed end, UsedRegs, @@ -1266,8 +1317,8 @@ set_args( ), State0#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, - used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + available_regs = ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), + used_regs = ParamMask bor NewUsedMask }. %% @private @@ -1313,9 +1364,9 @@ replace_reg0([Other | T], Reg, Replacement, Acc) -> replace_reg0(T, Reg, Replacement, [Other | Acc]). %% @private --spec set_args0([arg()], [aarch64_register() | imm], [aarch64_register()], [aarch64_register()], [ - binary() -]) -> binary(). +-spec set_args0( + [arg()], [aarch64_register() | imm], [aarch64_register()], [aarch64_register()], [binary()] +) -> binary(). set_args0([], [], [], _AvailGP, Acc) -> list_to_binary(lists:reverse(Acc)); set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc) -> @@ -1394,64 +1445,94 @@ set_args1({avm_int64_t, Value}, Reg) when is_integer(Value) -> (state(), Src :: value() | vm_register(), Dest :: vm_register()) -> state(); (state(), Src :: {free, {ptr, aarch64_register(), 1}}, Dest :: {fp_reg, non_neg_integer()}) -> state(). +move_to_vm_register(State, Src, Dest) -> + move_to_vm_register_emit(State, Src, Dest). + % Native register to VM register -move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> +move_to_vm_register_emit(State0, Src, {x_reg, extra}) when is_atom(Src) -> I1 = jit_aarch64_asm:str(Src, ?X_REG(?MAX_REG)), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; -move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) -> +move_to_vm_register_emit(State0, Src, {x_reg, X}) when is_atom(Src) -> I1 = jit_aarch64_asm:str(Src, ?X_REG(X)), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; -move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> +move_to_vm_register_emit(State0, Src, {ptr, Reg}) when is_atom(Src) -> I1 = jit_aarch64_asm:str(Src, {Reg, 0}), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; -move_to_vm_register(#state{available_regs = [Temp | _]} = State0, Src, {y_reg, Y}) when +move_to_vm_register_emit(#state{available_regs = Available} = State0, Src, {y_reg, Y}) when is_atom(Src) -> + Temp = first_avail(Available), I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), I2 = jit_aarch64_asm:str(Src, {Temp, Y * ?WORD_SIZE}), Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), State0#state{stream = Stream1}; % Source is an integer -move_to_vm_register(State, 0, Dest) -> - move_to_vm_register(State, xzr, Dest); -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when +move_to_vm_register_emit(State, 0, Dest) -> + move_to_vm_register_emit(State, xzr, Dest); +move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when is_integer(N) -> + Temp = first_avail(AR0), + TempBit = reg_bit(Temp), + AT = AR0 band (bnot TempBit), I1 = jit_aarch64_asm:mov(Temp, N), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1 = move_to_vm_register_emit( + State0#state{stream = Stream1, available_regs = AT}, Temp, Dest + ), State1#state{available_regs = AR0}; % Source is a VM register -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> +move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> + Temp = first_avail(AR0), + TempBit = reg_bit(Temp), + AT = AR0 band (bnot TempBit), I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(?MAX_REG)), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1 = move_to_vm_register_emit( + State0#state{stream = Stream1, available_regs = AT}, Temp, Dest + ), State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> +move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> + Temp = first_avail(AR0), + TempBit = reg_bit(Temp), + AT = AR0 band (bnot TempBit), I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(X)), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1 = move_to_vm_register_emit( + State0#state{stream = Stream1, available_regs = AT}, Temp, Dest + ), State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> +move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> + Temp = first_avail(AR0), + TempBit = reg_bit(Temp), + AT = AR0 band (bnot TempBit), I1 = jit_aarch64_asm:ldr(Temp, {Reg, 0}), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1 = move_to_vm_register_emit( + State0#state{stream = Stream1, available_regs = AT}, Temp, Dest + ), State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> +move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> + Temp = first_avail(AR0), + TempBit = reg_bit(Temp), + AT = AR0 band (bnot TempBit), I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), I2 = jit_aarch64_asm:ldr(Temp, {Temp, Y * ?WORD_SIZE}), Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1 = move_to_vm_register_emit( + State0#state{stream = Stream1, available_regs = AT}, Temp, Dest + ), State1#state{available_regs = AR0}; % term_to_float -move_to_vm_register( - #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State0, +move_to_vm_register_emit( + #state{stream_module = StreamModule, available_regs = Available, stream = Stream0} = State0, {free, {ptr, Reg, 1}}, {fp_reg, F} ) -> + Temp = first_avail(Available), I1 = jit_aarch64_asm:ldr(Reg, {Reg, ?WORD_SIZE}), I2 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), I3 = jit_aarch64_asm:str(Reg, {Temp, ?FP_REG_OFFSET(State0, F)}), @@ -1476,32 +1557,38 @@ move_to_vm_register( vm_register() | aarch64_register() ) -> state(). move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + Temp = first_avail(Available), I1 = jit_aarch64_asm:ldr(Temp, {Reg, Index * ?WORD_SIZE}), I2 = jit_aarch64_asm:str(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) -> + Temp = first_avail(Available), I1 = jit_aarch64_asm:ldr(Temp, {Reg, Index * ?WORD_SIZE}), I2 = jit_aarch64_asm:str(Temp, {Dest, 0}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) -> + Temp1 = first_avail(Available), + Bit1 = reg_bit(Temp1), + Avail1 = Available band (bnot Bit1), + Temp2 = first_avail(Avail1), I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), I2 = jit_aarch64_asm:ldr(Temp2, {Reg, Index * ?WORD_SIZE}), I3 = jit_aarch64_asm:str(Temp2, {Temp1, Y * ?WORD_SIZE}), @@ -1509,12 +1596,13 @@ move_array_element( Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State, {free, Reg}, Index, {y_reg, Y} ) when is_integer(Index) -> + Temp = first_avail(Available), I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), I2 = jit_aarch64_asm:ldr(Reg, {Reg, Index * ?WORD_SIZE}), I3 = jit_aarch64_asm:str(Reg, {Temp, Y * ?WORD_SIZE}), @@ -1540,7 +1628,9 @@ move_array_element( ) when X < ?MAX_REG andalso is_atom(IndexReg) -> I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), I2 = jit_aarch64_asm:str(IndexReg, ?X_REG(X)), - {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, @@ -1560,7 +1650,9 @@ move_array_element( ) when is_atom(IndexReg) -> I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), I2 = jit_aarch64_asm:str(IndexReg, {PtrReg, 0}), - {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, @@ -1571,17 +1663,20 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] = AvailableRegs0, + available_regs = AvailableRegs0, used_regs = UsedRegs0 } = State, Reg, {free, IndexReg}, {y_reg, Y} ) when ?IS_GPR(IndexReg) -> + Temp = first_avail(AvailableRegs0), I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), I2 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), I3 = jit_aarch64_asm:str(IndexReg, {Temp, Y * ?WORD_SIZE}), - {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append( Stream0, <> ), @@ -1618,17 +1713,21 @@ get_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [ElemReg | AvailableT], + available_regs = Available, used_regs = UsedRegs0 } = State, Reg, Index ) -> + ElemReg = first_avail(Available), + Bit = reg_bit(ElemReg), I1 = jit_aarch64_asm:ldr(ElemReg, {Reg, Index * ?WORD_SIZE}), Stream1 = StreamModule:append(Stream0, <>), { State#state{ - stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + stream = Stream1, + available_regs = Available band (bnot Bit), + used_regs = UsedRegs0 bor Bit }, ElemReg }. @@ -1697,12 +1796,13 @@ move_to_array_element( ) when is_integer(IndexVal) andalso is_integer(Offset) -> move_to_array_element(State, Value, BaseReg, IndexVal + Offset); move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State, ValueReg, BaseReg, IndexReg, Offset ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Temp = first_avail(Available), I1 = jit_aarch64_asm:add(Temp, IndexReg, Offset), I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, Temp, lsl, 3}), Stream1 = StreamModule:append(Stream0, <>), @@ -1715,7 +1815,7 @@ move_to_array_element( Offset ) -> {State1, ValueReg} = copy_to_native_register(State0, Value), - [Temp | _] = State1#state.available_regs, + Temp = first_avail(State1#state.available_regs), I1 = jit_aarch64_asm:add(Temp, IndexReg, Offset), I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, Temp, lsl, 3}), Stream1 = (State1#state.stream_module):append(State1#state.stream, <>), @@ -1732,80 +1832,129 @@ move_to_array_element( %% @return Tuple of {Updated backend state, Native register containing the value} %%----------------------------------------------------------------------------- -spec move_to_native_register(state(), value() | cp) -> {state(), aarch64_register()}. -move_to_native_register( +move_to_native_register(State, Reg) when ?IS_GPR(Reg) -> + {State, Reg}; +move_to_native_register(State, Value) -> + move_to_native_register_emit(State, Value). + +move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Available, used_regs = Used } = State, cp ) -> + Reg = first_avail(Available), + Bit = reg_bit(Reg), I1 = jit_aarch64_asm:ldr(Reg, ?CP), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; -move_to_native_register(State, Reg) when is_atom(Reg) -> - {State, Reg}; -move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} + { + State#state{ + stream = Stream1, + used_regs = Used bor Bit, + available_regs = Available band (bnot Bit) + }, + Reg + }; +move_to_native_register_emit( + #state{stream_module = StreamModule, stream = Stream0} = State, + {ptr, Reg} ) when is_atom(Reg) -> I1 = jit_aarch64_asm:ldr(Reg, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1}, Reg}; -move_to_native_register( +move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Available, used_regs = Used } = State, Imm ) when is_integer(Imm) -> + Reg = first_avail(Available), + Bit = reg_bit(Reg), I1 = jit_aarch64_asm:mov(Reg, Imm), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; -move_to_native_register( + { + State#state{ + stream = Stream1, + used_regs = Used bor Bit, + available_regs = Available band (bnot Bit) + }, + Reg + }; +move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Available, used_regs = Used } = State, {x_reg, extra} ) -> + Reg = first_avail(Available), + Bit = reg_bit(Reg), I1 = jit_aarch64_asm:ldr(Reg, ?X_REG(?MAX_REG)), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; -move_to_native_register( + { + State#state{ + stream = Stream1, + used_regs = Used bor Bit, + available_regs = Available band (bnot Bit) + }, + Reg + }; +move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Available, used_regs = Used } = State, {x_reg, X} ) when X < ?MAX_REG -> + Reg = first_avail(Available), + Bit = reg_bit(Reg), I1 = jit_aarch64_asm:ldr(Reg, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; -move_to_native_register( + { + State#state{ + stream = Stream1, + used_regs = Used bor Bit, + available_regs = Available band (bnot Bit) + }, + Reg + }; +move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Available, used_regs = Used } = State, {y_reg, Y} ) -> + Reg = first_avail(Available), + Bit = reg_bit(Reg), I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), I2 = jit_aarch64_asm:ldr(Reg, {Reg, Y * ?WORD_SIZE}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}. + { + State#state{ + stream = Stream1, + available_regs = Available band (bnot Bit), + used_regs = Used bor Bit + }, + Reg + }. %%----------------------------------------------------------------------------- %% @doc Move a value (integer, vm register, pointer or native register) to a @@ -1819,7 +1968,13 @@ move_to_native_register( -spec move_to_native_register(state(), value(), aarch64_register()) -> state(). move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst -) when is_atom(RegSrc) orelse is_integer(RegSrc) -> +) when is_atom(RegSrc) -> + I = jit_aarch64_asm:mov(RegDst, RegSrc), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst +) when is_integer(RegSrc) -> I = jit_aarch64_asm:mov(RegDst, RegSrc), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; @@ -1866,26 +2021,40 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [SaveReg | AvailT], + available_regs = Available, used_regs = Used } = State, Reg ) when is_atom(Reg) -> + SaveReg = first_avail(Available), + Bit = reg_bit(SaveReg), I1 = jit_aarch64_asm:mov(SaveReg, Reg), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; + { + State#state{ + stream = Stream1, available_regs = Available band (bnot Bit), used_regs = Used bor Bit + }, + SaveReg + }; copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [SaveReg | AvailT], + available_regs = Available, used_regs = Used } = State, {ptr, Reg} ) when is_atom(Reg) -> + SaveReg = first_avail(Available), + Bit = reg_bit(SaveReg), I1 = jit_aarch64_asm:ldr(SaveReg, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; + { + State#state{ + stream = Stream1, available_regs = Available band (bnot Bit), used_regs = Used bor Bit + }, + SaveReg + }; copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). @@ -1898,9 +2067,10 @@ copy_to_native_register(State, Reg) -> %%----------------------------------------------------------------------------- -spec move_to_cp(state(), vm_register()) -> state(). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {y_reg, Y} ) -> + Reg = first_avail(Avail), I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), I2 = jit_aarch64_asm:ldr(Reg, {Reg, Y * ?WORD_SIZE}), I3 = jit_aarch64_asm:str(Reg, ?CP), @@ -1917,9 +2087,10 @@ move_to_cp( %%----------------------------------------------------------------------------- -spec increment_sp(state(), integer()) -> state(). increment_sp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Offset ) -> + Reg = first_avail(Avail), I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), I2 = jit_aarch64_asm:add(Reg, Reg, Offset * ?WORD_SIZE), I3 = jit_aarch64_asm:str(Reg, ?Y_REGS), @@ -1940,12 +2111,13 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], + available_regs = Avail, branches = Branches, labels = Labels } = State, Label ) -> + Temp = first_avail(Avail), Offset = StreamModule:offset(Stream0), case lists:keyfind(Label, 1, Labels) of {Label, LabelOffset} -> @@ -1979,10 +2151,11 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], + available_regs = Avail, branches = Branches } = State ) -> + Temp = first_avail(Avail), OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), I1 = jit_aarch64_asm:adr(Temp, 0), @@ -2015,16 +2188,20 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailableT], + available_regs = Avail, used_regs = UsedRegs0 } = State ) -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), I1 = jit_aarch64_asm:ldr(Reg, ?JITSTATE_MODULE), I2 = jit_aarch64_asm:ldr_w(Reg, ?MODULE_INDEX(Reg)), Code = <>, Stream1 = StreamModule:append(Stream0, Code), { - State#state{stream = Stream1, available_regs = AvailableT, used_regs = [Reg | UsedRegs0]}, + State#state{ + stream = Stream1, available_regs = Avail band (bnot Bit), used_regs = UsedRegs0 bor Bit + }, Reg }. @@ -2037,7 +2214,7 @@ op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, Reg, StreamModule:append(Stream0, I) catch error:{unencodable_immediate, Val} -> - [Temp | _] = State#state.available_regs, + Temp = first_avail(State#state.available_regs), I1 = jit_aarch64_asm:mov(Temp, Val), I2 = jit_aarch64_asm:Op(Reg, Reg, Temp), StreamModule:append(Stream0, <>) @@ -2064,16 +2241,28 @@ op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA, %% @param Val immediate value to AND %% @return Updated backend state %%----------------------------------------------------------------------------- +and_(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, SrcReg) when + is_atom(SrcReg) +-> + I1 = jit_aarch64_asm:and_(Reg, Reg, SrcReg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; and_(State, {free, Reg}, Val) -> NewState = op_imm(State, and_, Reg, Reg, Val), {NewState, Reg}; and_( - #state{available_regs = [ResultReg | T], used_regs = UR} = State, + #state{available_regs = Avail, used_regs = UR} = State, Reg, Val ) -> + ResultReg = first_avail(Avail), + Bit = reg_bit(ResultReg), NewState = op_imm( - State#state{available_regs = T, used_regs = [ResultReg | UR]}, and_, ResultReg, Reg, Val + State#state{available_regs = Avail band (bnot Bit), used_regs = UR bor Bit}, + and_, + ResultReg, + Reg, + Val ), {NewState, ResultReg}. @@ -2085,7 +2274,12 @@ and_( %% @param Val immediate value to OR %% @return Updated backend state %%----------------------------------------------------------------------------- --spec or_(state(), aarch64_register(), integer()) -> state(). +or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, SrcReg) when + is_atom(SrcReg) +-> + I1 = jit_aarch64_asm:orr(Reg, Reg, SrcReg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; or_(State, Reg, Val) -> op_imm(State, orr, Reg, Reg, Val). @@ -2131,14 +2325,16 @@ mul(State, _Reg, 1) -> State; mul(State, Reg, 2) -> shift_left(State, Reg, 1); -mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> +mul(#state{available_regs = Avail} = State, Reg, 3) -> + Temp = first_avail(Avail), I1 = jit_aarch64_asm:lsl(Temp, Reg, 1), I2 = jit_aarch64_asm:add(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State, Reg, 4) -> shift_left(State, Reg, 2); -mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> +mul(#state{available_regs = Avail} = State, Reg, 5) -> + Temp = first_avail(Avail), I1 = jit_aarch64_asm:lsl(Temp, Reg, 2), I2 = jit_aarch64_asm:add(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -2146,14 +2342,16 @@ mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> mul(State0, Reg, 6) -> State1 = mul(State0, Reg, 3), mul(State1, Reg, 2); -mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> +mul(#state{available_regs = Avail} = State, Reg, 7) -> + Temp = first_avail(Avail), I1 = jit_aarch64_asm:lsl(Temp, Reg, 3), I2 = jit_aarch64_asm:sub(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State, Reg, 8) -> shift_left(State, Reg, 3); -mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> +mul(#state{available_regs = Avail} = State, Reg, 9) -> + Temp = first_avail(Avail), I1 = jit_aarch64_asm:lsl(Temp, Reg, 3), I2 = jit_aarch64_asm:add(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -2161,7 +2359,8 @@ mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> mul(State0, Reg, 10) -> State1 = mul(State0, Reg, 5), mul(State1, Reg, 2); -mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> +mul(#state{available_regs = Avail} = State, Reg, 15) -> + Temp = first_avail(Avail), I1 = jit_aarch64_asm:lsl(Temp, Reg, 4), I2 = jit_aarch64_asm:sub(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -2173,10 +2372,11 @@ mul(State, Reg, 32) -> mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Reg, Val ) -> + Temp = first_avail(Avail), % multiply by decomposing by power of 2 I1 = jit_aarch64_asm:mov(Temp, Val), I2 = jit_aarch64_asm:mul(Reg, Reg, Temp), @@ -2193,8 +2393,9 @@ mul( %%----------------------------------------------------------------------------- -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). decrement_reductions_and_maybe_schedule_next( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0 ) -> + Temp = first_avail(Avail), % Load reduction count I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), % Decrement reduction count @@ -2254,10 +2455,11 @@ call_only_or_schedule_next( stream = Stream0, branches = Branches, labels = Labels, - available_regs = [Temp | _] + available_regs = Avail } = State0, Label ) -> + Temp = first_avail(Avail), % Load reduction count I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), % Decrement reduction count @@ -2392,24 +2594,75 @@ return_labels_and_lines( ), State#state{stream = Stream1}. -%% @private --spec free_reg([aarch64_register()], [aarch64_register()], aarch64_register()) -> - {[aarch64_register()], [aarch64_register()]}. -free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> - AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), - true = lists:member(Reg, UsedRegs0), - UsedRegs1 = lists:delete(Reg, UsedRegs0), - {AvailableRegs1, UsedRegs1}. - -%% @private --spec free_reg0([aarch64_register()], [aarch64_register()], aarch64_register(), [aarch64_register()]) -> - [aarch64_register()]. -free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> - lists:reverse(Acc, [Reg | PrevRegs0]); -free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> - free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); -free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> - free_reg0(SortedT, PrevRegs, Reg, Acc). +reg_bit(r0) -> ?REG_BIT_R0; +reg_bit(r1) -> ?REG_BIT_R1; +reg_bit(r2) -> ?REG_BIT_R2; +reg_bit(r3) -> ?REG_BIT_R3; +reg_bit(r4) -> ?REG_BIT_R4; +reg_bit(r5) -> ?REG_BIT_R5; +reg_bit(r6) -> ?REG_BIT_R6; +reg_bit(r7) -> ?REG_BIT_R7; +reg_bit(r8) -> ?REG_BIT_R8; +reg_bit(r9) -> ?REG_BIT_R9; +reg_bit(r10) -> ?REG_BIT_R10; +reg_bit(r11) -> ?REG_BIT_R11; +reg_bit(r12) -> ?REG_BIT_R12; +reg_bit(r13) -> ?REG_BIT_R13; +reg_bit(r14) -> ?REG_BIT_R14; +reg_bit(r15) -> ?REG_BIT_R15; +reg_bit(r16) -> ?REG_BIT_R16; +reg_bit(r17) -> ?REG_BIT_R17. + +regs_to_mask([]) -> 0; +regs_to_mask([imm | T]) -> regs_to_mask(T); +regs_to_mask([Reg | T]) -> reg_bit(Reg) bor regs_to_mask(T). + +%% first_avail returns the first available register from a bitmask. +%% Order: [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6] +first_avail(Mask) when Mask band ?REG_BIT_R7 =/= 0 -> r7; +first_avail(Mask) when Mask band ?REG_BIT_R8 =/= 0 -> r8; +first_avail(Mask) when Mask band ?REG_BIT_R9 =/= 0 -> r9; +first_avail(Mask) when Mask band ?REG_BIT_R10 =/= 0 -> r10; +first_avail(Mask) when Mask band ?REG_BIT_R11 =/= 0 -> r11; +first_avail(Mask) when Mask band ?REG_BIT_R12 =/= 0 -> r12; +first_avail(Mask) when Mask band ?REG_BIT_R13 =/= 0 -> r13; +first_avail(Mask) when Mask band ?REG_BIT_R14 =/= 0 -> r14; +first_avail(Mask) when Mask band ?REG_BIT_R15 =/= 0 -> r15; +first_avail(Mask) when Mask band ?REG_BIT_R3 =/= 0 -> r3; +first_avail(Mask) when Mask band ?REG_BIT_R4 =/= 0 -> r4; +first_avail(Mask) when Mask band ?REG_BIT_R5 =/= 0 -> r5; +first_avail(Mask) when Mask band ?REG_BIT_R6 =/= 0 -> r6. + +%% Convert bitmask to list, matching the order +mask_to_list(0) -> []; +mask_to_list(Mask) -> mask_to_list_r7(Mask). + +mask_to_list_r7(Mask) when Mask band ?REG_BIT_R7 =/= 0 -> [r7 | mask_to_list_r8(Mask)]; +mask_to_list_r7(Mask) -> mask_to_list_r8(Mask). +mask_to_list_r8(Mask) when Mask band ?REG_BIT_R8 =/= 0 -> [r8 | mask_to_list_r9(Mask)]; +mask_to_list_r8(Mask) -> mask_to_list_r9(Mask). +mask_to_list_r9(Mask) when Mask band ?REG_BIT_R9 =/= 0 -> [r9 | mask_to_list_r10(Mask)]; +mask_to_list_r9(Mask) -> mask_to_list_r10(Mask). +mask_to_list_r10(Mask) when Mask band ?REG_BIT_R10 =/= 0 -> [r10 | mask_to_list_r11(Mask)]; +mask_to_list_r10(Mask) -> mask_to_list_r11(Mask). +mask_to_list_r11(Mask) when Mask band ?REG_BIT_R11 =/= 0 -> [r11 | mask_to_list_r12(Mask)]; +mask_to_list_r11(Mask) -> mask_to_list_r12(Mask). +mask_to_list_r12(Mask) when Mask band ?REG_BIT_R12 =/= 0 -> [r12 | mask_to_list_r13(Mask)]; +mask_to_list_r12(Mask) -> mask_to_list_r13(Mask). +mask_to_list_r13(Mask) when Mask band ?REG_BIT_R13 =/= 0 -> [r13 | mask_to_list_r14(Mask)]; +mask_to_list_r13(Mask) -> mask_to_list_r14(Mask). +mask_to_list_r14(Mask) when Mask band ?REG_BIT_R14 =/= 0 -> [r14 | mask_to_list_r15(Mask)]; +mask_to_list_r14(Mask) -> mask_to_list_r15(Mask). +mask_to_list_r15(Mask) when Mask band ?REG_BIT_R15 =/= 0 -> [r15 | mask_to_list_r3(Mask)]; +mask_to_list_r15(Mask) -> mask_to_list_r3(Mask). +mask_to_list_r3(Mask) when Mask band ?REG_BIT_R3 =/= 0 -> [r3 | mask_to_list_r4(Mask)]; +mask_to_list_r3(Mask) -> mask_to_list_r4(Mask). +mask_to_list_r4(Mask) when Mask band ?REG_BIT_R4 =/= 0 -> [r4 | mask_to_list_r5(Mask)]; +mask_to_list_r4(Mask) -> mask_to_list_r5(Mask). +mask_to_list_r5(Mask) when Mask band ?REG_BIT_R5 =/= 0 -> [r5 | mask_to_list_r6(Mask)]; +mask_to_list_r5(Mask) -> mask_to_list_r6(Mask). +mask_to_list_r6(Mask) when Mask band ?REG_BIT_R6 =/= 0 -> [r6]; +mask_to_list_r6(_Mask) -> []. %% @private -spec args_regs([arg()]) -> [aarch64_register() | imm]. diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 80f76e1e7..2fa44c553 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -135,8 +135,8 @@ offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], jump_table_start :: non_neg_integer(), - available_regs :: [armv6m_register()], - used_regs :: [armv6m_register()], + available_regs :: non_neg_integer(), + used_regs :: non_neg_integer(), labels :: [{integer() | reference(), integer()}], variant :: non_neg_integer(), literal_pool :: [{non_neg_integer(), armv6m_register(), non_neg_integer()}] @@ -212,9 +212,32 @@ %% - r12: intra-procedure call scratch %% - r13 (SP), r14 (LR), r15 (PC): special purpose %% Reorder to match AArch64 test expectations (r7 first) --define(AVAILABLE_REGS, [r7, r6, r5, r4, r3, r1]). -define(PARAMETER_REGS, [r0, r1, r2, r3]). --define(SCRATCH_REGS, [r7, r6, r5, r4, r3, r2, r1, r0, r12]). + +-define(REG_BIT_R0, (1 bsl 0)). +-define(REG_BIT_R1, (1 bsl 1)). +-define(REG_BIT_R2, (1 bsl 2)). +-define(REG_BIT_R3, (1 bsl 3)). +-define(REG_BIT_R4, (1 bsl 4)). +-define(REG_BIT_R5, (1 bsl 5)). +-define(REG_BIT_R6, (1 bsl 6)). +-define(REG_BIT_R7, (1 bsl 7)). +-define(REG_BIT_R8, (1 bsl 8)). +-define(REG_BIT_R9, (1 bsl 9)). +-define(REG_BIT_R10, (1 bsl 10)). +-define(REG_BIT_R11, (1 bsl 11)). +-define(REG_BIT_R12, (1 bsl 12)). +-define(REG_BIT_R13, (1 bsl 13)). +-define(REG_BIT_R14, (1 bsl 14)). +-define(REG_BIT_R15, (1 bsl 15)). + +-define(AVAILABLE_REGS_MASK, + (?REG_BIT_R7 bor ?REG_BIT_R6 bor ?REG_BIT_R5 bor ?REG_BIT_R4 bor ?REG_BIT_R3 bor ?REG_BIT_R1) +). +-define(SCRATCH_REGS_MASK, + (?REG_BIT_R7 bor ?REG_BIT_R6 bor ?REG_BIT_R5 bor ?REG_BIT_R4 bor ?REG_BIT_R3 bor + ?REG_BIT_R2 bor ?REG_BIT_R1 bor ?REG_BIT_R0 bor ?REG_BIT_R12) +). %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. @@ -251,8 +274,8 @@ new(Variant, StreamModule, Stream) -> branches = [], jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS, - used_regs = [], + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0, labels = [], variant = Variant, literal_pool = [] @@ -308,7 +331,7 @@ debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> %% @return The list of used registers %%----------------------------------------------------------------------------- -spec used_regs(state()) -> [armv6m_register()]. -used_regs(#state{used_regs = Used}) -> Used. +used_regs(#state{used_regs = Used}) -> mask_to_list(Used). %%----------------------------------------------------------------------------- %% @doc Return the list of currently available native scratch registers. This @@ -318,7 +341,7 @@ used_regs(#state{used_regs = Used}) -> Used. %% @return The list of available registers %%----------------------------------------------------------------------------- -spec available_regs(state()) -> [armv6m_register()]. -available_regs(#state{available_regs = Available}) -> Available. +available_regs(#state{available_regs = Available}) -> mask_to_list(Available). %%----------------------------------------------------------------------------- %% @doc Free native registers. The passed list of registers can contain @@ -342,8 +365,10 @@ free_native_register( ) when is_atom(Reg) -> - {Available1, Used1} = free_reg(Available0, Used0, Reg), - State#state{available_regs = Available1, used_regs = Used1}; + Bit = reg_bit(Reg), + State#state{ + available_regs = Available0 bor Bit, used_regs = Used0 band (bnot Bit) + }; free_native_register(State, {ptr, Reg}) -> free_native_register(State, Reg); free_native_register(State, _Other) -> @@ -357,9 +382,9 @@ free_native_register(State, _Other) -> %% @return ok %%----------------------------------------------------------------------------- -spec assert_all_native_free(state()) -> ok. -assert_all_native_free(#state{ - available_regs = ?AVAILABLE_REGS, used_regs = [] -}) -> +assert_all_native_free(State) -> + 0 = State#state.used_regs, + ?AVAILABLE_REGS_MASK = State#state.available_regs, ok. %%----------------------------------------------------------------------------- @@ -591,23 +616,25 @@ call_primitive( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [TempReg | RestRegs], - used_regs = UsedRegs + available_regs = Available, + used_regs = Used } = State, Primitive, Args -) -> +) when Available =/= 0 -> % Use a low register for LDR since ARM Thumb LDR only works with low registers + TempReg = first_avail(Available), + TempBit = reg_bit(TempReg), PrepCall = load_primitive_ptr(Primitive, TempReg), Stream1 = StreamModule:append(Stream0, PrepCall), StateCall = State#state{ stream = Stream1, - available_regs = RestRegs, - used_regs = [TempReg | UsedRegs] + available_regs = Available band (bnot TempBit), + used_regs = Used bor TempBit }, call_func_ptr(StateCall, {free, TempReg}, Args); call_primitive( - #state{available_regs = []} = State, + #state{available_regs = 0} = State, Primitive, Args ) -> @@ -636,9 +663,13 @@ call_primitive_last( % registers used for parameters ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), ArgsRegs = args_regs(Args), - ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, - [Temp | AvailableRegs1] = ScratchRegs, - UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + ParamMask = regs_to_mask(ParamRegs), + ArgsMask = regs_to_mask(ArgsRegs), + ScratchMask = ?AVAILABLE_REGS_MASK band (bnot (ArgsMask bor ParamMask)), + Temp = first_avail(ScratchMask), + TempBit = reg_bit(Temp), + AvailableRegs1 = ScratchMask band (bnot TempBit), + UsedRegs = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), PrepCall = load_primitive_ptr(Primitive, Temp), Stream1 = StreamModule:append(Stream0, PrepCall), @@ -686,7 +717,10 @@ call_primitive_last( State2 = set_registers_args(State1, ArgsForTailCall, 0), tail_call_with_jit_state_registers_only(State2, Temp) end, - State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}, + State5 = State4#state{ + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0 + }, flush_literal_pool(State5). %%----------------------------------------------------------------------------- @@ -756,13 +790,11 @@ return_if_not_equal_to_ctx( I4 = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), I2 = jit_armv6m_asm:bcc(eq, 2 + byte_size(I3) + byte_size(I4)), Stream1 = StreamModule:append(Stream0, <>), - {AvailableRegs1, UsedRegs1} = free_reg( - AvailableRegs0, UsedRegs0, Reg - ), + RegBit = reg_bit(Reg), State#state{ stream = Stream1, - available_regs = AvailableRegs1, - used_regs = UsedRegs1 + available_regs = AvailableRegs0 bor RegBit, + used_regs = UsedRegs0 band (bnot RegBit) }. %%----------------------------------------------------------------------------- @@ -803,11 +835,12 @@ jump_to_continuation( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], + available_regs = Available, offset = BaseOffset } = State0, {free, OffsetReg} ) -> + Temp = first_avail(Available), % ARM v6-M PIC implementation using one temp register: % 1. Use ADR to get PC into temp register % 2. Add PC to OffsetReg to get intermediate value @@ -850,7 +883,7 @@ jump_to_continuation( Code = <>, Stream2 = StreamModule:append(State1#state.stream, Code), % Free all registers as this is a terminal instruction - State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}, + State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0}, flush_literal_pool(State2). branch_to_offset_code(_State, Offset, TargetOffset) when @@ -860,8 +893,9 @@ branch_to_offset_code(_State, Offset, TargetOffset) when Rel = TargetOffset - Offset, jit_armv6m_asm:b(Rel); branch_to_offset_code( - #state{available_regs = [TempReg | _]}, Offset, TargetOffset -) -> + #state{available_regs = Available}, Offset, TargetOffset +) when Available =/= 0 -> + TempReg = first_avail(Available), % Far branch: use register-based sequence, need temporary register if Offset rem 4 =:= 0 -> @@ -888,8 +922,9 @@ branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), {State, CodeBlock}; branch_to_label_code( - #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false -) -> + #state{available_regs = Available, branches = Branches} = State0, Offset, Label, false +) when Available =/= 0 -> + TempReg = first_avail(Available), SequenceSize = if Offset rem 4 =:= 0 -> @@ -919,7 +954,7 @@ branch_to_label_code( State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; branch_to_label_code( - #state{available_regs = [], branches = Branches} = State0, Offset, Label, false + #state{available_regs = 0, branches = Branches} = State0, Offset, Label, false ) -> SequenceSize = if @@ -960,7 +995,7 @@ branch_to_label_code( Reloc = {Label, Offset, {far_branch, SequenceSize, ?IP_REG}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; -branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> +branch_to_label_code(#state{available_regs = 0}, _Offset, _Label, _LabelLookup) -> error({no_available_registers, _LabelLookup}). %%----------------------------------------------------------------------------- @@ -1091,9 +1126,10 @@ if_block_cond( {State2, le, byte_size(I1)}; %% Handle {Val, '<', Reg} for values > 255, need to load into temp register if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, {Val, '<', RegOrTuple} -) when is_integer(Val) -> +) when is_integer(Val), Available =/= 0 -> + Temp = first_avail(Available), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1146,9 +1182,10 @@ if_block_cond( State2 = State1#state{stream = Stream1}, {State2, CC, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, {RegOrTuple, '<', Val} -) when is_integer(Val) -> +) when is_integer(Val), Available =/= 0 -> + Temp = first_avail(Available), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1167,9 +1204,10 @@ if_block_cond( State3 = State2#state{stream = Stream2}, {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, available_regs = Available} = State0, {Val, '<', RegOrTuple} -) when is_integer(Val) -> +) when is_integer(Val), Available =/= 0 -> + Temp = first_avail(Available), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1272,9 +1310,10 @@ if_block_cond( State3 = if_block_free_reg({free, RegB}, State2), {State3, CC, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, {RegOrTuple, '==', Val} -) when is_integer(Val) -> +) when is_integer(Val), Available =/= 0 -> + Temp = first_avail(Available), Offset0 = StreamModule:offset(Stream0), Reg = case RegOrTuple of @@ -1292,9 +1331,10 @@ if_block_cond( State3 = State2#state{stream = Stream2}, {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, {RegOrTuple, '!=', Val} -) when is_integer(Val) -> +) when is_integer(Val), Available =/= 0 -> + Temp = first_avail(Available), Offset0 = StreamModule:offset(Stream0), Reg = case RegOrTuple of @@ -1315,10 +1355,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Available } = State0, {'(bool)', RegOrTuple, '==', false} -) -> +) when Available =/= 0 -> + Temp = first_avail(Available), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1337,10 +1378,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Available } = State0, {'(bool)', RegOrTuple, '!=', false} -) -> +) when Available =/= 0 -> + Temp = first_avail(Available), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1359,10 +1401,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Available } = State0, {RegOrTuple, '&', Val, '!=', 0} -) -> +) when Available =/= 0 -> + Temp = first_avail(Available), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1393,10 +1436,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Available } = State0, {Reg, '&', 16#F, '!=', 16#F} -) when ?IS_GPR(Reg) -> +) when ?IS_GPR(Reg), Available =/= 0 -> + Temp = first_avail(Available), % Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG I1 = jit_armv6m_asm:mvns(Temp, Reg), % 32 - 4 @@ -1427,10 +1471,13 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | AT] + available_regs = Available } = State0, {Reg, '&', Mask, '!=', Val} -) when ?IS_GPR(Reg) -> +) when ?IS_GPR(Reg), Available =/= 0 -> + Temp = first_avail(Available), + TempBit = reg_bit(Temp), + AT = Available band (bnot TempBit), % AND with mask OffsetBefore = StreamModule:offset(Stream0), I1 = jit_armv6m_asm:mov(Temp, Reg), @@ -1445,7 +1492,9 @@ if_block_cond( CC = eq, ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), Stream4 = StreamModule:append(Stream3, <<16#FFFF:16>>), - State3 = State2#state{stream = Stream4, available_regs = [Temp | State2#state.available_regs]}, + State3 = State2#state{ + stream = Stream4, available_regs = State2#state.available_regs bor TempBit + }, {State3, CC, OffsetAfter - OffsetBefore}; if_block_cond( #state{ @@ -1472,7 +1521,9 @@ if_block_cond( -spec if_block_free_reg(armv6m_register() | {free, armv6m_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> #state{available_regs = AvR0, used_regs = UR0} = State0, - {AvR1, UR1} = free_reg(AvR0, UR0, Reg), + Bit = reg_bit(Reg), + AvR1 = AvR0 bor Bit, + UR1 = UR0 band (bnot Bit), State0#state{ available_regs = AvR1, used_regs = UR1 @@ -1493,22 +1544,11 @@ bit_test_optimization(16#F) -> {low_bits_mask, 4}; bit_test_optimization(16#3F) -> {low_bits_mask, 6}; bit_test_optimization(_) -> no_optimization. --spec merge_used_regs(state(), [armv6m_register()]) -> state(). -merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ - Reg | T -]) -> - case lists:member(Reg, UR0) of - true -> - merge_used_regs(State, T); - false -> - AvR1 = lists:delete(Reg, AvR0), - UR1 = [Reg | UR0], - merge_used_regs( - State#state{used_regs = UR1, available_regs = AvR1}, T - ) - end; -merge_used_regs(State, []) -> - State. +-spec merge_used_regs(state(), non_neg_integer()) -> state(). +merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> + MergedUR = UR bor OtherUR, + MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), + State#state{used_regs = MergedUR, available_regs = MergedAvail}. %%----------------------------------------------------------------------------- %% @doc Emit a shift register right by a fixed number of bits, effectively @@ -1530,7 +1570,7 @@ shift_right( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [ResultReg | T], + available_regs = Avail, used_regs = UR } = State, Reg, @@ -1538,9 +1578,16 @@ shift_right( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + ResultReg = first_avail(Avail), + Bit = reg_bit(ResultReg), I = jit_armv6m_asm:lsrs(ResultReg, Reg, Shift), Stream1 = StreamModule:append(Stream0, I), - {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + { + State#state{ + stream = Stream1, available_regs = Avail band (bnot Bit), used_regs = UR bor Bit + }, + ResultReg + }. %%----------------------------------------------------------------------------- %% @doc Emit a shift register left by a fixed number of bits, effectively @@ -1572,8 +1619,8 @@ call_func_ptr( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0 + available_regs = AvailableRegs0Mask, + used_regs = UsedRegs0Mask } = State0, FuncPtrTuple, Args @@ -1586,22 +1633,35 @@ call_func_ptr( end, [FuncPtrTuple | Args] ), - UsedRegs1 = UsedRegs0 -- FreeRegs, - SavedRegsBase = [?CTX_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + FreeMask = regs_to_mask(FreeRegs), + UsedRegs1Mask = UsedRegs0Mask band (bnot FreeMask), + SavedRegsBase = [?CTX_REG, ?NATIVE_INTERFACE_REG | mask_to_list(UsedRegs1Mask)], % Calculate available registers for potential padding - FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), - AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + FreeGPMask = FreeMask band ?AVAILABLE_REGS_MASK, + AvailableRegs1Mask = FreeGPMask bor AvailableRegs0Mask, % Add padding register if odd number to maintain 8-byte stack alignment per ARM AAPCS - SavedRegs = + % Prefer the func ptr register for padding since it's being freed anyway + {SavedRegs, PaddingReg} = case (length(SavedRegsBase) rem 2) =:= 1 of - true when AvailableRegs1 /= [] -> - [PaddingReg | _] = AvailableRegs1, - SavedRegsBase ++ [PaddingReg]; + true when AvailableRegs1Mask =/= 0 -> + PaddingReg0 = + case FuncPtrTuple of + {free, FPR} -> + case AvailableRegs1Mask band reg_bit(FPR) =/= 0 of + true -> FPR; + false when FreeGPMask =/= 0 -> first_avail(FreeGPMask); + false -> first_avail(AvailableRegs0Mask) + end; + _ when FreeGPMask =/= 0 -> + first_avail(FreeGPMask); + _ -> + first_avail(AvailableRegs0Mask) + end, + {SavedRegsBase ++ [PaddingReg0], PaddingReg0}; _ -> - PaddingReg = undefined, - SavedRegsBase + {SavedRegsBase, undefined} end, Stream1 = push_registers(SavedRegs, StreamModule, Stream0), @@ -1625,13 +1685,14 @@ call_func_ptr( end, RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0), StackArgsRegs = lists:flatmap(fun arg_to_reg_list/1, StackArgs), + AllArgsRegsMask = regs_to_mask(RegArgsRegs ++ StackArgsRegs), % We pushed registers to stack, so we can use these registers we saved % and the currently available registers to push values to the stack. - SetArgsPushStackAvailableArgs = (UsedRegs1 -- (RegArgsRegs ++ StackArgsRegs)) ++ AvailableRegs0, + SetArgsPushStackMask = (UsedRegs1Mask band (bnot AllArgsRegsMask)) bor AvailableRegs0Mask, State1 = State0#state{ - available_regs = SetArgsPushStackAvailableArgs, - used_regs = ?AVAILABLE_REGS -- SetArgsPushStackAvailableArgs, + available_regs = SetArgsPushStackMask, + used_regs = ?AVAILABLE_REGS_MASK band (bnot SetArgsPushStackMask), stream = Stream1 }, State2 = @@ -1641,61 +1702,67 @@ call_func_ptr( [Arg5, Args6] -> set_stack_args(State1, Arg5, Args6) end, - SetArgsRegsOnlyAvailableArgs = State2#state.available_regs, + SetArgsMask = State2#state.available_regs, ParameterRegs = parameter_regs(RegArgs0), - {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = + ParamMask = regs_to_mask(ParameterRegs), + RegArgsRegsMask = regs_to_mask(RegArgsRegs), + {Stream3, SetArgsAvailMask, FuncPtrReg, RegArgs} = case FuncPtrTuple of {free, FuncPtrReg0} -> + FuncPtrReg0Bit = reg_bit(FuncPtrReg0), % If FuncPtrReg is in parameter regs, we must swap it with a free reg. - case lists:member(FuncPtrReg0, ParameterRegs) of + case ParamMask band FuncPtrReg0Bit =/= 0 of true -> - case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of - [] -> - % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0 + AvailNotParam = SetArgsMask band (bnot ParamMask), + case AvailNotParam of + 0 -> + % Swap with a reg used in RegArgs0 % that is not in ParameterRegs - [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs, - [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs, + NewArgReg = first_avail(SetArgsMask), + FuncPtrReg1 = first_avail(RegArgsRegsMask band (bnot ParamMask)), + FuncPtrReg1Bit = reg_bit(FuncPtrReg1), MovInstr1 = jit_armv6m_asm:mov(NewArgReg, FuncPtrReg1), MovInstr2 = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0), - SetArgsAvailableArgs1 = - (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ - [FuncPtrReg0], + SetArgsAvailMask1 = + (SetArgsMask band (bnot FuncPtrReg1Bit)) bor FuncPtrReg0Bit, RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg), { StreamModule:append( State2#state.stream, <> ), - SetArgsAvailableArgs1, + SetArgsAvailMask1, FuncPtrReg1, RegArgs1 }; - [FuncPtrReg1 | _] -> + _ -> + FuncPtrReg1 = first_avail(AvailNotParam), + FuncPtrReg1Bit = reg_bit(FuncPtrReg1), MovInstr = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0), - SetArgsAvailableArgs1 = - (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ - [FuncPtrReg0], + SetArgsAvailMask1 = + (SetArgsMask band (bnot FuncPtrReg1Bit)) bor FuncPtrReg0Bit, { StreamModule:append(State2#state.stream, MovInstr), - SetArgsAvailableArgs1, + SetArgsAvailMask1, FuncPtrReg1, RegArgs0 } end; false -> - SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], - {State2#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0} + SetArgsAvailMask1 = SetArgsMask band (bnot FuncPtrReg0Bit), + {State2#state.stream, SetArgsAvailMask1, FuncPtrReg0, RegArgs0} end; {primitive, Primitive} -> - [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs, - SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + FuncPtrReg0 = first_avail(SetArgsMask band (bnot ParamMask)), + FuncPtrReg0Bit = reg_bit(FuncPtrReg0), + SetArgsAvailMask1 = SetArgsMask band (bnot FuncPtrReg0Bit), PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), Stream2 = StreamModule:append(State2#state.stream, PrepCall), - {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0} + {Stream2, SetArgsAvailMask1, FuncPtrReg0, RegArgs0} end, State3 = State2#state{ - available_regs = SetArgsAvailableRegs, - used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs, + available_regs = SetArgsAvailMask, + used_regs = ?AVAILABLE_REGS_MASK band (bnot SetArgsAvailMask), stream = Stream3 }, @@ -1715,27 +1782,49 @@ call_func_ptr( % not the one used for padding. If none are available (all 8 registers % were pushed to the stack), we write the result to the stack position % of FuncPtrReg - {Stream6, UsedRegs2} = - case length(SavedRegs) of - 8 when element(1, FuncPtrTuple) =:= free -> - % We use original FuncPtrReg then as we know it's available. - % Calculate stack offset: register number * 4 bytes - ResultReg = element(2, FuncPtrTuple), - StoreResultStackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, - StoreResult = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset}), - {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; - 8 when PaddingReg =/= undefined -> - % We use PaddingReg then as we know it's available. - % Calculate stack offset: register number * 4 bytes - ResultReg = PaddingReg, - StoreResultStackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, - StoreResult = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset}), - {StreamModule:append(Stream5, StoreResult), [PaddingReg | UsedRegs1]}; + {Stream6, UsedRegs2Mask, ResultReg} = + case {length(SavedRegs), FuncPtrTuple} of + {8, {free, ResultFPReg0}} -> + % Registers exhausted: use FuncPtrReg which is free after the call + StoreResultStackOffset0 = jit_armv6m_asm:reg_to_num(ResultFPReg0) * 4, + StoreResult0 = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset0}), + { + StreamModule:append(Stream5, StoreResult0), + UsedRegs1Mask bor reg_bit(ResultFPReg0), + ResultFPReg0 + }; + {8, _} when PaddingReg =/= undefined -> + % Use PaddingReg which was saved just for alignment + StoreResultStackOffset1 = jit_armv6m_asm:reg_to_num(PaddingReg) * 4, + StoreResult1 = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset1}), + { + StreamModule:append(Stream5, StoreResult1), + UsedRegs1Mask bor reg_bit(PaddingReg), + PaddingReg + }; + {_, {free, ResultFPReg1}} when ResultFPReg1 =/= PaddingReg -> + % FuncPtrReg is free after the call and not saved, use it for result + MoveResult0 = jit_armv6m_asm:mov(ResultFPReg1, r0), + { + StreamModule:append(Stream5, MoveResult0), + UsedRegs1Mask bor reg_bit(ResultFPReg1), + ResultFPReg1 + }; _ -> - % Use any free that is not in SavedRegs - [ResultReg | _] = AvailableRegs1 -- SavedRegs, - MoveResult = jit_armv6m_asm:mov(ResultReg, r0), - {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]} + PaddingRegBit = + case PaddingReg of + undefined -> 0; + _ -> reg_bit(PaddingReg) + end, + AvailForResultMask = + AvailableRegs1Mask band (bnot (UsedRegs1Mask bor PaddingRegBit)), + ResultReg0 = first_avail(AvailForResultMask), + MoveResult0 = jit_armv6m_asm:mov(ResultReg0, r0), + { + StreamModule:append(Stream5, MoveResult0), + UsedRegs1Mask bor reg_bit(ResultReg0), + ResultReg0 + } end, % Deallocate stack space if we allocated it for 5+ arguments @@ -1750,13 +1839,13 @@ call_func_ptr( Stream8 = pop_registers(lists:reverse(SavedRegs), StreamModule, Stream7), - AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), - AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + ResultRegBit = reg_bit(ResultReg), + AvailableRegs3Mask = (AvailableRegs1Mask band (bnot ResultRegBit)) band ?AVAILABLE_REGS_MASK, { State4#state{ stream = Stream8, - available_regs = AvailableRegs3, - used_regs = UsedRegs2 + available_regs = AvailableRegs3Mask, + used_regs = UsedRegs2Mask }, ResultReg }. @@ -1844,30 +1933,34 @@ set_registers_args(State0, Args, StackOffset) -> set_registers_args(State0, Args, ParamRegs, StackOffset). set_registers_args( - #state{used_regs = UsedRegs} = State0, + #state{used_regs = UsedRegsMask} = State0, Args, ParamRegs, StackOffset ) -> ArgsRegs = args_regs(Args), - AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, + ParamMask = regs_to_mask(ParamRegs), + ArgsMask = regs_to_mask(ArgsRegs), + AvailableScratchMask = + ?SCRATCH_REGS_MASK band (bnot (ParamMask bor ArgsMask bor UsedRegsMask)), + AvailableScratchGP = mask_to_list(AvailableScratchMask), State1 = set_registers_args0( State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset ), Stream1 = State1#state.stream, - NewUsedRegs = lists:foldl( + NewUsedMask = lists:foldl( fun - ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); - ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, {ptr, Reg}}, AccUsed) -> AccUsed band (bnot reg_bit(Reg)); + ({free, Reg}, AccUsed) when is_atom(Reg) -> AccUsed band (bnot reg_bit(Reg)); (_, AccUsed) -> AccUsed end, - UsedRegs, + UsedRegsMask, Args ), State1#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, - used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + available_regs = ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), + used_regs = ParamMask bor NewUsedMask }. parameter_regs(Args) -> @@ -2027,52 +2120,70 @@ move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> I1 = jit_armv6m_asm:str(Src, {Reg, 0}), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; -move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when +move_to_vm_register(#state{available_regs = Avail} = State0, Src, {y_reg, Y}) when is_atom(Src) -> + Temp1 = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp1)), Code = str_y_reg(Src, Y, Temp1, AT), Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), State0#state{stream = Stream1}; % Source is an integer to y_reg (optimized: ldr first, then movs) -move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when +move_to_vm_register(#state{available_regs = Avail} = State0, N, {y_reg, Y}) when is_integer(N), N >= 0, N =< 255 -> + Temp1 = first_avail(Avail), + Avail1 = Avail band (bnot reg_bit(Temp1)), + Temp2 = first_avail(Avail1), + AT = Avail1 band (bnot reg_bit(Temp2)), I1 = jit_armv6m_asm:movs(Temp2, N), YCode = str_y_reg(Temp2, Y, Temp1, AT), Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), State0#state{stream = Stream1}; % Source is an integer (0-255 for movs, negative values need different handling) -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when +move_to_vm_register(#state{available_regs = AR0} = State0, N, Dest) when is_integer(N), N >= 0, N =< 255 -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), I1 = jit_armv6m_asm:movs(Temp, N), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; %% Handle large values using simple literal pool (branch-over pattern) -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when +move_to_vm_register(#state{available_regs = AR0} = State0, N, Dest) when is_integer(N) -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), State2 = move_to_vm_register(State1, Temp, Dest), State2#state{available_regs = AR0}; % Source is a VM register -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> +move_to_vm_register(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(?MAX_REG)), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> +move_to_vm_register(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(X)), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> +move_to_vm_register(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), I1 = jit_armv6m_asm:ldr(Temp, {Reg, 0}), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> +move_to_vm_register(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), Code = ldr_y_reg(Temp, Y, AT), Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), @@ -2081,7 +2192,7 @@ move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, move_to_vm_register( #state{ stream_module = StreamModule, - available_regs = [Temp1, Temp2 | _], + available_regs = Avail, stream = Stream0, variant = Variant } = @@ -2089,6 +2200,8 @@ move_to_vm_register( {free, {ptr, Reg, 1}}, {fp_reg, F} ) -> + Temp1 = first_avail(Avail), + Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), I1 = jit_armv6m_asm:ldr(Temp1, ?FP_REGS), I2 = jit_armv6m_asm:ldr(Temp2, {Reg, 4}), case Variant band ?JIT_VARIANT_FLOAT32 of @@ -2123,22 +2236,25 @@ move_to_vm_register( vm_register() | armv6m_register() ) -> state(). move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> + Temp = first_avail(Avail), I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), I2 = jit_armv6m_asm:str(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, available_regs = [Temp1, Temp2 | _]} = + #state{stream_module = StreamModule, available_regs = Avail} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + Temp1 = first_avail(Avail), + Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), % For large offsets, use max offset (124) in ldr + remainder in temp register Offset = Index * 4, LdrOffset = 124, @@ -2155,22 +2271,24 @@ move_array_element( Stream2 = StreamModule:append(Stream1, <>), State1#state{stream = Stream2}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> + Temp = first_avail(Avail), I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), I2 = jit_armv6m_asm:str(Temp, {Dest, 0}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, available_regs = [Temp | _]} = + #state{stream_module = StreamModule, available_regs = Avail} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) -> + Temp = first_avail(Avail), % For large offsets, use max offset (124) in ldr + remainder in temp register Offset = Index * 4, LdrOffset = 124, @@ -2184,12 +2302,16 @@ move_array_element( Stream2 = StreamModule:append(Stream1, <>), State1#state{stream = Stream2}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> + Temp1 = first_avail(Avail), + Avail1 = Avail band (bnot reg_bit(Temp1)), + Temp2 = first_avail(Avail1), + AT = Avail1 band (bnot reg_bit(Temp2)), I1 = jit_armv6m_asm:ldr(Temp2, {Reg, Index * 4}), YCode = str_y_reg(Temp2, Y, Temp1, AT), Code = <>, @@ -2197,13 +2319,17 @@ move_array_element( State#state{stream = Stream1}; move_array_element( #state{ - stream_module = StreamModule, available_regs = [Temp1, Temp2 | AT] + stream_module = StreamModule, available_regs = Avail } = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) -> + Temp1 = first_avail(Avail), + Avail1 = Avail band (bnot reg_bit(Temp1)), + Temp2 = first_avail(Avail1), + AT = Avail1 band (bnot reg_bit(Temp2)), % For large offsets, use max offset (124) in ldr + remainder in temp register Offset = Index * 4, LdrOffset = 124, @@ -2217,12 +2343,14 @@ move_array_element( Stream2 = StreamModule:append(Stream1, Code), State1#state{stream = Stream2}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {free, Reg}, Index, {y_reg, Y} ) when is_integer(Index) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}), YCode = str_y_reg(Reg, Y, Temp, AT), Code = <>, @@ -2248,7 +2376,9 @@ move_array_element( I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), I3 = jit_armv6m_asm:str(IndexReg, ?X_REG(X)), - {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, @@ -2269,9 +2399,9 @@ move_array_element( I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), I3 = jit_armv6m_asm:str(IndexReg, {PtrReg, 0}), - {AvailableRegs1, UsedRegs1} = free_reg( - AvailableRegs0, UsedRegs0, IndexReg - ), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, @@ -2282,7 +2412,7 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | AT] = AvailableRegs0, + available_regs = AvailableRegs0, used_regs = UsedRegs0 } = State, Reg, @@ -2291,11 +2421,14 @@ move_array_element( ) when is_atom(IndexReg) -> I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), + Temp = first_avail(AvailableRegs0), + TempBit = reg_bit(Temp), + AT = AvailableRegs0 band (bnot TempBit), Code = str_y_reg(IndexReg, Y, Temp, AT), I3 = Code, - {AvailableRegs1, UsedRegs1} = free_reg( - AvailableRegs0, UsedRegs0, IndexReg - ), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append( Stream0, <> ), @@ -2322,11 +2455,12 @@ get_array_element( get_array_element( #state{ stream_module = StreamModule, - available_regs = [Temp | _] + available_regs = Avail } = State, {free, Reg}, Index ) -> + Temp = first_avail(Avail), % For large offsets, split into ldr immediate (max 124) + remainder in temp register Offset = Index * 4, Remainder = Offset - 124, @@ -2340,29 +2474,37 @@ get_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [ElemReg | AvailableT], + available_regs = Avail, used_regs = UsedRegs0 } = State, Reg, Index ) when Index * 4 =< 124 -> + ElemReg = first_avail(Avail), + ElemBit = reg_bit(ElemReg), I1 = jit_armv6m_asm:ldr(ElemReg, {Reg, Index * 4}), Stream1 = StreamModule:append(Stream0, <>), { State#state{ - stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + stream = Stream1, + available_regs = Avail band (bnot ElemBit), + used_regs = UsedRegs0 bor ElemBit }, ElemReg }; get_array_element( #state{ stream_module = StreamModule, - available_regs = [ElemReg, Temp | AvailableT], + available_regs = Avail, used_regs = UsedRegs0 } = State, Reg, Index ) -> + ElemReg = first_avail(Avail), + ElemBit = reg_bit(ElemReg), + Avail1 = Avail band (bnot ElemBit), + Temp = first_avail(Avail1), % For large offsets, split into ldr immediate (max 124) + remainder in temp register Offset = Index * 4, Remainder = Offset - 124, @@ -2375,8 +2517,8 @@ get_array_element( { State1#state{ stream = Stream2, - available_regs = [Temp | AvailableT], - used_regs = [ElemReg | UsedRegs0] + available_regs = Avail1, + used_regs = UsedRegs0 bor ElemBit }, ElemReg }. @@ -2395,11 +2537,12 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State0#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, available_regs = Avail} = State0, ValueReg, Reg, Index ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + Temp = first_avail(Avail), % For large offsets, split into str immediate (max 124) + remainder in temp register Offset = Index * 4, Remainder = Offset - 124, @@ -2411,11 +2554,12 @@ move_to_array_element( Stream2 = StreamModule:append(Stream1, <>), State1#state{stream = Stream2}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, ValueReg, Reg, IndexReg ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + Temp = first_avail(Avail), I1 = jit_armv6m_asm:mov(Temp, IndexReg), I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), I3 = jit_armv6m_asm:str(ValueReg, {Reg, Temp}), @@ -2440,12 +2584,13 @@ move_to_array_element( ) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, ValueReg, BaseReg, IndexReg, Offset ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Temp = first_avail(Avail), I1 = jit_armv6m_asm:adds(Temp, IndexReg, Offset), I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), I3 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp}), @@ -2459,7 +2604,7 @@ move_to_array_element( Offset ) -> {State1, ValueReg} = copy_to_native_register(State0, Value), - [Temp | _] = State1#state.available_regs, + Temp = first_avail(State1#state.available_regs), I1 = jit_armv6m_asm:adds(Temp, IndexReg, Offset), I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), I3 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp}), @@ -2474,14 +2619,21 @@ move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, cp ) -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), I1 = jit_armv6m_asm:ldr(Reg, ?CP), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; + { + State#state{ + stream = Stream1, used_regs = Used bor Bit, available_regs = Avail band (bnot Bit) + }, + Reg + }; move_to_native_register(State, Reg) when is_atom(Reg) -> {State, Reg}; move_to_native_register( @@ -2492,69 +2644,94 @@ move_to_native_register( {State#state{stream = Stream1}, Reg}; move_to_native_register( #state{ - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State0, Imm ) when is_integer(Imm) -> - State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT}, + Reg = first_avail(Avail), + Bit = reg_bit(Reg), + State1 = State0#state{used_regs = Used bor Bit, available_regs = Avail band (bnot Bit)}, {move_to_native_register(State1, Imm, Reg), Reg}; move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, {x_reg, extra} ) -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), I1 = jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; + { + State#state{ + stream = Stream1, used_regs = Used bor Bit, available_regs = Avail band (bnot Bit) + }, + Reg + }; move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, {x_reg, X} ) when X < ?MAX_REG -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), I1 = jit_armv6m_asm:ldr(Reg, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; + { + State#state{ + stream = Stream1, used_regs = Used bor Bit, available_regs = Avail band (bnot Bit) + }, + Reg + }; move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, {y_reg, Y} ) -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), + AvailT = Avail band (bnot Bit), Code = ldr_y_reg(Reg, Y, AvailT), Stream1 = StreamModule:append(Stream0, Code), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; + {State#state{stream = Stream1, available_regs = AvailT, used_regs = Used bor Bit}, Reg}; move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [RegA, RegB | AvailT], + available_regs = Avail, used_regs = Used } = State, {fp_reg, F} ) -> + RegA = first_avail(Avail), + BitA = reg_bit(RegA), + Avail1 = Avail band (bnot BitA), + RegB = first_avail(Avail1), + BitB = reg_bit(RegB), + AvailT = Avail1 band (bnot BitB), I1 = jit_armv6m_asm:ldr(RegB, ?FP_REGS), I2 = jit_armv6m_asm:ldr(RegA, {RegB, F * 8}), I3 = jit_armv6m_asm:ldr(RegB, {RegB, F * 8 + 4}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), { - State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, + State#state{stream = Stream1, available_regs = AvailT, used_regs = Used bor BitA bor BitB}, {fp, RegA, RegB} }. @@ -2615,33 +2792,53 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [SaveReg | AvailT], + available_regs = Avail, used_regs = Used } = State, Reg ) when is_atom(Reg) -> + SaveReg = first_avail(Avail), + SaveBit = reg_bit(SaveReg), I1 = jit_armv6m_asm:mov(SaveReg, Reg), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot SaveBit), + used_regs = Used bor SaveBit + }, + SaveReg + }; copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [SaveReg | AvailT], + available_regs = Avail, used_regs = Used } = State, {ptr, Reg} ) when is_atom(Reg) -> + SaveReg = first_avail(Avail), + SaveBit = reg_bit(SaveReg), I1 = jit_armv6m_asm:ldr(SaveReg, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot SaveBit), + used_regs = Used bor SaveBit + }, + SaveReg + }; copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {y_reg, Y} ) -> + Reg = first_avail(Avail), + AvailT = Avail band (bnot reg_bit(Reg)), I1 = ldr_y_reg(Reg, Y, AvailT), I2 = jit_armv6m_asm:str(Reg, ?CP), Code = <>, @@ -2649,9 +2846,10 @@ move_to_cp( State#state{stream = Stream1}. increment_sp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Offset ) -> + Reg = first_avail(Avail), I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), I2 = jit_armv6m_asm:adds(Reg, Offset * 4), I3 = jit_armv6m_asm:str(Reg, ?Y_REGS), @@ -2664,10 +2862,12 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, offset = JumpTableOffset, - available_regs = [Temp1, Temp2 | _] + available_regs = Avail } = State, Label ) -> + Temp1 = first_avail(Avail), + Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), % Calculate jump table entry offset JumpTableEntryOffset = (Label * ?JUMP_TABLE_ENTRY_SIZE) + JumpTableOffset, @@ -2702,10 +2902,12 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp, TempJitState | _], + available_regs = Avail, branches = Branches } = State ) -> + Temp = first_avail(Avail), + TempJitState = first_avail(Avail band (bnot reg_bit(Temp))), OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), ?ASSERT(byte_size(jit_armv6m_asm:adr(Temp, 4)) =:= 2), @@ -2744,10 +2946,14 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg, TempJitState | AvailableT], + available_regs = Avail, used_regs = UsedRegs0 } = State ) -> + Reg = first_avail(Avail), + RegBit = reg_bit(Reg), + Avail1 = Avail band (bnot RegBit), + TempJitState = first_avail(Avail1), % Load jit_state pointer from stack, then load module I1a = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), I1b = jit_armv6m_asm:ldr(Reg, ?JITSTATE_MODULE(TempJitState)), @@ -2757,8 +2963,8 @@ get_module_index( { State#state{ stream = Stream1, - available_regs = [TempJitState | AvailableT], - used_regs = [Reg | UsedRegs0] + available_regs = Avail1, + used_regs = UsedRegs0 bor RegBit }, Reg }. @@ -2767,33 +2973,45 @@ get_module_index( %% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to %% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool %% by using BICS for -4. +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, SrcReg) when + is_atom(SrcReg) +-> + I = jit_armv6m_asm:ands(Reg, SrcReg), + Stream1 = StreamModule:append(Stream0, I), + {State0#state{stream = Stream1}, Reg}; and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) -> I1 = jit_armv6m_asm:lsls(Reg, Reg, 8), I2 = jit_armv6m_asm:lsrs(Reg, Reg, 8), Stream1 = StreamModule:append(Stream0, <>), {State0#state{stream = Stream1}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + #state{stream_module = StreamModule, available_regs = Avail} = State0, {free, Reg}, Val -) when Val < 0 andalso Val >= -256 -> +) when Avail =/= 0 andalso Val < 0 andalso Val >= -256 -> + Temp = first_avail(Avail), + TempBit = reg_bit(Temp), + AT = Avail band (bnot TempBit), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), Stream1 = State1#state.stream, I = jit_armv6m_asm:bics(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; + {State1#state{available_regs = AT bor TempBit, stream = Stream2}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + #state{stream_module = StreamModule, available_regs = Avail} = State0, {free, Reg}, Val -) -> +) when Avail =/= 0 -> + Temp = first_avail(Avail), + TempBit = reg_bit(Temp), + AT = Avail band (bnot TempBit), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_armv6m_asm:ands(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; + {State1#state{available_regs = AT bor TempBit, stream = Stream2}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = []} = State0, + #state{stream_module = StreamModule, available_regs = 0} = State0, {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> @@ -2813,7 +3031,7 @@ and_( Stream4 = StreamModule:append(Stream3, Restore), {State0#state{stream = Stream4}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = []} = State0, + #state{stream_module = StreamModule, available_regs = 0} = State0, {free, Reg}, Val ) -> @@ -2833,26 +3051,43 @@ and_( Stream4 = StreamModule:append(Stream3, Restore), {State0#state{stream = Stream4}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} = + #state{stream_module = StreamModule, available_regs = Avail, used_regs = UR} = State0, Reg, ?TERM_PRIMARY_CLEAR_MASK ) -> + ResultReg = first_avail(Avail), + ResultBit = reg_bit(ResultReg), I1 = jit_armv6m_asm:lsrs(ResultReg, Reg, 2), I2 = jit_armv6m_asm:lsls(ResultReg, ResultReg, 2), Stream1 = StreamModule:append(State0#state.stream, <>), - {State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}. + { + State0#state{ + stream = Stream1, + available_regs = Avail band (bnot ResultBit), + used_regs = UR bor ResultBit + }, + ResultReg + }. +or_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, SrcReg) when + is_atom(SrcReg) +-> + I = jit_armv6m_asm:orrs(Reg, SrcReg), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; or_( - #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + #state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val ) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_armv6m_asm:orrs(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}. + State1#state{available_regs = Avail, stream = Stream2}. add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when (Val >= 0 andalso Val =< 255) orelse is_atom(Val) @@ -2860,12 +3095,14 @@ add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) w I = jit_armv6m_asm:adds(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I), State0#state{stream = Stream1}; -add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> +add(#state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_armv6m_asm:adds(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}. + State1#state{available_regs = Avail, stream = Stream2}. mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when Val >= 0 andalso Val =< 255 @@ -3014,25 +3251,29 @@ sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) wh I1 = jit_armv6m_asm:subs(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; -sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> +sub(#state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_armv6m_asm:subs(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}. + State1#state{available_regs = Avail, stream = Stream2}. mul(State, _Reg, 1) -> State; mul(State, Reg, 2) -> shift_left(State, Reg, 1); -mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> +mul(#state{available_regs = Avail} = State, Reg, 3) -> + Temp = first_avail(Avail), I1 = jit_armv6m_asm:lsls(Temp, Reg, 1), I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State, Reg, 4) -> shift_left(State, Reg, 2); -mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> +mul(#state{available_regs = Avail} = State, Reg, 5) -> + Temp = first_avail(Avail), I1 = jit_armv6m_asm:lsls(Temp, Reg, 2), I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -3040,14 +3281,16 @@ mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> mul(State0, Reg, 6) -> State1 = mul(State0, Reg, 3), mul(State1, Reg, 2); -mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> +mul(#state{available_regs = Avail} = State, Reg, 7) -> + Temp = first_avail(Avail), I1 = jit_armv6m_asm:lsls(Temp, Reg, 3), I2 = jit_armv6m_asm:subs(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State, Reg, 8) -> shift_left(State, Reg, 3); -mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> +mul(#state{available_regs = Avail} = State, Reg, 9) -> + Temp = first_avail(Avail), I1 = jit_armv6m_asm:lsls(Temp, Reg, 3), I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -3055,7 +3298,8 @@ mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> mul(State0, Reg, 10) -> State1 = mul(State0, Reg, 5), mul(State1, Reg, 2); -mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> +mul(#state{available_regs = Avail} = State, Reg, 15) -> + Temp = first_avail(Avail), I1 = jit_armv6m_asm:lsls(Temp, Reg, 4), I2 = jit_armv6m_asm:subs(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -3067,16 +3311,19 @@ mul(State, Reg, 32) -> mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul( - #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + #state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val ) -> + Temp = first_avail(Avail), + TempBit = reg_bit(Temp), + AT = Avail band (bnot TempBit), % multiply by decomposing by power of 2 State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_armv6m_asm:muls(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. + State1#state{stream = Stream2, available_regs = State1#state.available_regs bor TempBit}. %% %% Analysis of AArch64 pattern and ARM Thumb mapping: @@ -3101,9 +3348,11 @@ mul( -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). decrement_reductions_and_maybe_schedule_next( #state{ - stream_module = StreamModule, stream = Stream0, available_regs = [Temp, TempJitState | _] + stream_module = StreamModule, stream = Stream0, available_regs = Avail } = State0 ) -> + Temp = first_avail(Avail), + TempJitState = first_avail(Avail band (bnot reg_bit(Temp))), % Load jit_state pointer from stack I0 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), % Load reduction count @@ -3169,10 +3418,12 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp, TempJitState | _] + available_regs = Avail } = State0, Label ) -> + Temp = first_avail(Avail), + TempJitState = first_avail(Avail band (bnot reg_bit(Temp))), % Load jit_state pointer from stack I0 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), % Load reduction count @@ -3248,7 +3499,7 @@ set_cp(State0) -> State0 ), % Get a temporary register from available registers - [TempReg | _] = AvailRegs, + TempReg = first_avail(AvailRegs), Offset = StreamModule:offset(Stream0), % build cp with module_index << 24 @@ -3313,9 +3564,10 @@ rewrite_cp_offset( State0#state{stream = Stream3}. set_bs( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, TermReg ) -> + Temp = first_avail(Avail), I1 = jit_armv6m_asm:str(TermReg, ?BS), I2 = jit_armv6m_asm:movs(Temp, 0), I3 = jit_armv6m_asm:str(Temp, ?BS_OFFSET), @@ -3367,20 +3619,21 @@ return_labels_and_lines( State#state{stream = Stream1}. %% Helper function to generate str instruction with y_reg offset, handling large offsets -str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 -> +str_y_reg(SrcReg, Y, TempReg, _AvailMask) when Y * 4 =< 124 -> % Small offset - use immediate addressing I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS), I2 = jit_armv6m_asm:str(SrcReg, {TempReg, Y * 4}), <>; -str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) -> +str_y_reg(SrcReg, Y, TempReg1, AvailMask) when AvailMask =/= 0 -> % Large offset - use register arithmetic with second available register + TempReg2 = first_avail(AvailMask), Offset = Y * 4, I1 = jit_armv6m_asm:ldr(TempReg1, ?Y_REGS), I2 = jit_armv6m_asm:movs(TempReg2, Offset), I3 = jit_armv6m_asm:add(TempReg2, TempReg1), I4 = jit_armv6m_asm:str(SrcReg, {TempReg2, 0}), <>; -str_y_reg(SrcReg, Y, TempReg1, []) -> +str_y_reg(SrcReg, Y, TempReg1, 0) -> % Large offset - no additional registers available, use IP_REG as second temp Offset = Y * 4, I1 = jit_armv6m_asm:ldr(TempReg1, ?Y_REGS), @@ -3391,25 +3644,27 @@ str_y_reg(SrcReg, Y, TempReg1, []) -> <>. %% Helper function to generate ldr instruction with y_reg offset, handling large offsets -ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 -> +ldr_y_reg(DstReg, Y, AvailMask) when AvailMask =/= 0, Y * 4 =< 124 -> % Small offset - use immediate addressing + TempReg = first_avail(AvailMask), I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS), I2 = jit_armv6m_asm:ldr(DstReg, {TempReg, Y * 4}), <>; -ldr_y_reg(DstReg, Y, [TempReg | _]) -> +ldr_y_reg(DstReg, Y, AvailMask) when AvailMask =/= 0 -> % Large offset - use DstReg as second temp register for arithmetic + TempReg = first_avail(AvailMask), Offset = Y * 4, I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS), I2 = jit_armv6m_asm:movs(DstReg, Offset), I3 = jit_armv6m_asm:add(DstReg, TempReg), I4 = jit_armv6m_asm:ldr(DstReg, {DstReg, 0}), <>; -ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 -> +ldr_y_reg(DstReg, Y, 0) when Y * 4 =< 124 -> % Small offset, no registers available - use DstReg as temp I1 = jit_armv6m_asm:ldr(DstReg, ?Y_REGS), I2 = jit_armv6m_asm:ldr(DstReg, {DstReg, Y * 4}), <>; -ldr_y_reg(DstReg, Y, []) -> +ldr_y_reg(DstReg, Y, 0) -> % Large offset, no registers available - use IP_REG as temp register % Note: IP_REG (r12) can only be used with mov, not ldr directly Offset = Y * 4, @@ -3420,18 +3675,56 @@ ldr_y_reg(DstReg, Y, []) -> I5 = jit_armv6m_asm:ldr(DstReg, {DstReg, 0}), <>. -free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> - AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), - true = lists:member(Reg, UsedRegs0), - UsedRegs1 = lists:delete(Reg, UsedRegs0), - {AvailableRegs1, UsedRegs1}. - -free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> - lists:reverse(Acc, [Reg | PrevRegs0]); -free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> - free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); -free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> - free_reg0(SortedT, PrevRegs, Reg, Acc). +reg_bit(r0) -> ?REG_BIT_R0; +reg_bit(r1) -> ?REG_BIT_R1; +reg_bit(r2) -> ?REG_BIT_R2; +reg_bit(r3) -> ?REG_BIT_R3; +reg_bit(r4) -> ?REG_BIT_R4; +reg_bit(r5) -> ?REG_BIT_R5; +reg_bit(r6) -> ?REG_BIT_R6; +reg_bit(r7) -> ?REG_BIT_R7; +reg_bit(r8) -> ?REG_BIT_R8; +reg_bit(r9) -> ?REG_BIT_R9; +reg_bit(r10) -> ?REG_BIT_R10; +reg_bit(r11) -> ?REG_BIT_R11; +reg_bit(r12) -> ?REG_BIT_R12; +reg_bit(r13) -> ?REG_BIT_R13; +reg_bit(r14) -> ?REG_BIT_R14; +reg_bit(r15) -> ?REG_BIT_R15. + +regs_to_mask([]) -> 0; +regs_to_mask([ctx | T]) -> regs_to_mask(T); +regs_to_mask([imm | T]) -> regs_to_mask(T); +regs_to_mask([jit_state | T]) -> regs_to_mask(T); +regs_to_mask([offset | T]) -> regs_to_mask(T); +regs_to_mask([stack | T]) -> regs_to_mask(T); +regs_to_mask([Reg | T]) -> reg_bit(Reg) bor regs_to_mask(T). + +%% first_avail returns the first available register from a bitmask. +%% Order matches AVAILABLE_REGS = [r7, r6, r5, r4, r3, r1] +first_avail(Mask) when Mask band ?REG_BIT_R7 =/= 0 -> r7; +first_avail(Mask) when Mask band ?REG_BIT_R6 =/= 0 -> r6; +first_avail(Mask) when Mask band ?REG_BIT_R5 =/= 0 -> r5; +first_avail(Mask) when Mask band ?REG_BIT_R4 =/= 0 -> r4; +first_avail(Mask) when Mask band ?REG_BIT_R3 =/= 0 -> r3; +first_avail(Mask) when Mask band ?REG_BIT_R1 =/= 0 -> r1. + +%% Convert bitmask to list, matching the order of AVAILABLE_REGS. +mask_to_list(0) -> []; +mask_to_list(Mask) -> mask_to_list_r7(Mask). + +mask_to_list_r7(Mask) when Mask band ?REG_BIT_R7 =/= 0 -> [r7 | mask_to_list_r6(Mask)]; +mask_to_list_r7(Mask) -> mask_to_list_r6(Mask). +mask_to_list_r6(Mask) when Mask band ?REG_BIT_R6 =/= 0 -> [r6 | mask_to_list_r5(Mask)]; +mask_to_list_r6(Mask) -> mask_to_list_r5(Mask). +mask_to_list_r5(Mask) when Mask band ?REG_BIT_R5 =/= 0 -> [r5 | mask_to_list_r4(Mask)]; +mask_to_list_r5(Mask) -> mask_to_list_r4(Mask). +mask_to_list_r4(Mask) when Mask band ?REG_BIT_R4 =/= 0 -> [r4 | mask_to_list_r3(Mask)]; +mask_to_list_r4(Mask) -> mask_to_list_r3(Mask). +mask_to_list_r3(Mask) when Mask band ?REG_BIT_R3 =/= 0 -> [r3 | mask_to_list_r1(Mask)]; +mask_to_list_r3(Mask) -> mask_to_list_r1(Mask). +mask_to_list_r1(Mask) when Mask band ?REG_BIT_R1 =/= 0 -> [r1]; +mask_to_list_r1(_Mask) -> []. args_regs(Args) -> lists:map( diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 4a57d91d3..2af1b472a 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -165,8 +165,8 @@ offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], jump_table_start :: non_neg_integer(), - available_regs :: [riscv32_register()], - used_regs :: [riscv32_register()], + available_regs :: non_neg_integer(), + used_regs :: non_neg_integer(), labels :: [{integer() | reference(), integer()}], variant :: non_neg_integer() }). @@ -236,9 +236,34 @@ %% - a2: native interface pointer (reserved) %% - t0-t6: temporaries, caller-saved, available for JIT use %% - s0-s11: callee-saved (would need to be saved/restored) --define(AVAILABLE_REGS, [t6, t5, t4, t3, t2, t1, t0]). -define(PARAMETER_REGS, [a0, a1, a2, a3, a4, a5, a6, a7]). --define(SCRATCH_REGS, [t6, t5, t4, t2, t1, t0]). + +-define(REG_BIT_A0, (1 bsl 0)). +-define(REG_BIT_A1, (1 bsl 1)). +-define(REG_BIT_A2, (1 bsl 2)). +-define(REG_BIT_A3, (1 bsl 3)). +-define(REG_BIT_A4, (1 bsl 4)). +-define(REG_BIT_A5, (1 bsl 5)). +-define(REG_BIT_A6, (1 bsl 6)). +-define(REG_BIT_A7, (1 bsl 7)). +-define(REG_BIT_T0, (1 bsl 8)). +-define(REG_BIT_T1, (1 bsl 9)). +-define(REG_BIT_T2, (1 bsl 10)). +-define(REG_BIT_T3, (1 bsl 11)). +-define(REG_BIT_T4, (1 bsl 12)). +-define(REG_BIT_T5, (1 bsl 13)). +-define(REG_BIT_T6, (1 bsl 14)). + +%% AVAILABLE_REGS = [t6, t5, t4, t3, t2, t1, t0] +-define(AVAILABLE_REGS_MASK, + (?REG_BIT_T6 bor ?REG_BIT_T5 bor ?REG_BIT_T4 bor ?REG_BIT_T3 bor + ?REG_BIT_T2 bor ?REG_BIT_T1 bor ?REG_BIT_T0) +). +%% SCRATCH_REGS = [t6, t5, t4, t2, t1, t0] +-define(SCRATCH_REGS_MASK, + (?REG_BIT_T6 bor ?REG_BIT_T5 bor ?REG_BIT_T4 bor + ?REG_BIT_T2 bor ?REG_BIT_T1 bor ?REG_BIT_T0) +). %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. @@ -275,8 +300,8 @@ new(Variant, StreamModule, Stream) -> branches = [], jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS, - used_regs = [], + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0, labels = [], variant = Variant }. @@ -332,7 +357,7 @@ debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> %% @return The list of used registers %%----------------------------------------------------------------------------- -spec used_regs(state()) -> [riscv32_register()]. -used_regs(#state{used_regs = Used}) -> Used. +used_regs(#state{used_regs = Used}) -> mask_to_list(Used). %%----------------------------------------------------------------------------- %% @doc Return the list of currently available native scratch registers. This @@ -342,7 +367,7 @@ used_regs(#state{used_regs = Used}) -> Used. %% @return The list of available registers %%----------------------------------------------------------------------------- -spec available_regs(state()) -> [riscv32_register()]. -available_regs(#state{available_regs = Available}) -> Available. +available_regs(#state{available_regs = Available}) -> mask_to_list(Available). %%----------------------------------------------------------------------------- %% @doc Free native registers. The passed list of registers can contain @@ -363,11 +388,11 @@ free_native_registers(State, [Reg | Rest]) -> free_native_register( #state{available_regs = Available0, used_regs = Used0} = State, Reg -) when - is_atom(Reg) --> - {Available1, Used1} = free_reg(Available0, Used0, Reg), - State#state{available_regs = Available1, used_regs = Used1}; +) when is_atom(Reg) -> + Bit = reg_bit(Reg), + State#state{ + available_regs = Available0 bor Bit, used_regs = Used0 band (bnot Bit) + }; free_native_register(State, {ptr, Reg}) -> free_native_register(State, Reg); free_native_register(State, _Other) -> @@ -381,9 +406,9 @@ free_native_register(State, _Other) -> %% @return ok %%----------------------------------------------------------------------------- -spec assert_all_native_free(state()) -> ok. -assert_all_native_free(#state{ - available_regs = ?AVAILABLE_REGS, used_regs = [] -}) -> +assert_all_native_free(State) -> + 0 = State#state.used_regs, + ?AVAILABLE_REGS_MASK = State#state.available_regs, ok. %%----------------------------------------------------------------------------- @@ -585,23 +610,25 @@ call_primitive( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [TempReg | RestRegs], - used_regs = UsedRegs + available_regs = Available, + used_regs = Used } = State, Primitive, Args -) -> +) when Available =/= 0 -> + TempReg = first_avail(Available), + TempBit = reg_bit(TempReg), % Use a low register for LDR since ARM Thumb LDR only works with low registers PrepCall = load_primitive_ptr(Primitive, TempReg), Stream1 = StreamModule:append(Stream0, PrepCall), StateCall = State#state{ stream = Stream1, - available_regs = RestRegs, - used_regs = [TempReg | UsedRegs] + available_regs = Available band (bnot TempBit), + used_regs = Used bor TempBit }, call_func_ptr(StateCall, {free, TempReg}, Args); call_primitive( - #state{available_regs = []} = State, + #state{available_regs = 0} = State, Primitive, Args ) -> @@ -630,14 +657,18 @@ call_primitive_last( % registers used for parameters ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), ArgsRegs = args_regs(Args), - ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, - [Temp | AvailableRegs1] = ScratchRegs, - UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + ArgsRegsMask = regs_to_mask(ArgsRegs), + ParamMask = regs_to_mask(ParamRegs), + ScratchMask = ?AVAILABLE_REGS_MASK band (bnot (ArgsRegsMask bor ParamMask)), + Temp = first_avail(ScratchMask), + TempBit = reg_bit(Temp), + AvailableRegs1 = ScratchMask band (bnot TempBit), + UsedMask = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), PrepCall = load_primitive_ptr(Primitive, Temp), Stream1 = StreamModule:append(Stream0, PrepCall), State1 = State0#state{ - stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedMask }, % Preprocess offset special arg @@ -661,7 +692,10 @@ call_primitive_last( State2 = set_registers_args(State1, ArgsForTailCall, 0), tail_call_with_jit_state_registers_only(State2, Temp) end, - State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. + State4#state{ + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0 + }. %%----------------------------------------------------------------------------- %% @doc Tail call to address in register. @@ -715,13 +749,11 @@ return_if_not_equal_to_ctx( % Offset must account for the beq instruction itself (4 bytes) plus I2 and I3 I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, 4 + byte_size(I2) + byte_size(I3)), Stream1 = StreamModule:append(Stream0, <>), - {AvailableRegs1, UsedRegs1} = free_reg( - AvailableRegs0, UsedRegs0, Reg - ), + RegBit = reg_bit(Reg), State#state{ stream = Stream1, - available_regs = AvailableRegs1, - used_regs = UsedRegs1 + available_regs = AvailableRegs0 bor RegBit, + used_regs = UsedRegs0 band (bnot RegBit) }. %%----------------------------------------------------------------------------- @@ -759,11 +791,12 @@ jump_to_continuation( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], + available_regs = Available, offset = BaseOffset } = State0, {free, OffsetReg} ) -> + Temp = first_avail(Available), % Calculate absolute address: native_code_base + target_offset % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) CurrentStreamOffset = StreamModule:offset(Stream0), @@ -779,7 +812,7 @@ jump_to_continuation( Code = <>, Stream1 = StreamModule:append(Stream0, Code), % Free all registers since this is a tail jump - State0#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + State0#state{stream = Stream1, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0}. branch_to_offset_code(_State, Offset, TargetOffset) when TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 @@ -788,8 +821,9 @@ branch_to_offset_code(_State, Offset, TargetOffset) when Rel = TargetOffset - Offset, jit_riscv32_asm:j(Rel); branch_to_offset_code( - #state{available_regs = [TempReg | _]}, Offset, TargetOffset -) -> + #state{available_regs = Available}, Offset, TargetOffset +) when Available =/= 0 -> + TempReg = first_avail(Available), % Far branch: use auipc + jalr sequence for PC-relative addressing % This computes: PC + Immediate and jumps to it @@ -817,8 +851,9 @@ branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), {State, CodeBlock}; branch_to_label_code( - #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false -) -> + #state{available_regs = Available, branches = Branches} = State0, Offset, Label, false +) when Available =/= 0 -> + TempReg = first_avail(Available), % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes) % Placeholder: auipc TempReg, 0 @@ -829,7 +864,7 @@ branch_to_label_code( State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; branch_to_label_code( - #state{available_regs = [], branches = Branches} = State0, Offset, Label, false + #state{available_regs = 0, branches = Branches} = State0, Offset, Label, false ) -> % RISC-V: Use t6 as scratch (caller-saved, safe to clobber) % Far branch sequence using PC-relative auipc + jalr (8 bytes) @@ -841,7 +876,7 @@ branch_to_label_code( Reloc = {Label, Offset, {far_branch, t6}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; -branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> +branch_to_label_code(#state{available_regs = 0}, _Offset, _Label, _LabelLookup) -> error({no_available_registers, _LabelLookup}). %%----------------------------------------------------------------------------- @@ -981,7 +1016,7 @@ if_block_cond( end, % RISC-V: bge Reg, Val, offset (branch if Reg >= Val, i.e., NOT less than) % Load immediate into a temp register for comparison - [Temp | _] = State0#state.available_regs, + Temp = first_avail(State0#state.available_regs), OffsetBefore = StreamModule:offset(Stream0), State1 = mov_immediate(State0, Temp, Val), Stream1 = State1#state.stream, @@ -992,9 +1027,10 @@ if_block_cond( State3 = State2#state{stream = Stream2}, {State3, {bge, Reg, Temp}, BranchDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, {RegOrTuple, '<', Val} ) when is_integer(Val) -> + Temp = first_avail(Available), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1011,9 +1047,10 @@ if_block_cond( State3 = State2#state{stream = Stream2}, {State3, {bge, Reg, Temp}, BranchDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, {Val, '<', RegOrTuple} ) when is_integer(Val), Val >= 0, Val =< 255 -> + Temp = first_avail(Available), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1030,9 +1067,10 @@ if_block_cond( State3 = State2#state{stream = Stream2}, {State3, {bge, Temp, Reg}, BranchDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, {Val, '<', RegOrTuple} ) when is_integer(Val) -> + Temp = first_avail(Available), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1098,9 +1136,10 @@ if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> if_block_cond(State, {RegOrTuple, '==', Val}); if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, {RegOrTuple, '!=', Val} ) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Temp = first_avail(Available), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1134,9 +1173,10 @@ if_block_cond( if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> if_block_cond(State, {RegOrTuple, '!=', Val}); if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, {RegOrTuple, '==', Val} ) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Temp = first_avail(Avail), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1164,9 +1204,10 @@ if_block_cond( State3 = if_block_free_reg({free, RegB}, State2), {State3, {bne, RegA, RegB}, 0}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, {RegOrTuple, '==', Val} ) when is_integer(Val) -> + Temp = first_avail(Avail), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1183,9 +1224,10 @@ if_block_cond( State3 = State2#state{stream = Stream2}, {State3, {bne, Reg, Temp}, BranchDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, {RegOrTuple, '!=', Val} ) when is_integer(Val) -> + Temp = first_avail(Avail), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1205,10 +1247,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Avail } = State0, {'(bool)', RegOrTuple, '==', false} ) -> + Temp = first_avail(Avail), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1226,10 +1269,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Avail } = State0, {'(bool)', RegOrTuple, '!=', false} ) -> + Temp = first_avail(Avail), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1247,10 +1291,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Avail } = State0, {RegOrTuple, '&', Val, '!=', 0} ) -> + Temp = first_avail(Avail), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1281,10 +1326,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Avail } = State0, {Reg, '&', 16#F, '!=', 16#F} ) when ?IS_GPR(Reg) -> + Temp = first_avail(Avail), %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG I1 = jit_riscv32_asm:not_(Temp, Reg), I2 = jit_riscv32_asm:slli(Temp, Temp, 28), @@ -1313,10 +1359,12 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | AT] + available_regs = Avail } = State0, {Reg, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), %% RISC-V: AND with mask, then compare with value OffsetBefore = StreamModule:offset(Stream0), I1 = jit_riscv32_asm:mv(Temp, Reg), @@ -1332,7 +1380,7 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream3 = StreamModule:append(Stream2, BranchInstr), State3 = State2#state{ - stream = Stream3, available_regs = [Temp | State2#state.available_regs] + stream = Stream3, available_regs = State2#state.available_regs bor reg_bit(Temp) }, {State3, {beq, Temp, zero}, BranchDelta}; _ when ?IS_GPR(Val) -> @@ -1341,20 +1389,22 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream3 = StreamModule:append(Stream2, BranchInstr), State3 = State2#state{ - stream = Stream3, available_regs = [Temp | State2#state.available_regs] + stream = Stream3, available_regs = State2#state.available_regs bor reg_bit(Temp) }, {State3, {beq, Temp, Val}, BranchDelta}; _ -> %% Val is an immediate - need second temp register %% Reuse the mask register for the comparison value - [MaskReg | AT2] = AT, + MaskReg = first_avail(AT), + AT2 = AT band (bnot reg_bit(MaskReg)), State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val), Stream3 = State3#state.stream, BranchDelta = StreamModule:offset(Stream3) - OffsetBefore, BranchInstr = <<16#FFFFFFFF:32/little>>, Stream4 = StreamModule:append(Stream3, BranchInstr), State4 = State3#state{ - stream = Stream4, available_regs = [Temp, MaskReg | State3#state.available_regs] + stream = Stream4, + available_regs = State3#state.available_regs bor reg_bit(Temp) bor reg_bit(MaskReg) }, {State4, {beq, Temp, MaskReg}, BranchDelta} end; @@ -1391,7 +1441,8 @@ if_block_cond( _ -> %% Val is an immediate - need temp register %% Reuse the mask register for the comparison value - [MaskReg | AT] = State1#state.available_regs, + MaskReg = first_avail(State1#state.available_regs), + AT = State1#state.available_regs band (bnot reg_bit(MaskReg)), State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val), Stream2 = State2#state.stream, BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, @@ -1405,7 +1456,9 @@ if_block_cond( -spec if_block_free_reg(riscv32_register() | {free, riscv32_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> #state{available_regs = AvR0, used_regs = UR0} = State0, - {AvR1, UR1} = free_reg(AvR0, UR0, Reg), + Bit = reg_bit(Reg), + AvR1 = AvR0 bor Bit, + UR1 = UR0 band (bnot Bit), State0#state{ available_regs = AvR1, used_regs = UR1 @@ -1413,22 +1466,11 @@ if_block_free_reg({free, Reg}, State0) -> if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. --spec merge_used_regs(state(), [riscv32_register()]) -> state(). -merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ - Reg | T -]) -> - case lists:member(Reg, UR0) of - true -> - merge_used_regs(State, T); - false -> - AvR1 = lists:delete(Reg, AvR0), - UR1 = [Reg | UR0], - merge_used_regs( - State#state{used_regs = UR1, available_regs = AvR1}, T - ) - end; -merge_used_regs(State, []) -> - State. +-spec merge_used_regs(state(), non_neg_integer()) -> state(). +merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> + MergedUR = UR bor OtherUR, + MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), + State#state{used_regs = MergedUR, available_regs = MergedAvail}. %%----------------------------------------------------------------------------- %% @doc Emit a shift register right by a fixed number of bits, effectively @@ -1450,7 +1492,7 @@ shift_right( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [ResultReg | T], + available_regs = Avail, used_regs = UR } = State, Reg, @@ -1458,9 +1500,18 @@ shift_right( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + ResultReg = first_avail(Avail), + ResultBit = reg_bit(ResultReg), I = jit_riscv32_asm:srli(ResultReg, Reg, Shift), Stream1 = StreamModule:append(Stream0, I), - {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot ResultBit), + used_regs = UR bor ResultBit + }, + ResultReg + }. %%----------------------------------------------------------------------------- %% @doc Emit a shift register left by a fixed number of bits, effectively @@ -1492,8 +1543,8 @@ call_func_ptr( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0 + available_regs = AvailableRegs0Mask, + used_regs = UsedRegs0Mask } = State0, FuncPtrTuple, Args @@ -1506,13 +1557,16 @@ call_func_ptr( end, [FuncPtrTuple | Args] ), - UsedRegs1 = UsedRegs0 -- FreeRegs, + FreeMask = regs_to_mask(FreeRegs), + UsedRegs1Mask = UsedRegs0Mask band (bnot FreeMask), % Save RA (like AArch64 saves LR) so it's preserved across jalr calls - SavedRegs = [?RA_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + SavedRegs = [ + ?RA_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | mask_to_list(UsedRegs1Mask) + ], % Calculate available registers - FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), - AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + FreeGPMask = FreeMask band ?AVAILABLE_REGS_MASK, + AvailableRegs1Mask = FreeGPMask bor AvailableRegs0Mask, % Calculate stack space: round up to 16-byte boundary for RISC-V ABI NumRegs = length(SavedRegs), @@ -1535,70 +1589,76 @@ call_func_ptr( RegArgs0 = Args1, RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0), + RegArgsRegsMask = regs_to_mask(RegArgsRegs), % We pushed registers to stack, so we can use these registers we saved % and the currently available registers - SetArgsRegsOnlyAvailableArgs = (UsedRegs1 -- RegArgsRegs) ++ AvailableRegs0, + SetArgsMask = (UsedRegs1Mask band (bnot RegArgsRegsMask)) bor AvailableRegs0Mask, State1 = State0#state{ - available_regs = SetArgsRegsOnlyAvailableArgs, - used_regs = ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs, + available_regs = SetArgsMask, + used_regs = ?AVAILABLE_REGS_MASK band (bnot SetArgsMask), stream = Stream1 }, ParameterRegs = parameter_regs(RegArgs0), - {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = + ParamMask = regs_to_mask(ParameterRegs), + {Stream3, SetArgsAvailMask, FuncPtrReg, RegArgs} = case FuncPtrTuple of {free, FuncPtrReg0} -> + FuncPtrReg0Bit = reg_bit(FuncPtrReg0), % If FuncPtrReg is in parameter regs, we must swap it with a free reg. - case lists:member(FuncPtrReg0, ParameterRegs) of + case ParamMask band FuncPtrReg0Bit =/= 0 of true -> - case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of - [] -> - % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0 + AvailNotParam = SetArgsMask band (bnot ParamMask), + case AvailNotParam of + 0 -> + % Swap with a reg used in RegArgs0 % that is not in ParameterRegs - [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs, - [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs, + NewArgReg = first_avail(SetArgsMask), + FuncPtrReg1 = first_avail(RegArgsRegsMask band (bnot ParamMask)), + FuncPtrReg1Bit = reg_bit(FuncPtrReg1), MovInstr1 = jit_riscv32_asm:mv(NewArgReg, FuncPtrReg1), MovInstr2 = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), - SetArgsAvailableArgs1 = - (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ - [FuncPtrReg0], + SetArgsAvailMask1 = + (SetArgsMask band (bnot FuncPtrReg1Bit)) bor FuncPtrReg0Bit, RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg), { StreamModule:append( State1#state.stream, <> ), - SetArgsAvailableArgs1, + SetArgsAvailMask1, FuncPtrReg1, RegArgs1 }; - [FuncPtrReg1 | _] -> + _ -> + FuncPtrReg1 = first_avail(AvailNotParam), + FuncPtrReg1Bit = reg_bit(FuncPtrReg1), MovInstr = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), - SetArgsAvailableArgs1 = - (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ - [FuncPtrReg0], + SetArgsAvailMask1 = + (SetArgsMask band (bnot FuncPtrReg1Bit)) bor FuncPtrReg0Bit, { StreamModule:append(State1#state.stream, MovInstr), - SetArgsAvailableArgs1, + SetArgsAvailMask1, FuncPtrReg1, RegArgs0 } end; false -> - SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], - {State1#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0} + SetArgsAvailMask1 = SetArgsMask band (bnot FuncPtrReg0Bit), + {State1#state.stream, SetArgsAvailMask1, FuncPtrReg0, RegArgs0} end; {primitive, Primitive} -> - [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs, - SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + FuncPtrReg0 = first_avail(SetArgsMask band (bnot ParamMask)), + FuncPtrReg0Bit = reg_bit(FuncPtrReg0), + SetArgsAvailMask1 = SetArgsMask band (bnot FuncPtrReg0Bit), PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), Stream2 = StreamModule:append(State1#state.stream, PrepCall), - {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0} + {Stream2, SetArgsAvailMask1, FuncPtrReg0, RegArgs0} end, State3 = State1#state{ - available_regs = SetArgsAvailableRegs, - used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs, + available_regs = SetArgsAvailMask, + used_regs = ?AVAILABLE_REGS_MASK band (bnot SetArgsAvailMask), stream = Stream3 }, @@ -1613,40 +1673,55 @@ call_func_ptr( % For result, we need a free register (including FuncPtrReg). % If none are available (all registers were pushed to the stack), % we write the result to the stack position of FuncPtrReg - {Stream6, UsedRegs2} = - case length(SavedRegs) of - N when N >= 7 andalso element(1, FuncPtrTuple) =:= free -> - % We use original FuncPtrReg then as we know it's available. - % Calculate stack offset: find register index in SavedRegs * 4 bytes - ResultReg = element(2, FuncPtrTuple), - RegIndex = index_of(ResultReg, SavedRegs), + {Stream6, UsedRegs2Mask, ResultReg} = + case {length(SavedRegs), FuncPtrTuple} of + {N, {free, ResultFPReg0}} when N >= 7 -> + % Registers exhausted: use FuncPtrReg which is free after the call + RegIndex = index_of(ResultFPReg0, SavedRegs), case RegIndex >= 0 of true -> StoreResultStackOffset = RegIndex * 4, StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset), - {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + { + StreamModule:append(Stream5, StoreResult), + UsedRegs1Mask bor reg_bit(ResultFPReg0), + ResultFPReg0 + }; false -> - % FuncPtrReg was not in SavedRegs, use an available register - [ResultReg1 | _] = AvailableRegs1 -- SavedRegs, - MoveResult = jit_riscv32_asm:mv(ResultReg1, a0), - {StreamModule:append(Stream5, MoveResult), [ResultReg1 | UsedRegs1]} + MoveResult = jit_riscv32_asm:mv(ResultFPReg0, a0), + { + StreamModule:append(Stream5, MoveResult), + UsedRegs1Mask bor reg_bit(ResultFPReg0), + ResultFPReg0 + } end; + {_, {free, ResultFPReg1}} -> + % FuncPtrReg is free after the call, use it for result + MoveResult = jit_riscv32_asm:mv(ResultFPReg1, a0), + { + StreamModule:append(Stream5, MoveResult), + UsedRegs1Mask bor reg_bit(ResultFPReg1), + ResultFPReg1 + }; _ -> - % Use any free that is not in SavedRegs - [ResultReg | _] = AvailableRegs1 -- SavedRegs, - MoveResult = jit_riscv32_asm:mv(ResultReg, a0), - {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]} + ResultReg0 = first_avail(AvailableRegs1Mask), + MoveResult = jit_riscv32_asm:mv(ResultReg0, a0), + { + StreamModule:append(Stream5, MoveResult), + UsedRegs1Mask bor reg_bit(ResultReg0), + ResultReg0 + } end, Stream8 = pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream6), - AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), - AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + ResultRegBit = reg_bit(ResultReg), + AvailableRegs3Mask = (AvailableRegs1Mask band (bnot ResultRegBit)) band ?AVAILABLE_REGS_MASK, { State4#state{ stream = Stream8, - available_regs = AvailableRegs3, - used_regs = UsedRegs2 + available_regs = AvailableRegs3Mask, + used_regs = UsedRegs2Mask }, ResultReg }. @@ -1698,30 +1773,34 @@ set_registers_args(State0, Args, StackOffset) -> set_registers_args(State0, Args, ParamRegs, StackOffset). set_registers_args( - #state{used_regs = UsedRegs} = State0, + #state{used_regs = UsedRegsMask} = State0, Args, ParamRegs, StackOffset ) -> ArgsRegs = args_regs(Args), - AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, + ParamMask = regs_to_mask(ParamRegs), + ArgsMask = regs_to_mask(ArgsRegs), + AvailableScratchMask = + ?SCRATCH_REGS_MASK band (bnot (ParamMask bor ArgsMask bor UsedRegsMask)), + AvailableScratchGP = mask_to_list(AvailableScratchMask), State1 = set_registers_args0( State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset ), Stream1 = State1#state.stream, - NewUsedRegs = lists:foldl( + NewUsedMask = lists:foldl( fun - ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); - ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, {ptr, Reg}}, AccUsed) -> AccUsed band (bnot reg_bit(Reg)); + ({free, Reg}, AccUsed) when is_atom(Reg) -> AccUsed band (bnot reg_bit(Reg)); (_, AccUsed) -> AccUsed end, - UsedRegs, + UsedRegsMask, Args ), State1#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, - used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + available_regs = ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), + used_regs = ParamMask bor NewUsedMask }. parameter_regs(Args) -> @@ -1893,54 +1972,72 @@ move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> I1 = jit_riscv32_asm:sw(Reg, Src, 0), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; -move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when +move_to_vm_register(#state{available_regs = Avail} = State0, Src, {y_reg, Y}) when is_atom(Src) -> + Temp1 = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp1)), Code = str_y_reg(Src, Y, Temp1, AT), Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), State0#state{stream = Stream1}; % Source is an integer to y_reg (optimized: ldr first, then movs) -move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when +move_to_vm_register(#state{available_regs = Avail} = State0, N, {y_reg, Y}) when is_integer(N), N >= 0, N =< 255 -> + Temp1 = first_avail(Avail), + Avail2 = Avail band (bnot reg_bit(Temp1)), + Temp2 = first_avail(Avail2), + AT = Avail2 band (bnot reg_bit(Temp2)), I1 = jit_riscv32_asm:li(Temp2, N), YCode = str_y_reg(Temp2, Y, Temp1, AT), Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), State0#state{stream = Stream1}; % Source is an integer (0-255 for movs, negative values need different handling) -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when +move_to_vm_register(#state{available_regs = AR0} = State0, N, Dest) when is_integer(N), N >= 0, N =< 255 -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), I1 = jit_riscv32_asm:li(Temp, N), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; %% Handle large values using simple literal pool (branch-over pattern) -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when +move_to_vm_register(#state{available_regs = AR0} = State0, N, Dest) when is_integer(N) -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), State2 = move_to_vm_register(State1, Temp, Dest), State2#state{available_regs = AR0}; % Source is a VM register -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> +move_to_vm_register(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), {BaseReg, Off} = ?X_REG(?MAX_REG), I1 = jit_riscv32_asm:lw(Temp, BaseReg, Off), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> +move_to_vm_register(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), {XReg, X_REGOffset} = ?X_REG(X), I1 = jit_riscv32_asm:lw(Temp, XReg, X_REGOffset), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> +move_to_vm_register(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), I1 = jit_riscv32_asm:lw(Temp, Reg, 0), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> +move_to_vm_register(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), Code = ldr_y_reg(Temp, Y, AT), Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), @@ -1949,7 +2046,7 @@ move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, move_to_vm_register( #state{ stream_module = StreamModule, - available_regs = [Temp1, Temp2 | _], + available_regs = Avail, stream = Stream0, variant = Variant } = @@ -1957,6 +2054,8 @@ move_to_vm_register( {free, {ptr, Reg, 1}}, {fp_reg, F} ) -> + Temp1 = first_avail(Avail), + Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), {BaseReg, Off} = ?FP_REGS, I1 = jit_riscv32_asm:lw(Temp1, BaseReg, Off), I2 = jit_riscv32_asm:lw(Temp2, Reg, 4), @@ -1992,45 +2091,53 @@ move_to_vm_register( vm_register() | riscv32_register() ) -> state(). move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + Temp = first_avail(Avail), I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), {BaseReg, Off} = ?X_REG(X), I2 = jit_riscv32_asm:sw(BaseReg, Temp, Off), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) -> + Temp = first_avail(Avail), I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), I2 = jit_riscv32_asm:sw(Dest, Temp, 0), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) -> + Temp1 = first_avail(Avail), + Avail2 = Avail band (bnot reg_bit(Temp1)), + Temp2 = first_avail(Avail2), + AT = Avail2 band (bnot reg_bit(Temp2)), I1 = jit_riscv32_asm:lw(Temp2, Reg, Index * 4), YCode = str_y_reg(Temp2, Y, Temp1, AT), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {free, Reg}, Index, {y_reg, Y} ) when is_integer(Index) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), YCode = str_y_reg(Reg, Y, Temp, AT), Code = <>, @@ -2058,7 +2165,9 @@ move_array_element( I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), {BaseReg, Off} = ?X_REG(X), I4 = jit_riscv32_asm:sw(BaseReg, IndexReg, Off), - {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, @@ -2080,9 +2189,9 @@ move_array_element( I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), I4 = jit_riscv32_asm:sw(PtrReg, IndexReg, 0), - {AvailableRegs1, UsedRegs1} = free_reg( - AvailableRegs0, UsedRegs0, IndexReg - ), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, @@ -2093,21 +2202,23 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | AT] = AvailableRegs0, + available_regs = AvailableRegs0, used_regs = UsedRegs0 } = State, Reg, {free, IndexReg}, {y_reg, Y} ) when is_atom(IndexReg) -> + Temp = first_avail(AvailableRegs0), + AT = AvailableRegs0 band (bnot reg_bit(Temp)), I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), Code = str_y_reg(IndexReg, Y, Temp, AT), I4 = Code, - {AvailableRegs1, UsedRegs1} = free_reg( - AvailableRegs0, UsedRegs0, IndexReg - ), + Bit = reg_bit(IndexReg), + AvailableRegs1 = AvailableRegs0 bor Bit, + UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append( Stream0, <> ), @@ -2137,17 +2248,21 @@ get_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [ElemReg | AvailableT], + available_regs = Avail, used_regs = UsedRegs0 } = State, Reg, Index ) -> + ElemReg = first_avail(Avail), + ElemBit = reg_bit(ElemReg), I1 = jit_riscv32_asm:lw(ElemReg, Reg, Index * 4), Stream1 = StreamModule:append(Stream0, <>), { State#state{ - stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + stream = Stream1, + available_regs = Avail band (bnot ElemBit), + used_regs = UsedRegs0 bor ElemBit }, ElemReg }. @@ -2166,11 +2281,12 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State0#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, ValueReg, Reg, IndexReg ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + Temp = first_avail(Avail), I1 = jit_riscv32_asm:mv(Temp, IndexReg), I2 = jit_riscv32_asm:slli(Temp, Temp, 2), I3 = jit_riscv32_asm:add(Temp, Reg, Temp), @@ -2196,12 +2312,13 @@ move_to_array_element( ) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, ValueReg, BaseReg, IndexReg, Offset ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Temp = first_avail(Avail), I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), I2 = jit_riscv32_asm:slli(Temp, Temp, 2), I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), @@ -2216,7 +2333,7 @@ move_to_array_element( Offset ) -> {State1, ValueReg} = copy_to_native_register(State0, Value), - [Temp | _] = State1#state.available_regs, + Temp = first_avail(State1#state.available_regs), I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), I2 = jit_riscv32_asm:slli(Temp, Temp, 2), I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), @@ -2232,15 +2349,22 @@ move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, cp ) -> + Reg = first_avail(Avail), + RegBit = reg_bit(Reg), {BaseReg, Off} = ?CP, I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; + { + State#state{ + stream = Stream1, used_regs = Used bor RegBit, available_regs = Avail band (bnot RegBit) + }, + Reg + }; move_to_native_register(State, Reg) when is_atom(Reg) -> {State, Reg}; move_to_native_register( @@ -2251,64 +2375,89 @@ move_to_native_register( {State#state{stream = Stream1}, Reg}; move_to_native_register( #state{ - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State0, Imm ) when is_integer(Imm) -> - State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT}, + Reg = first_avail(Avail), + RegBit = reg_bit(Reg), + State1 = State0#state{used_regs = Used bor RegBit, available_regs = Avail band (bnot RegBit)}, {move_to_native_register(State1, Imm, Reg), Reg}; move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, {x_reg, extra} ) -> + Reg = first_avail(Avail), + RegBit = reg_bit(Reg), {BaseReg, Off} = ?X_REG(?MAX_REG), I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; + { + State#state{ + stream = Stream1, used_regs = Used bor RegBit, available_regs = Avail band (bnot RegBit) + }, + Reg + }; move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, {x_reg, X} ) when X < ?MAX_REG -> + Reg = first_avail(Avail), + RegBit = reg_bit(Reg), {BaseReg, Offset} = ?X_REG(X), I1 = jit_riscv32_asm:lw(Reg, BaseReg, Offset), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; + { + State#state{ + stream = Stream1, used_regs = Used bor RegBit, available_regs = Avail band (bnot RegBit) + }, + Reg + }; move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, {y_reg, Y} ) -> + Reg = first_avail(Avail), + RegBit = reg_bit(Reg), + AvailT = Avail band (bnot RegBit), Code = ldr_y_reg(Reg, Y, AvailT), Stream1 = StreamModule:append(Stream0, Code), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; + {State#state{stream = Stream1, available_regs = AvailT, used_regs = Used bor RegBit}, Reg}; move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [RegA, RegB | AvailT], + available_regs = Avail, used_regs = Used } = State, {fp_reg, F} ) -> + RegA = first_avail(Avail), + RegABit = reg_bit(RegA), + Avail2 = Avail band (bnot RegABit), + RegB = first_avail(Avail2), + RegBBit = reg_bit(RegB), + AvailT = Avail2 band (bnot RegBBit), {BaseReg, Off} = ?FP_REGS, I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), @@ -2316,7 +2465,9 @@ move_to_native_register( Code = <>, Stream1 = StreamModule:append(Stream0, Code), { - State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, + State#state{ + stream = Stream1, available_regs = AvailT, used_regs = Used bor RegABit bor RegBBit + }, {fp, RegA, RegB} }. @@ -2380,33 +2531,53 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [SaveReg | AvailT], + available_regs = Avail, used_regs = Used } = State, Reg ) when is_atom(Reg) -> + SaveReg = first_avail(Avail), + SaveBit = reg_bit(SaveReg), I1 = jit_riscv32_asm:mv(SaveReg, Reg), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot SaveBit), + used_regs = Used bor SaveBit + }, + SaveReg + }; copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [SaveReg | AvailT], + available_regs = Avail, used_regs = Used } = State, {ptr, Reg} ) when is_atom(Reg) -> + SaveReg = first_avail(Avail), + SaveBit = reg_bit(SaveReg), I1 = jit_riscv32_asm:lw(SaveReg, Reg, 0), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot SaveBit), + used_regs = Used bor SaveBit + }, + SaveReg + }; copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {y_reg, Y} ) -> + Reg = first_avail(Avail), + AvailT = Avail band (bnot reg_bit(Reg)), I1 = ldr_y_reg(Reg, Y, AvailT), {BaseReg, Off} = ?CP, I2 = jit_riscv32_asm:sw(BaseReg, Reg, Off), @@ -2415,9 +2586,10 @@ move_to_cp( State#state{stream = Stream1}. increment_sp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Offset ) -> + Reg = first_avail(Avail), {BaseReg1, Off1} = ?Y_REGS, I1 = jit_riscv32_asm:lw(Reg, BaseReg1, Off1), I2 = jit_riscv32_asm:addi(Reg, Reg, Offset * 4), @@ -2431,12 +2603,13 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], + available_regs = Avail, branches = Branches, labels = Labels } = State, Label ) -> + Temp = first_avail(Avail), Offset = StreamModule:offset(Stream0), case lists:keyfind(Label, 1, Labels) of {Label, LabelOffset} -> @@ -2467,10 +2640,11 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], + available_regs = Avail, branches = Branches } = State ) -> + Temp = first_avail(Avail), OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), % Reserve 8 bytes with all-1s placeholder for flash programming @@ -2491,10 +2665,12 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailableT], + available_regs = Avail, used_regs = UsedRegs0 } = State ) -> + Reg = first_avail(Avail), + RegBit = reg_bit(Reg), % Load module from jit_state (which is in a1) I1 = jit_riscv32_asm:lw(Reg, ?JITSTATE_REG, ?JITSTATE_MODULE_OFFSET), I2 = jit_riscv32_asm:lw(Reg, Reg, 0), @@ -2503,8 +2679,8 @@ get_module_index( { State#state{ stream = Stream1, - available_regs = AvailableT, - used_regs = [Reg | UsedRegs0] + available_regs = Avail band (bnot RegBit), + used_regs = UsedRegs0 bor RegBit }, Reg }. @@ -2513,35 +2689,45 @@ get_module_index( %% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to %% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool %% by using BICS for -4. +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, SrcReg) when + is_atom(SrcReg) +-> + I = jit_riscv32_asm:and_(Reg, Reg, SrcReg), + Stream1 = StreamModule:append(Stream0, I), + {State0#state{stream = Stream1}, Reg}; and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) -> I1 = jit_riscv32_asm:slli(Reg, Reg, 8), I2 = jit_riscv32_asm:srli(Reg, Reg, 8), Stream1 = StreamModule:append(Stream0, <>), {State0#state{stream = Stream1}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + #state{stream_module = StreamModule, available_regs = Avail} = State0, {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), Stream1 = State1#state.stream, % RISC-V doesn't have bics, use not + and I1 = jit_riscv32_asm:not_(Temp, Temp), I2 = jit_riscv32_asm:and_(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, <>), - {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; + {State1#state{available_regs = Avail, stream = Stream2}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + #state{stream_module = StreamModule, available_regs = Avail} = State0, {free, Reg}, Val -) -> +) when Avail =/= 0 -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_riscv32_asm:and_(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; + {State1#state{available_regs = Avail, stream = Stream2}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = []} = State0, + #state{stream_module = StreamModule, available_regs = 0} = State0, {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> @@ -2562,7 +2748,7 @@ and_( Stream4 = StreamModule:append(Stream3, Restore), {State0#state{stream = Stream4}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = []} = State0, + #state{stream_module = StreamModule, available_regs = 0} = State0, {free, Reg}, Val ) -> @@ -2582,25 +2768,42 @@ and_( Stream4 = StreamModule:append(Stream3, Restore), {State0#state{stream = Stream4}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} = + #state{stream_module = StreamModule, available_regs = Avail, used_regs = UR} = State0, Reg, ?TERM_PRIMARY_CLEAR_MASK ) -> + ResultReg = first_avail(Avail), + ResultBit = reg_bit(ResultReg), I = jit_riscv32_asm:andi(ResultReg, Reg, -4), Stream1 = StreamModule:append(State0#state.stream, I), - {State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}. + { + State0#state{ + stream = Stream1, + available_regs = Avail band (bnot ResultBit), + used_regs = UR bor ResultBit + }, + ResultReg + }. +or_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, SrcReg) when + is_atom(SrcReg) +-> + I = jit_riscv32_asm:or_(Reg, Reg, SrcReg), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; or_( - #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + #state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val ) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_riscv32_asm:or_(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}. + State1#state{available_regs = Avail, stream = Stream2}. add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when Val >= 0 andalso Val =< 255 @@ -2614,12 +2817,14 @@ add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) w I = jit_riscv32_asm:add(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I), State0#state{stream = Stream1}; -add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> +add(#state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_riscv32_asm:add(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}. + State1#state{available_regs = Avail, stream = Stream2}. mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when Val >= -16#800, Val =< 16#7FF @@ -2647,25 +2852,29 @@ sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) wh I = jit_riscv32_asm:sub(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; -sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> +sub(#state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_riscv32_asm:sub(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}. + State1#state{available_regs = Avail, stream = Stream2}. mul(State, _Reg, 1) -> State; mul(State, Reg, 2) -> shift_left(State, Reg, 1); -mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> +mul(#state{available_regs = Avail} = State, Reg, 3) -> + Temp = first_avail(Avail), I1 = jit_riscv32_asm:slli(Temp, Reg, 1), I2 = jit_riscv32_asm:add(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State, Reg, 4) -> shift_left(State, Reg, 2); -mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> +mul(#state{available_regs = Avail} = State, Reg, 5) -> + Temp = first_avail(Avail), I1 = jit_riscv32_asm:slli(Temp, Reg, 2), I2 = jit_riscv32_asm:add(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -2673,14 +2882,16 @@ mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> mul(State0, Reg, 6) -> State1 = mul(State0, Reg, 3), mul(State1, Reg, 2); -mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> +mul(#state{available_regs = Avail} = State, Reg, 7) -> + Temp = first_avail(Avail), I1 = jit_riscv32_asm:slli(Temp, Reg, 3), I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State, Reg, 8) -> shift_left(State, Reg, 3); -mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> +mul(#state{available_regs = Avail} = State, Reg, 9) -> + Temp = first_avail(Avail), I1 = jit_riscv32_asm:slli(Temp, Reg, 3), I2 = jit_riscv32_asm:add(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -2688,7 +2899,8 @@ mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> mul(State0, Reg, 10) -> State1 = mul(State0, Reg, 5), mul(State1, Reg, 2); -mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> +mul(#state{available_regs = Avail} = State, Reg, 15) -> + Temp = first_avail(Avail), I1 = jit_riscv32_asm:slli(Temp, Reg, 4), I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -2700,16 +2912,18 @@ mul(State, Reg, 32) -> mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul( - #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + #state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val ) -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), % multiply by decomposing by power of 2 State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_riscv32_asm:mul(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. + State1#state{stream = Stream2, available_regs = State1#state.available_regs bor reg_bit(Temp)}. %% %% Analysis of AArch64 pattern and RISC-V32 implementation: @@ -2732,8 +2946,9 @@ mul( %% -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). decrement_reductions_and_maybe_schedule_next( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0 ) -> + Temp = first_avail(Avail), % Load reduction count I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), % Decrement reduction count @@ -2775,7 +2990,8 @@ decrement_reductions_and_maybe_schedule_next( Stream4 = StreamModule:replace( Stream3, BNEOffset, <> ), - merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs). + StreamN = Stream4, + merge_used_regs(State2#state{stream = StreamN}, State1#state.used_regs). -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> @@ -2787,10 +3003,11 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = Avail } = State0, Label ) -> + Temp = first_avail(Avail), % Load reduction count (jit_state is in a1) I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), % Decrement reduction count @@ -2859,10 +3076,14 @@ call_primitive_with_cp(State0, Primitive, Args) -> rewrite_cp_offset(State2, RewriteOffset, TempReg). -spec set_cp(state()) -> {state(), non_neg_integer(), riscv32_register()}. -set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State0) -> +set_cp(#state{available_regs = Avail, used_regs = UsedRegs} = State0) -> + TempReg = first_avail(Avail), + TempBit = reg_bit(TempReg), % Reserve a temporary register for the offset BEFORE calling get_module_index % to avoid running out of available registers - State0b = State0#state{available_regs = AvailT, used_regs = [TempReg | UsedRegs]}, + State0b = State0#state{ + available_regs = Avail band (bnot TempBit), used_regs = UsedRegs bor TempBit + }, % get module index (dynamically) { #state{stream_module = StreamModule, stream = Stream0} = State1, @@ -2922,9 +3143,10 @@ rewrite_cp_offset( State0#state{stream = Stream1}. set_bs( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, TermReg ) -> + Temp = first_avail(Avail), {BaseReg1, Off1} = ?BS, I1 = jit_riscv32_asm:sw(BaseReg1, TermReg, Off1), I2 = jit_riscv32_asm:li(Temp, 0), @@ -3029,14 +3251,15 @@ pc_relative_address(Rd, Offset) -> end. %% Helper function to generate str instruction with y_reg offset, handling large offsets -str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 -> +str_y_reg(SrcReg, Y, TempReg, _AvailableMask) when Y * 4 =< 124 -> % Small offset - use immediate addressing {BaseReg, Off} = ?Y_REGS, I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), I2 = jit_riscv32_asm:sw(TempReg, SrcReg, Y * 4), <>; -str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) -> +str_y_reg(SrcReg, Y, TempReg1, AvailableMask) when AvailableMask =/= 0 -> % Large offset - use register arithmetic with second available register + TempReg2 = first_avail(AvailableMask), Offset = Y * 4, {BaseReg, Off} = ?Y_REGS, I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), @@ -3044,7 +3267,7 @@ str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) -> I3 = jit_riscv32_asm:add(TempReg2, TempReg2, TempReg1), I4 = jit_riscv32_asm:sw(TempReg2, SrcReg, 0), <>; -str_y_reg(SrcReg, Y, TempReg1, []) -> +str_y_reg(SrcReg, Y, TempReg1, 0) -> % Large offset - no additional registers available, use IP_REG as second temp Offset = Y * 4, {BaseReg, Off} = ?Y_REGS, @@ -3056,14 +3279,16 @@ str_y_reg(SrcReg, Y, TempReg1, []) -> <>. %% Helper function to generate ldr instruction with y_reg offset, handling large offsets -ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 -> +ldr_y_reg(DstReg, Y, AvailableMask) when AvailableMask =/= 0 andalso Y * 4 =< 124 -> % Small offset - use immediate addressing + TempReg = first_avail(AvailableMask), {BaseReg, Off} = ?Y_REGS, I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), I2 = jit_riscv32_asm:lw(DstReg, TempReg, Y * 4), <>; -ldr_y_reg(DstReg, Y, [TempReg | _]) -> +ldr_y_reg(DstReg, Y, AvailableMask) when AvailableMask =/= 0 -> % Large offset - use DstReg as second temp register for arithmetic + TempReg = first_avail(AvailableMask), Offset = Y * 4, {BaseReg, Off} = ?Y_REGS, I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), @@ -3071,13 +3296,13 @@ ldr_y_reg(DstReg, Y, [TempReg | _]) -> I3 = jit_riscv32_asm:add(DstReg, DstReg, TempReg), I4 = jit_riscv32_asm:lw(DstReg, DstReg, 0), <>; -ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 -> +ldr_y_reg(DstReg, Y, 0) when Y * 4 =< 124 -> % Small offset, no registers available - use DstReg as temp {BaseReg, Off} = ?Y_REGS, I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), I2 = jit_riscv32_asm:lw(DstReg, DstReg, Y * 4), <>; -ldr_y_reg(DstReg, Y, []) -> +ldr_y_reg(DstReg, Y, 0) -> % Large offset, no registers available - use IP_REG as temp register % Note: IP_REG (t3) can only be used with mov, not ldr directly Offset = Y * 4, @@ -3089,18 +3314,74 @@ ldr_y_reg(DstReg, Y, []) -> I5 = jit_riscv32_asm:lw(DstReg, DstReg, 0), <>. -free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> - AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), - true = lists:member(Reg, UsedRegs0), - UsedRegs1 = lists:delete(Reg, UsedRegs0), - {AvailableRegs1, UsedRegs1}. - -free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> - lists:reverse(Acc, [Reg | PrevRegs0]); -free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> - free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); -free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> - free_reg0(SortedT, PrevRegs, Reg, Acc). +reg_bit(a0) -> ?REG_BIT_A0; +reg_bit(a1) -> ?REG_BIT_A1; +reg_bit(a2) -> ?REG_BIT_A2; +reg_bit(a3) -> ?REG_BIT_A3; +reg_bit(a4) -> ?REG_BIT_A4; +reg_bit(a5) -> ?REG_BIT_A5; +reg_bit(a6) -> ?REG_BIT_A6; +reg_bit(a7) -> ?REG_BIT_A7; +reg_bit(t0) -> ?REG_BIT_T0; +reg_bit(t1) -> ?REG_BIT_T1; +reg_bit(t2) -> ?REG_BIT_T2; +reg_bit(t3) -> ?REG_BIT_T3; +reg_bit(t4) -> ?REG_BIT_T4; +reg_bit(t5) -> ?REG_BIT_T5; +reg_bit(t6) -> ?REG_BIT_T6. + +regs_to_mask([]) -> 0; +regs_to_mask([ctx | T]) -> regs_to_mask(T); +regs_to_mask([imm | T]) -> regs_to_mask(T); +regs_to_mask([jit_state | T]) -> regs_to_mask(T); +regs_to_mask([offset | T]) -> regs_to_mask(T); +regs_to_mask([stack | T]) -> regs_to_mask(T); +regs_to_mask([Reg | T]) -> reg_bit(Reg) bor regs_to_mask(T). + +%% first_avail returns the first available register from a bitmask. +%% Order matches AVAILABLE_REGS = [t6, t5, t4, t3, t2, t1, t0] +first_avail(Mask) when Mask band ?REG_BIT_T6 =/= 0 -> t6; +first_avail(Mask) when Mask band ?REG_BIT_T5 =/= 0 -> t5; +first_avail(Mask) when Mask band ?REG_BIT_T4 =/= 0 -> t4; +first_avail(Mask) when Mask band ?REG_BIT_T3 =/= 0 -> t3; +first_avail(Mask) when Mask band ?REG_BIT_T2 =/= 0 -> t2; +first_avail(Mask) when Mask band ?REG_BIT_T1 =/= 0 -> t1; +first_avail(Mask) when Mask band ?REG_BIT_T0 =/= 0 -> t0. + +%% Convert bitmask to list, covering all register bits. +mask_to_list(0) -> []; +mask_to_list(Mask) -> mask_to_list_t6(Mask). + +mask_to_list_t6(Mask) when Mask band ?REG_BIT_T6 =/= 0 -> [t6 | mask_to_list_t5(Mask)]; +mask_to_list_t6(Mask) -> mask_to_list_t5(Mask). +mask_to_list_t5(Mask) when Mask band ?REG_BIT_T5 =/= 0 -> [t5 | mask_to_list_t4(Mask)]; +mask_to_list_t5(Mask) -> mask_to_list_t4(Mask). +mask_to_list_t4(Mask) when Mask band ?REG_BIT_T4 =/= 0 -> [t4 | mask_to_list_t3(Mask)]; +mask_to_list_t4(Mask) -> mask_to_list_t3(Mask). +mask_to_list_t3(Mask) when Mask band ?REG_BIT_T3 =/= 0 -> [t3 | mask_to_list_t2(Mask)]; +mask_to_list_t3(Mask) -> mask_to_list_t2(Mask). +mask_to_list_t2(Mask) when Mask band ?REG_BIT_T2 =/= 0 -> [t2 | mask_to_list_t1(Mask)]; +mask_to_list_t2(Mask) -> mask_to_list_t1(Mask). +mask_to_list_t1(Mask) when Mask band ?REG_BIT_T1 =/= 0 -> [t1 | mask_to_list_t0(Mask)]; +mask_to_list_t1(Mask) -> mask_to_list_t0(Mask). +mask_to_list_t0(Mask) when Mask band ?REG_BIT_T0 =/= 0 -> [t0 | mask_to_list_a7(Mask)]; +mask_to_list_t0(Mask) -> mask_to_list_a7(Mask). +mask_to_list_a7(Mask) when Mask band ?REG_BIT_A7 =/= 0 -> [a7 | mask_to_list_a6(Mask)]; +mask_to_list_a7(Mask) -> mask_to_list_a6(Mask). +mask_to_list_a6(Mask) when Mask band ?REG_BIT_A6 =/= 0 -> [a6 | mask_to_list_a5(Mask)]; +mask_to_list_a6(Mask) -> mask_to_list_a5(Mask). +mask_to_list_a5(Mask) when Mask band ?REG_BIT_A5 =/= 0 -> [a5 | mask_to_list_a4(Mask)]; +mask_to_list_a5(Mask) -> mask_to_list_a4(Mask). +mask_to_list_a4(Mask) when Mask band ?REG_BIT_A4 =/= 0 -> [a4 | mask_to_list_a3(Mask)]; +mask_to_list_a4(Mask) -> mask_to_list_a3(Mask). +mask_to_list_a3(Mask) when Mask band ?REG_BIT_A3 =/= 0 -> [a3 | mask_to_list_a2(Mask)]; +mask_to_list_a3(Mask) -> mask_to_list_a2(Mask). +mask_to_list_a2(Mask) when Mask band ?REG_BIT_A2 =/= 0 -> [a2 | mask_to_list_a1(Mask)]; +mask_to_list_a2(Mask) -> mask_to_list_a1(Mask). +mask_to_list_a1(Mask) when Mask band ?REG_BIT_A1 =/= 0 -> [a1 | mask_to_list_a0(Mask)]; +mask_to_list_a1(Mask) -> mask_to_list_a0(Mask). +mask_to_list_a0(Mask) when Mask band ?REG_BIT_A0 =/= 0 -> [a0]; +mask_to_list_a0(_Mask) -> []. args_regs(Args) -> lists:map( diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index e5d36d536..af4318f67 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -116,8 +116,8 @@ offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], jump_table_start :: non_neg_integer(), - available_regs :: [x86_64_register()], - used_regs :: [x86_64_register()], + available_regs :: non_neg_integer(), + used_regs :: non_neg_integer(), labels :: [{integer() | reference(), integer()}], variant :: non_neg_integer() }). @@ -183,9 +183,26 @@ -define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). -define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). --define(AVAILABLE_REGS, [rax, r11, r10, r9, r8, rcx]). -define(PARAMETER_REGS, [rdi, rsi, rdx, rcx, r8, r9]). --define(SCRATCH_REGS, [rdi, rsi, rdx, rcx, r8, r9, r10, r11]). + +-define(REG_BIT_RAX, (1 bsl 0)). +-define(REG_BIT_RCX, (1 bsl 1)). +-define(REG_BIT_RDX, (1 bsl 2)). +-define(REG_BIT_RSI, (1 bsl 3)). +-define(REG_BIT_RDI, (1 bsl 4)). +-define(REG_BIT_R8, (1 bsl 5)). +-define(REG_BIT_R9, (1 bsl 6)). +-define(REG_BIT_R10, (1 bsl 7)). +-define(REG_BIT_R11, (1 bsl 8)). + +-define(AVAILABLE_REGS_MASK, + (?REG_BIT_RAX bor ?REG_BIT_R11 bor ?REG_BIT_R10 bor ?REG_BIT_R9 bor ?REG_BIT_R8 bor + ?REG_BIT_RCX) +). +-define(SCRATCH_REGS_MASK, + (?REG_BIT_RDI bor ?REG_BIT_RSI bor ?REG_BIT_RDX bor ?REG_BIT_RCX bor ?REG_BIT_R8 bor + ?REG_BIT_R9 bor ?REG_BIT_R10 bor ?REG_BIT_R11) +). %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. @@ -222,8 +239,8 @@ new(Variant, StreamModule, Stream) -> branches = [], jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS, - used_regs = [], + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0, labels = [], variant = Variant }. @@ -278,7 +295,7 @@ debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> %% @return The list of used registers %%----------------------------------------------------------------------------- -spec used_regs(state()) -> [x86_64_register()]. -used_regs(#state{used_regs = Used}) -> Used. +used_regs(#state{used_regs = Used}) -> mask_to_list(Used). %%----------------------------------------------------------------------------- %% @doc Return the list of currently available native scratch registers. This @@ -288,7 +305,7 @@ used_regs(#state{used_regs = Used}) -> Used. %% @return The list of available registers %%----------------------------------------------------------------------------- -spec available_regs(state()) -> [x86_64_register()]. -available_regs(#state{available_regs = Available}) -> Available. +available_regs(#state{available_regs = Available}) -> mask_to_list(Available). %%----------------------------------------------------------------------------- %% @doc Free native registers. The passed list of registers can contain @@ -312,8 +329,11 @@ free_native_register( ) when is_atom(Reg) -> - {Available1, Used1} = free_reg(Available0, Used0, Reg), - State#state{available_regs = Available1, used_regs = Used1}; + Bit = reg_bit(Reg), + State#state{ + available_regs = Available0 bor Bit, + used_regs = Used0 band (bnot Bit) + }; free_native_register(State, {ptr, Reg}) -> free_native_register(State, Reg); free_native_register(State, _Other) -> @@ -328,8 +348,8 @@ free_native_register(State, _Other) -> %%----------------------------------------------------------------------------- -spec assert_all_native_free(state()) -> ok. assert_all_native_free(State) -> - [] = State#state.used_regs, - ?AVAILABLE_REGS = State#state.available_regs, + 0 = State#state.used_regs, + ?AVAILABLE_REGS_MASK = State#state.available_regs, ok. %%----------------------------------------------------------------------------- @@ -455,9 +475,16 @@ call_primitive( ) -> % We need a register for the function pointer that should not be used as a parameter ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), - case AvailableRegs0 -- ParamRegs of - [Temp | _] -> - AvailableRegs1 = AvailableRegs0 -- [Temp], + ParamMask = regs_to_mask(ParamRegs), + FreeFromParams = AvailableRegs0 band (bnot ParamMask), + case FreeFromParams of + 0 -> + % No register left, we'll use the stack to save NATIVE_INTERFACE_REG + % and rax when calling function. + call_func_ptr(State, {primitive, Primitive}, Args); + _ -> + Temp = first_avail(FreeFromParams), + TempBit = reg_bit(Temp), PrepCall = case Primitive of 0 -> @@ -468,15 +495,13 @@ call_primitive( Stream1 = StreamModule:append(Stream0, PrepCall), call_func_ptr( State#state{ - stream = Stream1, available_regs = AvailableRegs1, used_regs = [Temp | UsedRegs] + stream = Stream1, + available_regs = AvailableRegs0 band (bnot TempBit), + used_regs = UsedRegs bor TempBit }, {free, Temp}, Args - ); - [] -> - % No register left, we'll use the stack to save NATIVE_INTERFACE_REG - % and rax when calling function. - call_func_ptr(State, {primitive, Primitive}, Args) + ) end. %%----------------------------------------------------------------------------- @@ -502,9 +527,14 @@ call_primitive_last( % registers used for parameters ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), ArgsRegs = args_regs(Args), - ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, - [Temp | AvailableRegs1] = ScratchRegs, - UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + ParamMask = regs_to_mask(ParamRegs), + ArgsMask = regs_to_mask(ArgsRegs), + ScratchMask = + ?AVAILABLE_REGS_MASK band (bnot (ArgsMask bor ParamMask)), + Temp = first_avail(ScratchMask), + TempBit = reg_bit(Temp), + AvailableRegs1 = ScratchMask band (bnot TempBit), + UsedRegs = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), PrepCall = case Primitive of 0 -> @@ -522,7 +552,11 @@ call_primitive_last( #state{stream = Stream2} = State1, Call = jit_x86_64_asm:jmpq({Temp}), Stream3 = StreamModule:append(Stream2, Call), - State1#state{stream = Stream3, available_regs = ?AVAILABLE_REGS, used_regs = []}. + State1#state{ + stream = Stream3, + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0 + }. %%----------------------------------------------------------------------------- %% @doc Emit a return of a value if it's not equal to ctx. @@ -551,11 +585,11 @@ return_if_not_equal_to_ctx( I4 = jit_x86_64_asm:retq(), I2 = jit_x86_64_asm:jz(byte_size(I3) + byte_size(I4) + 2), Stream1 = StreamModule:append(Stream0, <>), - {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, Reg), + RegBit = reg_bit(Reg), State#state{ stream = Stream1, - available_regs = AvailableRegs1, - used_regs = UsedRegs1 + available_regs = AvailableRegs0 bor RegBit, + used_regs = UsedRegs0 band (bnot RegBit) }. %%----------------------------------------------------------------------------- @@ -586,7 +620,10 @@ jump_to_label( {RelocOffset, I1} = jit_x86_64_asm:jmp_rel32(1), Reloc = {Label, Offset + RelocOffset, 32}, Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1, branches = [Reloc | AccBranches]} + State#state{ + stream = Stream1, + branches = [Reloc | AccBranches] + } end. jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> @@ -609,10 +646,11 @@ jump_to_continuation( stream_module = StreamModule, stream = Stream0, offset = BaseOffset, - available_regs = [TempReg | _] + available_regs = Avail } = State, {free, OffsetReg} ) -> + TempReg = first_avail(Avail), % Calculate absolute address: native_code_base + target_offset % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) % Similar to aarch64 approach but using leaq for PC-relative addressing @@ -630,7 +668,11 @@ jump_to_continuation( Code = <>, Stream1 = StreamModule:append(Stream0, Code), % Free all registers since this is a tail jump - State#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + State#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0 + }. %%----------------------------------------------------------------------------- %% @doc Emit an if block, i.e. emit a test of a condition and conditionnally @@ -763,8 +805,9 @@ if_block_cond0(State0, {Value, '<', RegOrTuple}) when ?IS_SINT32_T(Value) -> {State1, <>, byte_size(I1) + RelocJLEOffset}; % Catch-all for large values outside SINT32_T range if_block_cond0( - #state{available_regs = [Temp | _]} = State0, {Value, '<', RegOrTuple} + #state{available_regs = Avail} = State0, {Value, '<', RegOrTuple} ) when is_integer(Value) -> + Temp = first_avail(Avail), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -797,8 +840,9 @@ if_block_cond0(State0, {RegOrTuple, '<', RegB}) when is_atom(RegB) -> {State1, <>, byte_size(I1) + RelocJGEOffset}; % Catch-all for large values outside SINT32_T range if_block_cond0( - #state{available_regs = [Temp | _]} = State0, {RegOrTuple, '<', Value} + #state{available_regs = Avail} = State0, {RegOrTuple, '<', Value} ) when is_integer(Value) -> + Temp = first_avail(Avail), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -843,9 +887,10 @@ if_block_cond0( State1 = if_block_free_reg(RegOrTuple, State0), {State1, <>, byte_size(I1) + RelocJZOffset}; if_block_cond0( - #state{available_regs = [Temp | _]} = State0, + #state{available_regs = Avail} = State0, {RegOrTuple, '!=', Val} ) when is_integer(Val) orelse ?IS_GPR(Val) -> + Temp = first_avail(Avail), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -883,9 +928,10 @@ if_block_cond0( State1 = if_block_free_reg(RegOrTuple, State0), {State1, <>, byte_size(I1) + RelocJZOffset}; if_block_cond0( - #state{available_regs = [Temp | _]} = State0, + #state{available_regs = Avail} = State0, {RegOrTuple, '==', Val} ) when is_integer(Val) orelse ?IS_GPR(Val) -> + Temp = first_avail(Avail), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -960,7 +1006,7 @@ if_block_cond0(State0, {{free, Reg} = RegTuple, '&', Mask, '!=', Val}) when ?IS_ State1 = if_block_free_reg(RegTuple, State0), {State1, <>, byte_size(I1) + byte_size(I2) + RelocJZOffset}; if_block_cond0(State0, {Reg, '&', Mask, '!=', Val}) when ?IS_UINT8_T(Mask) -> - Temp = hd(State0#state.available_regs), + Temp = first_avail(State0#state.available_regs), I1 = jit_x86_64_asm:movq(Reg, Temp), I2 = jit_x86_64_asm:andb(Mask, Temp), I3 = jit_x86_64_asm:cmpb(Val, Temp), @@ -969,32 +1015,20 @@ if_block_cond0(State0, {Reg, '&', Mask, '!=', Val}) when ?IS_UINT8_T(Mask) -> byte_size(I1) + byte_size(I2) + byte_size(I3) + RelocJZOffset}. -spec if_block_free_reg(x86_64_register() | {free, x86_64_register()}, state()) -> state(). -if_block_free_reg({free, Reg}, State0) -> - #state{available_regs = AvR0, used_regs = UR0} = State0, - {AvR1, UR1} = free_reg(AvR0, UR0, Reg), +if_block_free_reg({free, Reg}, #state{available_regs = AvR0, used_regs = UR0} = State0) -> + Bit = reg_bit(Reg), State0#state{ - available_regs = AvR1, - used_regs = UR1 + available_regs = AvR0 bor Bit, + used_regs = UR0 band (bnot Bit) }; if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. --spec merge_used_regs(state(), [x86_64_register()]) -> state(). -merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ - Reg | T -]) -> - case lists:member(Reg, UR0) of - true -> - merge_used_regs(State, T); - false -> - AvR1 = lists:delete(Reg, AvR0), - UR1 = [Reg | UR0], - merge_used_regs( - State#state{used_regs = UR1, available_regs = AvR1}, T - ) - end; -merge_used_regs(State, []) -> - State. +-spec merge_used_regs(state(), non_neg_integer()) -> state(). +merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> + MergedUR = UR bor OtherUR, + MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), + State#state{used_regs = MergedUR, available_regs = MergedAvail}. %%----------------------------------------------------------------------------- %% @doc Emit a shift register right by a fixed number of bits, effectively @@ -1006,7 +1040,9 @@ merge_used_regs(State, []) -> %%----------------------------------------------------------------------------- -spec shift_right(#state{}, maybe_free_x86_64_register(), non_neg_integer()) -> {#state{}, x86_64_register()}. -shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when +shift_right( + #state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift +) when ?IS_GPR(Reg) andalso is_integer(Shift) -> I = jit_x86_64_asm:shrq(Shift, Reg), @@ -1015,7 +1051,7 @@ shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {fre shift_right( #state{ stream_module = StreamModule, - available_regs = [ResultReg | T], + available_regs = Avail, used_regs = UR, stream = Stream0 } = State, @@ -1024,10 +1060,19 @@ shift_right( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + ResultReg = first_avail(Avail), + Bit = reg_bit(ResultReg), I1 = jit_x86_64_asm:movq(Reg, ResultReg), I2 = jit_x86_64_asm:shrq(Shift, ResultReg), Stream1 = StreamModule:append(Stream0, <>), - {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot Bit), + used_regs = UR bor Bit + }, + ResultReg + }. %%----------------------------------------------------------------------------- %% @doc Emit a shift register left by a fixed number of bits, effectively @@ -1037,7 +1082,9 @@ shift_right( %% @param Shift number of bits to shift %% @return new state %%----------------------------------------------------------------------------- -shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when +shift_left( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift +) when is_atom(Reg) -> I = jit_x86_64_asm:shlq(Shift, Reg), @@ -1065,16 +1112,17 @@ call_func_ptr( FuncPtrTuple, Args ) -> - FreeRegs = lists:flatmap( + FreeMask = lists:foldl( fun - ({free, {ptr, Reg}}) -> [Reg]; - ({free, Reg}) when is_atom(Reg) -> [Reg]; - (_) -> [] + ({free, {ptr, Reg}}, Acc) -> Acc bor reg_bit(Reg); + ({free, Reg}, Acc) when is_atom(Reg) -> Acc bor reg_bit(Reg); + (_, Acc) -> Acc end, + 0, [FuncPtrTuple | Args] ), - UsedRegs1 = UsedRegs0 -- FreeRegs, - SavedRegs = [?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + UsedRegs1 = UsedRegs0 band (bnot FreeMask), + SavedRegs = [?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | mask_to_list(UsedRegs1)], Stream1 = lists:foldl( fun(Reg, AccStream) -> StreamModule:append(AccStream, jit_x86_64_asm:pushq(Reg)) @@ -1133,14 +1181,14 @@ call_func_ptr( StreamModule:append(Stream6, PostCall1) end, % If rax is in used regs, save it to another temporary register - AvailableRegs1 = FreeRegs ++ AvailableRegs0, + AvailableRegs1 = AvailableRegs0 bor FreeMask, {Stream8, ResultReg} = - case lists:member(rax, SavedRegs) of - true -> - [Temp | _] = AvailableRegs1, - {StreamModule:append(Stream7, jit_x86_64_asm:movq(rax, Temp)), Temp}; - false -> - {Stream7, rax} + case UsedRegs1 band ?REG_BIT_RAX of + 0 -> + {Stream7, rax}; + _ -> + Temp = first_avail(AvailableRegs1), + {StreamModule:append(Stream7, jit_x86_64_asm:movq(rax, Temp)), Temp} end, Stream9 = lists:foldl( fun(Reg, AccStream) -> @@ -1149,13 +1197,13 @@ call_func_ptr( Stream8, lists:reverse(SavedRegs) ), - AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), - AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), - UsedRegs2 = [ResultReg | UsedRegs1], + ResultBit = reg_bit(ResultReg), + AvailableRegs2 = (AvailableRegs1 band (bnot ResultBit)) band ?AVAILABLE_REGS_MASK, + UsedRegs2 = UsedRegs1 bor ResultBit, { State1#state{ stream = Stream9, - available_regs = AvailableRegs3, + available_regs = AvailableRegs2, used_regs = UsedRegs2 }, ResultReg @@ -1167,8 +1215,10 @@ set_args( ) -> ParamRegs = parameter_regs(Args), ArgsRegs = args_regs(Args), + ParamMask = regs_to_mask(ParamRegs), + ArgsMask = regs_to_mask(ArgsRegs), AvailableScratchGP = - ?SCRATCH_REGS -- ParamRegs -- ArgsRegs -- UsedRegs, + ?SCRATCH_REGS_MASK band (bnot (ParamMask bor ArgsMask bor UsedRegs)), Offset = StreamModule:offset(Stream0), Args1 = [ case Arg of @@ -1179,19 +1229,19 @@ set_args( ], SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, []), Stream1 = StreamModule:append(Stream0, SetArgsCode), - NewUsedRegs = lists:foldl( + NewUsedMask = lists:foldl( fun - ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); - ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); - (_, AccUsed) -> AccUsed + ({free, {ptr, Reg}}, AccMask) -> AccMask band (bnot reg_bit(Reg)); + ({free, Reg}, AccMask) when is_atom(Reg) -> AccMask band (bnot reg_bit(Reg)); + (_, AccMask) -> AccMask end, UsedRegs, Args ), State0#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, - used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + available_regs = ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), + used_regs = ParamMask bor NewUsedMask }. parameter_regs(Args) -> @@ -1275,13 +1325,9 @@ set_args0( ) -> case lists:member(ParamReg, ArgsRegs) of false -> - % Normal case: ParamReg is free, just move Arg to ParamReg J = set_args1(Arg, ParamReg), set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); true -> - % ParamReg is occupied by another argument that will go elsewhere - % Use xchg to swap ArgReg and ParamReg - % After xchg, the value from Arg (which was in ArgReg) is now in ParamReg I = jit_x86_64_asm:xchgq(ArgReg, ParamReg), {NewArgsT, NewArgsRegs} = exchange_reg(ArgsT, ArgsRegs, ParamReg, ArgReg), set_args0(NewArgsT, NewArgsRegs, ParamRegs, AvailGP, [I | Acc]) @@ -1322,72 +1368,82 @@ set_args1({avm_int64_t, Value}, Reg) when is_integer(Value) -> (state(), Src :: value() | vm_register(), Dest :: vm_register()) -> state(); (state(), Src :: {free, {ptr, x86_64_register(), 1}}, Dest :: {fp_reg, non_neg_integer()}) -> state(). +move_to_vm_register(State, Src, Dest) -> + move_to_vm_register_emit(State, Src, Dest). + % Src = 0, we can andq as an optimization -move_to_vm_register(State, 0, {x_reg, X}) when X < ?MAX_REG -> +move_to_vm_register_emit(State, 0, {x_reg, X}) when X < ?MAX_REG -> I1 = jit_x86_64_asm:andq(0, ?X_REG(X)), Stream1 = (State#state.stream_module):append(State#state.stream, I1), State#state{stream = Stream1}; -move_to_vm_register(State, 0, {x_reg, extra}) -> +move_to_vm_register_emit(State, 0, {x_reg, extra}) -> I1 = jit_x86_64_asm:andq(0, ?X_REG(?MAX_REG)), Stream1 = (State#state.stream_module):append(State#state.stream, I1), State#state{stream = Stream1}; -move_to_vm_register(State, 0, {ptr, Reg}) -> +move_to_vm_register_emit(State, 0, {ptr, Reg}) -> I1 = jit_x86_64_asm:andq(0, {0, Reg}), Stream1 = (State#state.stream_module):append(State#state.stream, I1), State#state{stream = Stream1}; -move_to_vm_register(#state{available_regs = [Temp | _]} = State, 0, {y_reg, Y}) -> +move_to_vm_register_emit(#state{available_regs = Avail} = State, 0, {y_reg, Y}) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:andq(0, {Y * 8, Temp}), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; % ?IS_SINT32_T(Src), we can use movq to set the value -move_to_vm_register(State, N, {x_reg, X}) when X < ?MAX_REG andalso ?IS_SINT32_T(N) -> +move_to_vm_register_emit(State, N, {x_reg, X}) when X < ?MAX_REG andalso ?IS_SINT32_T(N) -> Stream1 = (State#state.stream_module):append( State#state.stream, jit_x86_64_asm:movq(N, ?X_REG(X)) ), State#state{stream = Stream1}; -move_to_vm_register(State, N, {x_reg, extra}) when ?IS_SINT32_T(N) -> +move_to_vm_register_emit(State, N, {x_reg, extra}) when ?IS_SINT32_T(N) -> Stream1 = (State#state.stream_module):append( State#state.stream, jit_x86_64_asm:movq(N, ?X_REG(?MAX_REG)) ), State#state{stream = Stream1}; -move_to_vm_register(State, N, {ptr, Reg}) when ?IS_SINT32_T(N) -> +move_to_vm_register_emit(State, N, {ptr, Reg}) when ?IS_SINT32_T(N) -> Stream1 = (State#state.stream_module):append( State#state.stream, jit_x86_64_asm:movq(N, {0, Reg}) ), State#state{stream = Stream1}; -move_to_vm_register(#state{available_regs = [Temp | _]} = State, N, {y_reg, Y}) when +move_to_vm_register_emit(#state{available_regs = Avail} = State, N, {y_reg, Y}) when ?IS_SINT32_T(N) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:movq(N, {Y * 8, Temp}), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; % ?is_integer(Src), we need to use movabsq -move_to_vm_register(#state{available_regs = [Temp | _]} = State, N, {x_reg, X}) when +move_to_vm_register_emit(#state{available_regs = Avail} = State, N, {x_reg, X}) when X < ?MAX_REG andalso is_integer(N) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(N, Temp), I2 = jit_x86_64_asm:movq(Temp, ?X_REG(X)), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; -move_to_vm_register(#state{available_regs = [Temp | _]} = State, N, {x_reg, extra}) when +move_to_vm_register_emit(#state{available_regs = Avail} = State, N, {x_reg, extra}) when is_integer(N) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(N, Temp), I2 = jit_x86_64_asm:movq(Temp, ?X_REG(?MAX_REG)), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; -move_to_vm_register(#state{available_regs = [Temp | _]} = State, N, {ptr, Reg}) when +move_to_vm_register_emit(#state{available_regs = Avail} = State, N, {ptr, Reg}) when is_integer(N) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(N, Temp), I2 = jit_x86_64_asm:movq(Temp, {0, Reg}), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; -move_to_vm_register(#state{available_regs = [Temp1, Temp2 | _]} = State, N, {y_reg, Y}) when +move_to_vm_register_emit(#state{available_regs = Avail} = State, N, {y_reg, Y}) when is_integer(N) -> + Temp1 = first_avail(Avail), + Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp1), I2 = jit_x86_64_asm:movabsq(N, Temp2), I3 = jit_x86_64_asm:movq(Temp2, {Y * 8, Temp1}), @@ -1396,56 +1452,74 @@ move_to_vm_register(#state{available_regs = [Temp1, Temp2 | _]} = State, N, {y_r ), State#state{stream = Stream1}; % is_atom(Src) (native register) -move_to_vm_register(State, Reg, {x_reg, X}) when is_atom(Reg) andalso X < ?MAX_REG -> +move_to_vm_register_emit(State, Reg, {x_reg, X}) when is_atom(Reg) andalso X < ?MAX_REG -> I1 = jit_x86_64_asm:movq(Reg, ?X_REG(X)), Stream1 = (State#state.stream_module):append(State#state.stream, I1), State#state{stream = Stream1}; -move_to_vm_register(State, Reg, {x_reg, extra}) when is_atom(Reg) -> +move_to_vm_register_emit(State, Reg, {x_reg, extra}) when is_atom(Reg) -> I1 = jit_x86_64_asm:movq(Reg, ?X_REG(?MAX_REG)), Stream1 = (State#state.stream_module):append(State#state.stream, I1), State#state{stream = Stream1}; -move_to_vm_register(State, Reg, {ptr, Dest}) when is_atom(Reg) -> +move_to_vm_register_emit(State, Reg, {ptr, Dest}) when is_atom(Reg) -> I1 = jit_x86_64_asm:movq(Reg, {0, Dest}), Stream1 = (State#state.stream_module):append(State#state.stream, I1), State#state{stream = Stream1}; -move_to_vm_register(#state{available_regs = [Temp | _]} = State, Reg, {y_reg, Y}) when +move_to_vm_register_emit(#state{available_regs = Avail} = State, Reg, {y_reg, Y}) when is_atom(Reg) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:movq(Reg, {Y * 8, Temp}), Code = <>, Stream1 = (State#state.stream_module):append(State#state.stream, Code), State#state{stream = Stream1}; -% Src is x_reg, store in temporary register and call move_to_vm_register for the four cases -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) when +% Src is x_reg, store in temporary register and call move_to_vm_register_emit for the four cases +move_to_vm_register_emit(#state{available_regs = Avail} = State0, {x_reg, X}, Dest) when X < ?MAX_REG -> + Temp = first_avail(Avail), + TempBit = reg_bit(Temp), I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> + State1 = move_to_vm_register_emit( + State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit)}, Temp, Dest + ), + State1#state{available_regs = Avail}; +move_to_vm_register_emit(#state{available_regs = Avail} = State0, {x_reg, extra}, Dest) -> + Temp = first_avail(Avail), + TempBit = reg_bit(Temp), I1 = jit_x86_64_asm:movq(?X_REG(?MAX_REG), Temp), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> + State1 = move_to_vm_register_emit( + State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit)}, Temp, Dest + ), + State1#state{available_regs = Avail}; +move_to_vm_register_emit(#state{available_regs = Avail} = State0, {ptr, Reg}, Dest) -> + Temp = first_avail(Avail), + TempBit = reg_bit(Temp), I1 = jit_x86_64_asm:movq({0, Reg}, Temp), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - State1#state{available_regs = AR0}; -move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> + State1 = move_to_vm_register_emit( + State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit)}, Temp, Dest + ), + State1#state{available_regs = Avail}; +move_to_vm_register_emit(#state{available_regs = Avail} = State0, {y_reg, Y}, Dest) -> + Temp = first_avail(Avail), + TempBit = reg_bit(Temp), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - State1#state{available_regs = AR0}; + State1 = move_to_vm_register_emit( + State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit)}, Temp, Dest + ), + State1#state{available_regs = Avail}; % term_to_float -move_to_vm_register( - #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State0, +move_to_vm_register_emit( + #state{stream_module = StreamModule, available_regs = Avail, stream = Stream0} = State0, {free, {ptr, Reg, 1}}, {fp_reg, F} ) when is_atom(Reg) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq({8, Reg}, Reg), I2 = jit_x86_64_asm:movq(?FP_REGS, Temp), I3 = jit_x86_64_asm:movq(Reg, {?FP_REG_OFFSET(State0, F), Temp}), @@ -1470,32 +1544,36 @@ move_to_vm_register( Dest :: vm_register() | x86_64_register() ) -> state(). move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_integer(Index) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp), I2 = jit_x86_64_asm:movq(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Reg, Index, {ptr, Dest} ) when is_integer(Index) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp), I2 = jit_x86_64_asm:movq(Temp, {0, Dest}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Reg, Index, {y_reg, Y} ) when is_integer(Index) -> + Temp1 = first_avail(Avail), + Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp1), I2 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp2), I3 = jit_x86_64_asm:movq(Temp2, {Y * 8, Temp1}), @@ -1523,11 +1601,11 @@ move_array_element( I2 = jit_x86_64_asm:addq(Reg, IndexReg), I3 = jit_x86_64_asm:movq({0, IndexReg}, IndexReg), I4 = jit_x86_64_asm:movq(IndexReg, ?X_REG(X)), - {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + IndexBit = reg_bit(IndexReg), Stream1 = StreamModule:append(Stream0, <>), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, + available_regs = AvailableRegs0 bor IndexBit, + used_regs = UsedRegs0 band (bnot IndexBit), stream = Stream1 }; move_array_element( @@ -1545,36 +1623,37 @@ move_array_element( I2 = jit_x86_64_asm:addq(Reg, IndexReg), I3 = jit_x86_64_asm:movq({0, IndexReg}, IndexReg), I4 = jit_x86_64_asm:movq(IndexReg, {0, PtrReg}), - {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + IndexBit = reg_bit(IndexReg), Stream1 = StreamModule:append(Stream0, <>), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, + available_regs = AvailableRegs0 bor IndexBit, + used_regs = UsedRegs0 band (bnot IndexBit), stream = Stream1 }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] = AvailableRegs0, + available_regs = AvailableRegs0, used_regs = UsedRegs0 } = State, Reg, {free, IndexReg}, {y_reg, Y} ) when ?IS_GPR(IndexReg) -> + Temp = first_avail(AvailableRegs0), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:shlq(3, IndexReg), I3 = jit_x86_64_asm:addq(Reg, IndexReg), I4 = jit_x86_64_asm:movq({0, IndexReg}, IndexReg), I5 = jit_x86_64_asm:movq(IndexReg, {Y * 8, Temp}), - {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + IndexBit = reg_bit(IndexReg), Stream1 = StreamModule:append( Stream0, <> ), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, + available_regs = AvailableRegs0 bor IndexBit, + used_regs = UsedRegs0 band (bnot IndexBit), stream = Stream1 }. @@ -1607,17 +1686,19 @@ get_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [ElemReg | AvailableT], + available_regs = Avail, used_regs = UsedRegs0 } = State, Reg, Index ) -> + ElemReg = first_avail(Avail), + Bit = reg_bit(ElemReg), I1 = jit_x86_64_asm:movq({Index * 8, Reg}, ElemReg), Stream1 = StreamModule:append(Stream0, <>), { State#state{ - stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + stream = Stream1, available_regs = Avail band (bnot Bit), used_regs = UsedRegs0 bor Bit }, ElemReg }. @@ -1639,32 +1720,35 @@ get_array_element( Index :: non_neg_integer() ) -> state(). move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {x_reg, X}, Reg, Index ) when X < ?MAX_REG andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {ptr, Source}, Reg, Index ) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq({0, Source}, Temp), I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {y_reg, Y}, Reg, Index ) when ?IS_GPR(Reg) andalso is_integer(Index) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), I3 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), @@ -1684,11 +1768,12 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Source, Reg, Index ) when is_integer(Source) andalso is_integer(Index) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(Source, Temp), I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, <>), @@ -1714,23 +1799,25 @@ move_to_array_element( ) when is_integer(Index) andalso is_integer(Offset) -> move_to_array_element(State, Source, BaseReg, Index + Offset); move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {x_reg, X}, BaseReg, IndexReg, Offset ) when X < ?MAX_REG andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), I2 = jit_x86_64_asm:movq(Temp, {Offset * ?WORD_SIZE, BaseReg, IndexReg, 8}), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {y_reg, Y}, BaseReg, IndexReg, Offset ) when ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), I3 = jit_x86_64_asm:movq(Temp, {Offset * ?WORD_SIZE, BaseReg, IndexReg, 8}), @@ -1763,80 +1850,129 @@ move_to_array_element( State#state{stream = Stream1}. -spec move_to_native_register(state(), value() | cp) -> {state(), x86_64_register()}. -move_to_native_register( +move_to_native_register(State, Reg) when ?IS_GPR(Reg) -> + {State, Reg}; +move_to_native_register(State, Value) -> + move_to_native_register_emit(State, Value). + +move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, cp ) -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(?CP, Reg), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; -move_to_native_register(State, Reg) when is_atom(Reg) -> - {State, Reg}; -move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} + { + State#state{ + stream = Stream1, + used_regs = Used bor Bit, + available_regs = Avail band (bnot Bit) + }, + Reg + }; +move_to_native_register_emit( + #state{stream_module = StreamModule, stream = Stream0} = State, + {ptr, Reg} ) when is_atom(Reg) -> I1 = jit_x86_64_asm:movq({0, Reg}, Reg), Stream1 = StreamModule:append(Stream0, I1), {State#state{stream = Stream1}, Reg}; -move_to_native_register( +move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, Imm ) when is_integer(Imm) -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(Imm, Reg), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; -move_to_native_register( + { + State#state{ + stream = Stream1, + used_regs = Used bor Bit, + available_regs = Avail band (bnot Bit) + }, + Reg + }; +move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, {x_reg, extra} ) -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(?X_REG(?MAX_REG), Reg), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; -move_to_native_register( + { + State#state{ + stream = Stream1, + used_regs = Used bor Bit, + available_regs = Avail band (bnot Bit) + }, + Reg + }; +move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, {x_reg, X} ) when X < ?MAX_REG -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(?X_REG(X), Reg), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; -move_to_native_register( + { + State#state{ + stream = Stream1, + used_regs = Used bor Bit, + available_regs = Avail band (bnot Bit) + }, + Reg + }; +move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailT], + available_regs = Avail, used_regs = Used } = State, {y_reg, Y} ) -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(?Y_REGS, Reg), I2 = jit_x86_64_asm:movq({Y * 8, Reg}, Reg), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}. + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot Bit), + used_regs = Used bor Bit + }, + Reg + }. -spec move_to_native_register(state(), integer() | x86_64_register(), x86_64_register()) -> state(). move_to_native_register( @@ -1851,33 +1987,52 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [SaveReg | AvailT], + available_regs = Avail, used_regs = Used } = State, Reg ) when is_atom(Reg) -> + SaveReg = first_avail(Avail), + Bit = reg_bit(SaveReg), I1 = jit_x86_64_asm:movq(Reg, SaveReg), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot Bit), + used_regs = Used bor Bit + }, + SaveReg + }; copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [SaveReg | AvailT], + available_regs = Avail, used_regs = Used } = State, {ptr, Reg} ) when is_atom(Reg) -> + SaveReg = first_avail(Avail), + Bit = reg_bit(SaveReg), I1 = jit_x86_64_asm:movq({0, Reg}, SaveReg), Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; + { + State#state{ + stream = Stream1, + available_regs = Avail band (bnot Bit), + used_regs = Used bor Bit + }, + SaveReg + }; copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, {y_reg, Y} ) -> + Reg = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Reg), I2 = jit_x86_64_asm:movq({Y * 8, Reg}, Reg), I3 = jit_x86_64_asm:movq(Reg, ?CP), @@ -1886,9 +2041,10 @@ move_to_cp( State#state{stream = Stream1}. increment_sp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, Offset ) -> + Reg = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Reg), I2 = jit_x86_64_asm:addq(Offset * 8, Reg), I3 = jit_x86_64_asm:movq(Reg, ?Y_REGS), @@ -1900,12 +2056,13 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], + available_regs = Avail, branches = Branches, labels = Labels } = State, Label ) -> + Temp = first_avail(Avail), Offset = StreamModule:offset(Stream0), case lists:keyfind(Label, 1, Labels) of {Label, LabelOffset} -> @@ -1931,10 +2088,11 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], + available_regs = Avail, branches = Branches } = State ) -> + Temp = first_avail(Avail), OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), @@ -1954,20 +2112,38 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailableT], + available_regs = Avail, used_regs = UsedRegs0 } = State ) -> + Reg = first_avail(Avail), + Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(?JITSTATE_MODULE, Reg), I2 = jit_x86_64_asm:movl(?MODULE_INDEX(Reg), Reg), Code = <>, Stream1 = StreamModule:append(Stream0, Code), { - State#state{stream = Stream1, available_regs = AvailableT, used_regs = [Reg | UsedRegs0]}, + State#state{ + stream = Stream1, + available_regs = Avail band (bnot Bit), + used_regs = UsedRegs0 bor Bit + }, Reg }. -and_(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Val) when +and_( + #state{stream_module = StreamModule, stream = Stream0} = State, + {free, Reg}, + SrcReg +) when + ?IS_GPR(Reg), is_atom(SrcReg) +-> + I1 = jit_x86_64_asm:andq(SrcReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; +and_( + #state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Val +) when ?IS_GPR(Reg) -> % 32 bits instructions on x86-64 zero the high 32 bits @@ -1981,7 +2157,7 @@ and_(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg} and_( #state{ stream_module = StreamModule, - available_regs = [ResultReg | T], + available_regs = Avail, used_regs = UR, stream = Stream0 } = State, @@ -1990,6 +2166,8 @@ and_( ) when ?IS_GPR(Reg) -> + ResultReg = first_avail(Avail), + Bit = reg_bit(ResultReg), I1 = jit_x86_64_asm:movq(Reg, ResultReg), I2 = if @@ -1998,8 +2176,21 @@ and_( end, Stream1 = StreamModule:append(Stream0, I1), Stream2 = StreamModule:append(Stream1, I2), - {State#state{stream = Stream2, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + { + State#state{ + stream = Stream2, + available_regs = Avail band (bnot Bit), + used_regs = UR bor Bit + }, + ResultReg + }. +or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, SrcReg) when + is_atom(SrcReg) +-> + I1 = jit_x86_64_asm:orq(SrcReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> I1 = jit_x86_64_asm:orq(Val, Reg), Stream1 = StreamModule:append(Stream0, I1), @@ -2009,11 +2200,12 @@ add( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [TempReg | _] + available_regs = Avail } = State, Reg, Val ) when is_integer(Val), Val < -16#80000000 orelse Val > 16#7FFFFFFF -> + TempReg = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(Val, TempReg), I2 = jit_x86_64_asm:addq(TempReg, Reg), Stream1 = StreamModule:append(Stream0, I1), @@ -2028,17 +2220,16 @@ sub( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [TempReg | _] + available_regs = Avail } = State, Reg, Val ) when is_integer(Val), Val < -16#80000000 orelse Val > 16#7FFFFFFF -> - % Immediate too large for 32-bit, load into temporary register + TempReg = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(Val, TempReg), I2 = jit_x86_64_asm:subq(TempReg, Reg), Stream1 = StreamModule:append(Stream0, I1), Stream2 = StreamModule:append(Stream1, I2), - % Free temporary register immediately State#state{stream = Stream2}; sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> I1 = jit_x86_64_asm:subq(Val, Reg), @@ -2066,8 +2257,9 @@ mul(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). decrement_reductions_and_maybe_schedule_next( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 + #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0 ) -> + Temp = first_avail(Avail), Offset = StreamModule:offset(Stream0), I1 = jit_x86_64_asm:decl(?JITSTATE_REMAINING_REDUCTIONS), {RewriteJNZOffset, I2} = jit_x86_64_asm:jnz_rel8(0), @@ -2208,18 +2400,49 @@ return_labels_and_lines( ), State#state{stream = Stream1}. -free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> - AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), - true = lists:member(Reg, UsedRegs0), - UsedRegs1 = lists:delete(Reg, UsedRegs0), - {AvailableRegs1, UsedRegs1}. +reg_bit(rax) -> ?REG_BIT_RAX; +reg_bit(rcx) -> ?REG_BIT_RCX; +reg_bit(rdx) -> ?REG_BIT_RDX; +reg_bit(rsi) -> ?REG_BIT_RSI; +reg_bit(rdi) -> ?REG_BIT_RDI; +reg_bit(r8) -> ?REG_BIT_R8; +reg_bit(r9) -> ?REG_BIT_R9; +reg_bit(r10) -> ?REG_BIT_R10; +reg_bit(r11) -> ?REG_BIT_R11. + +regs_to_mask([]) -> 0; +regs_to_mask([imm | T]) -> regs_to_mask(T); +regs_to_mask([Reg | T]) -> reg_bit(Reg) bor regs_to_mask(T). + +first_avail(Mask) when Mask band ?REG_BIT_RAX =/= 0 -> rax; +first_avail(Mask) when Mask band ?REG_BIT_R11 =/= 0 -> r11; +first_avail(Mask) when Mask band ?REG_BIT_R10 =/= 0 -> r10; +first_avail(Mask) when Mask band ?REG_BIT_R9 =/= 0 -> r9; +first_avail(Mask) when Mask band ?REG_BIT_R8 =/= 0 -> r8; +first_avail(Mask) when Mask band ?REG_BIT_RCX =/= 0 -> rcx. + +%% Convert bitmask to list in reverse allocation order +%% Iteration order: rcx, r8, r9, r10, r11, rax +mask_to_list(0) -> []; +mask_to_list(Mask) -> mask_to_list_rcx(Mask). + +mask_to_list_rcx(Mask) when Mask band ?REG_BIT_RCX =/= 0 -> [rcx | mask_to_list_r8(Mask)]; +mask_to_list_rcx(Mask) -> mask_to_list_r8(Mask). + +mask_to_list_r8(Mask) when Mask band ?REG_BIT_R8 =/= 0 -> [r8 | mask_to_list_r9(Mask)]; +mask_to_list_r8(Mask) -> mask_to_list_r9(Mask). + +mask_to_list_r9(Mask) when Mask band ?REG_BIT_R9 =/= 0 -> [r9 | mask_to_list_r10(Mask)]; +mask_to_list_r9(Mask) -> mask_to_list_r10(Mask). + +mask_to_list_r10(Mask) when Mask band ?REG_BIT_R10 =/= 0 -> [r10 | mask_to_list_r11(Mask)]; +mask_to_list_r10(Mask) -> mask_to_list_r11(Mask). + +mask_to_list_r11(Mask) when Mask band ?REG_BIT_R11 =/= 0 -> [r11 | mask_to_list_rax(Mask)]; +mask_to_list_r11(Mask) -> mask_to_list_rax(Mask). -free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> - lists:reverse(Acc, [Reg | PrevRegs0]); -free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> - free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); -free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> - free_reg0(SortedT, PrevRegs, Reg, Acc). +mask_to_list_rax(Mask) when Mask band ?REG_BIT_RAX =/= 0 -> [rax]; +mask_to_list_rax(_Mask) -> []. args_regs(Args) -> lists:map( @@ -2285,7 +2508,9 @@ add_label( ), State#state{ - stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + stream = Stream2, + branches = RemainingBranches, + labels = [{Label, LabelOffset} | Labels] }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 6e287ac1c..6bb0b3113 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -169,11 +169,11 @@ call_primitive_extended_regs_test() -> " 48: f9404850 ldr x16, [x2, #144]\n" " 4c: a9bf03fe stp x30, x0, [sp, #-16]!\n" " 50: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " 54: a9bf1fe8 stp x8, x7, [sp, #-16]!\n" + " 54: a9bf23e7 stp x7, x8, [sp, #-16]!\n" " 58: d2800261 mov x1, #0x13 // #19\n" " 5c: d63f0200 blr x16\n" " 60: aa0003e9 mov x9, x0\n" - " 64: a8c11fe8 ldp x8, x7, [sp], #16\n" + " 64: a8c123e7 ldp x7, x8, [sp], #16\n" " 68: a8c10be1 ldp x1, x2, [sp], #16\n" " 6c: a8c103fe ldp x30, x0, [sp], #16\n" " 70: f9403450 ldr x16, [x2, #104]\n" @@ -213,8 +213,8 @@ call_primitive_few_free_regs_test() -> " 14: f940e450 ldr x16, [x2, #456]\n" " 18: a9bf03fe stp x30, x0, [sp, #-16]!\n" " 1c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " 20: a9bf23e9 stp x9, x8, [sp, #-16]!\n" - " 24: f81f0fe7 str x7, [sp, #-16]!\n" + " 20: a9bf23e7 stp x7, x8, [sp, #-16]!\n" + " 24: f81f0fe9 str x9, [sp, #-16]!\n" " 28: aa0803e0 mov x0, x8\n" " 2c: aa0703e1 mov x1, x7\n" " 30: aa0a03e2 mov x2, x10\n" @@ -222,8 +222,8 @@ call_primitive_few_free_regs_test() -> " 38: aa0b03e4 mov x4, x11\n" " 3c: d63f0200 blr x16\n" " 40: aa0003ea mov x10, x0\n" - " 44: f84107e7 ldr x7, [sp], #16\n" - " 48: a8c123e9 ldp x9, x8, [sp], #16\n" + " 44: f84107e9 ldr x9, [sp], #16\n" + " 48: a8c123e7 ldp x7, x8, [sp], #16\n" " 4c: a8c10be1 ldp x1, x2, [sp], #16\n" " 50: a8c103fe ldp x30, x0, [sp], #16" >>, @@ -415,7 +415,7 @@ if_block_test_() -> " c: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -434,7 +434,7 @@ if_block_test_() -> " 10: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -452,7 +452,7 @@ if_block_test_() -> " c: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -488,7 +488,7 @@ if_block_test_() -> " c: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -525,7 +525,7 @@ if_block_test_() -> " 10: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -563,7 +563,7 @@ if_block_test_() -> " 10: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -601,7 +601,7 @@ if_block_test_() -> " 10: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -639,7 +639,7 @@ if_block_test_() -> " 10: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -676,7 +676,7 @@ if_block_test_() -> " c: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -712,7 +712,7 @@ if_block_test_() -> " c: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -749,7 +749,7 @@ if_block_test_() -> " 10: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -769,7 +769,7 @@ if_block_test_() -> " 14: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -808,7 +808,7 @@ if_block_test_() -> " 14: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -847,7 +847,7 @@ if_block_test_() -> " 10: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -885,7 +885,7 @@ if_block_test_() -> " 10: 91000908 add x8, x8, #0x2" >>, jit_tests_common:assert_stream(aarch64, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -1962,8 +1962,10 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), - State2 = setelement(8, State1, [r8, r9]), + State1 = setelement( + 7, State0, element(7, State0) band (bnot ((1 bsl 8) bor (1 bsl 9))) + ), + State2 = setelement(8, State1, (1 bsl 8) bor (1 bsl 9)), [r8, r9] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1), Stream = ?BACKEND:stream(State3), @@ -1976,8 +1978,10 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), - State2 = setelement(8, State1, [r8, r9]), + State1 = setelement( + 7, State0, element(7, State0) band (bnot ((1 bsl 8) bor (1 bsl 9))) + ), + State2 = setelement(8, State1, (1 bsl 8) bor (1 bsl 9)), [r8, r9] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1), Stream = ?BACKEND:stream(State3), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 244f77f2c..427e16874 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -204,13 +204,13 @@ call_primitive_few_free_regs_test() -> " e: b4e7 push {r0, r1, r2, r5, r6, r7}\n" " 10: b082 sub sp, #8\n" " 12: 9300 str r3, [sp, #0]\n" - " 14: 4633 mov r3, r6\n" - " 16: 460e mov r6, r1\n" - " 18: 4618 mov r0, r3\n" - " 1a: 4639 mov r1, r7\n" + " 14: 463b mov r3, r7\n" + " 16: 460f mov r7, r1\n" + " 18: 4630 mov r0, r6\n" + " 1a: 4619 mov r1, r3\n" " 1c: 4622 mov r2, r4\n" " 1e: 462b mov r3, r5\n" - " 20: 47b0 blx r6\n" + " 20: 47b8 blx r7\n" " 22: 4604 mov r4, r0\n" " 24: b002 add sp, #8\n" " 26: bce7 pop {r0, r1, r2, r5, r6, r7}" @@ -487,7 +487,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -506,7 +506,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -525,7 +525,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -548,7 +548,7 @@ if_block_test_() -> " e: e077 b.n 0x100" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -567,7 +567,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -607,7 +607,7 @@ if_block_test_() -> " c: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -626,7 +626,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -664,7 +664,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -702,7 +702,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin % Test large immediate (1995) that requires temporary register @@ -765,7 +765,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -803,7 +803,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -841,7 +841,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -879,7 +879,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -917,7 +917,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -937,7 +937,7 @@ if_block_test_() -> " a: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -976,7 +976,7 @@ if_block_test_() -> " a: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -1018,7 +1018,7 @@ if_block_test_() -> " e: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -1083,7 +1083,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -1125,7 +1125,7 @@ if_block_test_() -> " e: e077 b.n 0x100" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -1173,7 +1173,7 @@ if_block_test_() -> " 12: 0000 movs r0, r0" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -1227,7 +1227,7 @@ bitwise_and_optimization_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State3)) end), %% Test optimized case: 16#F (low bits mask, 4 bits) - lsls r5, r7, #28 ?_test(begin @@ -1247,7 +1247,7 @@ bitwise_and_optimization_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State3)) end), %% Test optimized case: 16#3F (low bits mask, 6 bits) - lsls r5, r7, #26 ?_test(begin @@ -1267,7 +1267,7 @@ bitwise_and_optimization_test_() -> " 8: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State3)) end), %% Test non-optimized case: 5 (neither single bit nor low bits mask) - mov+tst ?_test(begin @@ -1288,7 +1288,7 @@ bitwise_and_optimization_test_() -> " a: 3602 adds r6, #2" >>, jit_tests_common:assert_stream(arm, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State3)) end) ]. @@ -2844,6 +2844,19 @@ move_array_element_test_() -> " 8: 67f7 str r7, [r6, #124] ; 0x7c" >>) end), + %% move_array_element: reg_x[reg_y] to y_reg (large y offset) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r3, 4), + move_array_element_test0(State1, r3, {free, Reg}, {y_reg, 32}, << + " 0: 691f ldr r7, [r3, #16]\n" + " 2: 00bf lsls r7, r7, #2\n" + " 4: 59df ldr r7, [r3, r7]\n" + " 6: 6946 ldr r6, [r0, #20]\n" + " 8: 2580 movs r5, #128\t; 0x80\n" + " a: 4435 add r5, r6\n" + " c: 602f str r7, [r5, #0]" + >>) + end), %% move_array_element with integer index and x_reg destination ?_test(begin {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), @@ -2995,9 +3008,11 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), - State2 = setelement(8, State1, [r3, r4]), - [r3, r4] = ?BACKEND:used_regs(State2), + State1 = setelement( + 7, State0, element(7, State0) band (bnot ((1 bsl 3) bor (1 bsl 4))) + ), + State2 = setelement(8, State1, (1 bsl 3) bor (1 bsl 4)), + [r4, r3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r3, r4, 1), Stream = ?BACKEND:stream(State3), Dump = << @@ -3010,9 +3025,11 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), - State2 = setelement(8, State1, [r3, r4]), - [r3, r4] = ?BACKEND:used_regs(State2), + State1 = setelement( + 7, State0, element(7, State0) band (bnot ((1 bsl 3) bor (1 bsl 4))) + ), + State2 = setelement(8, State1, (1 bsl 3) bor (1 bsl 4)), + [r4, r3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, r3, r4, 1), Stream = ?BACKEND:stream(State3), Dump = << @@ -3739,8 +3756,8 @@ call_func_ptr_register_exhaustion_test_() -> " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" " e: b082 sub sp, #8\n" - " 10: 2101 movs r1, #1\n" - " 12: 9100 str r1, [sp, #0]\n" + " 10: 2701 movs r7, #1\n" + " 12: 9700 str r7, [sp, #0]\n" " 14: 9908 ldr r1, [sp, #32]\n" " 16: 461a mov r2, r3\n" " 18: 2303 movs r3, #3\n" @@ -3796,8 +3813,8 @@ call_func_ptr_register_exhaustion_test_() -> " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" " e: b082 sub sp, #8\n" - " 10: 2401 movs r4, #1\n" - " 12: 9400 str r4, [sp, #0]\n" + " 10: 2701 movs r7, #1\n" + " 12: 9700 str r7, [sp, #0]\n" " 14: 460f mov r7, r1\n" " 16: 9908 ldr r1, [sp, #32]\n" " 18: 461a mov r2, r3\n" @@ -3826,10 +3843,10 @@ call_func_ptr_register_exhaustion_test_() -> " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" " c: b4ff push {r0, r1, r2, r3, r4, r5, r6, r7}\n" - " e: 460c mov r4, r1\n" + " e: 460f mov r7, r1\n" " 10: 4630 mov r0, r6\n" " 12: 4619 mov r1, r3\n" - " 14: 47a0 blx r4\n" + " 14: 47b8 blx r7\n" " 16: 9001 str r0, [sp, #4]\n" " 18: bcff pop {r0, r1, r2, r3, r4, r5, r6, r7}" >>, @@ -3852,10 +3869,10 @@ call_func_ptr_register_exhaustion_test_() -> " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" " c: b4ff push {r0, r1, r2, r3, r4, r5, r6, r7}\n" - " e: 6894 ldr r4, [r2, #8]\n" + " e: 6897 ldr r7, [r2, #8]\n" " 10: 4630 mov r0, r6\n" " 12: 4619 mov r1, r3\n" - " 14: 47a0 blx r4\n" + " 14: 47b8 blx r7\n" " 16: 9006 str r0, [sp, #24]\n" " 18: bcff pop {r0, r1, r2, r3, r4, r5, r6, r7}" >>, diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index 6cb8dea8b..c464c01ea 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -210,8 +210,8 @@ call_primitive_extended_regs_test() -> " 4a: c22a sw a0,4(sp)\n" " 4c: c42e sw a1,8(sp)\n" " 4e: c632 sw a2,12(sp)\n" - " 50: c87a sw t5,16(sp)\n" - " 52: ca7e sw t6,20(sp)\n" + " 50: c87e sw t6,16(sp)\n" + " 52: ca7a sw t5,20(sp)\n" " 54: 45cd li a1,19\n" " 56: 9e82 jalr t4\n" " 58: 8eaa mv t4,a0\n" @@ -219,8 +219,8 @@ call_primitive_extended_regs_test() -> " 5c: 4512 lw a0,4(sp)\n" " 5e: 45a2 lw a1,8(sp)\n" " 60: 4632 lw a2,12(sp)\n" - " 62: 4f42 lw t5,16(sp)\n" - " 64: 4fd2 lw t6,20(sp)\n" + " 62: 4fc2 lw t6,16(sp)\n" + " 64: 4f52 lw t5,20(sp)\n" " 66: 02010113 addi sp,sp,32\n" " 6a: 03462e03 lw t3,52(a2)\n" " 6e: 1101 addi sp,sp,-32\n" @@ -270,9 +270,9 @@ call_primitive_few_free_regs_test() -> " 18: c22a sw a0,4(sp)\n" " 1a: c42e sw a1,8(sp)\n" " 1c: c632 sw a2,12(sp)\n" - " 1e: c876 sw t4,16(sp)\n" + " 1e: c87e sw t6,16(sp)\n" " 20: ca7a sw t5,20(sp)\n" - " 22: cc7e sw t6,24(sp)\n" + " 22: cc76 sw t4,24(sp)\n" " 24: 857a mv a0,t5\n" " 26: 85fe mv a1,t6\n" " 28: 8672 mv a2,t3\n" @@ -284,9 +284,9 @@ call_primitive_few_free_regs_test() -> " 34: 4512 lw a0,4(sp)\n" " 36: 45a2 lw a1,8(sp)\n" " 38: 4632 lw a2,12(sp)\n" - " 3a: 4ec2 lw t4,16(sp)\n" + " 3a: 4fc2 lw t6,16(sp)\n" " 3c: 4f52 lw t5,20(sp)\n" - " 3e: 4fe2 lw t6,24(sp)\n" + " 3e: 4ee2 lw t4,24(sp)\n" " 40: 02010113 addi sp,sp,32" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream). @@ -493,7 +493,7 @@ if_block_test_() -> " c: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -511,7 +511,7 @@ if_block_test_() -> " c: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -530,7 +530,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -551,7 +551,7 @@ if_block_test_() -> " 12: a0fd j 0x100" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -569,7 +569,7 @@ if_block_test_() -> " c: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -606,7 +606,7 @@ if_block_test_() -> " e: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -624,7 +624,7 @@ if_block_test_() -> " c: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -661,7 +661,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -699,7 +699,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin % Test large immediate (1995) that requires temporary register @@ -758,7 +758,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -796,7 +796,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -834,7 +834,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -872,7 +872,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -910,7 +910,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -929,7 +929,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -968,7 +968,7 @@ if_block_test_() -> " 12: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -1010,7 +1010,7 @@ if_block_test_() -> " 18: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -1075,7 +1075,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -1094,7 +1094,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -1132,7 +1132,7 @@ if_block_test_() -> " 10: 0f09 addi t5,t5,2" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegA, RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( @@ -2542,9 +2542,11 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), - State2 = setelement(8, State1, [a3, t3]), - [a3, t3] = ?BACKEND:used_regs(State2), + %% a3 = bit 3 = 8, t3 = bit 11 = 2048 + %% AVAILABLE_REGS_MASK = 16#7F00, remove t3 => 16#7700 + State1 = setelement(7, State0, 16#7700), + State2 = setelement(8, State1, 8 bor 2048), + [t3, a3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), Stream = ?BACKEND:stream(State3), Dump = << @@ -2558,9 +2560,11 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), - State2 = setelement(8, State1, [a3, t3]), - [a3, t3] = ?BACKEND:used_regs(State2), + %% a3 = bit 3 = 8, t3 = bit 11 = 2048 + %% AVAILABLE_REGS_MASK = 16#7F00, remove t3 => 16#7700 + State1 = setelement(7, State0, 16#7700), + State2 = setelement(8, State1, 8 bor 2048), + [t3, a3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), Stream = ?BACKEND:stream(State3), Dump = << @@ -3153,9 +3157,9 @@ call_func_ptr_stack_alignment_test() -> " 14: c22a sw a0,4(sp)\n" " 16: c42e sw a1,8(sp)\n" " 18: c632 sw a2,12(sp)\n" - " 1a: c876 sw t4,16(sp)\n" + " 1a: c87e sw t6,16(sp)\n" " 1c: ca7a sw t5,20(sp)\n" - " 1e: cc7e sw t6,24(sp)\n" + " 1e: cc76 sw t4,24(sp)\n" " 20: 02a00513 li a0,42\n" " 24: 9e02 jalr t3\n" " 26: 8e2a mv t3,a0\n" @@ -3163,9 +3167,9 @@ call_func_ptr_stack_alignment_test() -> " 2a: 4512 lw a0,4(sp)\n" " 2c: 45a2 lw a1,8(sp)\n" " 2e: 4632 lw a2,12(sp)\n" - " 30: 4ec2 lw t4,16(sp)\n" + " 30: 4fc2 lw t6,16(sp)\n" " 32: 4f52 lw t5,20(sp)\n" - " 34: 4fe2 lw t6,24(sp)\n" + " 34: 4ee2 lw t4,24(sp)\n" " 36: 02010113 addi sp,sp,32" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream). @@ -3209,10 +3213,10 @@ call_func_ptr_register_exhaustion_test_() -> " 1c: c22a sw a0,4(sp)\n" " 1e: c42e sw a1,8(sp)\n" " 20: c632 sw a2,12(sp)\n" - " 22: c81a sw t1,16(sp)\n" - " 24: ca72 sw t3,20(sp)\n" - " 26: cc76 sw t4,24(sp)\n" - " 28: ce7e sw t6,28(sp)\n" + " 22: c87e sw t6,16(sp)\n" + " 24: ca76 sw t4,20(sp)\n" + " 26: cc72 sw t3,24(sp)\n" + " 28: ce1a sw t1,28(sp)\n" " 2a: 861e mv a2,t2\n" " 2c: 468d li a3,3\n" " 2e: 4705 li a4,1\n" @@ -3222,10 +3226,10 @@ call_func_ptr_register_exhaustion_test_() -> " 36: 4512 lw a0,4(sp)\n" " 38: 45a2 lw a1,8(sp)\n" " 3a: 4632 lw a2,12(sp)\n" - " 3c: 4342 lw t1,16(sp)\n" - " 3e: 4e52 lw t3,20(sp)\n" - " 40: 4ee2 lw t4,24(sp)\n" - " 42: 4ff2 lw t6,28(sp)\n" + " 3c: 4fc2 lw t6,16(sp)\n" + " 3e: 4ed2 lw t4,20(sp)\n" + " 40: 4e62 lw t3,24(sp)\n" + " 42: 4372 lw t1,28(sp)\n" " 44: 02010113 addi sp,sp,32" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream) @@ -3250,10 +3254,10 @@ call_func_ptr_register_exhaustion_test_() -> " 1c: c22a sw a0,4(sp)\n" " 1e: c42e sw a1,8(sp)\n" " 20: c632 sw a2,12(sp)\n" - " 22: c81a sw t1,16(sp)\n" - " 24: ca72 sw t3,20(sp)\n" - " 26: cc76 sw t4,24(sp)\n" - " 28: ce7e sw t6,28(sp)\n" + " 22: c87e sw t6,16(sp)\n" + " 24: ca76 sw t4,20(sp)\n" + " 26: cc72 sw t3,24(sp)\n" + " 28: ce1a sw t1,28(sp)\n" " 2a: 861e mv a2,t2\n" " 2c: 4685 li a3,1\n" " 2e: 871a mv a4,t1\n" @@ -3263,10 +3267,10 @@ call_func_ptr_register_exhaustion_test_() -> " 36: 4512 lw a0,4(sp)\n" " 38: 45a2 lw a1,8(sp)\n" " 3a: 4632 lw a2,12(sp)\n" - " 3c: 4342 lw t1,16(sp)\n" - " 3e: 4e52 lw t3,20(sp)\n" - " 40: 4ee2 lw t4,24(sp)\n" - " 42: 4ff2 lw t6,28(sp)\n" + " 3c: 4fc2 lw t6,16(sp)\n" + " 3e: 4ed2 lw t4,20(sp)\n" + " 40: 4e62 lw t3,24(sp)\n" + " 42: 4372 lw t1,28(sp)\n" " 44: 02010113 addi sp,sp,32" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream) @@ -3291,10 +3295,10 @@ call_func_ptr_register_exhaustion_test_() -> " 1c: c22a sw a0,4(sp)\n" " 1e: c42e sw a1,8(sp)\n" " 20: c632 sw a2,12(sp)\n" - " 22: c81a sw t1,16(sp)\n" - " 24: ca72 sw t3,20(sp)\n" - " 26: cc76 sw t4,24(sp)\n" - " 28: ce7e sw t6,28(sp)\n" + " 22: c87e sw t6,16(sp)\n" + " 24: ca76 sw t4,20(sp)\n" + " 26: cc72 sw t3,24(sp)\n" + " 28: ce1a sw t1,28(sp)\n" " 2a: 861e mv a2,t2\n" " 2c: 869a mv a3,t1\n" " 2e: 4705 li a4,1\n" @@ -3304,10 +3308,10 @@ call_func_ptr_register_exhaustion_test_() -> " 36: 4512 lw a0,4(sp)\n" " 38: 45a2 lw a1,8(sp)\n" " 3a: 4632 lw a2,12(sp)\n" - " 3c: 4342 lw t1,16(sp)\n" - " 3e: 4e52 lw t3,20(sp)\n" - " 40: 4ee2 lw t4,24(sp)\n" - " 42: 4ff2 lw t6,28(sp)\n" + " 3c: 4fc2 lw t6,16(sp)\n" + " 3e: 4ed2 lw t4,20(sp)\n" + " 40: 4e62 lw t3,24(sp)\n" + " 42: 4372 lw t1,28(sp)\n" " 44: 02010113 addi sp,sp,32" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream), @@ -3333,27 +3337,27 @@ call_func_ptr_register_exhaustion_test_() -> " 1e: c22a sw a0,4(sp)\n" " 20: c42e sw a1,8(sp)\n" " 22: c632 sw a2,12(sp)\n" - " 24: c81a sw t1,16(sp)\n" - " 26: ca1e sw t2,20(sp)\n" - " 28: cc72 sw t3,24(sp)\n" - " 2a: ce76 sw t4,28(sp)\n" - " 2c: d07a sw t5,32(sp)\n" - " 2e: d27e sw t6,36(sp)\n" - " 30: 832e mv t1,a1\n" + " 24: c87e sw t6,16(sp)\n" + " 26: ca7a sw t5,20(sp)\n" + " 28: cc76 sw t4,24(sp)\n" + " 2a: ce72 sw t3,28(sp)\n" + " 2c: d01e sw t2,32(sp)\n" + " 2e: d21a sw t1,36(sp)\n" + " 30: 8fae mv t6,a1\n" " 32: 857a mv a0,t5\n" " 34: 85b6 mv a1,a3\n" - " 36: 9302 jalr t1\n" + " 36: 9f82 jalr t6\n" " 38: c42a sw a0,8(sp)\n" " 3a: 4082 lw ra,0(sp)\n" " 3c: 4512 lw a0,4(sp)\n" " 3e: 45a2 lw a1,8(sp)\n" " 40: 4632 lw a2,12(sp)\n" - " 42: 4342 lw t1,16(sp)\n" - " 44: 43d2 lw t2,20(sp)\n" - " 46: 4e62 lw t3,24(sp)\n" - " 48: 4ef2 lw t4,28(sp)\n" - " 4a: 5f02 lw t5,32(sp)\n" - " 4c: 5f92 lw t6,36(sp)\n" + " 42: 4fc2 lw t6,16(sp)\n" + " 44: 4f52 lw t5,20(sp)\n" + " 46: 4ee2 lw t4,24(sp)\n" + " 48: 4e72 lw t3,28(sp)\n" + " 4a: 5382 lw t2,32(sp)\n" + " 4c: 5312 lw t1,36(sp)\n" " 4e: 03010113 addi sp,sp,48" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream) @@ -3379,25 +3383,25 @@ call_func_ptr_register_exhaustion_test_() -> " 1e: c22a sw a0,4(sp)\n" " 20: c42e sw a1,8(sp)\n" " 22: c632 sw a2,12(sp)\n" - " 24: c81a sw t1,16(sp)\n" - " 26: ca1e sw t2,20(sp)\n" + " 24: c87e sw t6,16(sp)\n" + " 26: ca76 sw t4,20(sp)\n" " 28: cc72 sw t3,24(sp)\n" - " 2a: ce76 sw t4,28(sp)\n" - " 2c: d07e sw t6,32(sp)\n" - " 2e: 00862303 lw t1,8(a2)\n" + " 2a: ce1e sw t2,28(sp)\n" + " 2c: d01a sw t1,32(sp)\n" + " 2e: 00862f83 lw t6,8(a2)\n" " 32: 857a mv a0,t5\n" " 34: 85b6 mv a1,a3\n" - " 36: 9302 jalr t1\n" + " 36: 9f82 jalr t6\n" " 38: 8f2a mv t5,a0\n" " 3a: 4082 lw ra,0(sp)\n" " 3c: 4512 lw a0,4(sp)\n" " 3e: 45a2 lw a1,8(sp)\n" " 40: 4632 lw a2,12(sp)\n" - " 42: 4342 lw t1,16(sp)\n" - " 44: 43d2 lw t2,20(sp)\n" + " 42: 4fc2 lw t6,16(sp)\n" + " 44: 4ed2 lw t4,20(sp)\n" " 46: 4e62 lw t3,24(sp)\n" - " 48: 4ef2 lw t4,28(sp)\n" - " 4a: 5f82 lw t6,32(sp)\n" + " 48: 43f2 lw t2,28(sp)\n" + " 4a: 5302 lw t1,32(sp)\n" " 4c: 03010113 addi sp,sp,48" >>, jit_tests_common:assert_stream(riscv32, Dump, Stream)