diff --git a/lang/lua/luajit2/Makefile b/lang/lua/luajit2/Makefile index 313051df3e6d2..060558fb9393a 100644 --- a/lang/lua/luajit2/Makefile +++ b/lang/lua/luajit2/Makefile @@ -1,14 +1,14 @@ include $(TOPDIR)/rules.mk PKG_NAME:=luajit2 -PKG_SOURCE_DATE:=2026-02-27 +PKG_SOURCE_DATE:=2026-06-06 PKG_VERSION:=2.1.$(subst -,.,$(PKG_SOURCE_DATE)) PKG_RELEASE:=1 PKG_SOURCE_PROTO:=git PKG_SOURCE_URL:=https://github.com/openresty/luajit2 -PKG_MIRROR_HASH:=057f6c02cdfac3f7cacc21fcac45ff74084cabb4a4db2cc589ed332bb1aeddbb -PKG_SOURCE_VERSION:=928290a470d35eb01324d5275b22fb86b13602a2 +PKG_MIRROR_HASH:=44a57a8bab1851e76fc345e2b99c67b9937e38aefb7941f1313539cd15219fc1 +PKG_SOURCE_VERSION:=a08100e7598451d4fd3a89a9826980f7c64117e7 PKG_MAINTAINER:=Javier Marcet PKG_LICENSE:=MIT diff --git a/lang/lua/luajit2/patches/020-riscv64-support.patch b/lang/lua/luajit2/patches/020-riscv64-support.patch index ee4d560e9621e..fda18f81032f7 100644 --- a/lang/lua/luajit2/patches/020-riscv64-support.patch +++ b/lang/lua/luajit2/patches/020-riscv64-support.patch @@ -1,40 +1,71 @@ -From dff0c180819449c742ed77b2cb542b5a95c1f0d5 Mon Sep 17 00:00:00 2001 +From cab5d57d8249daa01bb4f98928dbdfb270153c66 Mon Sep 17 00:00:00 2001 From: gns Date: Thu, 27 Nov 2025 20:48:19 +0800 -Subject: [PATCH] Add RISC-V 64 support +Subject: [PATCH 01/23] riscv(support): add RISC-V 64 arch base definition -This backports RISC-V 64 support from openresty/luajit2 PR #236. - -The patch adds comprehensive RISC-V64 JIT implementation including: -- Architecture detection and definitions -- DynASM RISC-V support -- JIT compiler, assembler, and emitter implementations -- FFI calling conventions and Linux-specific support - -Original work by gns from PLCT Lab, ISCAS. -See: https://github.com/openresty/luajit2/pull/236 - -Co-authored-by: Heinrich Schuchardt --- ---- a/Makefile -+++ b/Makefile -@@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua - dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ - dis_mips64.lua dis_mips64el.lua \ - dis_mips64r6.lua dis_mips64r6el.lua \ -+ dis_riscv.lua dis_riscv64.lua \ - vmdef.lua + src/lj_arch.h | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -33,6 +33,8 @@ + #define LUAJIT_ARCH_mips64 7 + #define LUAJIT_ARCH_S390X 8 + #define LUAJIT_ARCH_s390x 8 ++#define LUAJIT_ARCH_RISCV64 9 ++#define LUAJIT_ARCH_riscv64 9 + + /* Target OS. */ + #define LUAJIT_OS_OTHER 0 +@@ -69,6 +71,8 @@ + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 + #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 ++#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 ++#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64 + #else + #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" + #endif +@@ -506,6 +510,20 @@ + #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL + #define LJ_TARGET_GC64 1 + #define LJ_ARCH_NOJIT 1 /* NYI */ ++#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64 ++ ++#define LJ_ARCH_NAME "riscv64" ++#define LJ_ARCH_BITS 64 ++#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */ ++#define LJ_TARGET_RISCV64 1 ++#define LJ_TARGET_GC64 1 ++#define LJ_TARGET_EHRETREG 10 ++#define LJ_TARGET_EHRAREG 1 ++#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\ ++ AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */ ++#define LJ_TARGET_MASKSHIFT 1 ++#define LJ_TARGET_MASKROT 1 ++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL + + #else + #error "No target architecture defined" +@@ -590,6 +608,10 @@ + #error "Only n64 ABI supported for MIPS64" + #undef LJ_TARGET_MIPS + #endif ++#elif LJ_TARGET_RISCV64 ++#if !defined(__riscv_float_abi_double) ++#error "Only RISC-V 64 double float supported for now" ++#endif + #endif + #endif - ifeq (,$(findstring Windows,$(OS))) --- /dev/null +++ b/dynasm/dasm_riscv.h -@@ -0,0 +1,435 @@ +@@ -0,0 +1,433 @@ +/* +** DynASM RISC-V encoding engine. -+** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ++** Copyright (C) 2022-2026 ISRC, ISCAS. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. -+** -+** Contributed by gns from PLCT Lab, ISCAS. +*/ + +#include @@ -470,10 +501,10 @@ Co-authored-by: Heinrich Schuchardt +------------------------------------------------------------------------------ +-- DynASM RISC-V module. +-- -+-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. ++-- Copyright (C) 2022-2026 ISRC, ISCAS. +-- See dynasm.lua for full copyright notice. +-- -+-- Contributed by gns from PLCT Lab, ISCAS. ++-- Contributed by gns from PLCT Lab, ISRC, ISCAS. +------------------------------------------------------------------------------ + +local riscv32 = riscv32 @@ -486,7 +517,7 @@ Co-authored-by: Heinrich Schuchardt + version = "1.5.0", + vernum = 10500, + release = "2022-07-12", -+ author = "Mike Pall", ++ author = "gns", + license = "MIT", +} + @@ -895,7 +926,7 @@ Co-authored-by: Heinrich Schuchardt + -- RV32D + ["fld_2"] = "00003007FL", + ["fsd_2"] = "00003027gS", -+ ++ + ["fmadd.d_4"] = "02000043FGgH", + ["fmsub.d_4"] = "02000047FGgH", + ["fnmsub.d_4"] = "0200004bFGgH", @@ -1452,7 +1483,7 @@ Co-authored-by: Heinrich Schuchardt +------------------------------------------------------------------------------ +-- DynASM RISC-V 32 module. +-- -+-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. ++-- Copyright (C) 2022-2026 ISRC, ISCAS. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 32 bit mode for the combined RISC-V module. @@ -1467,7 +1498,7 @@ Co-authored-by: Heinrich Schuchardt +------------------------------------------------------------------------------ +-- DynASM RISC-V 64 module. +-- -+-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. ++-- Copyright (C) 2022-2026 ISRC, ISCAS. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined RISC-V module. @@ -1476,5282 +1507,14 @@ Co-authored-by: Heinrich Schuchardt + +riscv64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_riscv") ---- a/src/Makefile -+++ b/src/Makefile -@@ -52,6 +52,7 @@ CCOPT_arm= - CCOPT_arm64= - CCOPT_ppc= - CCOPT_mips= -+CCOPT_riscv64= - # - #CCDEBUG= - # Uncomment the next line to generate debug information: -@@ -270,6 +271,9 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(T - TARGET_LJARCH= mips - endif - else -+ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) -+ TARGET_LJARCH= riscv64 -+else - $(error Unsupported target architecture) - endif - endif -@@ -278,6 +282,7 @@ endif - endif - endif - endif -+endif - - ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) - TARGET_SYS= PS3 -@@ -495,6 +500,9 @@ ifeq (ppc,$(TARGET_LJARCH)) - DASM_AFLAGS+= -D ELFV2 - endif - endif -+ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) -+ DASM_AFLAGS+= -D RISCV64 -+endif - endif - endif - ---- a/src/host/buildvm.c -+++ b/src/host/buildvm.c -@@ -69,6 +69,8 @@ static int collect_reloc(BuildCtx *ctx, - #include "../dynasm/dasm_mips.h" - #elif LJ_TARGET_S390X - #include "../dynasm/dasm_s390x.h" -+#elif LJ_TARGET_RISCV64 -+#include "../dynasm/dasm_riscv.h" - #else - #error "No support for this architecture (yet)" - #endif ---- a/src/host/buildvm_asm.c -+++ b/src/host/buildvm_asm.c -@@ -208,6 +208,34 @@ static void emit_asm_wordreloc(BuildCtx - "Error: unsupported opcode %08x for %s symbol relocation.\n", - ins, sym); - exit(1); -+#elif LJ_TARGET_RISCV64 -+ if ((ins & 0x7f) == 0x17u) { -+ fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym); -+ } else if ((ins & 0x7f) == 0x67u) { -+ fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym); -+ } else if ((ins & 0x7f) == 0x6fu) { -+ fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym); -+ } else if ((ins & 0x7f) == 0x03u) { -+ uint8_t funct3 = (ins >> 12) & 7; -+ uint8_t rd = (ins >> 7) & 31, rs1 = (ins >> 15) & 31; -+ switch (funct3) { -+ case 0: fprintf(ctx->fp, "\tlb"); break; -+ case 1: fprintf(ctx->fp, "\tlh"); break; -+ case 2: fprintf(ctx->fp, "\tlw"); break; -+ case 3: fprintf(ctx->fp, "\tld"); break; -+ case 4: fprintf(ctx->fp, "\tlbu"); break; -+ case 5: fprintf(ctx->fp, "\tlhu"); break; -+ case 6: fprintf(ctx->fp, "\tlwu"); break; -+ default: goto rv_reloc_err; -+ } -+ fprintf(ctx->fp, " x%d, %s(x%d)\n", rd, sym, rs1); -+ } else { -+rv_reloc_err: -+ fprintf(stderr, -+ "Error: unsupported opcode %08x for %s symbol relocation.\n", -+ ins, sym); -+ exit(1); -+ } - #else - #error "missing relocation support for this architecture" - #endif -@@ -304,6 +332,9 @@ void emit_asm(BuildCtx *ctx) - #if LJ_TARGET_MIPS - fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n"); - #endif -+#if LJ_TARGET_RISCV64 -+ fprintf(ctx->fp, ".option norvc\n.option norelax\n"); -+#endif - emit_asm_align(ctx, 4); - - #if LJ_TARGET_PS3 ---- a/src/jit/bcsave.lua -+++ b/src/jit/bcsave.lua -@@ -105,6 +105,7 @@ local map_arch = { - mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, - mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, - s390x = { e = "be", b = 64, m = 22, }, -+ riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, }, - } - - local map_os = { ---- /dev/null -+++ b/src/jit/dis_riscv.lua -@@ -0,0 +1,979 @@ -+------------------------------------------------------------------------------ -+-- LuaJIT RISC-V disassembler module. -+-- -+-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -+-- Released under the MIT license. See Copyright Notice in luajit.h -+-- -+-- Contributed by Milos Poletanovic from Syrmia.com. -+-- Contributed by gns from PLCT Lab, ISCAS. -+------------------------------------------------------------------------------ -+-- This is a helper module used by the LuaJIT machine code dumper module. -+-- -+-- It disassembles most standard RISC-V instructions. -+-- Mode is little-endian -+------------------------------------------------------------------------------ -+ -+local type = type -+local byte, format = string.byte, string.format -+local match, gmatch = string.match, string.gmatch -+local concat = table.concat -+local bit = require("bit") -+local band, bor, tohex = bit.band, bit.bor, bit.tohex -+local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift -+local jit = require("jit") -+ -+local jstat = { jit.status() } -+local function is_opt_enabled(opt) -+ for _, v in ipairs(jstat) do -+ if v == opt then -+ return true -+ end -+ end -+ return false -+end -+local xthead = is_opt_enabled("XThead") -+ -+------------------------------------------------------------------------------ -+-- Opcode maps -+------------------------------------------------------------------------------ -+ -+--RVC32 extension -+ -+local map_quad0 = { -+ shift = 13, mask = 7, -+ [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn", -+ false, "c.fsdNMh", "c.swZMn", "c.fswNMn" -+} -+ -+local map_sub2quad1 = { -+ shift = 5, mask = 3, -+ [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ" -+} -+ -+local map_sub1quad1 = { -+ shift = 10, mask = 3, -+ [0] = "c.srliM1", "c.sraiM1", "c.andiMx", map_sub2quad1 -+} -+ -+local map_quad1 = { -+ shift = 13, mask = 7, -+ [0] = { -+ shift = 7, mask = 31, -+ [0] = "c.nop", _ = "c.addiDx" -+ }, -+ [1] = "c.jalT", [2] = "c.liDx", -+ [3] = { -+ shift = 7, mask = 31, -+ [0] = "c.luiDK", [1] = "c.luiDK", [2] = "c.addi16spX", -+ _ = "c.luiDK" -+ }, -+ [4] = map_sub1quad1, [5] = "c.jT", [6] = "c.beqzMq", [7] = "c.bnezMq" -+} -+ -+local map_sub1quad2 = { -+ shift = 12, mask = 1, -+ [0] = { -+ shift = 2, mask = 31, -+ [0] = "c.jrD", _ = "c.mvDE" -+ }, -+ [1] = { -+ shift = 2, mask = 31, -+ [0] = { -+ shift = 7, mask = 31, -+ [0] = "c.ebreak", _ = "c.jalrD" -+ }, -+ _ = "c.addDE" -+ } -+} -+ -+local map_quad2 = { -+ shift = 13, mask = 7, -+ [0] = "c.slliD1", [1] = "c.fldspFQ",[2] = "c.lwspDY", [3] = "c.flwspFY", -+ [4] = map_sub1quad2, [5] = "c.fsdspVt", [6] = "c.swspEu", [7] = "c.fswspVu" -+} -+ -+local map_compr = { -+ [0] = map_quad0, map_quad1, map_quad2 -+} -+ -+--RV32M -+local map_mext = { -+ shift = 12, mask = 7, -+ [0] = "mulDRr", "mulhDRr", "mulhsuDRr", "mulhuDRr", -+ "divDRr", "divuDRr", "remDRr", "remuDRr" -+} -+ -+--RV64M -+local map_mext64 = { -+ shift = 12, mask = 7, -+ [0] = "mulwDRr", [4] = "divwDRr", [5] = "divuwDRr", [6] = "remwDRr", -+ [7] = "remuwDRr" -+} -+ -+--RV32F, RV64F, RV32D, RV64D -+local map_fload = { -+ shift = 12, mask = 7, -+ [2] = "flwFL", [3] = "fldFL" -+} -+ -+local map_fstore = { -+ shift = 12, mask = 7, -+ [2] = "fswSg", [3] = "fsdSg" -+} -+ -+local map_fmadd = { -+ shift = 25, mask = 3, -+ [0] = "fmadd.sFGgHo", "fmadd.dFGgHo" -+} -+ -+local map_fmsub = { -+ shift = 25, mask = 3, -+ [0] = "fmsub.sFGgHo", "fmsub.dFGgHo" -+} -+ -+local map_fnmsub = { -+ shift = 25, mask = 3, -+ [0] = "fnmsub.sFGgHo", "fnmsub.dFGgHo" -+} -+ -+local map_fnmadd = { -+ shift = 25, mask = 3, -+ [0] = "fnmadd.sFGgHo", "fnmadd.dFGgHo" -+} -+ -+local map_fsgnjs = { -+ shift = 12, mask = 7, -+ [0] = "fsgnj.s|fmv.sFGg6", "fsgnjn.s|fneg.sFGg6", "fsgnjx.s|fabs.sFGg6" -+} -+ -+local map_fsgnjd = { -+ shift = 12, mask = 7, -+ [0] = "fsgnj.d|fmv.dFGg6", "fsgnjn.d|fneg.dFGg6", "fsgnjx.d|fabs.dFGg6" -+} -+ -+local map_fms = { -+ shift = 12, mask = 7, -+ [0] = "fmin.sFGg", "fmax.sFGg", "fminm.sFGg", "fmaxm.sFGg" -+} -+ -+local map_fmd = { -+ shift = 12, mask = 7, -+ [0] = "fmin.dFGg", "fmax.dFGg", "fminm.dFGg", "fmaxm.dFGg" -+} -+ -+local map_fcomps = { -+ shift = 12, mask = 7, -+ [0] = "fle.sDGg", "flt.sDGg", "feq.sDGg", -+ [4] = "fleq.sDGg", "fltq.sDGg" -+} -+ -+local map_fcompd = { -+ shift = 12, mask = 7, -+ [0] = "fle.dDGg", "flt.dDGg", "feq.dDGg", -+ [4] = "fleq.dDGg", "fltq.dDGg" -+} -+ -+local map_fcvtwls = { -+ shift = 20, mask = 31, -+ [0] = "fcvt.w.sDGo", "fcvt.wu.sDGo", "fcvt.l.sDGo", "fcvt.lu.sDGo" -+} -+ -+local map_fcvtwld = { -+ shift = 20, mask = 31, -+ [0] = "fcvt.w.dDGo", "fcvt.wu.dDGo", "fcvt.l.dDGo", "fcvt.lu.dDGo", -+ [8] = { -+ shift = 12, mask = 7, -+ [1] = "fcvtmodw.dDG" -+ } -+} -+ -+local map_fcvts = { -+ shift = 20, mask = 31, -+ [0] = "fcvt.s.wFRo", "fcvt.s.wuFRo", "fcvt.s.lFRo", "fcvt.s.luFRo" -+} -+ -+local map_fcvtd = { -+ shift = 20, mask = 31, -+ [0] = "fcvt.d.wFRo", "fcvt.d.wuFRo", "fcvt.d.lFRo", "fcvt.d.luFRo" -+} -+ -+local map_fcvtsd = { -+ shift = 20, mask = 31, -+ [0] = "fcvt.s.dFGo", -+ [4] = "fround.sFGo", [5] = "froundnx.sFGo" -+} -+ -+local map_fcvtds = { -+ shift = 20, mask = 31, -+ [0] = "fcvt.d.sFGo", -+ [4] = "fround.dFGo", [5] = "froundnx.dFGo" -+} -+ -+local map_fmvwx = { -+ shift = 20, mask = 31, -+ [0] = "fmv.w.xFR", [1] = "fli.sFy" -+} -+ -+local map_fmvdx = { -+ shift = 20, mask = 31, -+ [0] = "fmv.d.xFR", [1] = "fli.dFy" -+} -+ -+local map_fext = { -+ shift = 25, mask = 127, -+ [0] = "fadd.sFGgo", [1] = "fadd.dFGgo", [4] = "fsub.sFGgo", [5] = "fsub.dFGgo", -+ [8] = "fmul.sFGgo", [9] = "fmul.dFGgo", [12] = "fdiv.sFGgo", [13] = "fdiv.dFGgo", -+ [16] = map_fsgnjs, [17] = map_fsgnjd, [20] = map_fms, [21] = map_fmd, -+ [32] = map_fcvtsd, [33] = map_fcvtds,[44] = "fsqrt.sFGo", [45] = "fsqrt.dFGo", -+ [80] = map_fcomps, [81] = map_fcompd, [96] = map_fcvtwls, [97] = map_fcvtwld, -+ [104] = map_fcvts, [105] = map_fcvtd, -+ [112] = { -+ shift = 12, mask = 7, -+ [0] = "fmv.x.wDG", "fclass.sDG" -+ }, -+ [113] = { -+ shift = 12, mask = 7, -+ [0] = "fmv.x.dDG", "fclass.dDG" -+ }, -+ [120] = map_fmvwx, [121] = map_fmvdx -+} -+ -+--RV32A, RV64A -+local map_aext = { -+ shift = 27, mask = 31, -+ [0] = { -+ shift = 12, mask = 7, -+ [2] = "amoadd.wDrO", [3] = "amoadd.dDrO" -+ }, -+ { -+ shift = 12, mask = 7, -+ [2] = "amoswap.wDrO", [3] = "amoswap.dDrO" -+ }, -+ { -+ shift = 12, mask = 7, -+ [2] = "lr.wDO", [3] = "lr.dDO" -+ }, -+ { -+ shift = 12, mask = 7, -+ [2] = "sc.wDrO", [3] = "sc.dDrO" -+ }, -+ { -+ shift = 12, mask = 7, -+ [2] = "amoxor.wDrO", [3] = "amoxor.dDrO" -+ }, -+ [8] = { -+ shift = 12, mask = 7, -+ [2] = "amoor.wDrO", [3] = "amoor.dDrO" -+ }, -+ [12] = { -+ shift = 12, mask = 7, -+ [2] = "amoand.wDrO", [3] = "amoand.dDrO" -+ }, -+ [16] = { -+ shift = 12, mask = 7, -+ [2] = "amomin.wDrO", [3] = "amomin.dDrO" -+ }, -+ [20] = { -+ shift = 12, mask = 7, -+ [2] = "amomax.wDrO", [3] = "amomax.dDrO" -+ }, -+ [24] = { -+ shift = 12, mask = 7, -+ [2] = "amominu.wDrO", [3] = "amominu.dDrO" -+ }, -+ [28] = { -+ shift = 12, mask = 7, -+ [2] = "amomaxu.wDrO", [3] = "amomaxu.dDrO" -+ }, -+} -+ -+-- RV32I, RV64I -+local map_load = { -+ shift = 12, mask = 7, -+ [0] = "lbDL", "lhDL", "lwDL", "ldDL", -+ "lbuDL", "lhuDL", "lwuDL" -+} -+ -+local map_opimm = { -+ shift = 12, mask = 7, -+ [0] = { -+ shift = 7, mask = 0x1ffffff, -+ [0] = "nop", _ = "addi|li|mvDR0I2" -+ }, -+ { -+ shift = 25, mask = 127, -+ [48] = { -+ shift = 20, mask = 31, -+ [4] = "sext.bDR", [5] = "sext.hDR" -+ }, -+ _ = "slliDRi", -+ }, "sltiDRI", "sltiu|seqzDRI5", -+ "xori|notDRI4", -+ { -+ shift = 26, mask = 63, -+ [0] = "srliDRi", [16] = "sraiDRi", [24] = "roriDRi", -+ [26] = { -+ shift = 20, mask = 63, -+ [56] = "rev8DR" -+ } -+ }, -+ "oriDRI", "andiDRI" -+} -+ -+local map_branch = { -+ shift = 12, mask = 7, -+ [0] = "beq|beqzRr0B", "bne|bnezRr0B" , false, false, -+ "blt|bgtz|bltzR0r2B", "bge|blez|bgezR0r2B", "bltuRrB", "bgeuRrB" -+} -+ -+local map_store = { -+ shift = 12, mask = 7, -+ [0] = "sbSr", "shSr", "swSr", "sdSr" -+} -+ -+local map_op = { -+ shift = 25, mask = 127, -+ [0] = { -+ shift = 12, mask = 7, -+ [0] = "addDRr", "sllDRr", "slt|sgtz|sltzDR0r2", "sltu|snezDR0r", -+ "xorDRr", "srlDRr", "orDRr", "andDRr" -+ }, -+ [1] = map_mext, -+ [4] = { -+ -+ }, -+ [5] = { -- Zbb -+ shift = 12, mask = 7, -+ [4] = "minDRr", [5] = "minuDRr", [6] = "maxDRr", [7] = "maxuDRr" -+ }, -+ [7] = { -- Zicond -+ shift = 12, mask = 7, -+ [5] = "czero.eqzDRr", [7] = "czero.nezDRr" -+ }, -+ [16] = { -- Zba -+ shift = 12, mask = 7, -+ [2] = "sh1addDRr", [4] = "sh2addDRr", [6] = "sh3addDRr" -+ }, -+ [32] = { -- Zbb -+ shift = 12, mask = 7, -+ [0] = "sub|negDR0r", [4] = "xnorDRr", [5] = "sraDRr", [6] = "ornDRr", [7] = "andnDRr" -+ }, -+ [48] = { -- Zbb -+ shift = 12, mask = 7, -+ [1] = "rolDRr", [5] = "rorDRr" -+ } -+} -+ -+--- 64I -+local map_opimm32 = { -+ shift = 12, mask = 7, -+ [0] = "addiw|sext.wDRI0", "slliwDRi", -+ [2] = { -- Zba -+ shift = 25, mask = 127, -+ [1] = "slli.uwDRi" -+ }, -+ [5] = { -- 64I -+ shift = 25, mask = 127, -+ [0] = "srliwDRi", [32] = "sraiwDRi", [48] = "roriwDRi" -+ }, -+ [48] = { -- Zbb -+ shift = 25, mask = 127, -+ [5] = "roriwDRi" -+ } -+} -+ -+local map_op32 = { -+ shift = 25, mask = 127, -+ [0] = { -- 64I -+ shift = 12, mask = 7, -+ [0] = "addwDRr", [1] = "sllwDRr", [5] = "srlwDRr" -+ }, -+ [1] = map_mext64, -+ [4] = { -- Zba & Zbb -+ shift = 12, mask = 7, -+ [0] = "add.uw|zext.w|DRr0", [4] = "zext.hDRr" -+ }, -+ [16] = { -- Zba -+ shift = 12, mask = 7, -+ [2] = "sh1add.uw", [4] = "sh2add.uw", [6] = "sh3add.uw" -+ }, -+ [32] = { -- 64I -+ shift = 12, mask = 7, -+ [0] = "subw|negwDR0r", [5] = "srawDRr" -+ }, -+ [48] = { -- Zbb -+ shift = 12, mask = 7, -+ [1] = "rolwDRr", [5] = "rorwDRr" -+ } -+} -+ -+local map_ecabre = { -+ shift = 12, mask = 7, -+ [0] = { -+ shift = 20, mask = 4095, -+ [0] = "ecall", "ebreak" -+ } -+} -+ -+local map_fence = { -+ shift = 12, mask = 1, -+ [0] = "fence", --"fence.i" ZIFENCEI EXTENSION -+} -+ -+local map_jalr = { -+ shift = 7, mask = 0x1ffffff, -+ _ = "jalr|jrDRI7", [256] = "ret" -+} -+ -+local map_xthead_custom0 = { -+ shift = 12, mask = 7, -+ [1] = { -- Arithmetic -+ shift = 27, mask = 31, -+ [0] = "th.addslDRrv", -+ [2] = { -+ shift = 26, mask = 63, -+ [4] = "th.srriDRi", -+ [5] = { -+ shift = 25, mask = 127, -+ [10] = "th.srriwDRi" -+ } -+ }, -+ [4] = { -- XTheadMac -+ shift = 25, mask = 3, -+ [0] = "th.mulaDRr", "th.mulsDRr", "th.mulawDRr", "th.mulswDRr" -+ }, -+ [5] = { -- XTheadMac -+ shift = 25, mask = 3, -+ [0] = "th.mulahDRr", "th.mulshDRr" -+ }, -+ [8] = { -- XTheadCondMov -+ shift = 25, mask = 3, -+ [0] = "th.mveqzDRr", "th.mvnezDRr" -+ }, -+ [16] = { -- XTheadBb -+ shift = 20, mask = 31, -+ [0] = { -+ shift = 25, mask = 3, -+ [0] = "th.tstnbzDRi", "th.revDR", "th.ff0DR", "th.ff1DR" -+ } -+ }, -+ [17] = { -- XTheadBb -+ shift = 26, mask = 1, -+ [0] = "th.tstDRi" -+ }, -+ [18] = { -- XTheadBb -+ shift = 20, mask = 31, -+ [0] = { -+ shift = 25, mask = 3, -+ [0] = "th.revwDR" -+ } -+ } -+ }, -+ [2] = "th.extDRji", [3] = "th.extuDRji", -+ { -- MemLoad -+ shift = 29, mask = 7, -+ [7] = { -- XTheadMemPair -+ shift = 25, mask = 3, -+ [0] = "th.lwdDrP", [2] = "th.lwudDrP", "th.lddDrP" -+ } -+ }, -+ { -- MemStore -+ shift = 29, mask = 7, -+ [7] = { -- XTheadMemPair -+ shift = 25, mask = 3, -+ [0] = "th.swdDrP", [3] = "th.sddDrP" -+ } -+ } -+} -+ -+local map_custom0 = xthead and map_xthead_custom0 or nil -+ -+local map_pri = { -+ [3] = map_load, [7] = map_fload, [11] = map_custom0, [15] = map_fence, [19] = map_opimm, -+ [23] = "auipcDA", [27] = map_opimm32, -+ [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_op, -+ [55] = "luiDU", [59] = map_op32, [67] = map_fmadd, [71] = map_fmsub, -+ [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext, -+ [103] = map_jalr, [111] = "jal|j|D0J", [115] = map_ecabre -+} -+ -+------------------------------------------------------------------------------ -+ -+local map_gpr = { -+ [0] = "zero", "ra", "sp", "gp", "tp", "x5", "x6", "x7", -+ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", -+ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", -+ "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31", -+} -+ -+local map_fgpr = { -+ [0] = "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", -+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", -+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", -+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", -+} -+ -+local map_rm = { -+ [0] = "rne", "rtz", "rdn", "rup", "rmm", [7] = "dyn" -+} -+ -+local map_fli = { -+ [0] = "-1.0", -+ "min", -+ "0x1p-16", "0x1p-15", "0x1p-8", "0x1p-7", -+ "0.0625", "0.125", -+ "0.25", "0.3125", "0.375", "0.4375", -+ "0.5", "0.625", "0.75", "0.875", -+ "1.0", "1.25", "1.5", "1.75", -+ "2.0", "2.5", "3.0", -+ "4.0", "8.0", "16.0", "128.0", "256.0", -+ "32768.0", "65536.0", "inf", "nan" -+} -+ -+------------------------------------------------------------------------------ -+ -+-- Output a nicely formatted line with an opcode and operands. -+local function putop(ctx, text, operands) -+ local pos = ctx.pos -+ local extra = "" -+ if ctx.rel then -+ local sym = ctx.symtab[ctx.rel] -+ if sym then extra = "\t->"..sym end -+ end -+ if ctx.hexdump > 0 then -+ ctx.out:write((format("%08x %s %-7s %s%s\n", -+ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ","), extra))) -+ else -+ ctx.out(format("%08x %-7s %s%s\n", -+ ctx.addr+pos, text, concat(operands, ", "), extra)) -+ end -+ local pos = ctx.pos -+ local first_byte = byte(ctx.code, ctx.pos+1) -+ --Examine if the next instruction is 16-bits or 32-bits -+ if(band(first_byte, 3) < 3) then -+ ctx.pos = pos + 2 -+ else -+ ctx.pos = pos + 4 -+ end -+end -+ -+-- Fallback for unknown opcodes. -+local function unknown(ctx) -+ return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) -+end -+ -+local function get_le(ctx) -+ local pos = ctx.pos -+ --Examine if the next instruction is 16-bits or 32-bits -+ local first_byte = byte(ctx.code, pos+1) -+ if(band(first_byte, 3) < 3) then --checking first two bits of opcode -+ local b0, b1 = byte(ctx.code, pos+1, pos+2) -+ return bor(lshift(b1, 8), b0) -+ else -+ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) -+ return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) -+ end -+end -+ -+local function parse_W(opcode) -+ local part1 = band(rshift(opcode, 7), 15) --9:6 -+ local part2 = band(rshift(opcode, 11), 3) --5:4 -+ local part3 = band(rshift(opcode, 5), 1)--3 -+ local part4 = band(rshift(opcode, 6), 1)--2 -+ return bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 4), -+ lshift(part3, 3), lshift(part4, 2)) -+end -+ -+local function parse_x(opcode) -+ local part1 = band(rshift(opcode, 12), 1) --5 -+ local part2 = band(rshift(opcode, 2), 31) --4:0 -+ if(part1 == 1) then -+ return bor(lshift(1, 31), lshift(0x1ffffff, 6), lshift(part1, 5), part2) -+ else -+ return bor(lshift(0, 31), lshift(part1, 5), part2) -+ end -+end -+ -+local function parse_X(opcode) -+ local part1 = band(rshift(opcode, 12), 1) --12 -+ local part2 = band(rshift(opcode, 3), 3) --8:7 -+ local part3 = band(rshift(opcode, 5), 1) --6 -+ local part4 = band(rshift(opcode, 2), 1) --5 -+ local part5 = band(rshift(opcode, 6), 1) --4 -+ if(part1 == 1) then -+ return bor(lshift(1, 31), lshift(0x3fffff, 9), lshift(part2, 7), -+ lshift(part3, 6), lshift(part4, 5), lshift(part5, 4)) -+ else -+ return bor(lshift(0, 31), lshift(part2, 7), lshift(part3, 6), -+ lshift(part4, 5), lshift(part5, 4)) -+ end -+end -+ -+local function parse_S(opcode) -+ local part1 = band(rshift(opcode, 25), 127) --11:5 -+ local sign = band(rshift(part1, 6), 1) -+ local part2 = band(rshift(opcode, 7), 31) --4:0 -+ if (sign == 1) then -+ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 5), part2) -+ else -+ return bor(lshift(0, 31), lshift(part1, 5), part2) -+ end -+end -+ -+local function parse_B(opcode) -+ local part1 = band(rshift(opcode, 7), 1) --11 -+ local part2 = band(rshift(opcode, 25), 63) --10:5 -+ local part3 = band(rshift(opcode, 8), 15) -- 4 : 1 -+ if (part1 == 1) then -+ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), -+ lshift(part2, 5), lshift(part3, 1), 0) -+ else -+ return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 5), -+ lshift(part3, 1), 0) -+ end -+end -+ -+local function parse_q(opcode) -+ local part1 = band(rshift(opcode, 12), 1) --8 -+ local part2 = band(rshift(opcode, 5), 3) --7:6 -+ local part3 = band(rshift(opcode, 2), 1) --5 -+ local part4 = band(rshift(opcode, 10), 3) --4:3 -+ local part5 = band(rshift(opcode, 3), 3) --2:1 -+ if(part1 == 1) then -+ return bor(lshift(1, 31), lshift(0x7fffff, 8), lshift(part2, 6), -+ lshift(part3, 5), lshift(part4, 3), lshift(part5, 1)) -+ else -+ return bor(lshift(0, 31), lshift(part2, 6), lshift(part3, 5), -+ lshift(part4, 3), lshift(part5, 1)) -+ end -+end -+ -+local function parse_J(opcode) -+ local part1 = band(rshift(opcode, 31), 1) --20 -+ local part2 = band(rshift(opcode, 12), 255) -- 19:12 -+ local part3 = band(rshift(opcode, 20), 1) --11 -+ local part4 = band(rshift(opcode, 21), 1023) --10:1 -+ if(part1 == 1) then -+ return bor(lshift(1, 31), lshift(0x7ff, 20), lshift(part2, 12), -+ lshift(part3, 11), lshift(part4, 1)) -+ else -+ return bor(lshift(0, 31), lshift(0, 20), lshift(part2, 12), -+ lshift(part3, 11), lshift(part4, 1)) -+ end -+end -+ -+local function parse_T(opcode) -+ local part1 = band(rshift(opcode, 12), 1) --11 -+ local part2 = band(rshift(opcode, 8), 1) --10 -+ local part3 = band(rshift(opcode, 9), 3)--9:8 -+ local part4 = band(rshift(opcode, 6), 1) --7 -+ local part5 = band(rshift(opcode, 7), 1) -- 6 -+ local part6 = band(rshift(opcode, 2), 1) --5 -+ local part7 = band(rshift(opcode, 11), 1) --4 -+ local part8 = band(rshift(opcode, 3), 7) --3:1 -+ if(part1 == 1) then -+ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), -+ lshift(part2, 10), lshift(part3, 8), lshift(part4, 7), -+ lshift(part5, 6), lshift(part6, 5), lshift(part7, 4), -+ lshift(part8, 1)) -+ else -+ return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 10), -+ lshift(part3, 8), lshift(part4, 7), lshift(part5, 6), -+ lshift(part6, 5), lshift(part7, 4), lshift(part8, 1)) -+ end -+end -+ -+local function parse_K(opcode) -+ local part1 = band(rshift(opcode, 12), 1) --5 17 -+ local part2 = band(rshift(opcode, 2), 31) --4:0 16:12 -+ if(part1 == 1) then -+ return bor(lshift(0, 31), lshift(0x7fff, 5), part2) -+ else -+ return bor(lshift(0, 31), lshift(part1, 5), part2) -+ end -+end -+ -+-- Disassemble a single instruction. -+local function disass_ins(ctx) -+ local op = ctx:get() -+ local operands = {} -+ local last = nil -+ ctx.op = op -+ ctx.rel =nil -+ -+ local opat = 0 -+ --for compressed instructions -+ if(band(op, 3) < 3) then -+ opat = ctx.map_compr[band(op, 3)] -+ while type(opat) ~= "string" do -+ if not opat then return unknown(ctx) end -+ local test = band(rshift(op, opat.shift), opat.mask) -+ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ -+ end -+ else -+ opat = ctx.map_pri[band(op,127)] -+ while type(opat) ~= "string" do -+ if not opat then return unknown(ctx) end -+ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ -+ end -+ end -+ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") -+ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") -+ local a1, a2 = 0 -+ if altname then -+ pat = pat2 -+ end -+ -+ local alias_done = false --variable for the case of 2 pseudoinstructions, if both parameters are x0, 0 -+ -+ for p in gmatch(pat, ".") do -+ local x = nil -+ if p == "D" then -+ x = map_gpr[band(rshift(op, 7), 31)] -+ elseif p == "F" then -+ x = map_fgpr[band(rshift(op, 7), 31)] -+ elseif p == "R" then -+ x = map_gpr[band(rshift(op, 15), 31)] -+ elseif p == "G" then -+ x = map_fgpr[band(rshift(op, 15), 31)] -+ elseif p == "r" then -+ x = map_gpr[band(rshift(op, 20), 31)] -+ if(name == "sb" or name == "sh" or name == "sw" or name == "sd") then -+ local temp = last --because of the diffrent order of the characters -+ operands[#operands] = x -+ x = temp -+ end -+ elseif p == "g" then -+ x = map_fgpr[band(rshift(op, 20), 31)] -+ if(name == "fsw" or name == "fsd") then -+ local temp = last -+ operands[#operands] = x -+ x = temp -+ end -+ elseif p == "Z" then -+ x = map_gpr[8 + band(rshift(op, 2), 7)] -+ elseif p == "N" then -+ x = map_fgpr[8 + band(rshift(op, 2), 7)] -+ elseif p == "M" then -+ x = map_gpr[8 + band(rshift(op, 7), 7)] -+ elseif p == "E" then -+ x = map_gpr[band(rshift(op, 2), 31)] -+ elseif p == "W" then -+ local uimm = parse_W(op) -+ x = format("%s,%d", "sp", uimm) -+ elseif p == "x" then -+ x = parse_x(op) -+ elseif p == "h" then -+ local part1 = band(rshift(op, 5), 3) --7:6 -+ local part2 = band(rshift(op, 10), 7) --5:3 -+ local uimm = bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 3)) -+ operands[#operands] = format("%d(%s)", uimm, last) -+ elseif p == "X" then -+ local imm = parse_X(op) -+ x = format("%s,%d", "sp", imm) -+ elseif p == "O" then -+ x = format("(%s)", map_gpr[band(rshift(op, 15), 31)]) -+ elseif p == "H" then -+ x = map_fgpr[band(rshift(op, 27), 31)] -+ elseif p == "L" then -+ local register = map_gpr[band(rshift(op, 15), 31)] -+ local disp = arshift(op, 20) -+ x = format("%d(%s)", disp, register) -+ elseif p == "P" then -- XTheadMemPair -+ local register = map_gpr[band(rshift(op, 15), 31)] -+ local disp = band(arshift(op, 25), 3) -+ local isword = bxor(band(arshift(op, 26), 1), 1) -+ x = format("(%s), %d, %d", register, disp, isword and 3 or 4) -+ elseif p == "I" then -+ x = arshift(op, 20) -+ --different for jalr -+ if(name == "jalr") then -+ local reg = map_gpr[band(rshift(op, 15), 31)] -+ if(ctx.reltab[reg] == nil) then -+ operands[#operands] = format("%d(%s)", x, last) -+ else -+ local target = ctx.reltab[reg] + x -+ operands[#operands] = format("%d(%s) #0x%08x", x, last, target) -+ ctx.rel = target -+ ctx.reltab[reg] = nil --assume no reuses of the register -+ end -+ x = nil --not to add additional operand -+ end -+ elseif p == "i" then -+ --both for RV32I AND RV64I -+ local value = band(arshift(op, 20), 63) -+ x = string.format("%d", value) -+ elseif p == "j" then -- XThead imm1[31..26] -+ local value = band(rshift(op, 26), 63) -+ x = string.format("%d", value) -+ elseif p == "v" then --XThead imm[2][26..25] -+ local value = band(rshift(op, 25), 3) -+ x = string.format("%d", value) -+ elseif p == "S" then -+ local register = map_gpr[band(rshift(op, 15), 31)] --register -+ local imm = parse_S(op) -+ x = format("%d(%s)", imm, register) -+ elseif p == "n" then -+ local part1 = band(rshift(op, 5), 1) --6 -+ local part2 = band(rshift(op, 10), 7) --5:3 -+ local part3 = band(rshift(op, 6), 1) --2 -+ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3), -+ lshift(part3, 2)) -+ operands[#operands] = format("%d(%s)", uimm, last) -+ elseif p == "A" then -+ local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)] -+ ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12) -+ x = format("0x%x", value) -+ elseif p == "B" then -+ x = ctx.addr + ctx.pos + parse_B(op) -+ ctx.rel = x -+ x = format("0x%08x", x) -+ elseif p == "U" then -+ local value = band(rshift(op, 12), 0xfffff) -+ x = string.format("0x%x", value) -+ elseif p == "Q" then -+ local part1 = band(rshift(op, 2), 7) --8:6 -+ local part2 = band(rshift(op, 12), 1) --5 -+ local part3 = band(rshift(op, 5), 3) --4:3 -+ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), -+ lshift(part3, 3)) -+ x = format("%d(%s)", uimm, "sp") -+ elseif p == "q" then -+ x = ctx.addr + ctx.pos + parse_q(op) -+ ctx.rel = x -+ x = format("0x%08x", x) -+ elseif p == "J" then -+ x = ctx.addr + ctx.pos + parse_J(op) -+ ctx.rel = x -+ x = format("0x%08x", x) -+ elseif p == "K" then -+ local value = parse_K(op) -+ x = string.format("0x%x", value) -+ elseif p == "Y" then -+ local part1 = band(rshift(op, 2), 3) --7:6 -+ local part2 = band(rshift(op, 12), 1) --5 -+ local part3 = band(rshift(op, 4), 7) --4:2 -+ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), -+ lshift(part3, 2)) -+ x = format("%d(%s)", uimm, "sp") -+ elseif p == "o" then -- rounding mode -+ x = map_rm[band(rshift(op, 12), 7)] -+ elseif p == "y" then -- fli lut -+ x = map_fli[band(rshift(op, 15), 31)] -+ elseif p == "1" then -+ local part1 = band(rshift(op, 12), 1) --5 -+ local part2 = band(rshift(op, 2), 31) --4:0 -+ local uimm = bor(lshift(0, 31), lshift(part1, 5), part2) -+ x = string.format("0x%x", uimm) -+ elseif p == "T" then -+ x = ctx.addr + ctx.pos + parse_T(op) -+ ctx.rel = x -+ x = format("0x%08x", x) -+ elseif p == "t" then -+ local part1 = band(rshift(op, 7), 7) --8:6 -+ local part2 = band(rshift(op, 10), 7) --5:3 -+ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3)) -+ x = format("%d(%s)", uimm, "sp") -+ elseif p == "u" then -+ local part1 = band(rshift(op, 7), 3) --7:6 -+ local part2 = band(rshift(op, 9), 15) --5:2 -+ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2)) -+ x = format("%d(%s)", uimm, "sp") -+ elseif p == "V" then -+ x = map_fgpr[band(rshift(op, 2), 31)] -+ elseif p == "0" then --PSEUDOINSTRUCTIONS -+ if (last == "zero" or last == 0) then -+ local n = #operands -+ operands[n] = nil -+ last = operands[n-1] -+ local a1, a2 = match(altname, "([^|]*)|(.*)") -+ if a1 then name, altname = a1, a2 -+ else name = altname end -+ alias_done = true -+ end -+ elseif (p == "4") then -+ if(last == -1) then -+ name = altname -+ operands[#operands] = nil -+ end -+ elseif (p == "5") then -+ if(last == 1) then -+ name = altname -+ operands[#operands] = nil -+ end -+ elseif (p == "6") then -+ if(last == operands[#operands - 1]) then -+ name = altname -+ operands[#operands] = nil -+ end -+ elseif (p == "7") then --jalr rs -+ local value = string.sub(operands[#operands], 1, 1) -+ local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1) -+ if(value == "0" and -+ (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then -+ if(operands[#operands - 1] == "zero") then -+ name = altname -+ end -+ operands[#operands] = nil -+ operands[#operands] = reg -+ end -+ elseif (p == "2" and alias_done == false) then -+ if (last == "zero" or last == 0) then -+ local a1, a2 = match(altname, "([^|]*)|(.*)") -+ name = a2 -+ operands[#operands] = nil -+ end -+ end -+ if x then operands[#operands+1] = x; last = x end -+ end -+ return putop(ctx, name, operands) -+end -+ -+------------------------------------------------------------------------------ -+ -+-- Disassemble a block of code. -+local function disass_block(ctx, ofs, len) -+ if not ofs then -+ ofs = 0 -+ end -+ local stop = len and ofs+len or #ctx.code -+ --instructions can be both 32 and 16 bits -+ stop = stop - stop % 2 -+ ctx.pos = ofs - ofs % 2 -+ ctx.rel = nil -+ while ctx.pos < stop do disass_ins(ctx) end -+end -+ -+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). -+local function create(code, addr, out) -+ local ctx = {} -+ ctx.code = code -+ ctx.addr = addr or 0 -+ ctx.out = out or io.write -+ ctx.symtab = {} -+ ctx.disass = disass_block -+ ctx.hexdump = 8 -+ ctx.get = get_le -+ ctx.map_pri = map_pri -+ ctx.map_compr = map_compr -+ ctx.reltab = {} -+ return ctx -+end -+ -+-- Simple API: disassemble code (a string) at address and output via out. -+local function disass(code, addr, out) -+ create(code, addr, out):disass(addr) -+end -+ -+-- Return register name for RID. -+local function regname(r) -+ if r < 32 then return map_gpr[r] end -+ return "f"..(r-32) -+end -+ -+-- Public module functions. -+return { -+ create = create, -+ disass = disass, -+ regname = regname -+} ---- /dev/null -+++ b/src/jit/dis_riscv64.lua -@@ -0,0 +1,16 @@ -+---------------------------------------------------------------------------- -+-- LuaJIT RISC-V 64 disassembler wrapper module. -+-- -+-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -+-- Released under the MIT license. See Copyright Notice in luajit.h -+---------------------------------------------------------------------------- -+-- This module just exports the default riscv little-endian functions from the -+-- RISC-V disassembler module. All the interesting stuff is there. -+------------------------------------------------------------------------------ -+ -+local dis_riscv = require((string.match(..., ".*%.") or "").."dis_riscv") -+return { -+ create = dis_riscv.create, -+ disass = dis_riscv.disass, -+ regname = dis_riscv.regname -+} -\ No newline at end of file ---- a/src/lib_jit.c -+++ b/src/lib_jit.c -@@ -706,6 +706,104 @@ JIT_PARAMDEF(JIT_PARAMINIT) - #include - #endif - -+#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX -+ -+#if LJ_TARGET_LINUX -+#include -+ -+struct riscv_hwprobe hwprobe_requests[] = { -+ {RISCV_HWPROBE_KEY_IMA_EXT_0} -+}; -+ -+const uint64_t *hwprobe_ext = &hwprobe_requests[0].value; -+ -+int hwprobe_ret = 0; -+#endif -+ -+static int riscv_compressed() -+{ -+#if defined(__riscv_c) || defined(__riscv_compressed) -+ /* Don't bother checking for RVC -- would crash before getting here. */ -+ return 1; -+#elif LJ_TARGET_LINUX -+ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_IMA_C)) ? 1 : 0; -+#else -+ return 0; -+#endif -+} -+ -+static int riscv_zba() -+{ -+#if defined(__riscv_b) || defined(__riscv_zba) -+ /* Don't bother checking for Zba -- would crash before getting here. */ -+ return 1; -+#elif LJ_TARGET_LINUX -+ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBA)) ? 1 : 0; -+#else -+ return 0; -+#endif -+} -+ -+static int riscv_zbb() -+{ -+#if defined(__riscv_b) || defined(__riscv_zbb) -+ /* Don't bother checking for Zbb -- would crash before getting here. */ -+ return 1; -+#elif LJ_TARGET_LINUX -+ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBB)) ? 1 : 0; -+#else -+ return 0; -+#endif -+} -+ -+static int riscv_zicond() -+{ -+#if defined(__riscv_zicond) -+ /* Don't bother checking for Zicond -- would crash before getting here. */ -+ return 1; -+#elif LJ_TARGET_LINUX -+ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZICOND)) ? 1 : 0; -+#else -+ return 0; -+#endif -+} -+ -+static int riscv_zfa() -+{ -+#if defined(__riscv_zfa) -+ /* Don't bother checking for Zfa -- would crash before getting here. */ -+ return 1; -+#elif LJ_TARGET_LINUX -+ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZFA)) ? 1 : 0; -+#else -+ return 0; -+#endif -+} -+ -+static int riscv_xthead() -+{ -+#if (defined(__riscv_xtheadba) \ -+ && defined(__riscv_xtheadbb) \ -+ && defined(__riscv_xtheadcondmov) \ -+ && defined(__riscv_xtheadmac)) -+ /* Don't bother checking for XThead -- would crash before getting here. */ -+ return 1; -+#else -+/* -+** Hardcoded as there's no easy way of detection: -+** - SIGILL have some trouble with libluajit as we speak -+** - Checking mvendorid looks good, but might not be reliable. -+*/ -+ return 0; -+#endif -+} -+ -+static uint32_t riscv_probe(int (*func)(void), uint32_t flag) -+{ -+ return func() ? flag : 0; -+} -+#endif -+ - /* Arch-dependent CPU feature detection. */ - static uint32_t jit_cpudetect(void) - { -@@ -778,6 +876,29 @@ static uint32_t jit_cpudetect(void) - #endif - #elif LJ_TARGET_S390X - /* No optional CPU features to detect (for now). */ -+ -+#elif LJ_TARGET_RISCV64 -+#if LJ_HASJIT -+ -+#if LJ_TARGET_LINUX -+ /* HWPROBE-based detection of RVC, Zba, Zbb and Zicond. */ -+ hwprobe_ret = syscall(__NR_riscv_hwprobe, &hwprobe_requests, -+ sizeof(hwprobe_requests) / sizeof(struct riscv_hwprobe), 0, -+ NULL, 0); -+ -+ flags |= riscv_probe(riscv_compressed, JIT_F_RVC); -+ flags |= riscv_probe(riscv_zba, JIT_F_RVZba); -+ flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb); -+ flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond); -+ flags |= riscv_probe(riscv_zfa, JIT_F_RVZfa); -+ flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); -+ -+#endif -+ -+ /* Detect V/P? */ -+ /* V have no hardware available, P not ratified yet. */ -+#endif -+ - #else - #error "Missing CPU detection for this architecture" - #endif ---- a/src/lj_alloc.c -+++ b/src/lj_alloc.c -@@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, siz - #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) - #define CALL_MREMAP_NOMOVE 0 - #define CALL_MREMAP_MAYMOVE 1 --#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) -+#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64) - #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE - #else - #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -33,6 +33,8 @@ - #define LUAJIT_ARCH_mips64 7 - #define LUAJIT_ARCH_S390X 8 - #define LUAJIT_ARCH_s390x 8 -+#define LUAJIT_ARCH_RISCV64 9 -+#define LUAJIT_ARCH_riscv64 9 - - /* Target OS. */ - #define LUAJIT_OS_OTHER 0 -@@ -69,6 +71,8 @@ - #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 - #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) - #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 -+#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 -+#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64 - #else - #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" - #endif -@@ -506,6 +510,20 @@ - #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - #define LJ_TARGET_GC64 1 - #define LJ_ARCH_NOJIT 1 /* NYI */ -+#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64 -+ -+#define LJ_ARCH_NAME "riscv64" -+#define LJ_ARCH_BITS 64 -+#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */ -+#define LJ_TARGET_RISCV64 1 -+#define LJ_TARGET_GC64 1 -+#define LJ_TARGET_EHRETREG 10 -+#define LJ_TARGET_EHRAREG 1 -+#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\ -+ AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */ -+#define LJ_TARGET_MASKSHIFT 1 -+#define LJ_TARGET_MASKROT 1 -+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - - #else - #error "No target architecture defined" -@@ -590,6 +608,10 @@ - #error "Only n64 ABI supported for MIPS64" - #undef LJ_TARGET_MIPS - #endif -+#elif LJ_TARGET_RISCV64 -+#if !defined(__riscv_float_abi_double) -+#error "Only RISC-V 64 double float supported for now" -+#endif - #endif - #endif - ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -231,6 +231,8 @@ static Reg rset_pickrandom(ASMState *as, - #include "lj_emit_ppc.h" - #elif LJ_TARGET_MIPS - #include "lj_emit_mips.h" -+#elif LJ_TARGET_RISCV64 -+#include "lj_emit_riscv.h" - #else - #error "Missing instruction emitter for target CPU" - #endif -@@ -1719,6 +1721,8 @@ static void asm_loop(ASMState *as) - #include "lj_asm_mips.h" - #elif LJ_TARGET_S390X - #include "lj_asm_s390x.h" -+#elif LJ_TARGET_RISCV64 -+#include "lj_asm_riscv64.h" - #else - #error "Missing assembler for target CPU" - #endif ---- /dev/null -+++ b/src/lj_asm_riscv64.h -@@ -0,0 +1,2037 @@ -+/* -+** RISC-V IR assembler (SSA IR -> machine code). -+** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h -+** -+** Contributed by gns from PLCT Lab, ISCAS. -+*/ -+ -+/* -- Register allocator extensions --------------------------------------- */ -+ -+/* Allocate a register with a hint. */ -+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -+{ -+ Reg r = IR(ref)->r; -+ if (ra_noreg(r)) { -+ if (!ra_hashint(r) && !iscrossref(as, ref)) -+ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ -+ r = ra_allocref(as, ref, allow); -+ } -+ ra_noweak(as, r); -+ return r; -+} -+ -+/* Allocate a register or RID_ZERO. */ -+static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) -+{ -+ Reg r = IR(ref)->r; -+ if (ra_noreg(r)) { -+ if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) -+ return RID_ZERO; -+ r = ra_allocref(as, ref, allow); -+ } else { -+ ra_noweak(as, r); -+ } -+ return r; -+} -+ -+/* Allocate two source registers for three-operand instructions. */ -+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -+{ -+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); -+ Reg left = irl->r, right = irr->r; -+ if (ra_hasreg(left)) { -+ ra_noweak(as, left); -+ if (ra_noreg(right)) -+ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); -+ else -+ ra_noweak(as, right); -+ } else if (ra_hasreg(right)) { -+ ra_noweak(as, right); -+ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); -+ } else if (ra_hashint(right)) { -+ right = ra_alloc1z(as, ir->op2, allow); -+ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); -+ } else { -+ left = ra_alloc1z(as, ir->op1, allow); -+ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); -+ } -+ return left | (right << 8); -+} -+ -+/* -- Guard handling ------------------------------------------------------ */ -+ -+/* Copied from MIPS, AUIPC+JALR is expensive to setup in-place */ -+#define RISCV_SPAREJUMP 4 -+ -+/* Setup spare long-range jump (trampoline?) slots per mcarea. */ -+ -+static void asm_sparejump_setup(ASMState *as) -+{ -+ MCode *mxp = as->mctop; -+ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { -+ for (int i = RISCV_SPAREJUMP*2; i--; ) -+ *--mxp = RISCVI_EBREAK; -+ as->mctop = mxp; -+ } -+} -+ -+static MCode *asm_sparejump_use(MCode *mcarea, MCode *target) -+{ -+ MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); -+ int slot = RISCV_SPAREJUMP; -+ RISCVIns tslot = RISCVI_EBREAK, tauipc, tjalr; -+ while (slot--) { -+ mxp -= 2; -+ ptrdiff_t delta = (char *)target - (char *)mxp; -+ tauipc = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)), -+ tjalr = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); -+ if (mxp[0] == tauipc && mxp[1] == tjalr) { -+ return mxp; -+ } else if (mxp[0] == tslot) { -+ mxp[0] = tauipc, mxp[1] = tjalr; -+ return mxp; -+ } -+ } -+ return NULL; -+} -+ -+/* Setup exit stub after the end of each trace. */ -+static void asm_exitstub_setup(ASMState *as, ExitNo nexits) -+{ -+ ExitNo i; -+ MCode *mxp = as->mctop; -+ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) -+ asm_mclimit(as); -+ for (i = nexits-1; (int32_t)i >= 0; i--) -+ *--mxp = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ((uintptr_t)(4*(-4-i))); -+ ptrdiff_t delta = (char *)lj_vm_exit_handler - (char *)(mxp-3); -+ /* 1: sw ra, 0(sp); auipc+jalr ->vm_exit_handler; lui x0, traceno; jal <1; jal <1; ... */ -+ *--mxp = RISCVI_LUI | RISCVF_IMMU(as->T->traceno); -+ *--mxp = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) -+ | RISCVF_IMMI(RISCVF_LO((uintptr_t)(void *)delta)); -+ *--mxp = RISCVI_AUIPC | RISCVF_D(RID_TMP) -+ | RISCVF_IMMU(RISCVF_HI((uintptr_t)(void *)delta)); -+ *--mxp = RISCVI_SD | RISCVF_S2(RID_RA) | RISCVF_S1(RID_SP); -+ as->mctop = mxp; -+} -+ -+static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) -+{ -+ /* Keep this in-sync with exitstub_trace_addr(). */ -+ return as->mctop + exitno + 4; -+} -+ -+/* Emit conditional branch to exit for guard. */ -+static void asm_guard(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2) -+{ -+ MCode *target = asm_exitstub_addr(as, as->snapno); -+ MCode *p = as->mcp; -+ if (LJ_UNLIKELY(p == as->invmcp)) { -+ as->loopinv = 1; -+ as->mcp = ++p; -+ *p = RISCVI_JAL | RISCVF_IMMJ((char *)target - (char *)p); -+ riscvi = riscvi^RISCVF_FUNCT3(1); /* Invert cond. */ -+ target = p - 1; /* Patch target later in asm_loop_fixup. */ -+ } -+ ptrdiff_t delta = (char *)target - (char *)(p - 1); -+ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); -+ *--p = (riscvi^RISCVF_FUNCT3(1)) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); -+ as->mcp = p; -+} -+ -+/* -- Operand fusion ------------------------------------------------------ */ -+ -+/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -+#define CONFLICT_SEARCH_LIM 31 -+ -+/* Check if there's no conflicting instruction between curins and ref. */ -+static int noconflict(ASMState *as, IRRef ref, IROp conflict) -+{ -+ IRIns *ir = as->ir; -+ IRRef i = as->curins; -+ if (i > ref + CONFLICT_SEARCH_LIM) -+ return 0; /* Give up, ref is too far away. */ -+ while (--i > ref) -+ if (ir[i].o == conflict) -+ return 0; /* Conflict found. */ -+ return 1; /* Ok, no conflict. */ -+} -+ -+/* Fuse the array base of colocated arrays. */ -+static int32_t asm_fuseabase(ASMState *as, IRRef ref) -+{ -+ IRIns *ir = IR(ref); -+ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && -+ !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) -+ return (int32_t)sizeof(GCtab); -+ return 0; -+} -+ -+/* Fuse array/hash/upvalue reference into register+offset operand. */ -+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) -+{ -+ IRIns *ir = IR(ref); -+ if (ra_noreg(ir->r)) { -+ if (ir->o == IR_AREF) { -+ if (mayfuse(as, ref)) { -+ if (irref_isk(ir->op2)) { -+ IRRef tab = IR(ir->op1)->op1; -+ int32_t ofs = asm_fuseabase(as, tab); -+ IRRef refa = ofs ? tab : ir->op1; -+ ofs += 8*IR(ir->op2)->i; -+ if (checki12(ofs)) { -+ *ofsp = ofs; -+ return ra_alloc1(as, refa, allow); -+ } -+ } -+ } -+ } else if (ir->o == IR_HREFK) { -+ if (mayfuse(as, ref)) { -+ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); -+ if (checki12(ofs)) { -+ *ofsp = ofs; -+ return ra_alloc1(as, ir->op1, allow); -+ } -+ } -+ } else if (ir->o == IR_UREFC) { -+ if (irref_isk(ir->op1)) { -+ GCfunc *fn = ir_kfunc(IR(ir->op1)); -+ GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; -+ intptr_t ofs = ((intptr_t)((uintptr_t)(&uv->tv) - (uintptr_t)&J2GG(as->J)->g)); -+ if (checki12(ofs)) { -+ *ofsp = (int32_t)ofs; -+ return RID_GL; -+ } -+ } -+ } else if (ir->o == IR_TMPREF) { -+ *ofsp = (int32_t)offsetof(global_State, tmptv); -+ return RID_GL; -+ } -+ } -+ *ofsp = 0; -+ return ra_alloc1(as, ref, allow); -+} -+ -+/* Fuse XLOAD/XSTORE reference into load/store operand. */ -+static void asm_fusexref(ASMState *as, RISCVIns riscvi, Reg rd, IRRef ref, -+ RegSet allow, int32_t ofs) -+{ -+ IRIns *ir = IR(ref); -+ Reg base; -+ if (ra_noreg(ir->r) && canfuse(as, ir)) { -+ intptr_t ofs2; -+ if (ir->o == IR_ADD) { -+ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), -+ checki12(ofs2))) { -+ ref = ir->op1; -+ ofs = (int32_t)ofs2; -+ } -+ } else if (ir->o == IR_STRREF) { -+ ofs2 = 4096; -+ lj_assertA(ofs == 0, "bad usage"); -+ ofs = (int32_t)sizeof(GCstr); -+ if (irref_isk(ir->op2)) { -+ ofs2 = ofs + get_kval(as, ir->op2); -+ ref = ir->op1; -+ } else if (irref_isk(ir->op1)) { -+ ofs2 = ofs + get_kval(as, ir->op1); -+ ref = ir->op2; -+ } -+ if (!checki12(ofs2)) { -+ /* NYI: Fuse ADD with constant. */ -+ Reg right, left = ra_alloc2(as, ir, allow); -+ right = (left >> 8); left &= 255; -+ emit_lso(as, riscvi, rd, RID_TMP, ofs); -+ emit_ds1s2(as, RISCVI_ADD, RID_TMP, left, right); -+ return; -+ } -+ ofs = ofs2; -+ } -+ } -+ base = ra_alloc1(as, ref, allow); -+ emit_lso(as, riscvi, rd, base, ofs); -+} -+ -+/* Fuse Integer multiply-accumulate. */ -+ -+static int asm_fusemac(ASMState *as, IRIns *ir, RISCVIns riscvi) -+{ -+ IRRef lref = ir->op1, rref = ir->op2; -+ IRIns *irm; -+ if (lref != rref && -+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && -+ ra_noreg(irm->r)) || -+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && -+ (rref = lref, ra_noreg(irm->r))))) { -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg add = ra_hintalloc(as, rref, dest, RSET_GPR); -+ Reg left = ra_alloc2(as, irm, -+ rset_exclude(rset_exclude(RSET_GPR, dest), add)); -+ Reg right = (left >> 8); left &= 255; -+ emit_ds1s2(as, riscvi, dest, left, right); -+ if (dest != add) emit_mv(as, dest, add); -+ return 1; -+ } -+ return 0; -+} -+ -+/* Fuse FP multiply-add/sub. */ -+ -+static int asm_fusemadd(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvir) -+{ -+ IRRef lref = ir->op1, rref = ir->op2; -+ IRIns *irm; -+ if ((as->flags & JIT_F_OPT_FMA) && -+ lref != rref && -+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && -+ ra_noreg(irm->r)) || -+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && -+ (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) { -+ Reg dest = ra_dest(as, ir, RSET_FPR); -+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); -+ Reg left = ra_alloc2(as, irm, -+ rset_exclude(rset_exclude(RSET_FPR, dest), add)); -+ Reg right = (left >> 8); left &= 255; -+ emit_ds1s2s3(as, riscvi, dest, left, right, add); -+ return 1; -+ } -+ return 0; -+} -+/* -- Calls --------------------------------------------------------------- */ -+ -+/* Generate a call to a C function. */ -+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -+{ -+ uint32_t n, nargs = CCI_XNARGS(ci); -+ int32_t ofs = 0; -+ Reg gpr, fpr = REGARG_FIRSTFPR; -+ if ((void *)ci->func) -+ emit_call(as, (void *)ci->func, 1); -+ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) -+ as->cost[gpr] = REGCOST(~0u, ASMREF_L); -+ gpr = REGARG_FIRSTGPR; -+ for (n = 0; n < nargs; n++) { /* Setup args. */ -+ IRRef ref = args[n]; -+ IRIns *ir = IR(ref); -+ if (ref) { -+ if (irt_isfp(ir->t)) { -+ if (fpr <= REGARG_LASTFPR) { -+ lj_assertA(rset_test(as->freeset, fpr), -+ "reg %d not free", fpr); /* Must have been evicted. */ -+ ra_leftov(as, fpr, ref); -+ fpr++; if(ci->flags & CCI_VARARG) gpr++; -+ } else if (!(ci->flags & CCI_VARARG) && gpr <= REGARG_LASTGPR) { -+ lj_assertA(rset_test(as->freeset, gpr), -+ "reg %d not free", gpr); /* Must have been evicted. */ -+ ra_leftov(as, gpr, ref); -+ gpr++; -+ } else { -+ Reg r = ra_alloc1(as, ref, RSET_FPR); -+ emit_spstore(as, ir, r, ofs); -+ ofs += 8; -+ } -+ } else { -+ if (gpr <= REGARG_LASTGPR) { -+ lj_assertA(rset_test(as->freeset, gpr), -+ "reg %d not free", gpr); /* Must have been evicted. */ -+ ra_leftov(as, gpr, ref); -+ gpr++; if(ci->flags & CCI_VARARG) fpr++; -+ } else { -+ Reg r = ra_alloc1z(as, ref, RSET_GPR); -+ emit_spstore(as, ir, r, ofs); -+ ofs += 8; -+ } -+ } -+ } -+ } -+} -+ -+/* Setup result reg/sp for call. Evict scratch regs. */ -+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -+{ -+ RegSet drop = RSET_SCRATCH; -+ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); -+ if (ra_hasreg(ir->r)) -+ rset_clear(drop, ir->r); /* Dest reg handled below. */ -+ if (hiop && ra_hasreg((ir+1)->r)) -+ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ -+ ra_evictset(as, drop); /* Evictions must be performed first. */ -+ if (ra_used(ir)) { -+ lj_assertA(!irt_ispri(ir->t), "PRI dest"); -+ if (irt_isfp(ir->t)) { -+ if ((ci->flags & CCI_CASTU64)) { -+ Reg dest = ra_dest(as, ir, RSET_FPR); -+ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, -+ dest, RID_RET); -+ } else { -+ ra_destreg(as, ir, RID_FPRET); -+ } -+ } else if (hiop) { -+ ra_destpair(as, ir); -+ } else { -+ ra_destreg(as, ir, RID_RET); -+ } -+ } -+} -+ -+static void asm_callx(ASMState *as, IRIns *ir) -+{ -+ IRRef args[CCI_NARGS_MAX*2]; -+ CCallInfo ci; -+ IRRef func; -+ IRIns *irf; -+ ci.flags = asm_callx_flags(as, ir); -+ asm_collectargs(as, ir, &ci, args); -+ asm_setupresult(as, ir, &ci); -+ func = ir->op2; irf = IR(func); -+ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } -+ if (irref_isk(func)) { /* Call to constant address. */ -+ ci.func = (ASMFunction)(void *)get_kval(as, func); -+ } else { /* Need specific register for indirect calls. */ -+ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); -+ MCode *p = as->mcp; -+ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(r); -+ if (r == RID_CFUNCADDR) -+ *--p = RISCVI_ADDI | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); -+ else -+ *--p = RISCVI_MV | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); -+ as->mcp = p; -+ ci.func = (ASMFunction)(void *)0; -+ } -+ asm_gencall(as, &ci, args); -+} -+ -+/* -- Returns ------------------------------------------------------------- */ -+ -+/* Return to lower frame. Guard that it goes to the right spot. */ -+static void asm_retf(ASMState *as, IRIns *ir) -+{ -+ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); -+ void *pc = ir_kptr(IR(ir->op2)); -+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); -+ as->topslot -= (BCReg)delta; -+ if ((int32_t)as->topslot < 0) as->topslot = 0; -+ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ -+ emit_setgl(as, base, jit_base); -+ emit_addptr(as, base, -8*delta); -+ asm_guard(as, RISCVI_BNE, RID_TMP, -+ ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); -+ emit_lso(as, RISCVI_LD, RID_TMP, base, -8); -+} -+ -+/* -- Buffer operations --------------------------------------------------- */ -+ -+#if LJ_HASBUFFER -+static void asm_bufhdr_write(ASMState *as, Reg sb) -+{ -+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); -+ IRIns irgc; -+ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ -+ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); -+ emit_ds1s2(as, RISCVI_OR, RID_TMP, RID_TMP, tmp); -+ emit_dsi(as, RISCVI_ANDI, tmp, tmp, SBUF_MASK_FLAG); -+ emit_getgl(as, RID_TMP, cur_L); -+ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); -+} -+#endif -+ -+/* -- Type conversions ---------------------------------------------------- */ -+ -+static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -+{ -+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); -+ Reg dest = ra_dest(as, ir, RSET_GPR), cmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); -+ asm_guard(as, RISCVI_BEQ, cmp, RID_ZERO); -+ emit_ds1s2(as, RISCVI_FEQ_D, cmp, tmp, left); -+ emit_ds(as, RISCVI_FCVT_D_W, tmp, dest); -+ emit_ds(as, RISCVI_FCVT_W_D, dest, left); -+} -+ -+static void asm_tobit(ASMState *as, IRIns *ir) -+{ -+ RegSet allow = RSET_FPR; -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_alloc1(as, ir->op1, allow); -+ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); -+ Reg tmp = ra_scratch(as, rset_clear(allow, right)); -+ emit_ds(as, RISCVI_FMV_X_W, dest, tmp); -+ emit_ds1s2(as, RISCVI_FADD_D, tmp, left, right); -+} -+ -+static void asm_conv(ASMState *as, IRIns *ir) -+{ -+ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -+ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); -+ int stfp = (st == IRT_NUM || st == IRT_FLOAT); -+ IRRef lref = ir->op1; -+ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); -+ /* Use GPR to pass floating-point arguments */ -+ if (irt_isfp(ir->t) && ir->r >= RID_X10 && ir->r <= RID_X17) { -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg ftmp = ra_scratch(as, RSET_FPR); -+ if (stfp) { /* FP to FP conversion. */ -+ emit_ds(as, st == IRT_NUM ? RISCVI_FMV_X_W : RISCVI_FMV_X_D, dest, ftmp); -+ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, -+ ftmp, ra_alloc1(as, lref, RSET_FPR)); -+ } else { /* Integer to FP conversion. */ -+ Reg left = ra_alloc1(as, lref, RSET_GPR); -+ RISCVIns riscvi = irt_isfloat(ir->t) ? -+ (((IRT_IS64 >> st) & 1) ? -+ (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : -+ (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : -+ (((IRT_IS64 >> st) & 1) ? -+ (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : -+ (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); -+ emit_ds(as, st64 ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dest, ftmp); -+ emit_ds(as, riscvi, ftmp, left); -+ } -+ } else if (irt_isfp(ir->t)) { -+ Reg dest = ra_dest(as, ir, RSET_FPR); -+ if (stfp) { /* FP to FP conversion. */ -+ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, -+ dest, ra_alloc1(as, lref, RSET_FPR)); -+ } else { /* Integer to FP conversion. */ -+ Reg left = ra_alloc1(as, lref, RSET_GPR); -+ RISCVIns riscvi = irt_isfloat(ir->t) ? -+ (((IRT_IS64 >> st) & 1) ? -+ (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : -+ (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : -+ (((IRT_IS64 >> st) & 1) ? -+ (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : -+ (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); -+ emit_ds(as, riscvi, dest, left); -+ } -+ } else if (stfp) { /* FP to integer conversion. */ -+ if (irt_isguard(ir->t)) { -+ /* Checked conversions are only supported from number to int. */ -+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, -+ "bad type for checked CONV"); -+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); -+ } else { -+ Reg left = ra_alloc1(as, lref, RSET_FPR); -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ RISCVIns riscvi = irt_is64(ir->t) ? -+ (st == IRT_NUM ? -+ (irt_isi64(ir->t) ? RISCVI_FCVT_L_D : RISCVI_FCVT_LU_D) : -+ (irt_isi64(ir->t) ? RISCVI_FCVT_L_S : RISCVI_FCVT_LU_S)) : -+ (st == IRT_NUM ? -+ (irt_isint(ir->t) ? RISCVI_FCVT_W_D : RISCVI_FCVT_WU_D) : -+ (irt_isint(ir->t) ? RISCVI_FCVT_W_S : RISCVI_FCVT_WU_S)); -+ emit_ds(as, riscvi|RISCVF_RM(RISCVRM_RTZ), dest, left); -+ } -+ } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_alloc1(as, lref, RSET_GPR); -+ RISCVIns riscvi = st == IRT_I8 ? RISCVI_SEXT_B : -+ st == IRT_U8 ? RISCVI_ZEXT_B : -+ st == IRT_I16 ? RISCVI_SEXT_H : RISCVI_ZEXT_H; -+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); -+ emit_ext(as, riscvi, dest, left); -+ } else { /* 32/64 bit integer conversions. */ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ if (irt_is64(ir->t)) { -+ if (st64) { -+ /* 64/64 bit no-op (cast)*/ -+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ -+ } else { /* 32 to 64 bit sign extension. */ -+ Reg left = ra_alloc1(as, lref, RSET_GPR); -+ if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ -+ emit_ext(as, RISCVI_SEXT_W, dest, left); -+ } else { /* 32 to 64 bit zero extension. */ -+ emit_ext(as, RISCVI_ZEXT_W, dest, left); -+ } -+ } -+ } else { -+ if (st64 && !(ir->op2 & IRCONV_NONE)) { -+ /* This is either a 32 bit reg/reg mov which zeroes the hiword -+ ** or a load of the loword from a 64 bit address. -+ */ -+ Reg left = ra_alloc1(as, lref, RSET_GPR); -+ emit_ext(as, RISCVI_ZEXT_W, dest, left); -+ } else { /* 32/32 bit no-op (cast). */ -+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ -+ } -+ } -+ } -+} -+ -+static void asm_strto(ASMState *as, IRIns *ir) -+{ -+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; -+ IRRef args[2]; -+ int32_t ofs = SPOFS_TMP; -+ RegSet drop = RSET_SCRATCH; -+ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ -+ ra_evictset(as, drop); -+ if (ir->s) ofs = sps_scale(ir->s); -+ asm_guard(as, RISCVI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ -+ args[0] = ir->op1; /* GCstr *str */ -+ args[1] = ASMREF_TMP1; /* TValue *n */ -+ asm_gencall(as, ci, args); -+ /* Store the result to the spill slot or temp slots. */ -+ Reg tmp = ra_releasetmp(as, ASMREF_TMP1); -+ emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs); -+} -+ -+/* -- Memory references --------------------------------------------------- */ -+ -+/* Store tagged value for ref at base+ofs. */ -+static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) -+{ -+ RegSet allow = rset_exclude(RSET_GPR, base); -+ IRIns *ir = IR(ref); -+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), -+ "store of IR type %d", irt_type(ir->t)); -+ if (irref_isk(ref)) { -+ TValue k; -+ lj_ir_kvalue(as->J->L, &k, ir); -+ emit_lso(as, RISCVI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); -+ } else { -+ Reg src = ra_alloc1(as, ref, allow); -+ rset_clear(allow, src); -+ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); -+ emit_lso(as, RISCVI_SD, RID_TMP, base, ofs); -+ if (irt_isinteger(ir->t)) { -+ if (as->flags & JIT_F_RVZba) { -+ emit_ds1s2(as, RISCVI_ADD_UW, RID_TMP, src, type); -+ } else { -+ emit_ds1s2(as, RISCVI_ADD, RID_TMP, RID_TMP, type); -+ emit_ext(as, RISCVI_ZEXT_W, RID_TMP, src); -+ } -+ } else { -+ emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, type); -+ } -+ } -+} -+ -+/* Get pointer to TValue. */ -+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) // todo-new -+{ -+ if ((mode & IRTMPREF_IN1)) { -+ IRIns *ir = IR(ref); -+ if (irt_isnum(ir->t)) { -+ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { -+ /* Use the number constant itself as a TValue. */ -+ ra_allockreg(as, igcptr(ir_knum(ir)), dest); -+ return; -+ } -+ emit_lso(as, RISCVI_FSD, ra_alloc1(as, ref, RSET_FPR), dest, 0); -+ } else { -+ asm_tvstore64(as, dest, 0, ref); -+ } -+ } -+ /* g->tmptv holds the TValue(s). */ -+ emit_opk(as, RISCVI_ADDI, dest, RID_GL, dest, offsetof(global_State, tmptv)); -+} -+ -+static void asm_aref(ASMState *as, IRIns *ir) -+{ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg idx, base; -+ if (irref_isk(ir->op2)) { -+ IRRef tab = IR(ir->op1)->op1; -+ int32_t ofs = asm_fuseabase(as, tab); -+ IRRef refa = ofs ? tab : ir->op1; -+ ofs += 8*IR(ir->op2)->i; -+ if (checki12(ofs)) { -+ base = ra_alloc1(as, refa, RSET_GPR); -+ emit_dsi(as, RISCVI_ADDI, dest, base, ofs); -+ return; -+ } -+ } -+ base = ra_alloc1(as, ir->op1, RSET_GPR); -+ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); -+ emit_sh3add(as, dest, base, idx, RID_TMP); -+} -+ -+/* Inlined hash lookup. Specialized for key type and for const keys. -+** The equivalent C code is: -+** Node *n = hashkey(t, key); -+** do { -+** if (lj_obj_equal(&n->key, key)) return &n->val; -+** } while ((n = nextnode(n))); -+** return niltv(L); -+*/ -+static void asm_href(ASMState *as, IRIns *ir, IROp merge) -+{ -+ RegSet allow = RSET_GPR; -+ int destused = ra_used(ir); -+ Reg dest = ra_dest(as, ir, allow); -+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); -+ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2; -+ Reg cmp64 = RID_NONE; -+ IRRef refkey = ir->op2; -+ IRIns *irkey = IR(refkey); -+ int isk = irref_isk(refkey); -+ IRType1 kt = irkey->t; -+ uint32_t khash; -+ MCLabel l_end, l_loop, l_next; -+ rset_clear(allow, tab); -+ tmp1 = ra_scratch(as, allow); -+ rset_clear(allow, tmp1); -+ tmp2 = ra_scratch(as, allow); -+ rset_clear(allow, tmp2); -+ -+ if (irt_isnum(kt)) { -+ key = ra_alloc1(as, refkey, RSET_FPR); -+ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); -+ } else { -+ /* Allocate cmp64 register used for 64-bit comparisons */ -+ if (!isk && irt_isaddr(kt)) { -+ cmp64 = tmp2; -+ } else { -+ int64_t k; -+ if (isk && irt_isaddr(kt)) { -+ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; -+ } else { -+ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); -+ k = ~((int64_t)~irt_toitype(kt) << 47); -+ } -+ cmp64 = ra_allock(as, k, allow); -+ rset_clear(allow, cmp64); -+ } -+ if (!irt_ispri(kt)) { -+ key = ra_alloc1(as, refkey, allow); -+ rset_clear(allow, key); -+ } -+ } -+ -+ /* Key not found in chain: jump to exit (if merged) or load niltv. */ -+ l_end = emit_label(as); -+ int is_lend_exit = 0; -+ as->invmcp = NULL; -+ if (merge == IR_NE) -+ asm_guard(as, RISCVI_BEQ, RID_ZERO, RID_ZERO); -+ else if (destused) -+ emit_loada(as, dest, niltvg(J2G(as->J))); -+ -+ /* Follow hash chain until the end. */ -+ l_loop = --as->mcp; -+ emit_mv(as, dest, tmp1); -+ emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, next)); -+ l_next = emit_label(as); -+ -+ /* Type and value comparison. */ -+ if (merge == IR_EQ) { /* Must match asm_guard(). */ -+ l_end = asm_exitstub_addr(as, as->snapno); -+ is_lend_exit = 1; -+ } -+ if (irt_isnum(kt)) { -+ emit_branch(as, RISCVI_BNE, tmp1, RID_ZERO, l_end, is_lend_exit); -+ emit_ds1s2(as, RISCVI_FEQ_D, tmp1, tmpnum, key); -+ emit_branch(as, RISCVI_BEQ, tmp1, RID_ZERO, l_next, -1); -+ emit_dsi(as, RISCVI_SLTIU, tmp1, tmp1, ((int32_t)LJ_TISNUM)); -+ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 47); -+ emit_ds(as, RISCVI_FMV_D_X, tmpnum, tmp1); -+ } else { -+ emit_branch(as, RISCVI_BEQ, tmp1, cmp64, l_end, is_lend_exit); -+ } -+ emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); -+ *l_loop = RISCVI_BNE | RISCVF_S1(tmp1) | RISCVF_S2(RID_ZERO) -+ | RISCVF_IMMB((char *)as->mcp-(char *)l_loop); -+ if (!isk && irt_isaddr(kt)) { -+ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); -+ emit_ds1s2(as, RISCVI_ADD, tmp2, key, type); -+ rset_clear(allow, type); -+ } -+ -+ /* Load main position relative to tab->node into dest. */ -+ khash = isk ? ir_khash(as, irkey) : 1; -+ if (khash == 0) { -+ emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); -+ } else { -+ Reg tmphash = tmp1; -+ if (isk) -+ tmphash = ra_allock(as, khash, allow); -+ /* node = tab->node + (idx*32-idx*8) */ -+ emit_ds1s2(as, RISCVI_ADD, dest, dest, tmp1); -+ lj_assertA(sizeof(Node) == 24, "bad Node size"); -+ emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp2, tmp1); -+ emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 3); -+ emit_dsshamt(as, RISCVI_SLLIW, tmp2, tmp1, 5); -+ emit_ds1s2(as, RISCVI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask -+ emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); -+ emit_lso(as, RISCVI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); -+ if (isk) { -+ /* Nothing to do. */ -+ } else if (irt_isstr(kt)) { -+ emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); -+ } else { /* Must match with hash*() in lj_tab.c. */ -+ emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2); -+ emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f); -+ emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2); -+ emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f); -+ emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest); -+ emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1); -+ emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f); -+ if (irt_isnum(kt)) { -+ emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1); -+ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi -+ emit_ext(as, RISCVI_SEXT_W, tmp2, tmp1); // lo -+ emit_ds(as, RISCVI_FMV_X_D, tmp1, key); -+ } else { -+ checkmclim(as); -+ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi -+ emit_ext(as, RISCVI_SEXT_W, tmp2, key); // lo -+ emit_ds1s2(as, RISCVI_ADD, tmp1, key, type); -+ } -+ } -+ } -+} -+ -+static void asm_hrefk(ASMState *as, IRIns *ir) -+{ -+ IRIns *kslot = IR(ir->op2); -+ IRIns *irkey = IR(kslot->op1); -+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); -+ int32_t kofs = ofs + (int32_t)offsetof(Node, key); -+ int bigofs = !checki12(kofs); -+ Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; -+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); -+ RegSet allow = rset_exclude(RSET_GPR, node); -+ Reg idx = node; -+ int64_t k; -+ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); -+ if (bigofs) { -+ idx = dest; -+ rset_clear(allow, dest); -+ kofs = (int32_t)offsetof(Node, key); -+ } else if (ra_hasreg(dest)) { -+ emit_dsi(as, RISCVI_ADDI, dest, node, ofs); -+ } -+ if (irt_ispri(irkey->t)) { -+ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); -+ k = ~((int64_t)~irt_toitype(irkey->t) << 47); -+ } else if (irt_isnum(irkey->t)) { -+ k = (int64_t)ir_knum(irkey)->u64; -+ } else { -+ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); -+ } -+ asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow)); -+ emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs); -+ if (bigofs) -+ emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow)); -+} -+ -+static void asm_uref(ASMState *as, IRIns *ir) -+{ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); -+ if (irref_isk(ir->op1) && !guarded) { -+ GCfunc *fn = ir_kfunc(IR(ir->op1)); -+ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; -+ emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR); -+ } else { -+ if (guarded) -+ asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO); -+ if (ir->o == IR_UREFC) -+ emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); -+ else -+ emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v)); -+ if (guarded) -+ emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); -+ if (irref_isk(ir->op1)) { -+ GCfunc *fn = ir_kfunc(IR(ir->op1)); -+ GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); -+ emit_loada(as, dest, o); -+ } else { -+ emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR), -+ (int32_t)offsetof(GCfuncL, uvptr) + -+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); -+ } -+ } -+} -+ -+static void asm_fref(ASMState *as, IRIns *ir) -+{ -+ UNUSED(as); UNUSED(ir); -+ lj_assertA(!ra_used(ir), "unfused FREF"); -+} -+ -+static void asm_strref(ASMState *as, IRIns *ir) -+{ -+ RegSet allow = RSET_GPR; -+ Reg dest = ra_dest(as, ir, allow); -+ Reg base = ra_alloc1(as, ir->op1, allow); -+ IRIns *irr = IR(ir->op2); -+ int32_t ofs = sizeof(GCstr); -+ rset_clear(allow, base); -+ if (irref_isk(ir->op2) && checki12(ofs + irr->i)) { -+ emit_dsi(as, RISCVI_ADDI, dest, base, ofs + irr->i); -+ } else { -+ emit_dsi(as, RISCVI_ADDI, dest, dest, ofs); -+ emit_ds1s2(as, RISCVI_ADD, dest, base, ra_alloc1(as, ir->op2, allow)); -+ } -+} -+ -+/* -- Loads and stores ---------------------------------------------------- */ -+ -+static RISCVIns asm_fxloadins(IRIns *ir) -+{ -+ switch (irt_type(ir->t)) { -+ case IRT_I8: return RISCVI_LB; -+ case IRT_U8: return RISCVI_LBU; -+ case IRT_I16: return RISCVI_LH; -+ case IRT_U16: return RISCVI_LHU; -+ case IRT_NUM: return RISCVI_FLD; -+ case IRT_FLOAT: return RISCVI_FLW; -+ default: return irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW; -+ } -+} -+ -+static RISCVIns asm_fxstoreins(IRIns *ir) -+{ -+ switch (irt_type(ir->t)) { -+ case IRT_I8: case IRT_U8: return RISCVI_SB; -+ case IRT_I16: case IRT_U16: return RISCVI_SH; -+ case IRT_NUM: return RISCVI_FSD; -+ case IRT_FLOAT: return RISCVI_FSW; -+ default: return irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW; -+ } -+} -+ -+static void asm_fload(ASMState *as, IRIns *ir) -+{ -+ RegSet allow = RSET_GPR; -+ Reg idx, dest = ra_dest(as, ir, allow); -+ rset_clear(allow, dest); -+ RISCVIns riscvi = asm_fxloadins(ir); -+ int32_t ofs; -+ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ -+ idx = RID_GL; -+ ofs = (ir->op2 << 2) - GG_OFS(g); -+ } else { -+ idx = ra_alloc1(as, ir->op1, allow); -+ if (ir->op2 == IRFL_TAB_ARRAY) { -+ ofs = asm_fuseabase(as, ir->op1); -+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ -+ emit_dsi(as, RISCVI_ADDI, dest, idx, ofs); -+ return; -+ } -+ } -+ ofs = field_ofs[ir->op2]; -+ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); -+ } -+ rset_clear(allow, idx); -+ emit_lso(as, riscvi, dest, idx, ofs); -+} -+ -+static void asm_fstore(ASMState *as, IRIns *ir) -+{ -+ if (ir->r != RID_SINK) { -+ Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); -+ IRIns *irf = IR(ir->op1); -+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); -+ int32_t ofs = field_ofs[irf->op2]; -+ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); -+ emit_lso(as, asm_fxstoreins(ir), src, idx, ofs); -+ } -+} -+ -+static void asm_xload(ASMState *as, IRIns *ir) -+{ -+ Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); -+ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), -+ "unaligned XLOAD"); -+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); -+} -+ -+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) -+{ -+ if (ir->r != RID_SINK) { -+ Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); -+ asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, -+ rset_exclude(RSET_GPR, src), ofs); -+ } -+} -+ -+#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) -+ -+static void asm_ahuvload(ASMState *as, IRIns *ir) -+{ -+ Reg dest = RID_NONE, type = RID_TMP, idx; -+ RegSet allow = RSET_GPR; -+ int32_t ofs = 0; -+ IRType1 t = ir->t; -+ if (ra_used(ir)) { -+ lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t), -+ "bad load type %d", irt_type(ir->t)); -+ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); -+ rset_clear(allow, dest); -+ if (irt_isaddr(t)) { -+ emit_cleartp(as, dest, dest); -+ } else if (irt_isint(t)) -+ emit_ext(as, RISCVI_SEXT_W, dest, dest); -+ } -+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); -+ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; -+ rset_clear(allow, idx); -+ if (irt_isnum(t)) { -+ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); -+ emit_dsi(as, RISCVI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); -+ } else { -+ asm_guard(as, RISCVI_BNE, type, -+ ra_allock(as, (int32_t)irt_toitype(t), allow)); -+ } -+ if (ra_hasreg(dest)) { -+ if (irt_isnum(t)) { -+ emit_lso(as, RISCVI_FLD, dest, idx, ofs); -+ dest = type; -+ } -+ } else { -+ dest = type; -+ } -+ emit_dsshamt(as, RISCVI_SRAI, type, dest, 47); -+ emit_lso(as, RISCVI_LD, dest, idx, ofs); -+} -+ -+static void asm_ahustore(ASMState *as, IRIns *ir) -+{ -+ RegSet allow = RSET_GPR; -+ Reg idx, src = RID_NONE, type = RID_NONE; -+ int32_t ofs = 0; -+ if (ir->r == RID_SINK) -+ return; -+ if (irt_isnum(ir->t)) { -+ src = ra_alloc1(as, ir->op2, RSET_FPR); -+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); -+ emit_lso(as, RISCVI_FSD, src, idx, ofs); -+ } else { -+ Reg tmp = RID_TMP; -+ if (irt_ispri(ir->t)) { -+ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); -+ rset_clear(allow, tmp); -+ } else { -+ src = ra_alloc1(as, ir->op2, allow); -+ rset_clear(allow, src); -+ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); -+ rset_clear(allow, type); -+ } -+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); -+ emit_lso(as, RISCVI_SD, tmp, idx, ofs); -+ if (ra_hasreg(src)) { -+ if (irt_isinteger(ir->t)) { -+ if (as->flags & JIT_F_RVZba) { -+ emit_ds1s2(as, RISCVI_ADD_UW, tmp, src, type); -+ } else { -+ emit_ds1s2(as, RISCVI_ADD, tmp, tmp, type); -+ emit_ext(as, RISCVI_ZEXT_W, tmp, src); -+ } -+ } else { -+ emit_ds1s2(as, RISCVI_ADD, tmp, src, type); -+ } -+ } -+ } -+} -+ -+static void asm_sload(ASMState *as, IRIns *ir) -+{ -+ Reg dest = RID_NONE, type = RID_NONE, base; -+ RegSet allow = RSET_GPR; -+ IRType1 t = ir->t; -+ int32_t ofs = 8*((int32_t)ir->op1-2); -+ lj_assertA(checki12(ofs), "sload IR operand out of range"); -+ lj_assertA(!(ir->op2 & IRSLOAD_PARENT), -+ "bad parent SLOAD"); /* Handled by asm_head_side(). */ -+ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), -+ "inconsistent SLOAD variant"); -+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { -+ dest = ra_scratch(as, RSET_FPR); -+ asm_tointg(as, ir, dest); -+ t.irt = IRT_NUM; /* Continue with a regular number type check. */ -+ } else if (ra_used(ir)) { -+ Reg tmp = RID_NONE; -+ if ((ir->op2 & IRSLOAD_CONVERT)) -+ tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); -+ lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), -+ "bad SLOAD type %d", irt_type(t)); -+ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); -+ rset_clear(allow, dest); -+ base = ra_alloc1(as, REF_BASE, allow); -+ rset_clear(allow, base); -+ if (irt_isaddr(t)) { /* Clear type from pointers. */ -+ emit_cleartp(as, dest, dest); -+ } else if (ir->op2 & IRSLOAD_CONVERT) { -+ if (irt_isint(t)) { -+ emit_ds(as, RISCVI_FCVT_W_D|RISCVF_RM(RISCVRM_RTZ), dest, tmp); -+ /* If value is already loaded for type check, move it to FPR. */ -+ if ((ir->op2 & IRSLOAD_TYPECHECK)) -+ emit_ds(as, RISCVI_FMV_D_X, tmp, dest); -+ else -+ dest = tmp; -+ t.irt = IRT_NUM; /* Check for original type. */ -+ } else { -+ emit_ds(as, RISCVI_FCVT_D_W, dest, tmp); -+ dest = tmp; -+ t.irt = IRT_INT; /* Check for original type. */ -+ } -+ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { -+ /* Sign-extend integers. */ -+ emit_ext(as, RISCVI_SEXT_W, dest, dest); -+ } -+ goto dotypecheck; -+ } -+ base = ra_alloc1(as, REF_BASE, allow); -+ rset_clear(allow, base); -+dotypecheck: -+ if ((ir->op2 & IRSLOAD_TYPECHECK)) { -+ type = dest < RID_MAX_GPR ? dest : RID_TMP; -+ if (irt_ispri(t)) { -+ asm_guard(as, RISCVI_BNE, type, -+ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); -+ } else if ((ir->op2 & IRSLOAD_KEYINDEX)) { -+ asm_guard(as, RISCVI_BNE, RID_TMP, -+ ra_allock(as, (int32_t)LJ_KEYINDEX, allow)); -+ emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 32); -+ } else { -+ if (irt_isnum(t)) { -+ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); -+ emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, LJ_TISNUM); -+ if (ra_hasreg(dest)) { -+ emit_lso(as, RISCVI_FLD, dest, base, ofs); -+ } -+ } else { -+ asm_guard(as, RISCVI_BNE, RID_TMP, -+ ra_allock(as, (int32_t)irt_toitype(t), allow)); -+ } -+ emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 47); -+ } -+ emit_lso(as, RISCVI_LD, type, base, ofs); -+ } else if (ra_hasreg(dest)) { -+ emit_lso(as, irt_isnum(t) ? RISCVI_FLD : -+ irt_isint(t) ? RISCVI_LW : RISCVI_LD, -+ dest, base, ofs); -+ } -+} -+ -+/* -- Allocations --------------------------------------------------------- */ -+ -+#if LJ_HASFFI -+static void asm_cnew(ASMState *as, IRIns *ir) -+{ -+ CTState *cts = ctype_ctsG(J2G(as->J)); -+ CTypeID id = (CTypeID)IR(ir->op1)->i; -+ CTSize sz; -+ CTInfo info = lj_ctype_info(cts, id, &sz); -+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; -+ IRRef args[4]; -+ RegSet drop = RSET_SCRATCH; -+ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), -+ "bad CNEW/CNEWI operands"); -+ -+ as->gcsteps++; -+ if (ra_hasreg(ir->r)) -+ rset_clear(drop, ir->r); /* Dest reg handled below. */ -+ ra_evictset(as, drop); -+ if (ra_used(ir)) -+ ra_destreg(as, ir, RID_RET); /* GCcdata * */ -+ -+ /* Initialize immutable cdata object. */ -+ if (ir->o == IR_CNEWI) { -+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH); -+ emit_lso(as, sz == 8 ? RISCVI_SD : RISCVI_SW, ra_alloc1(as, ir->op2, allow), -+ RID_RET, (sizeof(GCcdata))); -+ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); -+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ -+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; -+ args[0] = ASMREF_L; /* lua_State *L */ -+ args[1] = ir->op1; /* CTypeID id */ -+ args[2] = ir->op2; /* CTSize sz */ -+ args[3] = ASMREF_TMP1; /* CTSize align */ -+ asm_gencall(as, ci, args); -+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); -+ return; -+ } -+ -+ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ -+ emit_lso(as, RISCVI_SB, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))); -+ emit_lso(as, RISCVI_SH, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))); -+ emit_loadk12(as, RID_RET+1, ~LJ_TCDATA); -+ emit_loadk32(as, RID_TMP, id); -+ args[0] = ASMREF_L; /* lua_State *L */ -+ args[1] = ASMREF_TMP1; /* MSize size */ -+ asm_gencall(as, ci, args); -+ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), -+ ra_releasetmp(as, ASMREF_TMP1)); -+} -+#endif -+ -+/* -- Write barriers ------------------------------------------------------ */ -+ -+static void asm_tbar(ASMState *as, IRIns *ir) -+{ -+ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); -+ Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); -+ Reg link = RID_TMP; -+ MCLabel l_end = emit_label(as); -+ emit_lso(as, RISCVI_SD, link, tab, (int32_t)offsetof(GCtab, gclist)); -+ emit_lso(as, RISCVI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); -+ emit_setgl(as, tab, gc.grayagain); // make tab gray again -+ emit_getgl(as, link, gc.grayagain); -+ emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: not jump -+ emit_ds1s2(as, RISCVI_XOR, mark, mark, RID_TMP); // mark=0: gray -+ emit_dsi(as, RISCVI_ANDI, RID_TMP, mark, LJ_GC_BLACK); -+ emit_lso(as, RISCVI_LBU, mark, tab, ((int32_t)offsetof(GCtab, marked))); -+} -+ -+static void asm_obar(ASMState *as, IRIns *ir) -+{ -+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; -+ IRRef args[2]; -+ MCLabel l_end; -+ Reg obj, val, tmp; -+ /* No need for other object barriers (yet). */ -+ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue -+ ra_evictset(as, RSET_SCRATCH); -+ l_end = emit_label(as); -+ args[0] = ASMREF_TMP1; /* global_State *g */ -+ args[1] = ir->op1; /* TValue *tv */ -+ asm_gencall(as, ci, args); -+ emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); -+ obj = IR(ir->op1)->r; -+ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); -+ emit_branch(as, RISCVI_BEQ, tmp, RID_ZERO, l_end, -1); -+ emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: jump -+ emit_dsi(as, RISCVI_ANDI, tmp, tmp, LJ_GC_BLACK); -+ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); -+ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); -+ emit_lso(as, RISCVI_LBU, tmp, obj, -+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))); -+ emit_lso(as, RISCVI_LBU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))); -+} -+ -+/* -- Arithmetic and logic operations ------------------------------------- */ -+ -+static void asm_fparith(ASMState *as, IRIns *ir, RISCVIns riscvi) -+{ -+ Reg dest = ra_dest(as, ir, RSET_FPR); -+ Reg right, left = ra_alloc2(as, ir, RSET_FPR); -+ right = (left >> 8); left &= 255; -+ emit_ds1s2(as, riscvi, dest, left, right); -+} -+ -+static void asm_fpunary(ASMState *as, IRIns *ir, RISCVIns riscvi) -+{ -+ Reg dest = ra_dest(as, ir, RSET_FPR); -+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); -+ switch(riscvi) { -+ case RISCVI_FROUND_S_RTZ: case RISCVI_FROUND_S_RDN: case RISCVI_FROUND_S_RUP: -+ case RISCVI_FROUND_D_RTZ: case RISCVI_FROUND_D_RDN: case RISCVI_FROUND_D_RUP: -+ case RISCVI_FSQRT_S: case RISCVI_FSQRT_D: -+ emit_ds(as, riscvi, dest, left); -+ break; -+ case RISCVI_FMV_S: case RISCVI_FMV_D: -+ case RISCVI_FABS_S: case RISCVI_FABS_D: -+ case RISCVI_FNEG_S: case RISCVI_FNEG_D: -+ emit_ds1s2(as, riscvi, dest, left, left); -+ break; -+ default: -+ lj_assertA(0, "bad fp unary instruction"); -+ return; -+ } -+} -+ -+static void asm_fpround(ASMState *as, IRIns *ir, RISCVIns riscvi) -+{ -+ Reg dest = ra_dest(as, ir, RSET_FPR); -+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); -+ MCLabel l_end = emit_label(as); -+ -+ if (dest != left) { -+ emit_ds1s2(as, RISCVI_FSGNJ_D, dest, dest, left); -+ emit_ds(as, RISCVI_FCVT_D_L, dest, RID_TMP); -+ } else { -+ Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); -+ emit_ds1s2(as, RISCVI_FSGNJ_D, dest, ftmp, left); -+ emit_ds(as, RISCVI_FCVT_D_L, ftmp, RID_TMP); -+ } -+ emit_ds(as, riscvi, RID_TMP, left); -+ emit_branch(as, RISCVI_BLT, RID_ZERO, RID_TMP, l_end, 0); -+ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1075); -+ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, 0x7ff); -+ emit_dsi(as, RISCVI_SRLI, RID_TMP, RID_TMP, 52); -+ if (dest != left) -+ emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); -+ emit_ds(as, RISCVI_FMV_X_D, RID_TMP, left); -+} -+ -+static void asm_fpmath(ASMState *as, IRIns *ir) -+{ -+ IRFPMathOp fpm = (IRFPMathOp)ir->op2; -+ if (fpm <= IRFPM_TRUNC) -+ if (as->flags & JIT_F_RVZfa) { -+ asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FROUND_D_RDN : -+ fpm == IRFPM_CEIL ? RISCVI_FROUND_D_RUP : RISCVI_FROUND_D_RTZ); -+ } else { -+ asm_fpround(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RDN) : -+ fpm == IRFPM_CEIL ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RUP) : -+ RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RTZ)); -+ } -+ else if (fpm == IRFPM_SQRT) -+ asm_fpunary(as, ir, RISCVI_FSQRT_D); -+ else -+ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); -+} -+ -+static void asm_add(ASMState *as, IRIns *ir) -+{ -+ IRType1 t = ir->t; -+ if (irt_isnum(t)) { -+ if (!asm_fusemadd(as, ir, RISCVI_FMADD_D, RISCVI_FMADD_D)) -+ asm_fparith(as, ir, RISCVI_FADD_D); -+ return; -+ } else { -+ if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULA)) -+ return; -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); -+ if (irref_isk(ir->op2)) { -+ intptr_t k = get_kval(as, ir->op2); -+ if (checki12(k)) { -+ if (irt_is64(t)) { -+ emit_dsi(as, RISCVI_ADDI, dest, left, k); -+ } else { -+ emit_dsi(as, RISCVI_ADDIW, dest, left, k); -+ } -+ return; -+ } -+ } -+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); -+ emit_ds1s2(as, irt_is64(t) ? RISCVI_ADD : RISCVI_ADDW, dest, -+ left, right); -+ } -+} -+ -+static void asm_sub(ASMState *as, IRIns *ir) -+{ -+ if (irt_isnum(ir->t)) { -+ if (!asm_fusemadd(as, ir, RISCVI_FMSUB_D, RISCVI_FNMSUB_D)) -+ asm_fparith(as, ir, RISCVI_FSUB_D); -+ return; -+ } else { -+ if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULS)) -+ return; -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg right, left = ra_alloc2(as, ir, RSET_GPR); -+ right = (left >> 8); left &= 255; -+ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, -+ left, right); -+ } -+} -+ -+static void asm_mul(ASMState *as, IRIns *ir) -+{ -+ if (irt_isnum(ir->t)) { -+ asm_fparith(as, ir, RISCVI_FMUL_D); -+ } else { -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg right, left = ra_alloc2(as, ir, RSET_GPR); -+ right = (left >> 8); left &= 255; -+ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_MUL : RISCVI_MULW, dest, -+ left, right); -+ } -+} -+ -+static void asm_fpdiv(ASMState *as, IRIns *ir) -+{ -+ asm_fparith(as, ir, RISCVI_FDIV_D); -+} -+ -+static void asm_neg(ASMState *as, IRIns *ir) -+{ -+ if (irt_isnum(ir->t)) { -+ asm_fpunary(as, ir, RISCVI_FNEG_D); -+ } else { -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); -+ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, -+ RID_ZERO, left); -+ } -+} -+ -+#define asm_abs(as, ir) asm_fpunary(as, ir, RISCVI_FABS_D) -+ -+static void asm_arithov(ASMState *as, IRIns *ir) -+{ -+ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); -+ lj_assertA(!irt_is64(ir->t), "bad usage"); -+ if (irref_isk(ir->op2)) { -+ int k = IR(ir->op2)->i; -+ if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); -+ if (checki12(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ -+ left = ra_alloc1(as, ir->op1, RSET_GPR); -+ asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left); -+ emit_dsi(as, RISCVI_ADDI, dest, left, k); -+ if (dest == left) emit_mv(as, RID_TMP, left); -+ return; -+ } -+ } -+ left = ra_alloc2(as, ir, RSET_GPR); -+ right = (left >> 8); left &= 255; -+ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), -+ right), dest)); -+ asm_guard(as, RISCVI_BLT, RID_TMP, RID_ZERO); -+ emit_ds1s2(as, RISCVI_AND, RID_TMP, RID_TMP, tmp); -+ if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ -+ emit_ds1s2(as, RISCVI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); -+ } else { /* ((dest^left) & (dest^~right)) < 0 */ -+ emit_xnor(as, RID_TMP, dest, dest == right ? RID_TMP : right); -+ } -+ emit_ds1s2(as, RISCVI_XOR, tmp, dest, dest == left ? RID_TMP : left); -+ emit_ds1s2(as, ir->o == IR_ADDOV ? RISCVI_ADDW : RISCVI_SUBW, dest, left, right); -+ if (dest == left || dest == right) -+ emit_mv(as, RID_TMP, dest == left ? left : right); -+} -+ -+#define asm_addov(as, ir) asm_arithov(as, ir) -+#define asm_subov(as, ir) asm_arithov(as, ir) -+ -+static void asm_mulov(ASMState *as, IRIns *ir) -+{ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg right, left = ra_alloc2(as, ir, RSET_GPR); -+ right = (left >> 8); left &= 255; -+ asm_guard(as, RISCVI_BNE, dest, RID_TMP); -+ emit_ext(as, RISCVI_SEXT_W, dest, RID_TMP); // dest: [31:0]+signextend -+ emit_ds1s2(as, RISCVI_MUL, RID_TMP, left, right); // RID_TMP: [63:0] -+} -+ -+static void asm_bnot(ASMState *as, IRIns *ir) -+{ -+ Reg left, right, dest = ra_dest(as, ir, RSET_GPR); -+ IRIns *irl = IR(ir->op1); -+ if (as->flags & JIT_F_RVZbb && mayfuse(as, ir->op1) && irl->o == IR_BXOR) { -+ left = ra_alloc2(as, irl, RSET_GPR); -+ right = (left >> 8); left &= 255; -+ emit_ds1s2(as, RISCVI_XNOR, dest, left, right); -+ } else { -+ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); -+ emit_ds(as, RISCVI_NOT, dest, left); -+ } -+} -+ -+static void asm_bswap(ASMState *as, IRIns *ir) -+{ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -+ RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left); -+ if (as->flags & JIT_F_RVZbb) { -+ if (!irt_is64(ir->t)) -+ emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32); -+ emit_ds(as, RISCVI_REV8, dest, left); -+ } else if (as->flags & JIT_F_RVXThead) { -+ emit_ds(as, irt_is64(ir->t) ? RISCVI_TH_REV : RISCVI_TH_REVW, -+ dest, left); -+ } else if (irt_is64(ir->t)) { -+ Reg tmp1, tmp2, tmp3, tmp4; -+ tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); -+ tmp2 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp2); -+ tmp3 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp3); -+ tmp4 = ra_scratch(as, allow); -+ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp4); -+ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp3); -+ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); -+ emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 40); -+ emit_dsshamt(as, RISCVI_SLLI, dest, left, 56); -+ emit_ds1s2(as, RISCVI_OR, tmp3, tmp1, tmp3); -+ emit_ds1s2(as, RISCVI_AND, tmp4, left, RID_TMP); -+ emit_dsshamt(as, RISCVI_SLLI, tmp3, tmp3, 32); -+ emit_dsshamt(as, RISCVI_SLLI, tmp1, tmp1, 24); -+ emit_dsshamt(as, RISCVI_SRLIW, tmp3, left, 24); -+ emit_ds1s2(as, RISCVI_OR, tmp2, tmp3, tmp2); -+ emit_ds1s2(as, RISCVI_AND, tmp1, left, tmp1); -+ emit_ds1s2(as, RISCVI_OR, tmp3, tmp4, tmp3); -+ emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 24); -+ emit_dsshamt(as, RISCVI_SRLIW, tmp4, tmp4, 24); -+ emit_ds1s2(as, RISCVI_AND, tmp3, tmp3, tmp1); -+ emit_dsshamt(as, RISCVI_SRLI, tmp4, left, 8); -+ emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 24); -+ emit_ds1s2(as, RISCVI_OR, tmp2, tmp2, tmp3); -+ emit_du(as, RISCVI_LUI, tmp1, RISCVF_HI(0xff0000u)); -+ emit_ds1s2(as, RISCVI_AND, tmp2, tmp2, RID_TMP); -+ emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 56); -+ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); -+ emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); -+ emit_dsshamt(as, RISCVI_SRLI, tmp2, left, 40); -+ } else { -+ Reg tmp1, tmp2; -+ tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); -+ tmp2 = ra_scratch(as, allow); -+ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); -+ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp1); -+ emit_dsshamt(as, RISCVI_SLLI, tmp2, RID_TMP, 8); -+ emit_dsshamt(as, RISCVI_SLLIW, dest, left, 24); -+ emit_ds1s2(as, RISCVI_OR, tmp1, tmp1, tmp2); -+ emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP); -+ emit_ds1s2(as, RISCVI_AND, tmp1, tmp1, RID_TMP); -+ emit_dsshamt(as, RISCVI_SRLIW, tmp2, left, 24); -+ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); -+ emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); -+ emit_dsshamt(as, RISCVI_SRLI, tmp1, left, 8); -+ } -+} -+ -+static void asm_bitop(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik, RISCVIns riscvin) -+{ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left, right; -+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); -+ if (irref_isk(ir->op2)) { -+ intptr_t k = get_kval(as, ir->op2); -+ if (checki12(k)) { -+ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); -+ emit_dsi(as, riscvik, dest, left, k); -+ return; -+ } -+ } else if (as->flags & JIT_F_RVZbb) { -+ if (mayfuse(as, ir->op1) && irl->o == IR_BNOT) { -+ left = ra_alloc1(as, irl->op1, RSET_GPR); -+ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); -+ emit_ds1s2(as, riscvin, dest, right, left); -+ return; -+ } else if (mayfuse(as, ir->op2) && irr->o == IR_BNOT) { -+ left = ra_alloc1(as, ir->op1, RSET_GPR); -+ right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); -+ emit_ds1s2(as, riscvin, dest, left, right); -+ return; -+ } -+ } -+ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); -+ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); -+ emit_ds1s2(as, riscvi, dest, left, right); -+} -+ -+#define asm_band(as, ir) asm_bitop(as, ir, RISCVI_AND, RISCVI_ANDI, RISCVI_ANDN) -+#define asm_bor(as, ir) asm_bitop(as, ir, RISCVI_OR, RISCVI_ORI, RISCVI_ORN) -+#define asm_bxor(as, ir) asm_bitop(as, ir, RISCVI_XOR, RISCVI_XORI, RISCVI_XNOR) -+ -+static void asm_bitshift(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik) -+{ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -+ uint32_t shmsk = irt_is64(ir->t) ? 63 : 31; -+ if (irref_isk(ir->op2)) { /* Constant shifts. */ -+ uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmsk); -+ switch (riscvik) { -+ case RISCVI_SRAI: case RISCVI_SRLI: case RISCVI_SLLI: -+ case RISCVI_SRAIW: case RISCVI_SLLIW: case RISCVI_SRLIW: -+ emit_dsshamt(as, riscvik, dest, left, shift); -+ break; -+ case RISCVI_ADDI: shift = (-shift) & shmsk; -+ case RISCVI_RORI: -+ emit_roti(as, RISCVI_RORI, dest, left, RID_TMP, shift); -+ break; -+ case RISCVI_ADDIW: shift = (-shift) & shmsk; -+ case RISCVI_RORIW: -+ emit_roti(as, RISCVI_RORIW, dest, left, RID_TMP, shift); -+ break; -+ default: -+ lj_assertA(0, "bad shift instruction"); -+ return; -+ } -+ } else { -+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); -+ switch (riscvi) { -+ case RISCVI_SRA: case RISCVI_SRL: case RISCVI_SLL: -+ case RISCVI_SRAW: case RISCVI_SRLW: case RISCVI_SLLW: -+ emit_ds1s2(as, riscvi, dest, left, right); -+ break; -+ case RISCVI_ROR: case RISCVI_ROL: -+ case RISCVI_RORW: case RISCVI_ROLW: -+ emit_rot(as, riscvi, dest, left, right, RID_TMP); -+ break; -+ default: -+ lj_assertA(0, "bad shift instruction"); -+ return; -+ } -+ } -+} -+ -+#define asm_bshl(as, ir) (irt_is64(ir->t) ? \ -+ asm_bitshift(as, ir, RISCVI_SLL, RISCVI_SLLI) : \ -+ asm_bitshift(as, ir, RISCVI_SLLW, RISCVI_SLLIW)) -+#define asm_bshr(as, ir) (irt_is64(ir->t) ? \ -+ asm_bitshift(as, ir, RISCVI_SRL, RISCVI_SRLI) : \ -+ asm_bitshift(as, ir, RISCVI_SRLW, RISCVI_SRLIW)) -+#define asm_bsar(as, ir) (irt_is64(ir->t) ? \ -+ asm_bitshift(as, ir, RISCVI_SRA, RISCVI_SRAI) : \ -+ asm_bitshift(as, ir, RISCVI_SRAW, RISCVI_SRAIW)) -+#define asm_brol(as, ir) (irt_is64(ir->t) ? \ -+ asm_bitshift(as, ir, RISCVI_ROL, RISCVI_ADDI) : \ -+ asm_bitshift(as, ir, RISCVI_ROLW, RISCVI_ADDIW)) -+ // ROLI -> ADDI, ROLIW -> ADDIW; Hacky but works. -+#define asm_bror(as, ir) (irt_is64(ir->t) ? \ -+ asm_bitshift(as, ir, RISCVI_ROR, RISCVI_RORI) : \ -+ asm_bitshift(as, ir, RISCVI_RORW, RISCVI_RORIW)) -+ -+static void asm_min_max(ASMState *as, IRIns *ir, int ismax) -+{ -+ if (irt_isnum(ir->t)) { -+ Reg dest = ra_dest(as, ir, RSET_FPR); -+ MCLabel l_ret_left, l_end; -+ Reg right, left = ra_alloc2(as, ir, RSET_FPR); -+ right = (left >> 8); left &= 255; -+ l_end = emit_label(as); -+ -+ if (dest != left) -+ emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); -+ l_ret_left = emit_label(as); -+ -+ if (dest != left) -+ emit_jump(as, l_end, -1); -+ if (dest != right) -+ emit_ds1s2(as, RISCVI_FMV_D, dest, right, right); -+ -+ emit_branch(as, RISCVI_BNE, RID_TMP, RID_ZERO, l_ret_left, -1); -+ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, ismax ? right : left, -+ ismax ? left : right); -+ } else { -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); -+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); -+ if (as->flags & JIT_F_RVZbb) { -+ emit_ds1s2(as, ismax ? RISCVI_MAX : RISCVI_MIN, dest, left, right); -+ } else { -+ if (as->flags & JIT_F_RVXThead) { -+ if (left == right) { -+ if (dest != left) emit_mv(as, dest, left); -+ } else { -+ if (dest == left) { -+ emit_ds1s2(as, RISCVI_TH_MVNEZ, dest, right, RID_TMP); -+ } else { -+ emit_ds1s2(as, RISCVI_TH_MVEQZ, dest, left, RID_TMP); -+ if (dest != right) emit_mv(as, dest, right); -+ } -+ } -+ } else if (as->flags & JIT_F_RVZicond) { -+ emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP); -+ if (dest != right) { -+ emit_ds1s2(as, RISCVI_CZERO_EQZ, RID_TMP, right, RID_TMP); -+ emit_ds1s2(as, RISCVI_CZERO_NEZ, dest, left, RID_TMP); -+ } else { -+ emit_ds1s2(as, RISCVI_CZERO_NEZ, RID_TMP, left, RID_TMP); -+ emit_ds1s2(as, RISCVI_CZERO_EQZ, dest, right, RID_TMP); -+ } -+ } else { -+ if (dest != right) { -+ emit_ds1s2(as, RISCVI_XOR, dest, right, dest); -+ emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); -+ emit_ds1s2(as, RISCVI_XOR, dest, right, left); -+ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1); -+ } else { -+ emit_ds1s2(as, RISCVI_XOR, dest, left, dest); -+ emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); -+ emit_ds1s2(as, RISCVI_XOR, dest, left, right); -+ emit_ds1s2(as, RISCVI_SUB, RID_TMP, RID_ZERO, RID_TMP); -+ } -+ } -+ emit_ds1s2(as, RISCVI_SLT, RID_TMP, -+ ismax ? left : right, ismax ? right : left); -+ } -+ } -+} -+ -+#define asm_min(as, ir) asm_min_max(as, ir, 0) -+#define asm_max(as, ir) asm_min_max(as, ir, 1) -+ -+/* -- Comparisons --------------------------------------------------------- */ -+ -+/* FP comparisons. */ -+static void asm_fpcomp(ASMState *as, IRIns *ir) -+{ -+ IROp op = ir->o; -+ Reg right, left = ra_alloc2(as, ir, RSET_FPR); -+ right = (left >> 8); left &= 255; -+ asm_guard(as, (op < IR_EQ ? (op&4) : (op&1)) -+ ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); -+ switch (op) { -+ case IR_LT: case IR_UGE: -+ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, left, right); -+ break; -+ case IR_LE: case IR_UGT: case IR_ABC: -+ emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, left, right); -+ break; -+ case IR_GT: case IR_ULE: -+ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, right, left); -+ break; -+ case IR_GE: case IR_ULT: -+ emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, right, left); -+ break; -+ case IR_EQ: case IR_NE: -+ emit_ds1s2(as, RISCVI_FEQ_D, RID_TMP, left, right); -+ break; -+ default: -+ break; -+ } -+} -+ -+/* Integer comparisons. */ -+static void asm_intcomp(ASMState *as, IRIns *ir) -+{ -+ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ -+ /* 00 01 10 11 100 101 110 111 */ -+ IROp op = ir->o; -+ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); -+ if (op == IR_ABC) op = IR_UGT; -+ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { -+ switch (op) { -+ case IR_LT: asm_guard(as, RISCVI_BGE, left, RID_ZERO); break; -+ case IR_GE: asm_guard(as, RISCVI_BLT, left, RID_ZERO); break; -+ case IR_LE: asm_guard(as, RISCVI_BLT, RID_ZERO, left); break; -+ case IR_GT: asm_guard(as, RISCVI_BGE, RID_ZERO, left); break; -+ default: break; -+ } -+ return; -+ } -+ if (irref_isk(ir->op2)) { -+ intptr_t k = get_kval(as, ir->op2); -+ if ((op&2)) k++; -+ if (checki12(k)) { -+ asm_guard(as, (op&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); -+ emit_dsi(as, (op&4) ? RISCVI_SLTIU : RISCVI_SLTI, RID_TMP, left, k); -+ return; -+ } -+ } -+ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); -+ asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1), -+ (op&2) ? right : left, (op&2) ? left : right); -+} -+ -+static void asm_comp(ASMState *as, IRIns *ir) -+{ -+ if (irt_isnum(ir->t)) -+ asm_fpcomp(as, ir); -+ else -+ asm_intcomp(as, ir); -+} -+ -+static void asm_equal(ASMState *as, IRIns *ir) -+{ -+ if (irt_isnum(ir->t)) { -+ asm_fpcomp(as, ir); -+ } else { -+ Reg right, left = ra_alloc2(as, ir, RSET_GPR); -+ right = (left >> 8); left &= 255; -+ asm_guard(as, (ir->o & 1) ? RISCVI_BEQ : RISCVI_BNE, left, right); -+ } -+} -+ -+/* -- Split register ops -------------------------------------------------- */ -+ -+/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -+static void asm_hiop(ASMState *as, IRIns *ir) -+{ -+ /* HIOP is marked as a store because it needs its own DCE logic. */ -+ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ -+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; -+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ -+ switch ((ir-1)->o) { -+ case IR_CALLN: -+ case IR_CALLL: -+ case IR_CALLS: -+ case IR_CALLXS: -+ if (!uselo) -+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ -+ break; -+ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; -+ } -+} -+ -+/* -- Profiling ----------------------------------------------------------- */ -+ -+static void asm_prof(ASMState *as, IRIns *ir) -+{ -+ UNUSED(ir); -+ asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); -+ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); -+ emit_lsglptr(as, RISCVI_LBU, RID_TMP, -+ (int32_t)offsetof(global_State, hookmask)); -+} -+ -+/* -- Stack handling ------------------------------------------------------ */ -+ -+/* Check Lua stack size for overflow. Use exit handler as fallback. */ -+static void asm_stack_check(ASMState *as, BCReg topslot, -+ IRIns *irp, RegSet allow, ExitNo exitno) -+{ -+ /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */ -+ Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; -+ ExitNo oldsnap = as->snapno; -+ rset_clear(allow, pbase); -+ as->snapno = exitno; -+ asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); -+ as->snapno = oldsnap; -+ if (allow) { -+ tmp = rset_pickbot(allow); -+ ra_modified(as, tmp); -+ } else { // allow == RSET_EMPTY -+ tmp = RID_RET; -+ emit_lso(as, RISCVI_LD, tmp, RID_SP, 0); /* Restore tmp1 register. */ -+ } -+ emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); -+ emit_ds1s2(as, RISCVI_SUB, RID_TMP, tmp, pbase); -+ emit_lso(as, RISCVI_LD, tmp, tmp, offsetof(lua_State, maxstack)); -+ if (pbase == RID_TMP) -+ emit_getgl(as, RID_TMP, jit_base); -+ emit_getgl(as, tmp, cur_L); -+ if (allow == RSET_EMPTY) /* Spill temp register. */ -+ emit_lso(as, RISCVI_SD, tmp, RID_SP, 0); -+} -+ -+/* Restore Lua stack from on-trace state. */ -+static void asm_stack_restore(ASMState *as, SnapShot *snap) -+{ -+ SnapEntry *map = &as->T->snapmap[snap->mapofs]; -+#ifdef LUA_USE_ASSERT -+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; -+#endif -+ MSize n, nent = snap->nent; -+ /* Store the value of all modified slots to the Lua stack. */ -+ for (n = 0; n < nent; n++) { -+ SnapEntry sn = map[n]; -+ BCReg s = snap_slot(sn); -+ int32_t ofs = 8*((int32_t)s-1-LJ_FR2); -+ IRRef ref = snap_ref(sn); -+ IRIns *ir = IR(ref); -+ if ((sn & SNAP_NORESTORE)) -+ continue; -+ if (irt_isnum(ir->t)) { -+ Reg src = ra_alloc1(as, ref, RSET_FPR); -+ emit_lso(as, RISCVI_FSD, src, RID_BASE, ofs); -+ } else { -+ if ((sn & SNAP_KEYINDEX)) { -+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE); -+ int64_t kki = (int64_t)LJ_KEYINDEX << 32; -+ if (irref_isk(ref)) { -+ emit_lso(as, RISCVI_SD, -+ ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow), -+ RID_BASE, ofs); -+ } else { -+ Reg src = ra_alloc1(as, ref, allow); -+ Reg rki = ra_allock(as, kki, rset_exclude(allow, src)); -+ emit_lso(as, RISCVI_SD, RID_TMP, RID_BASE, ofs); -+ emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, rki); -+ } -+ } else { -+ asm_tvstore64(as, RID_BASE, ofs, ref); -+ } -+ } -+ checkmclim(as); -+ } -+ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); -+} -+ -+/* -- GC handling --------------------------------------------------------- */ -+ -+/* Marker to prevent patching the GC check exit. */ -+#define RISCV_NOPATCH_GC_CHECK \ -+ (RISCVI_OR|RISCVF_D(RID_TMP)|RISCVF_S1(RID_TMP)|RISCVF_S2(RID_TMP)) -+ -+/* Check GC threshold and do one or more GC steps. */ -+static void asm_gc_check(ASMState *as) -+{ -+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; -+ IRRef args[2]; -+ MCLabel l_end; -+ Reg tmp; -+ ra_evictset(as, RSET_SCRATCH); -+ l_end = emit_label(as); -+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ -+ asm_guard(as, RISCVI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */ -+ *--as->mcp = RISCV_NOPATCH_GC_CHECK; -+ args[0] = ASMREF_TMP1; /* global_State *g */ -+ args[1] = ASMREF_TMP2; /* MSize steps */ -+ asm_gencall(as, ci, args); -+ emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); -+ tmp = ra_releasetmp(as, ASMREF_TMP2); -+ emit_loadi(as, tmp, as->gcsteps); -+ /* Jump around GC step if GC total < GC threshold. */ -+ emit_branch(as, RISCVI_BLTU, RID_TMP, tmp, l_end, -1); -+ emit_getgl(as, tmp, gc.threshold); -+ emit_getgl(as, RID_TMP, gc.total); -+ as->gcsteps = 0; -+ checkmclim(as); -+} -+ -+/* -- Loop handling ------------------------------------------------------- */ -+ -+/* Fixup the loop branch. */ -+static void asm_loop_fixup(ASMState *as) -+{ -+ MCode *p = as->mctop; -+ MCode *target = as->mcp; -+ ptrdiff_t delta; -+ if (as->loopinv) { /* Inverted loop branch? */ -+ delta = (char *)target - (char *)(p - 2); -+ /* asm_guard* already inverted the branch, and patched the final b. */ -+ lj_assertA(checki21(delta), "branch target out of range"); -+ p[-2] = (p[-2]&0x00000fff) | RISCVF_IMMJ(delta); -+ } else { -+ /* J */ -+ delta = (char *)target - (char *)(p - 1); -+ p[-1] = RISCVI_JAL | RISCVF_IMMJ(delta); -+ } -+} -+ -+/* Fixup the tail of the loop. */ -+static void asm_loop_tail_fixup(ASMState *as) -+{ -+ UNUSED(as); /* Nothing to do(?) */ -+} -+ -+/* -- Head of trace ------------------------------------------------------- */ -+ -+/* Coalesce BASE register for a root trace. */ -+static void asm_head_root_base(ASMState *as) -+{ -+ IRIns *ir = IR(REF_BASE); -+ Reg r = ir->r; -+ if (ra_hasreg(r)) { -+ ra_free(as, r); -+ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) -+ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ -+ if (r != RID_BASE) -+ emit_mv(as, r, RID_BASE); -+ } -+} -+ -+/* Coalesce BASE register for a side trace. */ -+static Reg asm_head_side_base(ASMState *as, IRIns *irp) -+{ -+ IRIns *ir = IR(REF_BASE); -+ Reg r = ir->r; -+ if (ra_hasreg(r)) { -+ ra_free(as, r); -+ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) -+ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ -+ if (irp->r == r) { -+ return r; /* Same BASE register already coalesced. */ -+ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { -+ emit_mv(as, r, irp->r); /* Move from coalesced parent reg. */ -+ return irp->r; -+ } else { -+ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ -+ } -+ } -+ return RID_NONE; -+} -+ -+/* -- Tail of trace ------------------------------------------------------- */ -+ -+/* Fixup the tail code. */ -+static void asm_tail_fixup(ASMState *as, TraceNo lnk) -+{ -+ MCode *p = as->mctop; -+ MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; -+ int32_t spadj = as->T->spadjust; -+ if (spadj == 0) { -+ p[-3] = RISCVI_NOP; -+ // as->mctop = p-2; -+ } else { -+ /* Patch stack adjustment. */ -+ p[-3] = RISCVI_ADDI | RISCVF_D(RID_SP) | RISCVF_S1(RID_SP) | RISCVF_IMMI(spadj); -+ } -+ /* Patch exit jump. */ -+ ptrdiff_t delta = (char *)target - (char *)(p - 2); -+ p[-2] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); -+ p[-1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); -+} -+ -+/* Prepare tail of code. */ -+static void asm_tail_prep(ASMState *as) -+{ -+ MCode *p = as->mctop - 2; /* Leave room for exitstub. */ -+ if (as->loopref) { -+ as->invmcp = as->mcp = p; -+ } else { -+ as->mcp = p-1; /* Leave room for stack pointer adjustment. */ -+ as->invmcp = NULL; -+ } -+ p[0] = p[1] = RISCVI_NOP; /* Prevent load/store merging. */ -+} -+ -+/* -- Trace setup --------------------------------------------------------- */ -+ -+/* Ensure there are enough stack slots for call arguments. */ -+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -+{ -+ IRRef args[CCI_NARGS_MAX*2]; -+ uint32_t i, nargs = CCI_XNARGS(ci); -+ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; -+ asm_collectargs(as, ir, ci, args); -+ for (i = 0; i < nargs; i++) { -+ if (args[i] && irt_isfp(IR(args[i])->t)) { -+ if (nfpr > 0) { -+ nfpr--; if(ci->flags & CCI_VARARG) ngpr--; -+ } else if (!(ci->flags & CCI_VARARG) && ngpr > 0) ngpr--; -+ else nslots += 2; -+ } else { -+ if (ngpr > 0) { -+ ngpr--; if(ci->flags & CCI_VARARG) nfpr--; -+ } else nslots += 2; -+ } -+ } -+ if (nslots > as->evenspill) /* Leave room for args in stack slots. */ -+ as->evenspill = nslots; -+ return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); -+} -+ -+static void asm_setup_target(ASMState *as) -+{ -+ asm_sparejump_setup(as); -+ asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); -+} -+ -+/* -- Trace patching ------------------------------------------------------ */ -+ -+/* Patch exit jumps of existing machine code to a new target. */ -+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -+{ -+ MCode *p = T->mcode; -+ MCode *pe = (MCode *)((char *)p + T->szmcode); -+ MCode *px = exitstub_trace_addr(T, exitno); -+ MCode *cstart = NULL; -+ MCode *mcarea = lj_mcode_patch(J, p, 0); -+ -+ for (; p < pe; p++) { -+ /* Look for exitstub branch, replace with branch to target. */ -+ ptrdiff_t odelta = (char *)px - (char *)(p+1), -+ ndelta = (char *)target - (char *)(p+1); -+ if ((((p[0] ^ RISCVF_IMMB(8)) & 0xfe000f80u) == 0 && -+ ((p[0] & 0x0000007fu) == 0x63u) && -+ ((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && -+ ((p[1] & 0x0000007fu) == 0x6fu) && p[-1] != RISCV_NOPATCH_GC_CHECK) || -+ (((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && -+ ((p[1] & 0x0000007fu) == 0x6fu) && p[0] != RISCV_NOPATCH_GC_CHECK)) { -+ lj_assertJ(checki32(ndelta), "branch target out of range"); -+ /* Patch jump, if within range. */ -+ patchbranch: -+ if (checki21(ndelta)) { /* Patch jump */ -+ p[1] = RISCVI_JAL | RISCVF_IMMJ(ndelta); -+ if (!cstart) cstart = p + 1; -+ } else { /* Branch out of range. Use spare jump slot in mcarea. */ -+ MCode *mcjump = asm_sparejump_use(mcarea, target); -+ if (mcjump) { -+ lj_mcode_sync(mcjump, mcjump+2); -+ ndelta = (char *)mcjump - (char *)(p+1); -+ if (checki21(ndelta)) { -+ goto patchbranch; -+ } else { -+ lj_assertJ(0, "spare jump out of range: -Osizemcode too big"); -+ } -+ } -+ /* Ignore jump slot overflow. Child trace is simply not attached. */ -+ } -+ } else if (p+2 == pe) { -+ if (p[0] == RISCVI_NOP && p[1] == RISCVI_NOP) { -+ ptrdiff_t delta = (char *)target - (char *)p; -+ lj_assertJ(checki32(delta), "jump target out of range"); -+ p[0] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); -+ p[1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); -+ if (!cstart) cstart = p; -+ } -+ } -+ } -+ if (cstart) lj_mcode_sync(cstart, px+1); -+ lj_mcode_patch(J, mcarea, 1); -+} ---- a/src/lj_ccall.c -+++ b/src/lj_ccall.c -@@ -687,6 +687,146 @@ - if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ - } - -+#elif LJ_TARGET_RISCV64 -+/* -- RISC-V lp64d calling conventions ------------------------------------ */ -+ -+#define CCALL_HANDLE_STRUCTRET \ -+ /* Return structs of size > 16 by reference. */ \ -+ cc->retref = !(sz <= 16); \ -+ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; -+ -+#define CCALL_HANDLE_STRUCTRET2 \ -+ CCallStructClass cl = ccall_classify_struct(cts, ctr); \ -+ CCallStructMix mix = cl.mix; \ -+ switch (mix.val) { \ -+ case MIX_IX: { \ -+ ((intptr_t *)dp)[0] = cc->gpr[0]; \ -+ break; \ -+ } \ -+ case MIX_FX: case MIX_DX: \ -+ case MIX_FF: case MIX_FD: \ -+ case MIX_DF: case MIX_DD: { \ -+ eCCallStructMixElem es[2] = { mix.e1, mix.e2 }; \ -+ for (int ti = 0; ti < 2; ti++) { \ -+ if (es[ti] == MIX_ELEM_FLOAT) { \ -+ ((float *)dp)[ti] = cc->fpr[ti].f; \ -+ } else /*if (es[ti] == MIX_ELEM_DOUBLE)*/ { \ -+ ((double *)dp)[ti] = cc->fpr[ti].d; \ -+ } \ -+ } \ -+ break; \ -+ } \ -+ case MIX_FI: case MIX_DI: \ -+ case MIX_IF: case MIX_ID: { \ -+ eCCallStructMixElem es[2] = { mix.e1, mix.e2 }; \ -+ for (int ti = 0; ti < 2; ti++) { \ -+ if (es[ti] == MIX_ELEM_FLOAT) { \ -+ ((float *)dp)[ti] = cc->fpr[0].f; \ -+ } else if (es[ti] == MIX_ELEM_DOUBLE) { \ -+ ((double *)dp)[ti] = cc->fpr[0].d; \ -+ } else /*if (es[ti] == MIX_ELEM_INT)*/ { \ -+ ((intptr_t *)dp)[ti] = cc->gpr[0]; \ -+ } \ -+ } \ -+ break; \ -+ } \ -+ case MIX_UNINIT: \ -+ case MIX_FAILED: { \ -+ memcpy(dp, sp, ctr->size); \ -+ break; \ -+ } \ -+ default: lj_assertL(0, "Invalid ret mix %d", mix.val); \ -+ } -+ -+#define CCALL_HANDLE_COMPLEXRET \ -+ /* Complex values are returned in 1 or 2 FPRs. */ \ -+ cc->retref = 0; -+ -+#define CCALL_HANDLE_COMPLEXRET2 \ -+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ -+ ((float *)dp)[0] = cc->fpr[0].f; \ -+ ((float *)dp)[1] = cc->fpr[1].f; \ -+ } else { /* Copy complex double from FPRs. */ \ -+ ((double *)dp)[0] = cc->fpr[0].d; \ -+ ((double *)dp)[1] = cc->fpr[1].d; \ -+ } -+ -+#define CCALL_HANDLE_COMPLEXARG \ -+ /* Pass long double complex by reference. */ \ -+ if (sz == 2*sizeof(long double)) { \ -+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ -+ sz = CTSIZE_PTR; \ -+ } \ -+ /* Pass complex in two FPRs or two GPRs or on stack. */ \ -+ else if (sz == 2*sizeof(float)) { \ -+ mix = (CCallStructMix){ .val = MIX_FF }; \ -+ sz = CTSIZE_PTR; \ -+ } else /*if (sz == 2*sizeof(double))*/ { \ -+ mix = (CCallStructMix){ .val = MIX_DD }; \ -+ sz = 2*CTSIZE_PTR; \ -+ } -+ -+#define CCALL_HANDLE_RET \ -+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ -+ sp = (uint8_t *)&cc->fpr[0].f; -+ -+#define CCALL_HANDLE_STRUCTARG \ -+ /* Pass structs of size >16 by reference. */ \ -+ CCallStructClass cl = ccall_classify_struct(cts, d); \ -+ uint8_t ispod = cl.ispod; \ -+ mix = cl.mix; \ -+ if (!ispod && sz > 16) { \ -+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ -+ sz = CTSIZE_PTR; \ -+ } -+ -+ -+#define CCALL_HANDLE_REGARG \ -+ if (!isva) { /* Try determine MIX registers. */ \ -+ int n2 = 0; \ -+ switch (mix.val) { \ -+ case MIX_UNINIT: \ -+ if (isfp) { \ -+ n2 = 1; \ -+ break; \ -+ } \ -+ /* fallthrough */ \ -+ case MIX_FAILED: \ -+ /* MIX_[IFD]X are just like a standalone element */ \ -+ case MIX_IX: goto reghandle_gpr; \ -+ case MIX_FX: case MIX_DX: \ -+ n2 = 1; \ -+ break; \ -+ /* MIX_[FD][FD] are just like two standalone elements */ \ -+ /* fix float later */ \ -+ case MIX_FF: case MIX_DD: \ -+ case MIX_FD: case MIX_DF: \ -+ n2 = 2; \ -+ break; \ -+ /* Setup MIX_I[FD] or MIX[FD]I on stack first, fix later */ \ -+ default: goto reghandle_exit; \ -+ } \ -+ if (nfpr + n2 <= CCALL_NARG_FPR) { \ -+ dp = &cc->fpr[nfpr]; \ -+ nfpr += n2; \ -+ goto done; \ -+ } else { \ -+ if (ngpr + n2 <= maxgpr) { \ -+ dp = &cc->gpr[ngpr]; \ -+ ngpr += n2; \ -+ goto done; \ -+ } \ -+ } \ -+ } else { /* Try to pass argument in GPRs. */ \ -+ reghandle_gpr: \ -+ if (ngpr + n <= maxgpr) { \ -+ dp = &cc->gpr[ngpr]; \ -+ ngpr += n; \ -+ goto done; \ -+ } \ -+ reghandle_exit: \ -+ } -+ - #else - #error "Missing calling convention definitions for this architecture" - #endif -@@ -1055,6 +1195,164 @@ static void ccall_copy_struct(CCallState - - #endif - -+/* -- RISC-V ABI struct classification ---------------------------- */ -+ -+#if LJ_TARGET_RISCV64 -+ -+/* RISC-V 64 LP64D fp reg struct classification. */ -+/* X: unknown/uninit, F: float, D: double, I: integer */ -+ -+typedef enum eCCallStructMixElem { -+ MIX_ELEM_UNINIT = 0, -+ MIX_ELEM_FLOAT = 1, -+ MIX_ELEM_DOUBLE = 2, -+ MIX_ELEM_INT = 3, -+} eCCallStructMixElem; -+typedef enum eCCallStructMix { -+ MIX_UNINIT = 0, // i.e. MIX_XX -+ MIX_FX = 1, -+ MIX_DX = 2, -+ MIX_IX = 3, -+ MIX_XF = 4, -+ MIX_FF = 5, -+ MIX_DF = 6, -+ MIX_IF = 7, -+ MIX_XD = 8, -+ MIX_FD = 9, -+ MIX_DD = 10, -+ MIX_ID = 11, -+ MIX_XI = 12, -+ MIX_FI = 13, -+ MIX_DI = 14, -+ MIX_FAILED = 15, // MIX_II but that's not mixed -+} eCCallStructMix; -+ -+typedef union CCallStructMix { -+ eCCallStructMix val : 4; -+ struct { -+ eCCallStructMixElem e1 : 2; -+ eCCallStructMixElem e2 : 2; -+ }; -+} CCallStructMix; -+ -+typedef union CCallStructClass { -+ uint32_t val; -+ struct { -+ uint8_t ispod; -+ CCallStructMix mix; -+ }; -+} CCallStructClass; -+ -+static CCallStructClass ccall_classify_struct(CTState *cts, CType *ct) -+{ -+ CTSize sz = ct->size; -+ CCallStructMix mix = { .val = MIX_UNINIT }; -+ if (ct->info & CTF_UNION) mix.val = MIX_FAILED; -+ while (ct->sib && mix.val != MIX_FAILED) { -+ unsigned int m = 1; -+ CType *sct; -+ ct = ctype_get(cts, ct->sib); -+ if (ctype_isfield(ct->info)) { -+ sct = ctype_rawchild(cts, ct); -+ if (ctype_isarray(sct->info)) { -+ CType *cct = ctype_rawchild(cts, sct); -+ if (!cct->size) continue; -+ m = sct->size / cct->size; -+ sct = cct; -+ } -+ if (ctype_isfp(sct->info)) { -+ while (m--) { -+ /* Mix state trans: fp -+ * mix XX -> mix = [FD]X -+ * mix X[IFD] -> mix = FAILED -+ * mix [IFD]X -> mix = [IFD][FD] -+ * mix [IFD][IFD] -> mix = FAILED -+ */ -+ eCCallStructMixElem ne = (sct->size == 4) ? MIX_ELEM_FLOAT : MIX_ELEM_DOUBLE; -+ if (mix.val == MIX_UNINIT) { -+ mix = (CCallStructMix){ .e1 = ne, .e2 = MIX_ELEM_UNINIT }; -+ } else { -+ eCCallStructMixElem o1 = mix.e1, o2 = mix.e2, n2 = ne; -+ mix = (o2 != MIX_ELEM_UNINIT ? (CCallStructMix){ .val = MIX_FAILED } -+ : (CCallStructMix){ .e1 = o1, .e2 = n2 }); -+ } -+ } -+ } else if (ctype_iscomplex(sct->info)) { -+ while (m--) { -+ /* Mix state trans: complex -+ * mix XX -> mix = [FD][FD] -+ * mix other -> mix = FAILED -+ */ -+ eCCallStructMixElem ne = (sct->size == 8) ? MIX_ELEM_FLOAT : MIX_ELEM_DOUBLE; -+ mix = (mix.val == MIX_UNINIT) ? (CCallStructMix){ .e1 = ne, .e2 = ne } -+ : (CCallStructMix){ .val = MIX_FAILED }; -+ } -+ } else if (ctype_isinteger_or_bool(sct->info) || ctype_isenum(sct->info)) { -+ while (m--) { -+ /* Mix state trans: int -+ * mix XX -> mix = IX -+ * mix X[IFD] -> mix = FAILED -+ * mix [IFD]X -> mix = [FD]I; this auto fails II -+ * mix [IFD][IFD] -> mix = FAILED -+ */ -+ if (mix.val == MIX_UNINIT) { -+ mix = (CCallStructMix){ .e1 = MIX_ELEM_INT, .e2 = MIX_ELEM_UNINIT }; -+ } else { -+ eCCallStructMixElem o1 = mix.e1, o2 = mix.e2, n2 = MIX_ELEM_INT; -+ mix = (o2 != MIX_ELEM_UNINIT ? (CCallStructMix){ .val = MIX_FAILED } -+ : (CCallStructMix){ .e1 = o1, .e2 = n2 }); -+ } -+ } -+ } else if (ctype_isstruct(sct->info)) { -+ goto substruct; -+ } else { -+ goto not_ag; -+ } -+ } else if (ctype_isbitfield(ct->info)) { -+ goto not_ag; -+ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { -+ sct = ctype_rawchild(cts, ct); -+ substruct: -+ if (sct->size > 0) { -+ CCallStructClass s = ccall_classify_struct(cts, sct); -+ CCallStructMix smix = s.mix; -+ uint8_t spod = s.ispod; -+ if (smix.val == MIX_FAILED) mix.val = MIX_FAILED; -+ if (!spod) goto not_ag; -+ while (m--) { -+ /* Mix state transfer: substruct -+ * mix XX, smix any -> mix = smix -+ * mix X[IFD], smix any -> mix = FAILED -+ * mix [IFD]X, smix [XIFD]X -> mix = [IFD][XIFD] -+ * smix other -> mix = FAILED -+ * mix [IFD][IFD], smix XX -> mix = mix -+ * smix other -> mix = FAILED; this keep II fail -+ */ -+ if (mix.val == MIX_UNINIT) { -+ mix = smix; -+ } else { -+ eCCallStructMixElem o1 = mix.e1, o2 = mix.e2; -+ eCCallStructMixElem n1 = smix.e1, n2 = smix.e2; -+ if (o2 != MIX_ELEM_UNINIT) { -+ mix = (smix.val != MIX_UNINIT) ? mix -+ : (CCallStructMix){ .val = MIX_FAILED }; -+ } else { -+ mix = (n2 != MIX_ELEM_UNINIT) ? (CCallStructMix){ .val = MIX_FAILED } -+ : (CCallStructMix){ .e1 = o1, .e2 = n1 }; -+ } -+ } -+ } -+ } -+ } -+ } -+ if (MIX_UNINIT < mix.val && mix.val < MIX_FAILED) /* Mixed passing */ -+ return (CCallStructClass){ .ispod = 1, .mix = mix }; -+not_ag: /* Not a float/double aggregate or int/fp mix pair aggregate */ -+ return (CCallStructClass){ .ispod = (sz <= 16), .mix = mix }; /* Return structs of size <= 16 in GPRs. */ -+} -+ -+#endif -+ - /* -- Common C call handling ---------------------------------------------- */ - - /* Infer the destination CTypeID for a vararg argument. -@@ -1106,6 +1404,7 @@ static int ccall_set_args(lua_State *L, - #endif - #endif - -+ - /* Clear unused regs to get some determinism in case of misdeclaration. */ - memset(cc->gpr, 0, sizeof(cc->gpr)); - #if CCALL_NUM_FPR -@@ -1168,6 +1467,11 @@ static int ccall_set_args(lua_State *L, - CType *d; - CTSize sz; - MSize n, isfp = 0, isva = 0; -+#if LJ_TARGET_RISCV64 -+ MSize onsp, mnsp; -+ CCallStructMix mix = { .val = MIX_UNINIT }; -+ int isstack = 0; -+#endif - void *dp, *rp = NULL; - #if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 - int isf32 = 0; -@@ -1216,6 +1520,10 @@ static int ccall_set_args(lua_State *L, - #if LJ_TARGET_S390X - onstack = 1; - #endif -+#if LJ_TARGET_RISCV64 -+ isstack = 1; -+ onsp = nsp; -+#endif - if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */ - MSize align = (1u << ctype_align(d->info)) - 1; - if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1)) -@@ -1228,6 +1536,9 @@ static int ccall_set_args(lua_State *L, - #else - dp = ((uint8_t *)cc->stack) + nsp; - #endif -+#if LJ_TARGET_RISCV64 -+ mnsp = nsp + n * CTSIZE_PTR / 2; -+#endif - nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR; - if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ - err_nyi: -@@ -1295,7 +1606,11 @@ static int ccall_set_args(lua_State *L, - *(int64_t *)dp = (int64_t)*(int32_t *)dp; - } - #endif --#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) -+#if LJ_TARGET_RISCV64 -+ if (isfp && d->size == sizeof(float)) -+ ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */ -+#endif -+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 - if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) - #if LJ_TARGET_MIPS64 - || (isfp && nsp == 0) -@@ -1335,6 +1650,107 @@ static int ccall_set_args(lua_State *L, - CTSize i = (sz >> 2) - 1; - do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); - } -+#elif LJ_TARGET_RISCV64 -+ switch (mix.val) { -+ case MIX_UNINIT: -+ break; -+ /* Fix MIX values */ -+ case MIX_DF: { -+ ((uint32_t *)dp)[3] = 0xffffffffu; -+ break; -+ } -+ case MIX_FF: -+ if (isstack) break; -+ ((uint64_t *)dp)[1] = 0xffffffff00000000ul | ((uint32_t *)dp)[1]; -+ case MIX_FX: -+ case MIX_FD: { -+ ((uint64_t *)dp)[0] = 0xffffffff00000000ul | ((uint32_t *)dp)[0]; -+ break; -+ } -+ case MIX_FI: { -+ lj_assertL(sz == 8 || sz == 16, "invalid MIX_FI size %d", (int)sz); -+ if (ngpr >= CCALL_NARG_GPR) break; -+ if (sz == 8) { -+ FPRArg farg = { .hi = 0xffffffffu, .lo = ((uint32_t *)dp)[0] }; -+ if (!isva && nfpr + 1 <= CCALL_NARG_FPR) { -+ cc->fpr[nfpr++] = farg; -+ goto fi_next; -+ } else if (ngpr + 1 <= CCALL_NARG_GPR) { -+ cc->gpr[ngpr++] = farg.u; -+fi_next: -+ ((uint32_t *)dp)[0] = ((uint32_t *)dp)[1]; -+ ((uint32_t *)dp)[1] = 0; -+ if (ngpr + 1 <= CCALL_NARG_GPR) { -+ cc->gpr[ngpr++] = ((uint32_t *)dp)[0]; -+ ((uint32_t *)dp)[0] = 0, nsp = onsp; -+ } -+ } -+ break; -+ } else /*if (sz == 16)*/ { -+ ((uint64_t *)dp)[0] |= 0xffffffff00000000ul; -+ /* fallthrough */ -+ } -+ } -+ case MIX_DI: { -+ lj_assertL(sz == 16, "invalid MIX_DI size %d", (int)sz); -+ if (ngpr >= CCALL_NARG_GPR) break; -+ if (!isva && nfpr + 1 <= CCALL_NARG_FPR) { -+ cc->fpr[nfpr++] = (FPRArg){ .u = ((uint64_t *)dp)[0] }; -+ goto di_next; -+ } else if (ngpr + 1 <= CCALL_NARG_GPR) { -+ cc->gpr[ngpr++] = ((uint64_t *)dp)[0]; -+di_next: -+ ((uint64_t *)dp)[0] = ((uint64_t *)dp)[1]; -+ ((uint64_t *)dp)[1] = 0, nsp = mnsp; -+ if (ngpr + 1 <= CCALL_NARG_GPR) { -+ cc->gpr[ngpr++] = ((uint64_t *)dp)[0]; -+ ((uint64_t *)dp)[0] = 0, nsp = onsp; -+ } -+ } -+ break; -+ } -+ case MIX_IF: { -+ lj_assertL(sz == 8 || sz == 16, "invalid MIX_IF size %d", (int)sz); -+ if (sz == 8) { -+ FPRArg farg = { .hi = 0xffffffffu, .lo = ((uint32_t *)dp)[1] }; -+ if (ngpr + 1 <= CCALL_NARG_GPR) { -+ cc->gpr[ngpr++] = ((uint32_t *)dp)[0]; -+ ((uint32_t *)dp)[0] = ((uint32_t *)dp)[1]; -+ ((uint32_t *)dp)[1] = 0; -+ if (!isva && nfpr + 1 <= CCALL_NARG_FPR) { -+ cc->fpr[nfpr++] = farg; -+ goto if_next; -+ } else if (ngpr + 1 <= CCALL_NARG_GPR) { -+ cc->gpr[ngpr++] = farg.u; -+if_next: -+ ((uint32_t *)dp)[0] = 0, nsp = onsp; -+ } -+ } -+ break; -+ } else /*if (sz == 16)*/ { -+ ((uint64_t *)dp)[1] |= 0xffffffff00000000ul; -+ /* fallthrough */ -+ } -+ } -+ case MIX_ID: { -+ lj_assertL(sz == 16, "invalid MIX_ID size %d", (int)sz); -+ if (ngpr + 1 <= CCALL_NARG_GPR) { -+ cc->gpr[ngpr++] = ((uint64_t *)dp)[0]; -+ ((uint64_t *)dp)[0] = ((uint64_t *)dp)[1]; -+ ((uint64_t *)dp)[1] = 0, nsp = mnsp; -+ if (!isva && nfpr + 1 <= CCALL_NARG_FPR) { -+ cc->fpr[nfpr++] = (FPRArg){ .u = ((uint64_t *)dp)[0] }; -+ goto id_next; -+ } else if (ngpr + 1 <= CCALL_NARG_GPR) { -+ cc->gpr[ngpr++] = ((uint64_t *)dp)[0]; -+id_next: -+ ((uint64_t *)dp)[0] = 0, nsp = onsp; -+ } -+ } -+ break; -+ } -+ default: break; -+ } - #else - UNUSED(isfp); - #endif ---- a/src/lj_ccall.h -+++ b/src/lj_ccall.h -@@ -157,6 +157,23 @@ typedef union FPRArg { - float f; - } FPRArg; - -+#elif LJ_TARGET_RISCV64 -+ -+#define CCALL_NARG_GPR 8 -+#define CCALL_NARG_FPR 8 -+#define CCALL_NRET_GPR 2 -+#define CCALL_NRET_FPR 2 -+#define CCALL_SPS_EXTRA 3 -+#define CCALL_SPS_FREE 1 -+ -+typedef intptr_t GPRArg; -+typedef union FPRArg { -+ double d; -+ uint64_t u; -+ struct { LJ_ENDIAN_LOHI(float f; , float g;) }; -+ struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) }; -+} FPRArg; -+ - #else - #error "Missing calling convention definitions for this architecture" - #endif ---- a/src/lj_ccallback.c -+++ b/src/lj_ccallback.c -@@ -102,6 +102,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs - - #define CALLBACK_MCODE_HEAD 52 - -+#elif LJ_TARGET_RISCV64 -+ -+#define CALLBACK_MCODE_HEAD 68 -+ - #else - - /* Missing support for this architecture. */ -@@ -315,6 +319,39 @@ static void *callback_mcode_init(global_ - } - return p; - } -+#elif LJ_TARGET_RISCV64 -+static void *callback_mcode_init(global_State *g, uint32_t *page) -+{ -+ uint32_t *p = page; -+ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; -+ uintptr_t ug = (uintptr_t)(void *)g; -+ uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL; -+ uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL; -+ MSize slot; -+ *p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi)); -+ *p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi)); -+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi)); -+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi)); -+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); -+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); -+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21); -+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21); -+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); -+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); -+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff); -+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff); -+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10); -+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10); -+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff); -+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff); -+ *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0); -+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { -+ *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot); -+ *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p)); -+ p++; -+ } -+ return p; -+} - #else - /* Missing support for this architecture. */ - #define callback_mcode_init(g, p) (p) -@@ -617,6 +654,31 @@ void lj_ccallback_mcode_free(CTState *ct - if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ - } - -+#elif LJ_TARGET_RISCV64 -+ -+#define CALLBACK_HANDLE_REGARG \ -+ if (isfp) { \ -+ if (nfpr + n <= CCALL_NARG_FPR) { \ -+ sp = &cts->cb.fpr[nfpr]; \ -+ nfpr += n; \ -+ goto done; \ -+ } else if (ngpr + n <= maxgpr) { \ -+ sp = &cts->cb.gpr[ngpr]; \ -+ ngpr += n; \ -+ goto done; \ -+ } \ -+ } else { \ -+ if (ngpr + n <= maxgpr) { \ -+ sp = &cts->cb.gpr[ngpr]; \ -+ ngpr += n; \ -+ goto done; \ -+ } \ -+ } -+ -+#define CALLBACK_HANDLE_RET \ -+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ -+ ((float *)dp)[1] = *(float *)dp; -+ - #else - #error "Missing calling convention definitions for this architecture" - #endif -@@ -772,7 +834,7 @@ static void callback_conv_result(CTState - *(int64_t *)dp = (int64_t)*(int32_t *)dp; - } - #endif --#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) -+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 - /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ - if (ctr->size <= 4 && - (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) ---- /dev/null -+++ b/src/lj_emit_riscv.h -@@ -0,0 +1,574 @@ -+/* -+** RISC-V instruction emitter. -+** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h -+** -+** Contributed by gns from PLCT Lab, ISCAS. -+*/ -+ -+static intptr_t get_k64val(ASMState *as, IRRef ref) -+{ -+ IRIns *ir = IR(ref); -+ if (ir->o == IR_KINT64) { -+ return (intptr_t)ir_kint64(ir)->u64; -+ } else if (ir->o == IR_KGC) { -+ return (intptr_t)ir_kgc(ir); -+ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { -+ return (intptr_t)ir_kptr(ir); -+ } else { -+ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, -+ "bad 64 bit const IR op %d", ir->o); -+ return ir->i; /* Sign-extended. */ -+ } -+} -+ -+#define get_kval(as, ref) get_k64val(as, ref) -+ -+/* -- Emit basic instructions --------------------------------------------- */ -+ -+static void emit_r(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2) -+{ -+ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2); -+} -+ -+#define emit_ds(as, riscvi, rd, rs1) emit_r(as, riscvi, rd, rs1, 0) -+#define emit_ds2(as, riscvi, rd, rs2) emit_r(as, riscvi, rd, 0, rs2) -+#define emit_ds1s2(as, riscvi, rd, rs1, rs2) emit_r(as, riscvi, rd, rs1, rs2) -+ -+static void emit_r4(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg rs3) -+{ -+ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_S3(rs3); -+} -+ -+#define emit_ds1s2s3(as, riscvi, rd, rs1, rs2, rs3) emit_r4(as, riscvi, rd, rs1, rs2, rs3) -+ -+static void emit_i(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t i) -+{ -+ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_IMMI((uint32_t)i & 0xfff); -+} -+ -+#define emit_di(as, riscvi, rd, i) emit_i(as, riscvi, rd, 0, i) -+#define emit_dsi(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i) -+#define emit_dsshamt(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i&0x3f) -+ -+static void emit_s(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) -+{ -+ *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMS((uint32_t)i & 0xfff); -+} -+ -+#define emit_s1s2i(as, riscvi, rs1, rs2, i) emit_s(as, riscvi, rs1, rs2, i) -+ -+/* -+static void emit_b(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) -+{ -+ *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB((uint32_t)i & 0x1ffe); -+} -+*/ -+ -+static void emit_u(ASMState *as, RISCVIns riscvi, Reg rd, uint32_t i) -+{ -+ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMU(i & 0xfffff); -+} -+ -+#define emit_du(as, riscvi, rd, i) emit_u(as, riscvi, rd, i) -+ -+/* -+static void emit_j(ASMState *as, RISCVIns riscvi, Reg rd, int32_t i) -+{ -+ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMJ((uint32_t)i & 0x1fffffe); -+} -+*/ -+ -+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); -+static void ra_allockreg(ASMState *as, intptr_t k, Reg r); -+static Reg ra_scratch(ASMState *as, RegSet allow); -+ -+static void emit_lso(ASMState *as, RISCVIns riscvi, Reg data, Reg base, int32_t ofs) -+{ -+ lj_assertA(checki12(ofs), "load/store offset %d out of range", ofs); -+ switch (riscvi) { -+ case RISCVI_LD: case RISCVI_LW: case RISCVI_LH: case RISCVI_LB: -+ case RISCVI_LWU: case RISCVI_LHU: case RISCVI_LBU: -+ case RISCVI_FLW: case RISCVI_FLD: -+ emit_dsi(as, riscvi, data, base, ofs); -+ break; -+ case RISCVI_SD: case RISCVI_SW: case RISCVI_SH: case RISCVI_SB: -+ case RISCVI_FSW: case RISCVI_FSD: -+ emit_s1s2i(as, riscvi, base, data, ofs); -+ break; -+ default: lj_assertA(0, "invalid lso"); break; -+ } -+} -+ -+static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp, -+ int32_t shamt) -+{ -+ if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) { -+ if (!(as->flags & JIT_F_RVZbb)) switch (riscvi) { -+ case RISCVI_RORI: riscvi = RISCVI_TH_SRRI; break; -+ case RISCVI_RORIW: riscvi = RISCVI_TH_SRRIW; break; -+ default: lj_assertA(0, "invalid roti op"); break; -+ } -+ emit_dsshamt(as, riscvi, rd, rs1, shamt); -+ } else { -+ RISCVIns ai, bi; -+ int32_t shwid, shmsk; -+ switch (riscvi) { -+ case RISCVI_RORI: -+ ai = RISCVI_SRLI, bi = RISCVI_SLLI; -+ shwid = 64, shmsk = 63; -+ break; -+ case RISCVI_RORIW: -+ ai = RISCVI_SRLIW, bi = RISCVI_SLLIW; -+ shwid = 32, shmsk = 31; -+ break; -+ default: -+ lj_assertA(0, "invalid roti op"); -+ return; -+ } -+ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); -+ emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk); -+ emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk); -+ } -+} -+ -+static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp) -+{ -+ if (as->flags & JIT_F_RVZbb) { -+ emit_ds1s2(as, riscvi, rd, rs1, rs2); -+ } else { -+ RISCVIns sai, sbi; -+ switch (riscvi) { -+ case RISCVI_ROL: -+ sai = RISCVI_SLL, sbi = RISCVI_SRL; -+ break; -+ case RISCVI_ROR: -+ sai = RISCVI_SRL, sbi = RISCVI_SLL; -+ break; -+ case RISCVI_ROLW: -+ sai = RISCVI_SLLW, sbi = RISCVI_SRLW; -+ break; -+ case RISCVI_RORW: -+ sai = RISCVI_SRLW, sbi = RISCVI_SLLW; -+ break; -+ default: -+ lj_assertA(0, "invalid rot op"); -+ return; -+ } -+ if (rd == rs2) { -+ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); -+ emit_ds1s2(as, sbi, tmp, rs1, tmp); -+ emit_ds1s2(as, sai, rd, rs1, rs2); -+ emit_ds2(as, RISCVI_NEG, tmp, rs2); -+ } else { -+ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); -+ emit_ds1s2(as, sai, rd, rs1, rs2); -+ emit_ds1s2(as, sbi, tmp, rs1, tmp); -+ emit_ds2(as, RISCVI_NEG, tmp, rs2); -+ } -+ } -+} -+ -+static void emit_ext(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1) -+{ -+ if ((riscvi != RISCVI_ZEXT_W && as->flags & JIT_F_RVZbb) || -+ (riscvi == RISCVI_ZEXT_W && as->flags & JIT_F_RVZba)) { -+ emit_ds(as, riscvi, rd, rs1); -+ } else if (as->flags & JIT_F_RVXThead) { -+ uint32_t hi, sext; -+ switch (riscvi) { -+ case RISCVI_ZEXT_B: -+ case RISCVI_SEXT_W: -+ emit_ds(as, riscvi, rd, rs1); -+ return; -+ case RISCVI_ZEXT_H: -+ hi = 15, sext = 0; -+ break; -+ case RISCVI_ZEXT_W: -+ hi = 31, sext = 0; -+ break; -+ case RISCVI_SEXT_B: -+ hi = 7, sext = 1; -+ break; -+ case RISCVI_SEXT_H: -+ hi = 15, sext = 1; -+ break; -+ default: -+ lj_assertA(0, "invalid ext op"); -+ return; -+ } -+ emit_dsi(as, sext ? RISCVI_TH_EXT : RISCVI_TH_EXTU, -+ rd, rs1, hi << 6); -+ } else { -+ RISCVIns sli, sri; -+ int32_t shamt; -+ switch (riscvi) { -+ case RISCVI_ZEXT_B: -+ case RISCVI_SEXT_W: -+ emit_ds(as, riscvi, rd, rs1); -+ return; -+ case RISCVI_ZEXT_H: -+ sli = RISCVI_SLLI, sri = RISCVI_SRLI; -+ shamt = 48; -+ break; -+ case RISCVI_ZEXT_W: -+ sli = RISCVI_SLLI, sri = RISCVI_SRLI; -+ shamt = 32; -+ break; -+ case RISCVI_SEXT_B: -+ sli = RISCVI_SLLI, sri = RISCVI_SRAI; -+ shamt = 56; -+ break; -+ case RISCVI_SEXT_H: -+ sli = RISCVI_SLLI, sri = RISCVI_SRAI; -+ shamt = 48; -+ break; -+ default: -+ lj_assertA(0, "invalid ext op"); -+ return; -+ } -+ emit_dsshamt(as, sri, rd, rd, shamt); -+ emit_dsshamt(as, sli, rd, rs1, shamt); -+ } -+} -+ -+static void emit_cleartp(ASMState *as, Reg rd, Reg rs1) -+{ -+ if (as->flags & JIT_F_RVXThead) { -+ emit_dsi(as, RISCVI_TH_EXTU, rd, rs1, 46u << 6); -+ } else { -+ emit_dsshamt(as, RISCVI_SRLI, rd, rd, 17); -+ emit_dsshamt(as, RISCVI_SLLI, rd, rs1, 17); -+ } -+} -+ -+/* -+static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) -+{ -+ if (as->flags & JIT_F_RVZbb) { -+ emit_ds1s2(as, RISCVI_ANDN, rd, rs1, rs2); -+ } else { -+ emit_ds1s2(as, RISCVI_AND, rd, rs1, tmp); -+ emit_ds(as, RISCVI_NOT, tmp, rs2); -+ } -+} -+*/ -+ -+/* -+static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) -+{ -+ if (as->flags & JIT_F_RVZbb) { -+ emit_ds1s2(as, RISCVI_ORN, rd, rs1, rs2); -+ } else { -+ emit_ds1s2(as, RISCVI_OR, rd, rs1, tmp); -+ emit_ds(as, RISCVI_NOT, tmp, rs2); -+ } -+} -+*/ -+ -+static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2) -+{ -+ if (as->flags & JIT_F_RVZbb) { -+ emit_ds1s2(as, RISCVI_XNOR, rd, rs1, rs2); -+ } else { -+ emit_ds(as, RISCVI_NOT, rd, rd); -+ emit_ds1s2(as, RISCVI_XOR, rd, rs1, rs2); -+ } -+} -+ -+static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt) -+{ -+ if (as->flags & JIT_F_RVZba) { -+ switch (shamt) { -+ case 1: emit_ds1s2(as, RISCVI_SH1ADD, rd, rs2, rs1); break; -+ case 2: emit_ds1s2(as, RISCVI_SH2ADD, rd, rs2, rs1); break; -+ case 3: emit_ds1s2(as, RISCVI_SH3ADD, rd, rs2, rs1); break; -+ default: return; -+ } -+ } else if (as->flags & JIT_F_RVXThead) { -+ emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2); -+ } else { -+ emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp); -+ emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt); -+ } -+} -+ -+#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1) -+#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2) -+#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3) -+ -+static void emit_loadk12(ASMState *as, Reg rd, int32_t i) -+{ -+ emit_di(as, RISCVI_ADDI, rd, i); -+} -+ -+static void emit_loadk32(ASMState *as, Reg rd, int32_t i) -+{ -+ if (checki12((int64_t)i)) { -+ emit_loadk12(as, rd, i); -+ } else { -+ if(LJ_UNLIKELY(RISCVF_HI((uint32_t)i) == 0x80000u && i > 0)) -+ emit_dsi(as, RISCVI_XORI, rd, rd, RISCVF_LO(i)); -+ else -+ emit_dsi(as, RISCVI_ADDI, rd, rd, RISCVF_LO(i)); -+ emit_du(as, RISCVI_LUI, rd, RISCVF_HI((uint32_t)i)); -+ } -+} -+ -+/* -- Emit loads/stores --------------------------------------------------- */ -+ -+/* Prefer rematerialization of BASE/L from global_State over spills. */ -+#define emit_canremat(ref) ((ref) <= REF_BASE) -+ -+ -+/* Load a 32 bit constant into a GPR. */ -+#define emit_loadi(as, r, i) emit_loadk32(as, r, i); -+ -+/* Load a 64 bit constant into a GPR. */ -+static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) -+{ -+ int64_t u64_delta = (int64_t)((intptr_t)u64 - (intptr_t)(as->mcp - 2)); -+ if (checki32((int64_t)u64)) { -+ emit_loadk32(as, r, (int32_t)u64); -+ } else if (checki32auipc(u64_delta)) { -+ emit_dsi(as, RISCVI_ADDI, r, r, RISCVF_LO(u64_delta)); -+ emit_du(as, RISCVI_AUIPC, r, RISCVF_HI(u64_delta)); -+ } else { -+ uint32_t lo32 = u64 & 0xfffffffful; -+ if (checku11(lo32)) { -+ if (lo32 > 0) emit_dsi(as, RISCVI_ADDI, r, r, lo32); -+ emit_dsshamt(as, RISCVI_SLLI, r, r, 32); -+ } else { -+ RISCVIns li_insn[7] = {0}; -+ int shamt = 0, step = 0; -+ for(int bit = 0; bit < 32; bit++) { -+ if (lo32 & (1u << bit)) { -+ if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); -+ int inc = bit+10 > 31 ? 31-bit : 10; -+ bit += inc, shamt = inc+1; -+ uint32_t msk = ((1ul << (bit+1))-1)^((1ul << (((bit-inc) >= 0) ? (bit-inc) : 0))-1); -+ uint16_t payload = (lo32 & msk) >> (((bit-inc) >= 0) ? (bit-inc) : 0); -+ li_insn[step++] = RISCVI_ADDI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(payload); -+ } else shamt++; -+ } -+ if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); -+ -+ if (step < 6) { -+ for(int i = 0; i < step; i++) -+ *--as->mcp = li_insn[i]; -+ } else { -+ emit_dsi(as, RISCVI_ADDI, r, r, u64 & 0x3ff); -+ emit_dsshamt(as, RISCVI_SLLI, r, r, 10); -+ emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 10) & 0x7ff); -+ emit_dsshamt(as, RISCVI_SLLI, r, r, 11); -+ emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 21) & 0x7ff); -+ emit_dsshamt(as, RISCVI_SLLI, r, r, 11); -+ } -+ } -+ -+ uint32_t hi32 = u64 >> 32; -+ if (hi32 & 0xfff) emit_loadk32(as, r, hi32); -+ else emit_du(as, RISCVI_LUI, r, hi32 >> 12); -+ } -+} -+ -+#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) -+ -+/* Get/set from constant pointer. */ -+static void emit_lsptr(ASMState *as, RISCVIns riscvi, Reg r, void *p, RegSet allow) -+{ -+ emit_lso(as, riscvi, r, ra_allock(as, igcptr(p), allow), 0); -+} -+ -+/* Load 64 bit IR constant into register. */ -+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) -+{ -+ const uint64_t *k = &ir_k64(ir)->u64; -+ Reg r64 = r; -+ if (rset_test(RSET_FPR, r)) { -+ if (as->flags & JIT_F_RVZfa) { -+ uint8_t sign = (*k >> 63) & 1; -+ uint16_t k_hi16 = (*k >> 48) & 0xffff; -+ uint64_t k_lo48 = *k & 0xffffffffffff; -+ uint16_t mk_hi16 = k_hi16 & 0x7fff; -+ if (!k_lo48) { -+ if (riscv_fli_map_hi16[0] == k_hi16) { -+ emit_ds(as, RISCVI_FLI_D, r, 0); -+ return; -+ } -+ for (int i = 1; i < 32; i++) { -+ if (riscv_fli_map_hi16[i] == mk_hi16) { -+ if (sign) -+ emit_ds1s2(as, RISCVI_FNEG_D, r, r, r); -+ emit_ds(as, RISCVI_FLI_D, r, i); -+ return; -+ } -+ } -+ } -+ } -+ r64 = RID_TMP; -+ emit_ds(as, RISCVI_FMV_D_X, r, r64); -+ } -+ emit_loadu64(as, r64, *k); -+} -+ -+/* Get/set global_State fields. */ -+static void emit_lsglptr(ASMState *as, RISCVIns riscvi, Reg r, int32_t ofs) -+{ -+ emit_lso(as, riscvi, r, RID_GL, ofs); -+} -+ -+#define emit_getgl(as, r, field) \ -+ emit_lsglptr(as, RISCVI_LD, (r), (int32_t)offsetof(global_State, field)) -+#define emit_setgl(as, r, field) \ -+ emit_lsglptr(as, RISCVI_SD, (r), (int32_t)offsetof(global_State, field)) -+ -+/* Trace number is determined from per-trace exit stubs. */ -+#define emit_setvmstate(as, i) UNUSED(i) -+ -+/* -- Emit control-flow instructions -------------------------------------- */ -+ -+/* Label for internal jumps. */ -+typedef MCode *MCLabel; -+ -+/* Return label pointing to current PC. */ -+#define emit_label(as) ((as)->mcp) -+ -+static void emit_branch(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, MCode *target, int jump) -+{ -+ MCode *p = as->mcp; -+ ptrdiff_t delta = (char *)target - (char *)(p - 1); -+ switch (jump) { -+ case -1: -+ lj_assertA(RISCVF_SIMM_OK(delta, 13), "branch target out of range"); /* B */ -+ *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); -+ break; -+ case 0: case 1: -+ lj_assertA(RISCVF_SIMM_OK(delta, 21), "branch target out of range"); /* ^B+J */ -+ if (checki13(delta) && !jump) { -+ *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); -+ *--p = RISCVI_NOP; -+ } else { -+ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); /* Poorman's trampoline */ -+ *--p = (riscvi^0x00001000) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); -+ } -+ break; -+ default: -+ lj_assertA(0, "invalid jump type"); -+ break; -+ } -+ as->mcp = p; -+} -+ -+static void emit_jump(ASMState *as, MCode *target, int jump) -+{ -+ MCode *p = as->mcp; -+ ptrdiff_t delta; -+ switch(jump) { -+ case -1: -+ delta = (char *)target - (char *)(p - 1); -+ lj_assertA(RISCVF_SIMM_OK(delta, 21), "jump target out of range"); /* J */ -+ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); -+ break; -+ case 0: case 1: -+ delta = (char *)target - (char *)(p - 2); -+ lj_assertA(checki32auipc(delta), "jump target out of range"); /* AUIPC+JALR */ -+ if (checki21(delta) && !jump) { -+ *--p = RISCVI_NOP; -+ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); -+ } else { -+ *--p = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); -+ *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); -+ } -+ break; -+ default: -+ lj_assertA(0, "invalid jump type"); -+ break; -+ } -+ as->mcp = p; -+} -+ -+#define emit_jmp(as, target) emit_jump(as, target, 0) -+ -+#define emit_mv(as, dst, src) \ -+ emit_ds(as, RISCVI_MV, (dst), (src)) -+ -+static void emit_call(ASMState *as, void *target, int needcfa) -+{ -+ MCode *p = as->mcp; -+ ptrdiff_t delta = (char *)target - (char *)(p - 2); -+ if (checki21(delta)) { -+ *--p = RISCVI_NOP; -+ *--p = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ(delta); -+ } else if (checki32(delta)) { -+ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); -+ *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); -+ needcfa = 1; -+ } else { -+ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_CFUNCADDR) | RISCVF_IMMI(0); -+ needcfa = 2; -+ } -+ as->mcp = p; -+ if (needcfa > 1) -+ ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); -+} -+ -+/* -- Emit generic operations --------------------------------------------- */ -+ -+/* Generic move between two regs. */ -+static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -+{ -+ if (src < RID_MAX_GPR && dst < RID_MAX_GPR) -+ emit_mv(as, dst, src); -+ else if (src < RID_MAX_GPR) -+ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, dst, src); -+ else if (dst < RID_MAX_GPR) -+ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dst, src); -+ else -+ emit_ds1s2(as, irt_isnum(ir->t) ? RISCVI_FMV_D : RISCVI_FMV_S, dst, src, src); -+} -+ -+/* Emit an arithmetic operation with a constant operand. */ -+static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src, -+ Reg tmp, intptr_t k) -+{ -+ if (checki12(k)) emit_dsi(as, riscvi, dest, src, k); -+ else { -+ switch (riscvi) { -+ case RISCVI_ADDI: riscvi = RISCVI_ADD; break; -+ case RISCVI_XORI: riscvi = RISCVI_XOR; break; -+ case RISCVI_ORI: riscvi = RISCVI_OR; break; -+ case RISCVI_ANDI: riscvi = RISCVI_AND; break; -+ default: lj_assertA(0, "NYI arithmetic RISCVIns"); return; -+ } -+ emit_ds1s2(as, riscvi, dest, src, tmp); -+ emit_loadu64(as, tmp, (uintptr_t)k); -+ } -+} -+ -+/* Generic load of register with base and (small) offset address. */ -+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -+{ -+ if (r < RID_MAX_GPR) -+ emit_lso(as, irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW, r, base, ofs); -+ else -+ emit_lso(as, irt_isnum(ir->t) ? RISCVI_FLD : RISCVI_FLW, r, base, ofs); -+} -+ -+/* Generic store of register with base and (small) offset address. */ -+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -+{ -+ if (r < RID_MAX_GPR) -+ emit_lso(as, irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW, r, base, ofs); -+ else -+ emit_lso(as, irt_isnum(ir->t) ? RISCVI_FSD : RISCVI_FSW, r, base, ofs); -+} -+ -+/* Add offset to pointer. */ -+static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -+{ -+ if (ofs) -+ emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs); -+} -+ -+ -+#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) ---- a/src/lj_frame.h -+++ b/src/lj_frame.h -@@ -287,6 +287,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL - ** need to change to 3. - */ - #define CFRAME_SHIFT_MULTRES 0 -+#elif LJ_TARGET_RISCV64 -+#define CFRAME_OFS_ERRF 252 -+#define CFRAME_OFS_NRES 248 -+#define CFRAME_OFS_PREV 240 -+#define CFRAME_OFS_L 232 -+#define CFRAME_OFS_PC 224 -+#define CFRAME_OFS_MULTRES 0 -+#define CFRAME_SIZE 256 -+#define CFRAME_SHIFT_MULTRES 3 - #else - #error "Missing CFRAME_* definitions for this architecture" - #endif ---- a/src/lj_gdbjit.c -+++ b/src/lj_gdbjit.c -@@ -306,6 +306,9 @@ enum { - #elif LJ_TARGET_MIPS - DW_REG_SP = 29, - DW_REG_RA = 31, -+#elif LJ_TARGET_RISCV64 -+ DW_REG_SP = 2, -+ DW_REG_RA = 1, - #else - #error "Unsupported target architecture" - #endif -@@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = - .machine = 20, - #elif LJ_TARGET_MIPS - .machine = 8, -+#elif LJ_TARGET_RISCV64 -+ .machine = 243, - #else - #error "Unsupported target architecture" - #endif -@@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(G - for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } - for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } - } -+#elif LJ_TARGET_RISCV64 -+ { -+ int i; -+ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); } -+ DB(DW_CFA_offset|9); DUV(17); -+ DB(DW_CFA_offset|8); DUV(18); -+ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); } -+ DB(DW_CFA_offset|32|9); DUV(29); -+ DB(DW_CFA_offset|32|8); DUV(30); -+ } - #else - #error "Unsupported target architecture" - #endif ---- a/src/lj_jit.h -+++ b/src/lj_jit.h -@@ -68,6 +68,46 @@ - #endif - #endif - -+#elif LJ_TARGET_RISCV64 -+ -+#define JIT_F_RVC (JIT_F_CPU << 0) -+#define JIT_F_RVZba (JIT_F_CPU << 1) -+#define JIT_F_RVZbb (JIT_F_CPU << 2) -+#define JIT_F_RVZicond (JIT_F_CPU << 3) -+#define JIT_F_RVZfa (JIT_F_CPU << 4) -+#define JIT_F_RVXThead (JIT_F_CPU << 5) -+ -+#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\003Zfa\006XThead" -+ -+#if LJ_TARGET_LINUX -+#include -+ -+#ifndef __NR_riscv_hwprobe -+#ifndef __NR_arch_specific_syscall -+#define __NR_arch_specific_syscall 244 -+#endif -+#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14) -+#endif -+ -+struct riscv_hwprobe { -+ int64_t key; -+ uint64_t value; -+}; -+ -+#define RISCV_HWPROBE_KEY_MVENDORID 0 -+#define RISCV_HWPROBE_KEY_MARCHID 1 -+#define RISCV_HWPROBE_KEY_MIMPID 2 -+#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3 -+#define RISCV_HWPROBE_KEY_IMA_EXT_0 4 -+ -+#define RISCV_HWPROBE_IMA_C (1 << 1) -+#define RISCV_HWPROBE_EXT_ZBA (1 << 3) -+#define RISCV_HWPROBE_EXT_ZBB (1 << 4) -+#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32) -+#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35) -+ -+#endif -+ - #else - - #define JIT_F_CPUSTRING "" ---- a/src/lj_mcode.c -+++ b/src/lj_mcode.c -@@ -38,6 +38,12 @@ - void sys_icache_invalidate(void *start, size_t len); - #endif - -+#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX -+#include -+#include -+#include -+#endif -+ - /* Synchronize data/instruction cache. */ - void lj_mcode_sync(void *start, void *end) - { -@@ -52,6 +58,17 @@ void lj_mcode_sync(void *start, void *en - sys_icache_invalidate(start, (char *)end-(char *)start); - #elif LJ_TARGET_PPC - lj_vm_cachesync(start, end); -+#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX -+#if (defined(__GNUC__) || defined(__clang__)) -+ __asm__ volatile("fence rw, rw"); -+#else -+ lj_vm_fence_rw_rw(); -+#endif -+#ifdef __GLIBC__ -+ __riscv_flush_icache(start, end, 0); -+#else -+ syscall(__NR_riscv_flush_icache, start, end, 0UL); -+#endif - #elif defined(__GNUC__) || defined(__clang__) - __clear_cache(start, end); - #else ---- a/src/lj_target.h -+++ b/src/lj_target.h -@@ -55,7 +55,7 @@ typedef uint32_t RegSP; - /* Bitset for registers. 32 registers suffice for most architectures. - ** Note that one set holds bits for both GPRs and FPRs. - */ --#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 - typedef uint64_t RegSet; - #define RSET_BITS 6 - #define rset_picktop_(rs) ((Reg)lj_fls64(rs)) -@@ -145,6 +145,8 @@ typedef uint32_t RegCost; - #include "lj_target_mips.h" - #elif LJ_TARGET_S390X - #include "lj_target_s390x.h" -+#elif LJ_TARGET_RISCV64 -+#include "lj_target_riscv.h" - #else - #error "Missing include for target CPU" - #endif ---- /dev/null -+++ b/src/lj_target_riscv.h -@@ -0,0 +1,542 @@ -+/* -+** Definitions for RISC-V CPUs. -+** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h -+*/ -+ -+#ifndef _LJ_TARGET_RISCV_H -+#define _LJ_TARGET_RISCV_H -+ -+/* -- Registers IDs ------------------------------------------------------- */ -+ -+#define GPRDEF(_) \ -+ _(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \ -+ _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ -+ _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ -+ _(X24) _(X25) _(X26) _(X27) _(X28) _(X29) _(X30) _(X31) -+#define FPRDEF(_) \ -+ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ -+ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ -+ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ -+ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) -+#define VRIDDEF(_) -+ -+#define RIDENUM(name) RID_##name, -+ -+enum { -+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ -+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ -+ RID_MAX, -+ RID_ZERO = RID_X0, -+ RID_TMP = RID_RA, -+ RID_GP = RID_X3, -+ RID_TP = RID_X4, -+ -+ /* Calling conventions. */ -+ RID_RET = RID_X10, -+ RID_RETLO = RID_X10, -+ RID_RETHI = RID_X11, -+ RID_FPRET = RID_F10, -+ RID_CFUNCADDR = RID_X5, -+ -+ /* These definitions must match with the *.dasc file(s): */ -+ RID_BASE = RID_X18, /* Interpreter BASE. */ -+ RID_LPC = RID_X20, /* Interpreter PC. */ -+ RID_GL = RID_X21, /* Interpreter GL. */ -+ RID_LREG = RID_X23, /* Interpreter L. */ -+ -+ /* Register ranges [min, max) and number of registers. */ -+ RID_MIN_GPR = RID_X0, -+ RID_MAX_GPR = RID_X31+1, -+ RID_MIN_FPR = RID_MAX_GPR, -+ RID_MAX_FPR = RID_F31+1, -+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, -+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ -+}; -+ -+#define RID_NUM_KREF RID_NUM_GPR -+#define RID_MIN_KREF RID_X0 -+ -+/* -- Register sets ------------------------------------------------------- */ -+ -+/* Make use of all registers, except ZERO, TMP, SP, GP, TP, CFUNCADDR and GL. */ -+#define RSET_FIXED \ -+ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ -+ RID2RSET(RID_GP)|RID2RSET(RID_TP)|RID2RSET(RID_GL)) -+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) -+#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) -+ -+#define RSET_ALL (RSET_GPR|RSET_FPR) -+#define RSET_INIT RSET_ALL -+ -+#define RSET_SCRATCH_GPR \ -+ (RSET_RANGE(RID_X5, RID_X7+1)|RSET_RANGE(RID_X28, RID_X31+1)|\ -+ RSET_RANGE(RID_X10, RID_X17+1)) -+ -+#define RSET_SCRATCH_FPR \ -+ (RSET_RANGE(RID_F0, RID_F7+1)|RSET_RANGE(RID_F10, RID_F17+1)|\ -+ RSET_RANGE(RID_F28, RID_F31+1)) -+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -+ -+#define REGARG_FIRSTGPR RID_X10 -+#define REGARG_LASTGPR RID_X17 -+#define REGARG_NUMGPR 8 -+ -+#define REGARG_FIRSTFPR RID_F10 -+#define REGARG_LASTFPR RID_F17 -+#define REGARG_NUMFPR 8 -+ -+/* -- Spill slots --------------------------------------------------------- */ -+ -+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -+** -+** SPS_FIXED: Available fixed spill slots in interpreter frame. -+** This definition must match with the *.dasc file(s). -+** -+** SPS_FIRST: First spill slot for general use. -+*/ -+#if LJ_32 -+#define SPS_FIXED 5 -+#else -+#define SPS_FIXED 4 -+#endif -+#define SPS_FIRST 4 -+ -+#define SPOFS_TMP 0 -+ -+#define sps_scale(slot) (4 * (int32_t)(slot)) -+#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) -+ -+/* -- Exit state ---------------------------------------------------------- */ -+/* This definition must match with the *.dasc file(s). */ -+typedef struct { -+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ -+ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ -+ int32_t spill[256]; /* Spill slots. */ -+} ExitState; -+ -+/* Highest exit + 1 indicates stack check. */ -+#define EXITSTATE_CHECKEXIT 1 -+ -+/* Return the address of a per-trace exit stub. */ -+static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) -+{ -+ while (*p == 0x00000013) p++; /* Skip RISCVI_NOP. */ -+ return p + 4 + exitno; -+} -+/* Avoid dependence on lj_jit.h if only including lj_target.h. */ -+#define exitstub_trace_addr(T, exitno) \ -+ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) -+ -+/* -- Instructions -------------------------------------------------------- */ -+ -+/* Instruction fields. */ -+#define RISCVF_D(d) (((d)&31) << 7) -+#define RISCVF_S1(r) (((r)&31) << 15) -+#define RISCVF_S2(r) (((r)&31) << 20) -+#define RISCVF_S3(r) (((r)&31) << 27) -+#define RISCVF_FUNCT2(f) (((f)&3) << 25) -+#define RISCVF_FUNCT3(f) (((f)&7) << 12) -+#define RISCVF_FUNCT7(f) (((f)&127) << 25) -+#define RISCVF_SHAMT(s) ((s) << 20) -+#define RISCVF_RM(m) (((m)&7) << 12) -+#define RISCVF_IMMI(i) ((i) << 20) -+#define RISCVF_IMMS(i) (((i)&0xfe0) << 20 | ((i)&0x1f) << 7) -+#define RISCVF_IMMB(i) (((i)&0x1000) << 19 | ((i)&0x800) >> 4 | ((i)&0x7e0) << 20 | ((i)&0x1e) << 7) -+#define RISCVF_IMMU(i) (((i)&0xfffff) << 12) -+#define RISCVF_IMMJ(i) (((i)&0x100000) << 11 | ((i)&0xff000) | ((i)&0x800) << 9 | ((i)&0x7fe) << 20) -+ -+/* Encode helpers. */ -+#define RISCVF_W_HI(w) ((w) - ((((w)&0xfff)^0x800) - 0x800)) -+#define RISCVF_W_LO(w) ((w)&0xfff) -+#define RISCVF_HI(i) ((RISCVF_W_HI(i) >> 12) & 0xfffff) -+#define RISCVF_LO(i) RISCVF_W_LO(i) -+ -+/* Check for valid field range. */ -+#define RISCVF_SIMM_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) -+#define RISCVF_UIMM_OK(x, b) (((x) >> (b)) == 0) -+#define checku11(i) RISCVF_UIMM_OK(i, 11) -+#define checki12(i) RISCVF_SIMM_OK(i, 12) -+#define checki13(i) RISCVF_SIMM_OK(i, 13) -+#define checki20(i) RISCVF_SIMM_OK(i, 20) -+#define checki21(i) RISCVF_SIMM_OK(i, 21) -+#define checki32auipc(i) (checki32(i) && (int32_t)(i) < 0x7ffff800) -+ -+typedef enum RISCVIns { -+ -+ /* --- RVI --- */ -+ RISCVI_LUI = 0x00000037, -+ RISCVI_AUIPC = 0x00000017, -+ -+ RISCVI_JAL = 0x0000006f, -+ RISCVI_JALR = 0x00000067, -+ -+ RISCVI_ADDI = 0x00000013, -+ RISCVI_SLTI = 0x00002013, -+ RISCVI_SLTIU = 0x00003013, -+ RISCVI_XORI = 0x00004013, -+ RISCVI_ORI = 0x00006013, -+ RISCVI_ANDI = 0x00007013, -+ -+ RISCVI_SLLI = 0x00001013, -+ RISCVI_SRLI = 0x00005013, -+ RISCVI_SRAI = 0x40005013, -+ -+ RISCVI_ADD = 0x00000033, -+ RISCVI_SUB = 0x40000033, -+ RISCVI_SLL = 0x00001033, -+ RISCVI_SLT = 0x00002033, -+ RISCVI_SLTU = 0x00003033, -+ RISCVI_XOR = 0x00004033, -+ RISCVI_SRL = 0x00005033, -+ RISCVI_SRA = 0x40005033, -+ RISCVI_OR = 0x00006033, -+ RISCVI_AND = 0x00007033, -+ -+ RISCVI_LB = 0x00000003, -+ RISCVI_LH = 0x00001003, -+ RISCVI_LW = 0x00002003, -+ RISCVI_LBU = 0x00004003, -+ RISCVI_LHU = 0x00005003, -+ RISCVI_SB = 0x00000023, -+ RISCVI_SH = 0x00001023, -+ RISCVI_SW = 0x00002023, -+ -+ RISCVI_BEQ = 0x00000063, -+ RISCVI_BNE = 0x00001063, -+ RISCVI_BLT = 0x00004063, -+ RISCVI_BGE = 0x00005063, -+ RISCVI_BLTU = 0x00006063, -+ RISCVI_BGEU = 0x00007063, -+ -+ RISCVI_ECALL = 0x00000073, -+ RISCVI_EBREAK = 0x00100073, -+ -+ RISCVI_NOP = 0x00000013, -+ RISCVI_MV = 0x00000013, -+ RISCVI_NOT = 0xfff04013, -+ RISCVI_NEG = 0x40000033, -+ RISCVI_RET = 0x00008067, -+ RISCVI_ZEXT_B = 0x0ff07013, -+ -+#if LJ_TARGET_RISCV64 -+ RISCVI_LWU = 0x00007003, -+ RISCVI_LD = 0x00003003, -+ RISCVI_SD = 0x00003023, -+ -+ RISCVI_ADDIW = 0x0000001b, -+ -+ RISCVI_SLLIW = 0x0000101b, -+ RISCVI_SRLIW = 0x0000501b, -+ RISCVI_SRAIW = 0x4000501b, -+ -+ RISCVI_ADDW = 0x0000003b, -+ RISCVI_SUBW = 0x4000003b, -+ RISCVI_SLLW = 0x0000103b, -+ RISCVI_SRLW = 0x0000503b, -+ RISCVI_SRAW = 0x4000503b, -+ -+ RISCVI_NEGW = 0x4000003b, -+ RISCVI_SEXT_W = 0x0000001b, -+#endif -+ -+ /* --- RVM --- */ -+ RISCVI_MUL = 0x02000033, -+ RISCVI_MULH = 0x02001033, -+ RISCVI_MULHSU = 0x02002033, -+ RISCVI_MULHU = 0x02003033, -+ RISCVI_DIV = 0x02004033, -+ RISCVI_DIVU = 0x02005033, -+ RISCVI_REM = 0x02006033, -+ RISCVI_REMU = 0x02007033, -+#if LJ_TARGET_RISCV64 -+ RISCVI_MULW = 0x0200003b, -+ RISCVI_DIVW = 0x0200403b, -+ RISCVI_DIVUW = 0x0200503b, -+ RISCVI_REMW = 0x0200603b, -+ RISCVI_REMUW = 0x0200703b, -+#endif -+ -+ /* --- RVF --- */ -+ RISCVI_FLW = 0x00002007, -+ RISCVI_FSW = 0x00002027, -+ -+ RISCVI_FMADD_S = 0x00000043, -+ RISCVI_FMSUB_S = 0x00000047, -+ RISCVI_FNMSUB_S = 0x0000004b, -+ RISCVI_FNMADD_S = 0x0000004f, -+ -+ RISCVI_FADD_S = 0x00000053, -+ RISCVI_FSUB_S = 0x08000053, -+ RISCVI_FMUL_S = 0x10000053, -+ RISCVI_FDIV_S = 0x18000053, -+ RISCVI_FSQRT_S = 0x58000053, -+ -+ RISCVI_FSGNJ_S = 0x20000053, -+ RISCVI_FSGNJN_S = 0x20001053, -+ RISCVI_FSGNJX_S = 0x20002053, -+ -+ RISCVI_FMIN_S = 0x28000053, -+ RISCVI_FMAX_S = 0x28001053, -+ -+ RISCVI_FCVT_W_S = 0xc0000053, -+ RISCVI_FCVT_WU_S = 0xc0100053, -+ -+ RISCVI_FMV_X_W = 0xe0000053, -+ -+ RISCVI_FEQ_S = 0xa0002053, -+ RISCVI_FLT_S = 0xa0001053, -+ RISCVI_FLE_S = 0xa0000053, -+ -+ RISCVI_FCLASS_S = 0xe0001053, -+ -+ RISCVI_FCVT_S_W = 0xd0000053, -+ RISCVI_FCVT_S_WU = 0xd0100053, -+ RISCVI_FMV_W_X = 0xf0000053, -+ -+ RISCVI_FMV_S = 0x20000053, -+ RISCVI_FNEG_S = 0x20001053, -+ RISCVI_FABS_S = 0x20002053, -+#if LJ_TARGET_RISCV64 -+ RISCVI_FCVT_L_S = 0xc0200053, -+ RISCVI_FCVT_LU_S = 0xc0300053, -+ RISCVI_FCVT_S_L = 0xd0200053, -+ RISCVI_FCVT_S_LU = 0xd0300053, -+#endif -+ -+ /* --- RVD --- */ -+ RISCVI_FLD = 0x00003007, -+ RISCVI_FSD = 0x00003027, -+ -+ RISCVI_FMADD_D = 0x02000043, -+ RISCVI_FMSUB_D = 0x02000047, -+ RISCVI_FNMSUB_D = 0x0200004b, -+ RISCVI_FNMADD_D = 0x0200004f, -+ -+ RISCVI_FADD_D = 0x02000053, -+ RISCVI_FSUB_D = 0x0a000053, -+ RISCVI_FMUL_D = 0x12000053, -+ RISCVI_FDIV_D = 0x1a000053, -+ RISCVI_FSQRT_D = 0x5a000053, -+ -+ RISCVI_FSGNJ_D = 0x22000053, -+ RISCVI_FSGNJN_D = 0x22001053, -+ RISCVI_FSGNJX_D = 0x22002053, -+ -+ RISCVI_FMIN_D = 0x2a000053, -+ RISCVI_FMAX_D = 0x2a001053, -+ -+ RISCVI_FCVT_S_D = 0x40100053, -+ RISCVI_FCVT_D_S = 0x42000053, -+ -+ RISCVI_FEQ_D = 0xa2002053, -+ RISCVI_FLT_D = 0xa2001053, -+ RISCVI_FLE_D = 0xa2000053, -+ -+ RISCVI_FCLASS_D = 0xe2001053, -+ -+ RISCVI_FCVT_W_D = 0xc2000053, -+ RISCVI_FCVT_WU_D = 0xc2100053, -+ RISCVI_FCVT_D_W = 0xd2000053, -+ RISCVI_FCVT_D_WU = 0xd2100053, -+ -+ RISCVI_FMV_D = 0x22000053, -+ RISCVI_FNEG_D = 0x22001053, -+ RISCVI_FABS_D = 0x22002053, -+#if LJ_TARGET_RISCV64 -+ RISCVI_FCVT_L_D = 0xc2200053, -+ RISCVI_FCVT_LU_D = 0xc2300053, -+ RISCVI_FMV_X_D = 0xe2000053, -+ RISCVI_FCVT_D_L = 0xd2200053, -+ RISCVI_FCVT_D_LU = 0xd2300053, -+ RISCVI_FMV_D_X = 0xf2000053, -+#endif -+ -+ /* --- Zifencei --- */ -+ RISCVI_FENCE = 0x0000000f, -+ RISCVI_FENCE_I = 0x0000100f, -+ -+ /* --- Zicsr --- */ -+ RISCVI_CSRRW = 0x00001073, -+ RISCVI_CSRRS = 0x00002073, -+ RISCVI_CSRRC = 0x00003073, -+ RISCVI_CSRRWI = 0x00005073, -+ RISCVI_CSRRSI = 0x00006073, -+ RISCVI_CSRRCI = 0x00007073, -+ -+ /* --- RVB --- */ -+ /* Zba */ -+ RISCVI_SH1ADD = 0x20002033, -+ RISCVI_SH2ADD = 0x20004033, -+ RISCVI_SH3ADD = 0x20006033, -+#if LJ_TARGET_RISCV64 -+ RISCVI_ADD_UW = 0x0800003b, -+ -+ RISCVI_SH1ADD_UW = 0x2000203b, -+ RISCVI_SH2ADD_UW = 0x2000403b, -+ RISCVI_SH3ADD_UW = 0x2000603b, -+ -+ RISCVI_SLLI_UW = 0x0800101b, -+ -+ RISCVI_ZEXT_W = 0x0800003b, -+#endif -+ /* Zbb */ -+ RISCVI_ANDN = 0x40007033, -+ RISCVI_ORN = 0x40006033, -+ RISCVI_XNOR = 0x40004033, -+ -+ RISCVI_CLZ = 0x60001013, -+ RISCVI_CTZ = 0x60101013, -+ -+ RISCVI_CPOP = 0x60201013, -+ -+ RISCVI_MAX = 0x0a006033, -+ RISCVI_MAXU = 0x0a007033, -+ RISCVI_MIN = 0x0a004033, -+ RISCVI_MINU = 0x0a005033, -+ -+ RISCVI_SEXT_B = 0x60401013, -+ RISCVI_SEXT_H = 0x60501013, -+#if LJ_TARGET_RISCV64 -+ RISCVI_ZEXT_H = 0x0800403b, -+#endif -+ -+ RISCVI_ROL = 0x60001033, -+ RISCVI_ROR = 0x60005033, -+ RISCVI_RORI = 0x60005013, -+ -+ RISCVI_ORC_B = 0x28705013, -+ -+#if LJ_TARGET_RISCV64 -+ RISCVI_REV8 = 0x6b805013, -+ -+ RISCVI_CLZW = 0x6000101b, -+ RISCVI_CTZW = 0x6010101b, -+ -+ RISCVI_CPOPW = 0x6020101b, -+ -+ RISCVI_ROLW = 0x6000103b, -+ RISCVI_RORIW = 0x6000501b, -+ RISCVI_RORW = 0x6000503b, -+#endif -+ /* NYI: Zbc, Zbs */ -+ -+ /* --- Zicond --- */ -+ RISCVI_CZERO_EQZ = 0x0e005033, -+ RISCVI_CZERO_NEZ = 0x0e007033, -+ -+ /* --- Zfa --- */ -+ RISCVI_FLI_S = 0xf0100053, -+ RISCVI_FMINM_S = 0x28002053, -+ RISCVI_FMAXM_S = 0x28003053, -+ RISCVI_FROUND_S = 0x40400053, -+ RISCVI_FROUNDNX_S = 0x40500053, -+ RISCVI_FCVTMOD_W_D = 0xc2801053, -+ RISCVI_FLEQ_S = 0xa0004053, -+ RISCVI_FLTQ_S = 0xa0005053, -+ RISCVI_FLI_D = 0xf2100053, -+ RISCVI_FMINM_D = 0x2a002053, -+ RISCVI_FMAXM_D = 0x2a003053, -+ RISCVI_FROUND_D = 0x42400053, -+ RISCVI_FROUNDNX_D = 0x42500053, -+ RISCVI_FLEQ_D = 0xa2004053, -+ RISCVI_FLTQ_D = 0xa2005053, -+ -+ RISCVI_FROUND_S_RTZ = 0x40401053, -+ RISCVI_FROUND_S_RDN = 0x40402053, -+ RISCVI_FROUND_S_RUP = 0x40403053, -+ RISCVI_FROUNDNX_S_RTZ = 0x40501053, -+ RISCVI_FROUNDNX_S_RDN = 0x40502053, -+ RISCVI_FROUNDNX_S_RUP = 0x40503053, -+ RISCVI_FROUND_D_RTZ = 0x42401053, -+ RISCVI_FROUND_D_RDN = 0x42402053, -+ RISCVI_FROUND_D_RUP = 0x42403053, -+ RISCVI_FROUNDNX_D_RTZ = 0x42501053, -+ RISCVI_FROUNDNX_D_RDN = 0x42502053, -+ RISCVI_FROUNDNX_D_RUP = 0x42503053, -+ -+ /* TBD: RVV?, RVP?, RVJ? */ -+ -+ /* --- XThead* --- */ -+ /* XTHeadBa */ -+ RISCVI_TH_ADDSL = 0x0000100b, -+ -+ /* XTHeadBb */ -+ RISCVI_TH_SRRI = 0x1000100b, -+#if LJ_TARGET_RISCV64 -+ RISCVI_TH_SRRIW = 0x1400100b, -+#endif -+ RISCVI_TH_EXT = 0x0000200b, -+ RISCVI_TH_EXTU = 0x0000300b, -+ RISCVI_TH_FF0 = 0x8400100b, -+ RISCVI_TH_FF1 = 0x8600100b, -+ RISCVI_TH_REV = 0x8200100b, -+#if LJ_TARGET_RISCV64 -+ RISCVI_TH_REVW = 0x9000100b, -+#endif -+ RISCVI_TH_TSTNBZ = 0x8000100b, -+ -+ /* XTHeadBs */ -+ RISCVI_TH_TST = 0x8800100b, -+ -+ /* XTHeadCondMov */ -+ RISCVI_TH_MVEQZ = 0x4000100b, -+ RISCVI_TH_MVNEZ = 0x4200100b, -+ -+ /* XTHeadMac */ -+ RISCVI_TH_MULA = 0x2000100b, -+ RISCVI_TH_MULAH = 0x2800100b, -+#if LJ_TARGET_RISCV64 -+ RISCVI_TH_MULAW = 0x2400100b, -+#endif -+ RISCVI_TH_MULS = 0x2200100b, -+ RISCVI_TH_MULSH = 0x2a00100b, -+ RISCVI_TH_MULSW = 0x2600100b, -+ -+ /* NYI: XTHeadMemIdx, XTHeadFMemIdx, XTHeadMemPair */ -+} RISCVIns; -+ -+typedef enum RISCVRM { -+ RISCVRM_RNE = 0, -+ RISCVRM_RTZ = 1, -+ RISCVRM_RDN = 2, -+ RISCVRM_RUP = 3, -+ RISCVRM_RMM = 4, -+ RISCVRM_DYN = 7, -+} RISCVRM; -+ -+static const uint16_t riscv_fli_map_hi16[32] = { -+ 0xbff0u, // -1 -+ 0x0010u, // min -+ 0x3ef0u, // 2^-16 -+ 0x3f00u, // 2^-15 -+ 0x3f70u, // 2^-8 -+ 0x3f80u, // 2^-7 -+ 0x3fb0u, // 2^-4 -+ 0x3fc0u, // 2^-3, 0.125 -+ 0x3fd0u, // 2^-2, 0.25 -+ 0x3fd4u, // 0.3125 -+ 0x3fd8u, // 0.375 -+ 0x3fdcu, // 0.4375 -+ 0x3fe0u, // 0.5 -+ 0x3fe4u, // 0.625 -+ 0x3fe8u, // 0.75 -+ 0x3fecu, // 0.875 -+ 0x3ff0u, // 1 -+ 0x3ff4u, // 1.25 -+ 0x3ff8u, // 1.5 -+ 0x3ffcu, // 1.75 -+ 0x4000u, // 2 -+ 0x4004u, // 2.5 -+ 0x4008u, // 3 -+ 0x4010u, // 4 -+ 0x4020u, // 8 -+ 0x4030u, // 16 -+ 0x4060u, // 128 -+ 0x4070u, // 256 -+ 0x40e0u, // 2^15, 32768 -+ 0x40f0u, // 2^16, 65536 -+ 0x7ff0u, // inf -+ 0x7ff8u, // canonical nan -+}; -+ -+#endif ---- a/src/lj_vm.h -+++ b/src/lj_vm.h -@@ -37,6 +37,9 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint - #if LJ_TARGET_PPC - void lj_vm_cachesync(void *start, void *end); - #endif -+#if LJ_TARGET_RISCV64 -+void lj_vm_fence_rw_rw(); -+#endif - LJ_ASMF LJ_CONSTF double lj_vm_foldarith(double x, double y, int op); - #if LJ_HASJIT - LJ_ASMF LJ_CONSTF double lj_vm_foldfpm(double x, int op); ---- a/src/lj_vmmath.c -+++ b/src/lj_vmmath.c -@@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double - - /* -- Helper functions for generated machine code ------------------------- */ - --#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS -+#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \ -+ || LJ_TARGET_RISCV64 - int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) - { - uint32_t y, ua, ub; --- /dev/null +++ b/src/vm_riscv64.dasc -@@ -0,0 +1,4810 @@ +@@ -0,0 +1,4855 @@ +|// Low-level VM code for RISC-V 64 CPUs. +|// Bytecode interpreter, fast functions and helper functions. -+|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2022-2026 ISRC, ISCAS. See Copyright Notice in luajit.h +|// -+|// Contributed by gns from PLCT Lab, ISCAS. ++|// Contributed by gns from PLCT Lab, ISRC, ISCAS. +| +|.arch riscv64 +|.section code_op, code_sub @@ -7815,7 +2578,7 @@ Co-authored-by: Heinrich Schuchardt + |->vmeta_istype: + | addi PC, PC, -4 + | sd BASE, L->base -+ | mv CARG1, L ++ | mv CARG1, L + | srliw CARG2, RA, 3 + | srliw CARG3, RD, 3 + | sd PC, SAVE_PC(sp) @@ -9261,6 +4024,46 @@ Co-authored-by: Heinrich Schuchardt + | + | + |//----------------------------------------------------------------------- ++ |//-- Number conversion functions -------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// int64_t lj_vm_num2int_check(double x) ++ |->vm_num2int_check: ++ | fcvt.w.d CRET1, FARG1, rtz ++ | fcvt.d.w FARG2, CRET1 ++ | feq.d CARG2, FARG1, FARG2 ++ | beqz CARG2, >1 ++ | ret ++ |1: ++ | lui CRET1, 0x80000 ++ | slli CRET1, CRET1, 8 ++ | addi CRET1, CRET1, 0x80 ++ | slli CRET1, CRET1, 24 ++ | ret ++ | ++ |// int64_t lj_vm_num2i64(double x) ++ |->vm_num2i64: ++ | fcvt.l.d CRET1, FARG1, rtz ++ | ret ++ | ++ |// uint64_t lj_vm_num2u64(double x) ++ |->vm_num2u64: ++ | fcvt.lu.d CRET1, FARG1, rtz ++ | bnez CRET1, >1 ++ | fcvt.l.d CRET1, FARG1, rtz ++ |1: ++ | ret ++ | ++ |// int32_t lj_vm_tobit(double x) ++ |->vm_tobit: ++ | lui CARG2, 0x43380 // 2^52 + 2^51. ++ | slli CARG2, CARG2, 32 ++ | fmv.d.x FARG2, CARG2 ++ | fadd.d FARG2, FARG1, FARG2 ++ | fmv.x.w CRET1, FARG2 ++ | ret ++ | ++ |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | @@ -9459,2101 +4262,7371 @@ Co-authored-by: Heinrich Schuchardt + |//----------------------------------------------------------------------- +} + -+/* Generate the code for a single instruction. */ -+static void build_ins(BuildCtx *ctx, BCOp op, int defop) ++/* Generate the code for a single instruction. */ ++static void build_ins(BuildCtx *ctx, BCOp op, int defop) ++{ ++ int vk = 0; ++ |=>defop: ++ ++ switch (op) { ++ ++ /* -- Comparison ops ---------------------------------------------------- */ ++ ++ /* Remember: all ops branch for a true comparison, fall through otherwise. */ ++ ++ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: ++ | // RA = src1*8, RD = src2*8, JMP with RD = target ++ | add RA, BASE, RA ++ | add RD, BASE, RD ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | ld CARG1, 0(RA) ++ | ld CARG2, 0(RD) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ } else { ++ | ld CARG2, 0(RA) ++ | ld CARG1, 0(RD) ++ | gettp CARG3, CARG2 ++ | gettp CARG4, CARG1 ++ } ++ | lhu TMP2, OFS_RD(PC) // TMP2=jump ++ | addi PC, PC, 4 ++ | bne CARG3, TISNUM, >2 ++ | decode_BC4b TMP2 ++ | bne CARG4, TISNUM, >5 ++ | sext.w CARG1, CARG1 ++ | sext.w CARG2, CARG2 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | slt TMP1, CARG1, CARG2 ++ | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 ++ if (op == BC_ISLT || op == BC_ISGT) { ++ | neg TMP1, TMP1 ++ } else { ++ | addi TMP1, TMP1, -1 ++ } ++ | and TMP2, TMP2, TMP1 ++ |1: ++ | add PC, PC, TMP2 ++ | ins_next ++ | ++ |2: // RA is not an integer. ++ | sltiu TMP1, CARG3, LJ_TISNUM ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | bxeqz TMP1, ->vmeta_comp ++ | sltiu TMP1, CARG4, LJ_TISNUM ++ | decode_BC4b TMP2 ++ | beqz TMP1, >4 ++ | fmv.d.x FTMP0, CARG1 ++ | fmv.d.x FTMP2, CARG2 ++ |3: // RA and RD are both numbers. ++ | addw TMP2, TMP2, TMP3 ++ if (op == BC_ISLT) { ++ | flt.d TMP3, FTMP0, FTMP2 ++ | neg TMP3, TMP3 ++ } else if (op == BC_ISGE) { ++ | flt.d TMP3, FTMP0, FTMP2 ++ | addi TMP3, TMP3, -1 ++ } else if (op == BC_ISLE) { ++ | fle.d TMP3, FTMP2, FTMP0 ++ | neg TMP3, TMP3 ++ } else if (op == BC_ISGT) { ++ | fle.d TMP3, FTMP2, FTMP0 ++ | addi TMP3, TMP3, -1 ++ } ++ | and TMP2, TMP2, TMP3 ++ | j <1 ++ | ++ |4: // RA is a number, RD is not a number. ++ | // RA is a number, RD is an integer. Convert RD to a number. ++ | bxne CARG4, TISNUM, ->vmeta_comp ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | fcvt.d.w FTMP2, CARG2 ++ | fmv.d.x FTMP0, CARG1 ++ } else { ++ | fcvt.d.w FTMP0, CARG1 ++ | fmv.d.x FTMP2, CARG2 ++ } ++ | j <3 ++ | ++ |5: // RA is an integer, RD is not an integer ++ | sltiu TMP1, CARG4, LJ_TISNUM ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | bxeqz TMP1, ->vmeta_comp ++ | // RA is an integer, RD is a number. Convert RA to a number. ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | fcvt.d.w FTMP0, CARG1 ++ | fmv.d.x FTMP2, CARG2 ++ } else { ++ | fcvt.d.w FTMP2, CARG2 ++ | fmv.d.x FTMP0, CARG1 ++ } ++ | j <3 ++ break; ++ ++ case BC_ISEQV: case BC_ISNEV: ++ vk = op == BC_ISEQV; ++ | // RA = src1*8, RD = src2*8, JMP with RD = target ++ | add RA, BASE, RA ++ | add RD, BASE, RD ++ | addi PC, PC, 4 ++ | ld CARG1, 0(RA) ++ | ld CARG2, 0(RD) ++ | lhu TMP2, -4+OFS_RD(PC) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ | sltu TMP0, TISNUM, CARG3 ++ | sltu TMP1, TISNUM, CARG4 ++ | or TMP0, TMP0, TMP1 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ if (vk) { ++ | beqz TMP0, ->BC_ISEQN_Z ++ } else { ++ | beqz TMP0, ->BC_ISNEN_Z ++ } ++ |// Either or both types are not numbers. ++ |.if FFI ++ | // Check if RA or RD is a cdata. ++ | xori TMP0, CARG3, LJ_TCDATA ++ | xori TMP1, CARG4, LJ_TCDATA ++ | and TMP0, TMP0, TMP1 ++ | bxeqz TMP0, ->vmeta_equal_cd ++ |.endif ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | decode_BC4b TMP2 ++ | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 ++ | bne CARG1, CARG2, >2 ++ | // Tag and value are equal. ++ if (vk) { ++ |->BC_ISEQV_Z: ++ | add PC, PC, TMP2 ++ } ++ |1: ++ | ins_next ++ | ++ |2: // Check if the tags are the same and it's a table or userdata. ++ | xor TMP3, CARG3, CARG4 // Same type? ++ | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1 ++ | beqz TMP3, >3 ++ | mv TMP0, x0 // TMP0=0: not same type, or same type table/userdata ++ |3: ++ | cleartp TAB:TMP1, CARG1 ++ if (vk) { ++ | beqz TMP0, <1 ++ } else { ++ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. ++ } ++ | // Different tables or userdatas. Need to check __eq metamethod. ++ | // Field metatable must be at same offset for GCtab and GCudata! ++ | ld TAB:TMP3, TAB:TMP1->metatable ++ if (vk) { ++ | beqz TAB:TMP3, <1 // No metatable? ++ | lbu TMP3, TAB:TMP3->nomm ++ | andi TMP3, TMP3, 1<BC_ISEQV_Z // No metatable? ++ | lbu TMP3, TAB:TMP3->nomm ++ | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? ++ } ++ | j ->vmeta_equal // Handle __eq metamethod. ++ break; ++ ++ case BC_ISEQS: case BC_ISNES: ++ vk = op == BC_ISEQS; ++ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target ++ | add RA, BASE, RA ++ | addi PC, PC, 4 ++ | ld CARG1, 0(RA) ++ | sub RD, KBASE, RD ++ | lhu TMP2, -4+OFS_RD(PC) ++ | ld CARG2, -8(RD) // KBASE-8-str_const*8 ++ |.if FFI ++ | gettp CARG3, CARG1 ++ | li TMP1, LJ_TCDATA ++ |.endif ++ | li TMP0, LJ_TSTR ++ | decode_BC4b TMP2 ++ | settp CARG2, TMP0 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ |.if FFI ++ | bxeq CARG3, TMP1, ->vmeta_equal_cd ++ |.endif ++ | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D ++ | addw TMP2, TMP2, TMP3 ++ if (vk) { ++ | seqz TMP4, TMP0 ++ } else { ++ | snez TMP4, TMP0 ++ } ++ | neg TMP4, TMP4 ++ | and TMP2, TMP2, TMP4 ++ | add PC, PC, TMP2 ++ | ins_next ++ break; ++ ++ case BC_ISEQN: case BC_ISNEN: ++ vk = op == BC_ISEQN; ++ | // RA = src*8, RD = num_const*8, JMP with RD = target ++ | add RA, BASE, RA ++ | add RD, KBASE, RD ++ | ld CARG1, 0(RA) ++ | ld CARG2, 0(RD) ++ | lhu TMP2, OFS_RD(PC) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ | addi PC, PC, 4 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ if (vk) { ++ |->BC_ISEQN_Z: ++ } else { ++ |->BC_ISNEN_Z: ++ } ++ | decode_BC4b TMP2 ++ | bne CARG3, TISNUM, >4 ++ | addw TMP2, TMP2, TMP3 ++ | bne CARG4, TISNUM, >6 ++ | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D ++ |1: ++ if (vk) { ++ | seqz TMP4, TMP0 ++ | neg TMP4, TMP4 ++ | and TMP2, TMP2, TMP4 ++ | add PC, PC, TMP2 ++ |2: ++ } else { ++ | snez TMP4, TMP0 ++ | neg TMP4, TMP4 ++ | and TMP2, TMP2, TMP4 ++ |2: ++ | add PC, PC, TMP2 ++ } ++ |3: ++ | ins_next ++ | ++ |4: // RA is not an integer. ++ | addw TMP2, TMP2, TMP3 ++ |.if FFI ++ | bgeu CARG3, TISNUM, >7 ++ |.else ++ | bgeu CARG3, TISNUM, <2 ++ |.endif ++ | fmv.d.x FTMP0, CARG1 ++ | fmv.d.x FTMP2, CARG2 ++ | bne CARG4, TISNUM, >5 ++ |// RA is a number, RD is an integer. ++ | fcvt.d.w FTMP2, CARG2 ++ | ++ |5: // RA and RD are both numbers. ++ | feq.d TMP0, FTMP0, FTMP2 ++ | seqz TMP0, TMP0 ++ | j <1 ++ | ++ |6: // RA is an integer, RD is a number. ++ |.if FFI ++ | bgeu CARG4, TISNUM, >8 ++ |.else ++ | bgeu CARG4, TISNUM, <2 ++ |.endif ++ | fcvt.d.w FTMP0, CARG1 ++ | fmv.d.x FTMP2, CARG2 ++ | j <5 ++ | ++ |.if FFI ++ |7: // RA not int, not number ++ | li TMP0, LJ_TCDATA ++ | bne CARG3, TMP0, <2 ++ | j ->vmeta_equal_cd ++ | ++ |8: // RD not int, not number ++ | li TMP0, LJ_TCDATA ++ | bne CARG4, TMP0, <2 ++ | j ->vmeta_equal_cd ++ |.endif ++ break; ++ ++ case BC_ISEQP: case BC_ISNEP: ++ vk = op == BC_ISEQP; ++ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target ++ | add RA, BASE, RA ++ | srliw TMP0, RD, 3 ++ | ld TMP1, 0(RA) ++ | not TMP0, TMP0 // ~TMP0: ~0 ~1 ~2 ++ | lhu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target ++ | gettp TMP1, TMP1 ++ | addi PC, PC, 4 ++ | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D ++ |.if FFI ++ | li TMP3, LJ_TCDATA ++ | bxeq TMP1, TMP3, ->vmeta_equal_cd ++ |.endif ++ | decode_BC4b TMP2 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 ++ if (vk) { ++ | seqz TMP4, TMP0 ++ } else { ++ | snez TMP4, TMP0 ++ } ++ | neg TMP4, TMP4 ++ | and TMP2, TMP2, TMP4 ++ | add PC, PC, TMP2 ++ | ins_next ++ break; ++ ++ /* -- Unary test and copy ops ------------------------------------------- */ ++ ++ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: ++ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target ++ | add RD, BASE, RD ++ | lhu TMP2, OFS_RD(PC) ++ | ld CRET1, 0(RD) ++ | addi PC, PC, 4 ++ | gettp TMP0, CRET1 ++ | add RA, BASE, RA ++ | sltiu TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false ++ | decode_BC4b TMP2 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 ++ if (op == BC_IST || op == BC_ISTC) { ++ | beqz TMP0, >1 ++ if (op == BC_ISTC) { ++ | sd CRET1, 0(RA) ++ } ++ } else { ++ | bnez TMP0, >1 ++ if (op == BC_ISFC) { ++ | sd CRET1, 0(RA) ++ } ++ } ++ | add PC, PC, TMP2 ++ |1: ++ | ins_next ++ break; ++ ++ case BC_ISTYPE: ++ | // RA = src*8, RD = -type*8 ++ | add TMP0, BASE, RA ++ | srliw TMP1, RD, 3 ++ | ld TMP0, 0(TMP0) ++ | gettp TMP0, TMP0 ++ | add TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0 ++ | bxnez TMP0, ->vmeta_istype ++ | ins_next ++ break; ++ case BC_ISNUM: ++ | // RA = src*8, RD = -(TISNUM-1)*8 ++ | add TMP0, BASE, RA ++ | ld TMP0, 0(TMP0) ++ | checknum TMP0, ->vmeta_istype ++ | ins_next ++ break; ++ ++ /* -- Unary ops --------------------------------------------------------- */ ++ ++ case BC_MOV: ++ | // RA = dst*8, RD = src*8 ++ | add RD, BASE, RD ++ | add RA, BASE, RA ++ | ld TMP0, 0(RD) ++ | ins_next1 ++ | sd TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_NOT: ++ | // RA = dst*8, RD = src*8 ++ | add RD, BASE, RD ++ | add RA, BASE, RA ++ | ld TMP0, 0(RD) ++ | li TMP1, LJ_TTRUE ++ | ins_next1 ++ | gettp TMP0, TMP0 ++ | sltu TMP0, TMP1, TMP0 ++ | addiw TMP0, TMP0, 1 ++ | slli TMP0, TMP0, 47 ++ | not TMP0, TMP0 ++ | sd TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_UNM: ++ | // RA = dst*8, RD = src*8 ++ | add RB, BASE, RD ++ | add RA, BASE, RA ++ | ld TMP0, 0(RB) ++ | lui TMP1, 0x80000 ++ | gettp CARG3, TMP0 ++ | bne CARG3, TISNUM, >2 ++ | sext.w TMP0, TMP0 ++ | bxeq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31. ++ | negw TMP0, TMP0 ++ | beqz TMP0, >3 ++ | zext.w TMP0, TMP0 ++ | settp_b TMP0, TISNUM ++ |1: ++ | ins_next1 ++ | sd TMP0, 0(RA) ++ | ins_next2 ++ |2: ++ | sltiu TMP3, CARG3, LJ_TISNUM ++ | slli TMP1, TMP1, 32 ++ | bxeqz TMP3, ->vmeta_unm ++ | xor TMP0, TMP0, TMP1 // sign => ~sign ++ | j <1 ++ |3: ++ | slli TMP0, TMP1, 32 ++ | j <1 ++ break; ++ case BC_LEN: ++ | // RA = dst*8, RD = src*8 ++ | add CARG2, BASE, RD ++ | ld TMP0, 0(CARG2) ++ | add RA, BASE, RA ++ | gettp TMP1, TMP0 ++ | addi TMP2, TMP1, -LJ_TSTR ++ | cleartp STR:CARG1, TMP0 ++ | bnez TMP2, >2 ++ | lwu CARG1, STR:CARG1->len ++ |1: ++ | settp_b CARG1, TISNUM ++ | sd CARG1, 0(RA) ++ | ins_next ++ |2: ++ | addi TMP2, TMP1, -LJ_TTAB ++ | bxnez TMP2, ->vmeta_len ++#if LJ_52 ++ | ld TAB:TMP2, TAB:CARG1->metatable ++ | bnez TAB:TMP2, >9 ++ |3: ++#endif ++ |->BC_LEN_Z: ++ | call_intern BC_LEN, lj_tab_len // (GCtab *t) ++ | // Returns uint32_t (but less than 2^31). ++ | j <1 ++#if LJ_52 ++ |9: ++ | lbu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_len ++#endif ++ break; ++ ++ /* -- Binary ops -------------------------------------------------------- */ ++ ++ |.macro fpmod, a, b, c ++ | fdiv.d FARG1, b, c ++ | jal ->vm_floor // floor(b/c) ++ | fmul.d a, FRET1, c ++ | fsub.d a, b, a // b - floor(b/c)*c ++ |.endmacro ++ | ++ |.macro ins_arithpre ++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ++ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 ++ ||if (vk == 1) { ++ | // RA = dst*8, RB = num_const*8, RC = src1*8 ++ | decode_RB8 RC, INS ++ | decode_RDtoRC8 RB, RD ++ ||} else { ++ | // RA = dst*8, RB = src1*8, RC = num_const*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ ||} ++ ||switch (vk) { ++ ||case 0: // suffix is VN ++ | add RB, BASE, RB ++ | add RC, KBASE, RC ++ || break; ++ ||case 1: // suffix is NV ++ | add RC, BASE, RC ++ | add RB, KBASE, RB ++ || break; ++ ||default: // CAT or suffix is VV ++ | add RB, BASE, RB ++ | add RC, BASE, RC ++ || break; ++ ||} ++ |.endmacro ++ | ++ |.macro ins_arithfp, fpins, itype1, itype2 ++ | fld FTMP0, 0(RB) ++ | sltu itype1, itype1, TISNUM ++ | sltu itype2, itype2, TISNUM ++ | fld FTMP2, 0(RC) ++ | and itype1, itype1, itype2 ++ | add RA, BASE, RA ++ | bxeqz itype1, ->vmeta_arith ++ | fpins FRET1, FTMP0, FTMP2 ++ | ins_next1 ++ | fsd FRET1, 0(RA) ++ | ins_next2 ++ |.endmacro ++ | ++ |.macro ins_arithead, itype1, itype2, tval1, tval2 ++ | ld tval1, 0(RB) ++ | ld tval2, 0(RC) ++ | // Check for two integers. ++ | gettp itype1, tval1 ++ | gettp itype2, tval2 ++ |.endmacro ++ | ++ |.macro ins_arithdn, intins, fpins ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | bne TMP0, TISNUM, >1 ++ | bne TMP1, TISNUM, >1 ++ | sext.w CARG3, CARG1 ++ | sext.w CARG4, CARG2 ++ |.if "intins" == "addw" ++ | intins CRET1, CARG3, CARG4 ++ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. ++ | xor TMP2, CRET1, CARG4 ++ | and TMP1, TMP1, TMP2 ++ | add RA, BASE, RA ++ | bxltz TMP1, ->vmeta_arith ++ |.elif "intins" == "subw" ++ | intins CRET1, CARG3, CARG4 ++ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. ++ | xor TMP2, CARG3, CARG4 ++ | and TMP1, TMP1, TMP2 ++ | add RA, BASE, RA ++ | bxltz TMP1, ->vmeta_arith ++ |.elif "intins" == "mulw" ++ | mul TMP2, CARG3, CARG4 ++ | add RA, BASE, RA ++ | sext.w CRET1, TMP2 ++ | bxne CRET1, TMP2, ->vmeta_arith // 63-32bit not all 0 or 1: overflow. ++ |.endif ++ | zext.w CRET1, CRET1 ++ | settp_b CRET1, TISNUM ++ | sd CRET1, 0(RA) ++ | ins_next ++ |1: // Check for two numbers. ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ | ++ |.macro ins_arithdiv, fpins ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ | ++ |.macro ins_arithmod, fpins, BC ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | bne TMP0, TISNUM, >1 ++ | bne TMP1, TISNUM, >1 ++ | sext.w CARG1, CARG1 ++ | sext.w CARG2, CARG2 ++ | add RA, BASE, RA ++ | bxeqz CARG2, ->vmeta_arith ++ | call_intern BC, lj_vm_modi ++ | zext.w CRET1, CRET1 ++ | settp_b CRET1, TISNUM ++ | sd CRET1, 0(RA) ++ | ins_next ++ |1: // Check for two numbers. ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ ++ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: ++ | ins_arithdn addw, fadd.d ++ break; ++ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: ++ | ins_arithdn subw, fsub.d ++ break; ++ case BC_MULVN: case BC_MULNV: case BC_MULVV: ++ | ins_arithdn mulw, fmul.d ++ break; ++ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: ++ | ins_arithdiv fdiv.d ++ break; ++ case BC_MODVN: ++ | ins_arithmod fpmod, BC_MODVN ++ break; ++ case BC_MODNV: ++ | ins_arithmod fpmod, BC_MODNV ++ break; ++ case BC_MODVV: ++ | ins_arithmod fpmod, BC_MODVV ++ break; ++ case BC_POW: ++ | ins_arithpre ++ | ld CARG1, 0(RB) ++ | ld CARG2, 0(RC) ++ | gettp TMP0, CARG1 ++ | gettp TMP1, CARG2 ++ | sltiu TMP0, TMP0, LJ_TISNUM ++ | sltiu TMP1, TMP1, LJ_TISNUM ++ | and TMP0, TMP0, TMP1 ++ | add RA, BASE, RA ++ | bxeqz TMP0, ->vmeta_arith ++ | fld FARG1, 0(RB) ++ | fld FARG2, 0(RC) ++ | call_extern BC_POW, pow ++ | ins_next1 ++ | fsd FRET1, 0(RA) ++ | ins_next2 ++ break; ++ ++ case BC_CAT: ++ | // RA = dst*8, RB = src_start*8, RC = src_end*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | sub CARG3, RC, RB ++ | sd BASE, L->base ++ | add CARG2, BASE, RC ++ | mv MULTRES, RB ++ |->BC_CAT_Z: ++ | srliw CARG3, CARG3, 3 ++ | sd PC, SAVE_PC(sp) ++ | mv CARG1, L ++ | call_intern BC_CAT, lj_meta_cat // (lua_State *L, TValue *top, int left) ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | ld BASE, L->base ++ | bxnez CRET1, ->vmeta_binop ++ | add RB, BASE, MULTRES ++ | ld TMP0, 0(RB) ++ | add RA, BASE, RA ++ | sd TMP0, 0(RA) ++ | ins_next ++ break; ++ ++ /* -- Constant ops ------------------------------------------------------ */ ++ ++ case BC_KSTR: ++ | // RA = dst*8, RD = str_const*8 (~) ++ | sub TMP1, KBASE, RD ++ | li TMP2, LJ_TSTR ++ | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 ++ | add RA, BASE, RA ++ | settp TMP0, TMP2 ++ | sd TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_KCDATA: ++ |.if FFI ++ | // RA = dst*8, RD = cdata_const*8 (~) ++ | sub TMP1, KBASE, RD ++ | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 ++ | li TMP2, LJ_TCDATA ++ | add RA, BASE, RA ++ | settp TMP0, TMP2 ++ | sd TMP0, 0(RA) ++ | ins_next ++ |.endif ++ break; ++ case BC_KSHORT: ++ | // RA = dst*8, RD = int16_literal*8 ++ | sraiw RD, INS, 16 ++ | add RA, BASE, RA ++ | zext.w RD, RD ++ | ins_next1 ++ | settp_b RD, TISNUM ++ | sd RD, 0(RA) ++ | ins_next2 ++ break; ++ case BC_KNUM: ++ | // RA = dst*8, RD = num_const*8 ++ | add RD, KBASE, RD ++ | add RA, BASE, RA ++ | ld TMP0, 0(RD) ++ | ins_next1 ++ | sd TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_KPRI: ++ | // RA = dst*8, RD = primitive_type*8 (~) ++ | add RA, BASE, RA ++ | slli TMP0, RD, 44 // 44+3 ++ | not TMP0, TMP0 ++ | ins_next1 ++ | sd TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_KNIL: ++ | // RA = base*8, RD = end*8 ++ | add RA, BASE, RA ++ | sd TISNIL, 0(RA) ++ | addi RA, RA, 8 ++ | add RD, BASE, RD ++ |1: ++ | sd TISNIL, 0(RA) ++ | slt TMP0, RA, RD ++ | addi RA, RA, 8 ++ | bnez TMP0, <1 ++ | ins_next ++ break; ++ ++ /* -- Upvalue and function ops ------------------------------------------ */ ++ ++ case BC_UGET: ++ | // RA = dst*8, RD = uvnum*8 ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | add RA, BASE, RA ++ | cleartp LFUNC:TMP0 ++ | add RD, RD, LFUNC:TMP0 ++ | ld UPVAL:TMP0, LFUNC:RD->uvptr ++ | ld TMP1, UPVAL:TMP0->v ++ | ld TMP2, 0(TMP1) ++ | ins_next1 ++ | sd TMP2, 0(RA) ++ | ins_next2 ++ break; ++ case BC_USETV: ++ | // RA = uvnum*8, RD = src*8 ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | add RD, BASE, RD ++ | cleartp LFUNC:TMP0 ++ | add RA, RA, LFUNC:TMP0 ++ | ld UPVAL:TMP0, LFUNC:RA->uvptr ++ | ld CRET1, 0(RD) ++ | lbu TMP3, UPVAL:TMP0->marked ++ | ld CARG2, UPVAL:TMP0->v ++ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) ++ | lbu TMP0, UPVAL:TMP0->closed ++ | gettp TMP2, CRET1 ++ | sd CRET1, 0(CARG2) ++ | or TMP3, TMP3, TMP0 ++ | li TMP0, LJ_GC_BLACK|1 ++ | addi TMP2, TMP2, -(LJ_TNUMX+1) ++ | beq TMP3, TMP0, >2 // Upvalue is closed and black? ++ |1: ++ | ins_next ++ | ++ |2: // Check if new value is collectable. ++ | sltiu TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1) ++ | cleartp GCOBJ:CRET1, CRET1 ++ | beqz TMP0, <1 // tvisgcv(v) ++ | lbu TMP3, GCOBJ:CRET1->gch.marked ++ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) ++ | beqz TMP3, <1 ++ | // Crossed a write barrier. Move the barrier forward. ++ | mv CARG1, GL ++ | call_intern BC_USETV, lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | j <1 ++ break; ++ case BC_USETS: ++ | // RA = uvnum*8, RD = str_const*8 (~) ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | sub TMP1, KBASE, RD ++ | cleartp LFUNC:TMP0 ++ | add RA, RA, LFUNC:TMP0 ++ | ld UPVAL:TMP0, LFUNC:RA->uvptr ++ | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 ++ | lbu TMP2, UPVAL:TMP0->marked ++ | ld CARG2, UPVAL:TMP0->v ++ | lbu TMP3, STR:TMP1->marked ++ | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv) ++ | lbu TMP2, UPVAL:TMP0->closed ++ | li TMP0, LJ_TSTR ++ | settp TMP1, TMP0 ++ | sd TMP1, 0(CARG2) ++ | bnez TMP4, >2 ++ |1: ++ | ins_next ++ | ++ |2: // Check if string is white and ensure upvalue is closed. ++ | beqz TMP2, <1 ++ | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str) ++ | beqz TMP0, <1 ++ | // Crossed a write barrier. Move the barrier forward. ++ | mv CARG1, GL ++ | call_intern BC_USETS, lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | j <1 ++ break; ++ case BC_USETN: ++ | // RA = uvnum*8, RD = num_const*8 ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | add RD, KBASE, RD ++ | cleartp LFUNC:TMP0 ++ | add TMP0, RA, LFUNC:TMP0 ++ | ld UPVAL:TMP0, LFUNC:TMP0->uvptr ++ | ld TMP1, 0(RD) ++ | ld TMP0, UPVAL:TMP0->v ++ | sd TMP1, 0(TMP0) ++ | ins_next ++ break; ++ case BC_USETP: ++ | // RA = uvnum*8, RD = primitive_type*8 (~) ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | slli TMP2, RD, 44 ++ | cleartp LFUNC:TMP0 ++ | add TMP0, RA, LFUNC:TMP0 ++ | not TMP2, TMP2 ++ | ld UPVAL:TMP0, LFUNC:TMP0->uvptr ++ | ld TMP1, UPVAL:TMP0->v ++ | sd TMP2, 0(TMP1) ++ | ins_next ++ break; ++ ++ case BC_UCLO: ++ | // RA = level*8, RD = target ++ | ld TMP2, L->openupval ++ | branch_RD // Do this first since RD is not saved. ++ | sd BASE, L->base ++ | mv CARG1, L ++ | beqz TMP2, >1 ++ | add CARG2, BASE, RA ++ | call_intern BC_UCLO, lj_func_closeuv // (lua_State *L, TValue *level) ++ | ld BASE, L->base ++ |1: ++ | ins_next ++ break; ++ ++ case BC_FNEW: ++ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) ++ | sub TMP1, KBASE, RD ++ | ld CARG3, FRAME_FUNC(BASE) ++ | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 ++ | sd BASE, L->base ++ | sd PC, SAVE_PC(sp) ++ | cleartp CARG3 ++ | mv CARG1, L ++ | // (lua_State *L, GCproto *pt, GCfuncL *parent) ++ | call_intern BC_FNEW, lj_func_newL_gc ++ | // Returns GCfuncL *. ++ | li TMP0, LJ_TFUNC ++ | ld BASE, L->base ++ | settp CRET1, TMP0 ++ | add RA, BASE, RA ++ | sd CRET1, 0(RA) ++ | ins_next ++ break; ++ ++ /* -- Table ops --------------------------------------------------------- */ ++ ++ case BC_TNEW: ++ case BC_TDUP: ++ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) ++ | ld TMP0, GL->gc.total ++ | ld TMP1, GL->gc.threshold ++ | sd BASE, L->base ++ | sd PC, SAVE_PC(sp) ++ | bgeu TMP0, TMP1, >5 ++ |1: ++ if (op == BC_TNEW) { ++ | srliw CARG2, RD, 3 ++ | andi CARG2, CARG2, 0x7ff ++ | lzi TMP0, 0x801 ++ | addiw TMP2, CARG2, -0x7ff ++ | srliw CARG3, RD, 14 ++ | seqz TMP3, TMP2 ++ | neg TMP4, TMP3 ++ | xor CARG1, TMP0, CARG2 // CARG2 = TMP3 ? TMP0 : CARG2 ++ | and CARG1, CARG1, TMP4 ++ | xor CARG2, CARG2, CARG1 ++ | mv CARG1, L ++ | // (lua_State *L, int32_t asize, uint32_t hbits) ++ | call_intern BC_TNEW, lj_tab_new ++ | // Returns Table *. ++ } else { ++ | sub TMP1, KBASE, RD ++ | mv CARG1, L ++ | ld CARG2, -8(TMP1) // KBASE-8-str_const*8 ++ | call_intern BC_TDUP, lj_tab_dup // (lua_State *L, Table *kt) ++ | // Returns Table *. ++ } ++ | li TMP0, LJ_TTAB ++ | ld BASE, L->base ++ | ins_next1 ++ | settp CRET1, TMP0 ++ | add RA, BASE, RA ++ | sd CRET1, 0(RA) ++ | ins_next2 ++ |5: ++ | mv MULTRES, RD ++ | mv CARG1, L ++ if (op == BC_TNEW) { ++ | call_intern BC_TNEW, lj_gc_step_fixtop // (lua_State *L) ++ } else { ++ | call_intern BC_TDUP, lj_gc_step_fixtop // (lua_State *L) ++ } ++ | mv RD, MULTRES ++ | j <1 ++ break; ++ ++ case BC_GGET: ++ | // RA = dst*8, RD = str_const*8 (~) ++ case BC_GSET: ++ | // RA = src*8, RD = str_const*8 (~) ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | sub TMP1, KBASE, RD ++ | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 ++ | cleartp LFUNC:TMP0 ++ | ld TAB:RB, LFUNC:TMP0->env ++ | add RA, BASE, RA ++ if (op == BC_GGET) { ++ | j ->BC_TGETS_Z ++ } else { ++ | j ->BC_TSETS_Z ++ } ++ break; ++ ++ case BC_TGETV: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG2, BASE, RB ++ | add CARG3, BASE, RC ++ | ld TAB:RB, 0(CARG2) ++ | ld TMP2, 0(CARG3) ++ | add RA, BASE, RA ++ | checktab TAB:RB, ->vmeta_tgetv ++ | gettp TMP3, TMP2 ++ | lw TMP0, TAB:RB->asize ++ | bne TMP3, TISNUM, >5 // Integer key? ++ | sext.w TMP2, TMP2 ++ | ld TMP1, TAB:RB->array ++ | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part? ++ | slliw TMP2, TMP2, 3 ++ | add TMP2, TMP1, TMP2 ++ | ld CRET1, 0(TMP2) ++ | beq CRET1, TISNIL, >2 ++ |1: ++ | sd CRET1, 0(RA) ++ | ins_next ++ | ++ |2: // Check for __index if table value is nil. ++ | ld TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | lbu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_tgetv ++ | ++ |5: ++ | li TMP0, LJ_TSTR ++ | cleartp RC, TMP2 ++ | bxne TMP3, TMP0, ->vmeta_tgetv // String key? ++ | j ->BC_TGETS_Z ++ break; ++ case BC_TGETS: ++ | // RA = dst*8, RB = table*8, RC = str_const*8 (~) ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG2, BASE, RB ++ | sub CARG3, KBASE, RC ++ | ld TAB:RB, 0(CARG2) ++ | add RA, BASE, RA ++ | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 ++ | checktab TAB:RB, ->vmeta_tgets1 ++ |->BC_TGETS_Z: ++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 ++ | lw TMP0, TAB:RB->hmask ++ | lw TMP1, STR:RC->sid ++ | ld NODE:TMP2, TAB:RB->node ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask ++ | slliw TMP0, TMP1, 5 ++ | slliw TMP1, TMP1, 3 ++ | subw TMP1, TMP0, TMP1 ++ | li TMP3, LJ_TSTR ++ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | settp STR:RC, TMP3 // Tagged key to look for. ++ |1: ++ | ld CARG1, NODE:TMP2->key ++ | ld CARG2, NODE:TMP2->val ++ | ld NODE:TMP1, NODE:TMP2->next ++ | ld TAB:TMP3, TAB:RB->metatable ++ | bne CARG1, RC, >4 ++ | beq CARG2, TISNIL, >5 // Key found, but nil value? ++ |3: ++ | sd CARG2, 0(RA) ++ | ins_next ++ | ++ |4: // Follow hash chain. ++ | mv NODE:TMP2, NODE:TMP1 ++ | bnez NODE:TMP1, <1 ++ | // End of hash chain: key not found, nil result. ++ | ++ |5: // Check for __index if table value is nil. ++ | mv CARG2, TISNIL ++ | beqz TAB:TMP3, <3 // No metatable: done. ++ | lbu TMP0, TAB:TMP3->nomm ++ | andi TMP0, TMP0, 1<vmeta_tgets ++ break; ++ case BC_TGETB: ++ | // RA = dst*8, RB = table*8, RC = index*8 ++ | decode_RB8 RB, INS ++ | add CARG2, BASE, RB ++ | decode_RDtoRC8 RC, RD ++ | ld TAB:RB, 0(CARG2) ++ | add RA, BASE, RA ++ | srliw TMP0, RC, 3 ++ | checktab TAB:RB, ->vmeta_tgetb ++ | lw TMP1, TAB:RB->asize ++ | ld TMP2, TAB:RB->array ++ | bxgeu TMP0, TMP1, ->vmeta_tgetb ++ | add RC, TMP2, RC ++ | ld CRET1, 0(RC) ++ | beq CRET1, TISNIL, >5 ++ |1: ++ | sd CRET1, 0(RA) ++ | ins_next ++ | ++ |5: // Check for __index if table value is nil. ++ | ld TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | lbu TMP1, TAB:TMP2->nomm ++ | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! ++ break; ++ case BC_TGETR: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add RB, BASE, RB ++ | add RC, BASE, RC ++ | ld TAB:CARG1, 0(RB) ++ | lw CARG2, 0(RC) ++ | add RA, BASE, RA ++ | cleartp TAB:CARG1 ++ | lw TMP0, TAB:CARG1->asize ++ | ld TMP1, TAB:CARG1->array ++ | bxgeu CARG2, TMP0, ->vmeta_tgetr // In array part? ++ | slliw TMP2, CARG2, 3 ++ | add TMP3, TMP1, TMP2 ++ | ld TMP1, 0(TMP3) ++ |->BC_TGETR_Z: ++ | ins_next1 ++ | sd TMP1, 0(RA) ++ | ins_next2 ++ break; ++ ++ case BC_TSETV: ++ | // RA = src*8, RB = table*8, RC = key*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG2, BASE, RB ++ | add CARG3, BASE, RC ++ | ld TAB:RB, 0(CARG2) ++ | ld TMP2, 0(CARG3) ++ | add RA, BASE, RA ++ | checktab TAB:RB, ->vmeta_tsetv ++ | sext.w RC, TMP2 ++ | checkint TMP2, >5 ++ | lw TMP0, TAB:RB->asize ++ | ld TMP1, TAB:RB->array ++ | bxgeu RC, TMP0, ->vmeta_tsetv // Integer key and in array part? ++ | slliw TMP2, RC, 3 ++ | add TMP1, TMP1, TMP2 ++ | lbu TMP3, TAB:RB->marked ++ | ld TMP0, 0(TMP1) ++ | ld CRET1, 0(RA) ++ | beq TMP0, TISNIL, >3 ++ |1: ++ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) ++ | sd CRET1, 0(TMP1) ++ | bnez TMP2, >7 ++ |2: ++ | ins_next ++ | ++ |3: // Check for __newindex if previous value is nil. ++ | ld TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | lbu TMP2, TAB:TMP2->nomm ++ | andi TMP2, TMP2, 1<vmeta_tsetv ++ |5: ++ | gettp TMP0, TMP2 ++ | addi TMP0, TMP0, -LJ_TSTR ++ | bxnez TMP0, ->vmeta_tsetv ++ | cleartp STR:RC, TMP2 ++ | j ->BC_TSETS_Z // String key? ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <2 ++ break; ++ case BC_TSETS: ++ | // RA = src*8, RB = table*8, RC = str_const*8 (~) ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG2, BASE, RB ++ | sub CARG3, KBASE, RC ++ | ld TAB:RB, 0(CARG2) ++ | ld RC, -8(CARG3) // KBASE-8-str_const*8 ++ | add RA, BASE, RA ++ | cleartp STR:RC ++ | checktab TAB:RB, ->vmeta_tsets1 ++ |->BC_TSETS_Z: ++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 ++ | lw TMP0, TAB:RB->hmask ++ | lw TMP1, STR:RC->sid ++ | ld NODE:TMP2, TAB:RB->node ++ | sb x0, TAB:RB->nomm // Clear metamethod cache. ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask ++ | slliw TMP0, TMP1, 5 ++ | slliw TMP1, TMP1, 3 ++ | subw TMP1, TMP0, TMP1 ++ | li TMP3, LJ_TSTR ++ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | settp STR:RC, TMP3 // Tagged key to look for. ++ |1: ++ | ld TMP0, NODE:TMP2->key ++ | ld CARG2, NODE:TMP2->val ++ | ld NODE:TMP1, NODE:TMP2->next ++ | lbu TMP3, TAB:RB->marked ++ | bne TMP0, RC, >5 ++ | ld CARG1, 0(RA) ++ | ld TAB:TMP0, TAB:RB->metatable ++ | beq CARG2, TISNIL, >4 // Key found, but nil value? ++ |2: ++ | andi TMP4, TMP3, LJ_GC_BLACK // isblack(table) ++ | sd CARG1, NODE:TMP2->val ++ | bnez TMP4, >7 ++ |3: ++ | ins_next ++ | ++ |4: // Check for __newindex if previous value is nil. ++ | beqz TAB:TMP0, <2 // No metatable: done. ++ | lbu TMP0, TAB:TMP0->nomm ++ | andi TMP0, TMP0, 1<vmeta_tsets ++ | ++ |5: // Follow hash chain. ++ | mv NODE:TMP2, NODE:TMP1 ++ | bnez NODE:TMP1, <1 ++ | // End of hash chain: key not found, add a new one ++ | ++ | // But check for __newindex first. ++ | ld TAB:TMP2, TAB:RB->metatable ++ | addi CARG3, GL, offsetof(global_State, tmptv) ++ | beqz TAB:TMP2, >6 // No metatable: continue. ++ | lbu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. ++ |6: ++ | sd RC, 0(CARG3) ++ | sd BASE, L->base ++ | mv CARG2, TAB:RB ++ | sd PC, SAVE_PC(sp) ++ | mv CARG1, L ++ | // (lua_State *L, GCtab *t, TValue *k) ++ | call_intern BC_TSETS, lj_tab_newkey ++ | // Returns TValue *. ++ | ld TMP0, 0(RA) ++ | ld BASE, L->base ++ | sd TMP0, 0(CRET1) ++ | j <3 // No 2nd write barrier needed. ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <3 ++ break; ++ case BC_TSETB: ++ | // RA = src*8, RB = table*8, RC = index*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG2, BASE, RB ++ | add RA, BASE, RA ++ | ld TAB:RB, 0(CARG2) ++ | srliw TMP0, RC, 3 ++ | checktab RB, ->vmeta_tsetb ++ | lw TMP1, TAB:RB->asize ++ | ld TMP2, TAB:RB->array ++ | bxgeu TMP0, TMP1, ->vmeta_tsetb ++ | add RC, TMP2, RC ++ | ld TMP1, 0(RC) ++ | lbu TMP3, TAB:RB->marked ++ | beq TMP1, TISNIL, >5 ++ |1: ++ | ld CRET1, 0(RA) ++ | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table) ++ | sd CRET1, 0(RC) ++ | bnez TMP1, >7 ++ |2: ++ | ins_next ++ | ++ |5: // Check for __newindex if previous value is nil. ++ | ld TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | lbu TMP1, TAB:TMP2->nomm ++ | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <2 ++ break; ++ case BC_TSETR: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG1, BASE, RB ++ | add CARG3, BASE, RC ++ | ld TAB:CARG2, 0(CARG1) ++ | lw CARG3, 0(CARG3) ++ | cleartp TAB:CARG2 ++ | lbu TMP3, TAB:CARG2->marked ++ | lw TMP0, TAB:CARG2->asize ++ | ld TMP1, TAB:CARG2->array ++ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) ++ | add RA, BASE, RA ++ | bnez TMP2, >7 ++ |2: ++ | bxgeu CARG3, TMP0, ->vmeta_tsetr // In array part? ++ | slliw TMP2, CARG3, 3 ++ | add CRET1, TMP1, TMP2 ++ |->BC_TSETR_Z: ++ | ld TMP1, 0(RA) ++ | ins_next1 ++ | sd TMP1, 0(CRET1) ++ | ins_next2 ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:CARG2, TMP3, CRET1, <2 ++ break; ++ ++ case BC_TSETM: ++ | // RA = base*8 (table at base-1), RD = num_const*8 (start index) ++ | add RA, BASE, RA ++ |1: ++ | add TMP3, KBASE, RD ++ | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. ++ | addiw TMP0, MULTRES, -8 ++ | lw TMP3, 0(TMP3) // Integer constant is in lo-word. ++ | srliw CARG3, TMP0, 3 ++ | beqz TMP0, >4 // Nothing to copy? ++ | cleartp TAB:CARG2 ++ | addw CARG3, CARG3, TMP3 ++ | lw TMP2, TAB:CARG2->asize ++ | slliw TMP1, TMP3, 3 ++ | lbu TMP3, TAB:CARG2->marked ++ | ld CARG1, TAB:CARG2->array ++ | bltu TMP2, CARG3, >5 ++ | add TMP2, RA, TMP0 ++ | add TMP1, TMP1, CARG1 ++ | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) ++ |3: // Copy result slots to table. ++ | ld CRET1, 0(RA) ++ | addi RA, RA, 8 ++ | sd CRET1, 0(TMP1) ++ | addi TMP1, TMP1, 8 ++ | bltu RA, TMP2, <3 ++ | bnez TMP0, >7 ++ |4: ++ | ins_next ++ | ++ |5: // Need to resize array part. ++ | sd BASE, L->base ++ | sd PC, SAVE_PC(sp) ++ | mv BASE, RD ++ | mv CARG1, L ++ | // (lua_State *L, GCtab *t, int nasize) ++ | call_intern BC_TSETM, lj_tab_reasize ++ | // Must not reallocate the stack. ++ | mv RD, BASE ++ | ld BASE, L->base // Reload BASE for lack of a saved register. ++ | j <1 ++ | ++ |7: // Possible table write barrier for any value. Skip valiswhite check. ++ | barrierback TAB:CARG2, TMP3, TMP0, <4 ++ break; ++ ++ /* -- Calls and vararg handling ----------------------------------------- */ ++ ++ case BC_CALLM: ++ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 ++ | decode_RDtoRC8 NARGS8:RC, RD ++ | addw NARGS8:RC, NARGS8:RC, MULTRES ++ | j ->BC_CALL_Z ++ break; ++ case BC_CALL: ++ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 ++ | decode_RDtoRC8 NARGS8:RC, RD ++ |->BC_CALL_Z: ++ | mv TMP2, BASE ++ | add BASE, BASE, RA ++ | ld LFUNC:RB, 0(BASE) ++ | addi BASE, BASE, 16 ++ | addiw NARGS8:RC, NARGS8:RC, -8 ++ | checkfunc RB, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_CALLMT: ++ | // RA = base*8, (RB = 0,) RC = extra_nargs*8 ++ | addw NARGS8:RD, NARGS8:RD, MULTRES ++ | // Fall through. Assumes BC_CALLT follows. ++ break; ++ case BC_CALLT: ++ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 ++ | add RA, BASE, RA ++ | ld LFUNC:RB, 0(RA) ++ | mv NARGS8:RC, RD ++ | ld TMP1, FRAME_PC(BASE) ++ | addi RA, RA, 16 ++ | addiw NARGS8:RC, NARGS8:RC, -8 ++ | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt ++ |->BC_CALLT_Z: ++ | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. ++ | lbu TMP3, LFUNC:CARG3->ffid ++ | xori TMP2, TMP1, FRAME_VARG ++ | bnez TMP0, >7 ++ |1: ++ | sd LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. ++ | sltiu CARG4, TMP3, 2 // (> FF_C) Calling a fast function? ++ | mv TMP2, BASE ++ | mv RB, CARG3 ++ | mv TMP3, NARGS8:RC ++ | beqz NARGS8:RC, >3 ++ |2: ++ | ld CRET1, 0(RA) ++ | addi RA, RA, 8 ++ | addiw TMP3, TMP3, -8 ++ | sd CRET1, 0(TMP2) ++ | addi TMP2, TMP2, 8 ++ | bnez TMP3, <2 ++ |3: ++ | or TMP0, TMP0, CARG4 ++ | beqz TMP0, >5 ++ |4: ++ | ins_callt ++ | ++ |5: // Tailcall to a fast function with a Lua frame below. ++ | lw INS, -4(TMP1) ++ | decode_RA8 RA, INS ++ | sub TMP1, BASE, RA ++ | ld TMP1, -32(TMP1) ++ | cleartp LFUNC:TMP1 ++ | ld TMP1, LFUNC:TMP1->pc ++ | ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. ++ | j <4 ++ | ++ |7: // Tailcall from a vararg function. ++ | andi CARG4, TMP2, FRAME_TYPEP ++ | sub TMP2, BASE, TMP2 // Relocate BASE down. ++ | bnez CARG4, <1 // Vararg frame below? ++ | mv BASE, TMP2 ++ | ld TMP1, FRAME_PC(TMP2) ++ | andi TMP0, TMP1, FRAME_TYPE ++ | j <1 ++ break; ++ ++ case BC_ITERC: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) ++ | mv TMP2, BASE // Save old BASE for vmeta_call. ++ | add BASE, BASE, RA ++ | ld RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1. ++ | ld CARG1, -16(BASE) ++ | ld CARG2, -8(BASE) ++ | li NARGS8:RC, 16 // Iterators get 2 arguments. ++ | sd RB, 0(BASE) // Copy callable. ++ | sd CARG1, 16(BASE) // Copy state. ++ | sd CARG2, 24(BASE) // Copy control var. ++ | addi BASE, BASE, 16 ++ | checkfunc RB, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_ITERN: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) ++ |.if JIT ++ | hotloop ++ |.endif ++ |->vm_IITERN: ++ | add RA, BASE, RA ++ | ld TAB:RB, -16(RA) ++ | lw RC, -8(RA) // Get index from control var. ++ | cleartp TAB:RB ++ | addi PC, PC, 4 ++ | lw TMP0, TAB:RB->asize ++ | ld TMP1, TAB:RB->array ++ | slli CARG3, TISNUM, 47 ++ |1: // Traverse array part. ++ | bleu TMP0, RC, >5 // Index points after array part? ++ | slliw TMP3, RC, 3 ++ | add TMP3, TMP1, TMP3 ++ | ld CARG1, 0(TMP3) ++ | lhu RD, -4+OFS_RD(PC) // ITERL RD ++ | or TMP2, RC, CARG3 ++ | addiw RC, RC, 1 ++ | beq CARG1, TISNIL, <1 // Skip holes in array part. ++ | sd TMP2, 0(RA) ++ | sd CARG1, 8(RA) ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | decode_BC4b RD ++ | add RD, RD, TMP3 ++ | sw RC, -8(RA) // Update control var. ++ | add PC, PC, RD ++ |3: ++ | ins_next ++ | ++ |5: // Traverse hash part. ++ | lw TMP1, TAB:RB->hmask ++ | subw RC, RC, TMP0 ++ | ld TMP2, TAB:RB->node ++ |6: ++ | bltu TMP1, RC, <3 // End of iteration? Branch to ITERL+1. ++ | slliw TMP3, RC, 5 ++ | slliw RB, RC, 3 ++ | subw TMP3, TMP3, RB ++ | add NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8) ++ | ld CARG1, 0(NODE:TMP3) ++ | lhu RD, -4+OFS_RD(PC) // ITERL RD ++ | addiw RC, RC, 1 ++ | beq CARG1, TISNIL, <6 // Skip holes in hash part. ++ | ld CARG2, NODE:TMP3->key ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | sd CARG1, 8(RA) ++ | addw RC, RC, TMP0 ++ | decode_BC4b RD ++ | addw RD, RD, TMP3 ++ | sd CARG2, 0(RA) ++ | add PC, PC, RD ++ | sw RC, -8(RA) // Update control var. ++ | j <3 ++ break; ++ ++ case BC_ISNEXT: ++ | // RA = base*8, RD = target (points to ITERN) ++ | add RA, BASE, RA ++ | srliw TMP0, RD, 1 ++ | ld CFUNC:CARG1, -24(RA) ++ | add TMP0, PC, TMP0 ++ | ld CARG2, -16(RA) ++ | ld CARG3, -8(RA) ++ | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | checkfunc CFUNC:CARG1, >5 ++ | gettp CARG2, CARG2 ++ | addi CARG2, CARG2, -LJ_TTAB ++ | lbu TMP1, CFUNC:CARG1->ffid ++ | addi CARG3, CARG3, -LJ_TNIL ++ | or TMP3, CARG2, CARG3 ++ | addi TMP1, TMP1, -FF_next_N ++ | or TMP3, TMP3, TMP1 ++ | lui TMP1, ((LJ_KEYINDEX - (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)) >> 12) & 0xfffff ++ | bnez TMP3, >5 ++ | add PC, TMP0, TMP2 ++ | addi TMP1, TMP1, (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800) ++ | slli TMP1, TMP1, 32 ++ | sd TMP1, -8(RA) ++ |1: ++ | ins_next ++ |5: // Despecialize bytecode if any of the checks fail. ++ | li TMP3, BC_JMP ++ | li TMP1, BC_ITERC ++ | sb TMP3, -4+OFS_OP(PC) ++ | add PC, TMP0, TMP2 ++ |.if JIT ++ | lb TMP0, OFS_OP(PC) ++ | li TMP3, BC_ITERN ++ | lhu TMP2, OFS_RD(PC) ++ | bne TMP0, TMP3, >6 ++ |.endif ++ | sb TMP1, OFS_OP(PC) ++ | j <1 ++ |.if JIT ++ |6: // Unpatch JLOOP. ++ | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. ++ | slliw TMP2, TMP2, 3 ++ | add TMP0, TMP0, TMP2 ++ | ld TRACE:TMP2, 0(TMP0) ++ | lw TMP0, TRACE:TMP2->startins ++ | andi TMP0, TMP0, -256 ++ | or TMP0, TMP0, TMP1 ++ | sw TMP0, 0(PC) ++ | j <1 ++ |.endif ++ break; ++ ++ case BC_VARG: ++ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 ++ | ld TMP0, FRAME_PC(BASE) ++ | decode_RDtoRC8 RC, RD ++ | decode_RB8 RB, INS ++ | add RC, BASE, RC ++ | add RA, BASE, RA ++ | addi RC, RC, FRAME_VARG ++ | add TMP2, RA, RB ++ | addi TMP3, BASE, -16 // TMP3 = vtop ++ | sub RC, RC, TMP0 // RC = vbase ++ | // Note: RC may now be even _above_ BASE if nargs was < numparams. ++ | sub TMP1, TMP3, RC ++ | beqz RB, >5 // Copy all varargs? ++ | addi TMP2, TMP2, -16 ++ |1: // Copy vararg slots to destination slots. ++ | ld CARG1, 0(RC) ++ | sltu TMP0, RC, TMP3 ++ | addi RC, RC, 8 ++ | bnez TMP0, >2 ++ | mv CARG1, TISNIL ++ |2: ++ | sd CARG1, 0(RA) ++ | sltu TMP0, RA, TMP2 ++ | addi RA, RA, 8 ++ | bnez TMP0, <1 ++ |3: ++ | ins_next ++ | ++ |5: // Copy all varargs. ++ | ld TMP0, L->maxstack ++ | li MULTRES, 8 // MULTRES = (0+1)*8 ++ | blez TMP1, <3 // No vararg slots? ++ | add TMP2, RA, TMP1 ++ | addi MULTRES, TMP1, 8 ++ | bltu TMP0, TMP2, >7 ++ |6: ++ | ld CRET1, 0(RC) ++ | addi RC, RC, 8 ++ | sd CRET1, 0(RA) ++ | addi RA, RA, 8 ++ | bltu RC, TMP3, <6 // More vararg slots? ++ | j <3 ++ | ++ |7: // Grow stack for varargs. ++ | sd RA, L->top ++ | sub RA, RA, BASE ++ | sd BASE, L->base ++ | sub BASE, RC, BASE // Need delta, because BASE may change. ++ | sd PC, SAVE_PC(sp) ++ | srliw CARG2, TMP1, 3 ++ | mv CARG1, L ++ | call_intern BC_VARG, lj_state_growstack // (lua_State *L, int n) ++ | mv RC, BASE ++ | ld BASE, L->base ++ | add RA, BASE, RA ++ | add RC, BASE, RC ++ | addi TMP3, BASE, -16 ++ | j <6 ++ break; ++ ++ /* -- Returns ----------------------------------------------------------- */ ++ ++ case BC_RETM: ++ | // RA = results*8, RD = extra_nresults*8 ++ | addw RD, RD, MULTRES ++ | // Fall through. Assumes BC_RET follows. ++ break; ++ ++ case BC_RET: ++ | // RA = results*8, RD = (nresults+1)*8 ++ | ld PC, FRAME_PC(BASE) ++ | add RA, BASE, RA ++ | mv MULTRES, RD ++ |1: ++ | andi TMP0, PC, FRAME_TYPE ++ | xori TMP1, PC, FRAME_VARG ++ | bnez TMP0, ->BC_RETV_Z ++ | ++ |->BC_RET_Z: ++ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return ++ | lw INS, -4(PC) ++ | addi TMP2, BASE, -16 ++ | addi RC, RD, -8 ++ | decode_RA8 TMP0, INS ++ | decode_RB8 RB, INS ++ | sub BASE, TMP2, TMP0 ++ | add TMP3, TMP2, RB ++ | beqz RC, >3 ++ |2: ++ | ld CRET1, 0(RA) ++ | addi RA, RA, 8 ++ | addi RC, RC, -8 ++ | sd CRET1, 0(TMP2) ++ | addi TMP2, TMP2, 8 ++ | bnez RC, <2 ++ |3: ++ | addi TMP3, TMP3, -8 ++ |5: ++ | bltu TMP2, TMP3, >6 ++ | ld LFUNC:TMP1, FRAME_FUNC(BASE) ++ | cleartp LFUNC:TMP1 ++ | ld TMP1, LFUNC:TMP1->pc ++ | ins_next1 ++ | ld KBASE, PC2PROTO(k)(TMP1) ++ | ins_next2 ++ | ++ |6: // Fill up results with nil. ++ | sd TISNIL, 0(TMP2) ++ | addi TMP2, TMP2, 8 ++ | j <5 ++ | ++ |->BC_RETV_Z: // Non-standard return case. ++ | andi TMP2, TMP1, FRAME_TYPEP ++ | bnez TMP2, ->vm_return ++ | // Return from vararg function: relocate BASE down. ++ | sub BASE, BASE, TMP1 ++ | ld PC, FRAME_PC(BASE) ++ | j <1 ++ break; ++ ++ case BC_RET0: case BC_RET1: ++ | // RA = results*8, RD = (nresults+1)*8 ++ | ld PC, FRAME_PC(BASE) ++ | add RA, BASE, RA ++ | mv MULTRES, RD ++ | andi TMP0, PC, FRAME_TYPE ++ | xori TMP1, PC, FRAME_VARG ++ | bnez TMP0, ->BC_RETV_Z ++ | lw INS, -4(PC) ++ | addi TMP2, BASE, -16 ++ if (op == BC_RET1) { ++ | ld CRET1, 0(RA) ++ } ++ | decode_RB8 RB, INS ++ | decode_RA8 RA, INS ++ | sub BASE, TMP2, RA ++ if (op == BC_RET1) { ++ | sd CRET1, 0(TMP2) ++ } ++ |5: ++ | bltu RD, RB, >6 ++ | ld TMP1, FRAME_FUNC(BASE) ++ | cleartp LFUNC:TMP1 ++ | ld TMP1, LFUNC:TMP1->pc ++ | ins_next1 ++ | ld KBASE, PC2PROTO(k)(TMP1) ++ | ins_next2 ++ | ++ |6: // Fill up results with nil. ++ | addi TMP2, TMP2, 8 ++ | addi RD, RD, 8 ++ if (op == BC_RET1) { ++ | sd TISNIL, 0(TMP2) ++ } else { ++ | sd TISNIL, -8(TMP2) ++ } ++ | j <5 ++ break; ++ ++ /* -- Loops and branches ------------------------------------------------ */ ++ ++ case BC_FORL: ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_IFORL follows. ++ break; ++ ++ case BC_JFORI: ++ case BC_JFORL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_FORI: ++ case BC_IFORL: ++ | // RA = base*8, RD = target (after end of loop or start of loop) ++ vk = (op == BC_IFORL || op == BC_JFORL); ++ | add RA, BASE, RA ++ | ld CARG1, FORL_IDX*8(RA) // CARG1 = IDX ++ | ld CARG2, FORL_STEP*8(RA) // CARG2 = STEP ++ | ld CARG3, FORL_STOP*8(RA) // CARG3 = STOP ++ | gettp CARG4, CARG1 ++ | gettp CARG5, CARG2 ++ | gettp CARG6, CARG3 ++ if (op != BC_JFORL) { ++ | srliw RD, RD, 1 ++ | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J<<2 ++ | add TMP2, RD, TMP2 ++ } ++ | bne CARG4, TISNUM, >3 ++ | sext.w CARG4, CARG1 // start ++ | sext.w CARG3, CARG3 // stop ++ if (!vk) { // init ++ | bxne CARG6, TISNUM, ->vmeta_for ++ | bxne CARG5, TISNUM, ->vmeta_for ++ | bfextri TMP0, CARG2, 31, 31 // sign ++ | slt CARG2, CARG3, CARG4 ++ | slt TMP1, CARG4, CARG3 ++ | neg TMP4, TMP0 ++ | xor TMP0, TMP1, CARG2 // CARG2 = TMP0 ? TMP1 : CARG2 ++ | and TMP0, TMP0, TMP4 ++ | xor CARG2, CARG2, TMP0 // CARG2=0: +,start <= stop or -,start >= stop ++ } else { ++ | sext.w CARG5, CARG2 // step ++ | addw CARG1, CARG4, CARG5 // start + step ++ | xor TMP3, CARG1, CARG4 // y^a ++ | xor TMP1, CARG1, CARG5 // y^b ++ | and TMP3, TMP3, TMP1 ++ | slt TMP1, CARG1, CARG3 // start+step < stop ? ++ | slt CARG3, CARG3, CARG1 // stop < start+step ? ++ | sltz TMP0, CARG5 // step < 0 ? ++ | sltz TMP3, TMP3 // ((y^a) & (y^b)) < 0: overflow. ++ | neg TMP4, TMP0 ++ | xor TMP1, TMP1, CARG3 // CARG3 = TMP0 ? TMP1 : CARG3 ++ | and TMP1, TMP1, TMP4 ++ | xor CARG3, CARG3, TMP1 ++ | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue ++ | zext.w CARG1, CARG1 ++ | settp_b CARG1, TISNUM ++ | sd CARG1, FORL_IDX*8(RA) ++ } ++ |1: ++ if (op == BC_FORI) { ++ | neg TMP4, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS ++ | and TMP2, TMP2, TMP4 ++ | add PC, PC, TMP2 ++ } else if (op == BC_JFORI) { ++ | add PC, PC, TMP2 ++ | lhu RD, -4+OFS_RD(PC) ++ } else if (op == BC_IFORL) { ++ | addi TMP4, CARG2, -1 // CARG2!=0: next INS; CARG2==0: jump back ++ | and TMP2, TMP2, TMP4 ++ | add PC, PC, TMP2 ++ } ++ | ins_next1 ++ | sd CARG1, FORL_EXT*8(RA) ++ |2: ++ if (op == BC_JFORI) { ++ | decode_RD8b RD ++ | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop ++ } else if (op == BC_JFORL) { ++ | beqz CARG2, =>BC_JLOOP ++ } ++ | ins_next2 ++ | ++ |3: // FP loop. ++ | fld FTMP0, FORL_IDX*8(RA) // start ++ | fld FTMP1, FORL_STOP*8(RA) // stop ++ | ld TMP0, FORL_STEP*8(RA) // step ++ | sltz CARG2, TMP0 // step < 0 ? ++ | neg CARG2, CARG2 ++ if (!vk) { ++ | sltiu TMP3, CARG4, LJ_TISNUM // start is number ? ++ | sltiu TMP0, CARG5, LJ_TISNUM // step is number ? ++ | sltiu TMP1, CARG6, LJ_TISNUM // stop is number ? ++ | and TMP3, TMP3, TMP1 ++ | and TMP0, TMP0, TMP3 ++ | bxeqz TMP0, ->vmeta_for // if start or step or stop isn't number ++ | flt.d TMP3, FTMP0, FTMP1 // start < stop ? ++ | flt.d TMP4, FTMP1, FTMP0 // stop < start ? ++ | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 ++ | and TMP0, TMP0, CARG2 ++ | xor CARG2, TMP4, TMP0 // CARG2=0:+,startstop ++ | j <1 ++ } else { ++ | fld FTMP3, FORL_STEP*8(RA) ++ | fadd.d FTMP0, FTMP0, FTMP3 // start + step ++ | flt.d TMP3, FTMP0, FTMP1 // start + step < stop ? ++ | flt.d TMP4, FTMP1, FTMP0 ++ | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 ++ | and TMP0, TMP0, CARG2 ++ | xor CARG2, TMP4, TMP0 ++ if (op == BC_IFORL) { ++ | addi TMP3, CARG2, -1 ++ | and TMP2, TMP2, TMP3 ++ | add PC, PC, TMP2 ++ } ++ | fsd FTMP0, FORL_IDX*8(RA) ++ | ins_next1 ++ | fsd FTMP0, FORL_EXT*8(RA) ++ | j <2 ++ } ++ break; ++ ++ case BC_ITERL: ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_IITERL follows. ++ break; ++ ++ case BC_JITERL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IITERL: ++ | // RA = base*8, RD = target ++ | add RA, BASE, RA ++ | ld TMP1, 0(RA) ++ | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. ++ if (op == BC_JITERL) { ++ | sd TMP1,-8(RA) ++ | j =>BC_JLOOP ++ } else { ++ | branch_RD // Otherwise save control var + branch. ++ | sd TMP1, -8(RA) ++ } ++ |1: ++ | ins_next ++ break; ++ ++ case BC_LOOP: ++ | // RA = base*8, RD = target (loop extent) ++ | // Note: RA/RD is only used by trace recorder to determine scope/extent ++ | // This opcode does NOT jump, it's only purpose is to detect a hot loop. ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_ILOOP follows. ++ break; ++ ++ case BC_ILOOP: ++ | // RA = base*8, RD = target (loop extent) ++ | ins_next ++ break; ++ ++ case BC_JLOOP: ++ |.if JIT ++ | // RA = base*8 (ignored), RD = traceno*8 ++ | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. ++ | add TMP0, TMP0, RD ++ | // Traces on RISC-V don't store the trace number, so use 0. ++ | sd x0, GL->vmstate ++ | ld TRACE:TMP1, 0(TMP0) ++ | sd BASE, GL->jit_base // store Current JIT code L->base ++ | ld TMP1, TRACE:TMP1->mcode ++ | sd L, GL->tmpbuf.L ++ | jr TMP1 ++ |.endif ++ break; ++ ++ case BC_JMP: ++ | // RA = base*8 (only used by trace recorder), RD = target ++ | branch_RD // PC + (jump - 0x8000)<<2 ++ | ins_next ++ break; ++ ++ /* -- Function headers -------------------------------------------------- */ ++ ++ case BC_FUNCF: ++ |.if JIT ++ | hotcall ++ |.endif ++ case BC_FUNCV: /* NYI: compiled vararg functions. */ ++ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. ++ break; ++ ++ case BC_JFUNCF: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IFUNCF: ++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 ++ | ld TMP2, L->maxstack ++ | lbu TMP1, -4+PC2PROTO(numparams)(PC) ++ | ld KBASE, -4+PC2PROTO(k)(PC) ++ | bltu TMP2, RA, ->vm_growstack_l ++ | slliw TMP1, TMP1, 3 // numparams*8 ++ |2: ++ | bltu NARGS8:RC, TMP1, >3 // Check for missing parameters. ++ if (op == BC_JFUNCF) { ++ | decode_RD8 RD, INS ++ | j =>BC_JLOOP ++ } else { ++ | ins_next ++ } ++ | ++ |3: // Clear missing parameters. ++ | add TMP0, BASE, NARGS8:RC ++ | sd TISNIL, 0(TMP0) ++ | addiw NARGS8:RC, NARGS8:RC, 8 ++ | j <2 ++ break; ++ ++ case BC_JFUNCV: ++#if !LJ_HASJIT ++ break; ++#endif ++ | NYI // NYI: compiled vararg functions ++ break; /* NYI: compiled vararg functions. */ ++ ++ case BC_IFUNCV: ++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 ++ | li TMP0, LJ_TFUNC ++ | add TMP1, BASE, RC ++ | ld TMP2, L->maxstack ++ | settp LFUNC:RB, TMP0 ++ | add TMP0, RA, RC ++ | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. ++ | addi TMP2, TMP2, -8 ++ | addi TMP3, RC, 16+FRAME_VARG ++ | ld KBASE, -4+PC2PROTO(k)(PC) ++ | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. ++ | bgeu TMP0, TMP2, ->vm_growstack_l ++ | lbu TMP2, -4+PC2PROTO(numparams)(PC) ++ | mv RA, BASE ++ | mv RC, TMP1 ++ | ins_next1 ++ | addi BASE, TMP1, 16 ++ | beqz TMP2, >2 ++ |1: ++ | ld TMP0, 0(RA) ++ | sltu CARG2, RA, RC // Less args than parameters? ++ | addi RA, RA, 8 ++ | addi TMP1, TMP1, 8 ++ | addiw TMP2, TMP2, -1 ++ | beqz CARG2, >3 ++ | neg TMP4, CARG2 // Clear old fixarg slot (help the GC). ++ | xor TMP3, TISNIL, TMP0 // CARG1 = CARG2 ? TISNIL : TMP0 ++ | and TMP3, TMP3, TMP4 ++ | xor CARG1, TMP0, TMP3 ++ | sd CARG1, -8(RA) ++ | sd TMP0, 8(TMP1) ++ | bnez TMP2, <1 ++ |2: ++ | ins_next2 ++ |3: ++ | neg TMP4, CARG2 // Clear missing fixargs. ++ | xor TMP3, TMP0, TISNIL // TMP0 = CARG2 ? TMP0 : TISNIL ++ | and TMP3, TMP3, TMP4 ++ | xor TMP0, TISNIL, TMP3 ++ | sd TMP0, 8(TMP1) ++ | bnez TMP2, <1 ++ | j <2 ++ break; ++ ++ case BC_FUNCC: ++ case BC_FUNCCW: ++ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 ++ if (op == BC_FUNCC) { ++ | ld CARG4, CFUNC:RB->f ++ } else { ++ | ld CARG4, GL->wrapf ++ } ++ | add TMP1, RA, NARGS8:RC ++ | ld TMP2, L->maxstack ++ | add RC, BASE, NARGS8:RC ++ | sd BASE, L->base // base of currently excuting function ++ | sd RC, L->top ++ | bgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack. ++ | li_vmstate C // li TMP0, ~LJ_VMST_C ++ if (op == BC_FUNCCW) { ++ | ld CARG2, CFUNC:RB->f ++ } ++ | mv CARG1, L ++ | st_vmstate // sw TMP0, GL->vmstate ++ | jalr CARG4 // (lua_State *L [, lua_CFunction f]) ++ | // Returns nresults. ++ | ld BASE, L->base ++ | ld TMP1, L->top ++ | sd L, GL->cur_L ++ | slliw RD, CRET1, 3 ++ | li_vmstate INTERP ++ | ld PC, FRAME_PC(BASE) // Fetch PC of caller. ++ | sub RA, TMP1, RD // RA = L->top - nresults*8 ++ | st_vmstate ++ | j ->vm_returnc ++ break; ++ ++ /* ---------------------------------------------------------------------- */ ++ ++ default: ++ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); ++ exit(2); ++ break; ++ } ++} ++ ++static int build_backend(BuildCtx *ctx) ++{ ++ int op; ++ ++ dasm_growpc(Dst, BC__MAX); ++ ++ build_subroutines(ctx); ++ ++ |.code_op ++ for (op = 0; op < BC__MAX; op++) ++ build_ins(ctx, (BCOp)op, op); ++ ++ return BC__MAX; ++} ++ ++/* Emit pseudo frame-info for all assembler functions. */ ++static void emit_asm_debug(BuildCtx *ctx) ++{ ++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); ++ int i; ++ switch (ctx->mode) { ++ case BUILD_elfasm: ++ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe0:\n" ++ "\t.4byte .LECIE0-.LSCIE0\n" ++ ".LSCIE0:\n" ++ "\t.4byte 0xffffffff\n" ++ "\t.byte 0x1\n" ++ "\t.string \"\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 3\n" ++ ".LECIE0:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE0:\n" ++ "\t.4byte .LEFDE0-.LASFDE0\n" ++ ".LASFDE0:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.8byte .Lbegin\n" ++ "\t.8byte %d\n" ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*6\n" /* offset ra */, ++ fcofs, CFRAME_SIZE); ++ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ ++ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); ++ for (i = 27; i >= 18; i--) /* offset f31-f18 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ ++ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ ++ "\t.align 3\n" ++ ".LEFDE0:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".LSFDE1:\n" ++ "\t.4byte .LEFDE1-.LASFDE1\n" ++ ".LASFDE1:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.4byte lj_vm_ffi_call\n" ++ "\t.4byte %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ ++ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ ++ "\t.byte 0xd\n\t.uleb128 0x12\n" ++ "\t.align 3\n" ++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#if !LJ_NO_UNWIND ++ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe1:\n" ++ "\t.4byte .LECIE1-.LSCIE1\n" ++ ".LSCIE1:\n" ++ "\t.4byte 0\n" ++ "\t.byte 0x1\n" ++ "\t.string \"zPR\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.uleb128 6\n" /* augmentation length */ ++ "\t.byte 0x1b\n" ++ "\t.4byte lj_err_unwind_dwarf-.\n" ++ "\t.byte 0x1b\n" ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 2\n" ++ ".LECIE1:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE2:\n" ++ "\t.4byte .LEFDE2-.LASFDE2\n" ++ ".LASFDE2:\n" ++ "\t.4byte .LASFDE2-.Lframe1\n" ++ "\t.4byte .Lbegin-.\n" ++ "\t.4byte %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*6\n", /* offset ra */ ++ fcofs, CFRAME_SIZE); ++ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ ++ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); ++ for (i = 27; i >= 18; i--) /* offset f31-f18 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ ++ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ ++ "\t.align 2\n" ++ ".LEFDE2:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".Lframe2:\n" ++ "\t.4byte .LECIE2-.LSCIE2\n" ++ ".LSCIE2:\n" ++ "\t.4byte 0\n" ++ "\t.byte 0x1\n" ++ "\t.string \"zR\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.uleb128 1\n" /* augmentation length */ ++ "\t.byte 0x1b\n" ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 2\n" ++ ".LECIE2:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE3:\n" ++ "\t.4byte .LEFDE3-.LASFDE3\n" ++ ".LASFDE3:\n" ++ "\t.4byte .LASFDE3- .Lframe2\n" ++ "\t.4byte lj_vm_ffi_call-.\n" ++ "\t.4byte %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ ++ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ ++ "\t.byte 0xd\n\t.uleb128 0x12\n" ++ "\t.align 2\n" ++ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#endif ++ break; ++ default: ++ break; ++ } ++} +--- a/src/lj_frame.h ++++ b/src/lj_frame.h +@@ -287,6 +287,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL + ** need to change to 3. + */ + #define CFRAME_SHIFT_MULTRES 0 ++#elif LJ_TARGET_RISCV64 ++#define CFRAME_OFS_ERRF 252 ++#define CFRAME_OFS_NRES 248 ++#define CFRAME_OFS_PREV 240 ++#define CFRAME_OFS_L 232 ++#define CFRAME_OFS_PC 224 ++#define CFRAME_OFS_MULTRES 0 ++#define CFRAME_SIZE 256 ++#define CFRAME_SHIFT_MULTRES 3 + #else + #error "Missing CFRAME_* definitions for this architecture" + #endif +--- a/src/lj_vm.h ++++ b/src/lj_vm.h +@@ -37,6 +37,9 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint + #if LJ_TARGET_PPC + void lj_vm_cachesync(void *start, void *end); + #endif ++#if LJ_TARGET_RISCV64 ++void lj_vm_fence_rw_rw(); ++#endif + LJ_ASMF LJ_CONSTF double lj_vm_foldarith(double x, double y, int op); + #if LJ_HASJIT + LJ_ASMF LJ_CONSTF double lj_vm_foldfpm(double x, int op); +--- a/src/lj_vmmath.c ++++ b/src/lj_vmmath.c +@@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double + + /* -- Helper functions for generated machine code ------------------------- */ + +-#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS ++#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \ ++ || LJ_TARGET_RISCV64 + int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) + { + uint32_t y, ua, ub; +--- a/src/lj_target.h ++++ b/src/lj_target.h +@@ -55,7 +55,7 @@ typedef uint32_t RegSP; + /* Bitset for registers. 32 registers suffice for most architectures. + ** Note that one set holds bits for both GPRs and FPRs. + */ +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 + typedef uint64_t RegSet; + #define RSET_BITS 6 + #define rset_picktop_(rs) ((Reg)lj_fls64(rs)) +@@ -145,6 +145,8 @@ typedef uint32_t RegCost; + #include "lj_target_mips.h" + #elif LJ_TARGET_S390X + #include "lj_target_s390x.h" ++#elif LJ_TARGET_RISCV64 ++#include "lj_target_riscv.h" + #else + #error "Missing include for target CPU" + #endif +--- /dev/null ++++ b/src/lj_target_riscv.h +@@ -0,0 +1,542 @@ ++/* ++** Definitions for RISC-V CPUs. ++** Copyright (C) 2022-2026 ISRC, ISCAS. See Copyright Notice in luajit.h ++*/ ++ ++#ifndef _LJ_TARGET_RISCV_H ++#define _LJ_TARGET_RISCV_H ++ ++/* -- Registers IDs ------------------------------------------------------- */ ++ ++#define GPRDEF(_) \ ++ _(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \ ++ _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ ++ _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ ++ _(X24) _(X25) _(X26) _(X27) _(X28) _(X29) _(X30) _(X31) ++#define FPRDEF(_) \ ++ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ ++ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ ++ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ ++ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) ++#define VRIDDEF(_) ++ ++#define RIDENUM(name) RID_##name, ++ ++enum { ++ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ ++ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ ++ RID_MAX, ++ RID_ZERO = RID_X0, ++ RID_TMP = RID_RA, ++ RID_GP = RID_X3, ++ RID_TP = RID_X4, ++ ++ /* Calling conventions. */ ++ RID_RET = RID_X10, ++ RID_RETLO = RID_X10, ++ RID_RETHI = RID_X11, ++ RID_FPRET = RID_F10, ++ RID_CFUNCADDR = RID_X5, ++ ++ /* These definitions must match with the *.dasc file(s): */ ++ RID_BASE = RID_X18, /* Interpreter BASE. */ ++ RID_LPC = RID_X20, /* Interpreter PC. */ ++ RID_GL = RID_X21, /* Interpreter GL. */ ++ RID_LREG = RID_X23, /* Interpreter L. */ ++ ++ /* Register ranges [min, max) and number of registers. */ ++ RID_MIN_GPR = RID_X0, ++ RID_MAX_GPR = RID_X31+1, ++ RID_MIN_FPR = RID_MAX_GPR, ++ RID_MAX_FPR = RID_F31+1, ++ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, ++ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ ++}; ++ ++#define RID_NUM_KREF RID_NUM_GPR ++#define RID_MIN_KREF RID_X0 ++ ++/* -- Register sets ------------------------------------------------------- */ ++ ++/* Make use of all registers, except ZERO, TMP, SP, GP, TP, CFUNCADDR and GL. */ ++#define RSET_FIXED \ ++ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ ++ RID2RSET(RID_GP)|RID2RSET(RID_TP)|RID2RSET(RID_GL)) ++#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) ++#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) ++ ++#define RSET_ALL (RSET_GPR|RSET_FPR) ++#define RSET_INIT RSET_ALL ++ ++#define RSET_SCRATCH_GPR \ ++ (RSET_RANGE(RID_X5, RID_X7+1)|RSET_RANGE(RID_X28, RID_X31+1)|\ ++ RSET_RANGE(RID_X10, RID_X17+1)) ++ ++#define RSET_SCRATCH_FPR \ ++ (RSET_RANGE(RID_F0, RID_F7+1)|RSET_RANGE(RID_F10, RID_F17+1)|\ ++ RSET_RANGE(RID_F28, RID_F31+1)) ++#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) ++ ++#define REGARG_FIRSTGPR RID_X10 ++#define REGARG_LASTGPR RID_X17 ++#define REGARG_NUMGPR 8 ++ ++#define REGARG_FIRSTFPR RID_F10 ++#define REGARG_LASTFPR RID_F17 ++#define REGARG_NUMFPR 8 ++ ++/* -- Spill slots --------------------------------------------------------- */ ++ ++/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. ++** ++** SPS_FIXED: Available fixed spill slots in interpreter frame. ++** This definition must match with the *.dasc file(s). ++** ++** SPS_FIRST: First spill slot for general use. ++*/ ++#if LJ_32 ++#define SPS_FIXED 5 ++#else ++#define SPS_FIXED 4 ++#endif ++#define SPS_FIRST 4 ++ ++#define SPOFS_TMP 0 ++ ++#define sps_scale(slot) (4 * (int32_t)(slot)) ++#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) ++ ++/* -- Exit state ---------------------------------------------------------- */ ++/* This definition must match with the *.dasc file(s). */ ++typedef struct { ++ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ ++ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ ++ int32_t spill[256]; /* Spill slots. */ ++} ExitState; ++ ++/* Highest exit + 1 indicates stack check. */ ++#define EXITSTATE_CHECKEXIT 1 ++ ++/* Return the address of a per-trace exit stub. */ ++static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) ++{ ++ while (*p == 0x00000013) p++; /* Skip RISCVI_NOP. */ ++ return p + 4 + exitno; ++} ++/* Avoid dependence on lj_jit.h if only including lj_target.h. */ ++#define exitstub_trace_addr(T, exitno) \ ++ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) ++ ++/* -- Instructions -------------------------------------------------------- */ ++ ++/* Instruction fields. */ ++#define RISCVF_D(d) (((d)&31) << 7) ++#define RISCVF_S1(r) (((r)&31) << 15) ++#define RISCVF_S2(r) (((r)&31) << 20) ++#define RISCVF_S3(r) (((r)&31) << 27) ++#define RISCVF_FUNCT2(f) (((f)&3) << 25) ++#define RISCVF_FUNCT3(f) (((f)&7) << 12) ++#define RISCVF_FUNCT7(f) (((f)&127) << 25) ++#define RISCVF_SHAMT(s) ((s) << 20) ++#define RISCVF_RM(m) (((m)&7) << 12) ++#define RISCVF_IMMI(i) ((i) << 20) ++#define RISCVF_IMMS(i) (((i)&0xfe0) << 20 | ((i)&0x1f) << 7) ++#define RISCVF_IMMB(i) (((i)&0x1000) << 19 | ((i)&0x800) >> 4 | ((i)&0x7e0) << 20 | ((i)&0x1e) << 7) ++#define RISCVF_IMMU(i) (((i)&0xfffff) << 12) ++#define RISCVF_IMMJ(i) (((i)&0x100000) << 11 | ((i)&0xff000) | ((i)&0x800) << 9 | ((i)&0x7fe) << 20) ++ ++/* Encode helpers. */ ++#define RISCVF_W_HI(w) ((w) - ((((w)&0xfff)^0x800) - 0x800)) ++#define RISCVF_W_LO(w) ((w)&0xfff) ++#define RISCVF_HI(i) ((RISCVF_W_HI(i) >> 12) & 0xfffff) ++#define RISCVF_LO(i) RISCVF_W_LO(i) ++ ++/* Check for valid field range. */ ++#define RISCVF_SIMM_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) ++#define RISCVF_UIMM_OK(x, b) (((x) >> (b)) == 0) ++#define checku11(i) RISCVF_UIMM_OK(i, 11) ++#define checki12(i) RISCVF_SIMM_OK(i, 12) ++#define checki13(i) RISCVF_SIMM_OK(i, 13) ++#define checki20(i) RISCVF_SIMM_OK(i, 20) ++#define checki21(i) RISCVF_SIMM_OK(i, 21) ++#define checki32auipc(i) (checki32(i) && (int32_t)(i) < 0x7ffff800) ++ ++typedef enum RISCVIns { ++ ++ /* --- RVI --- */ ++ RISCVI_LUI = 0x00000037, ++ RISCVI_AUIPC = 0x00000017, ++ ++ RISCVI_JAL = 0x0000006f, ++ RISCVI_JALR = 0x00000067, ++ ++ RISCVI_ADDI = 0x00000013, ++ RISCVI_SLTI = 0x00002013, ++ RISCVI_SLTIU = 0x00003013, ++ RISCVI_XORI = 0x00004013, ++ RISCVI_ORI = 0x00006013, ++ RISCVI_ANDI = 0x00007013, ++ ++ RISCVI_SLLI = 0x00001013, ++ RISCVI_SRLI = 0x00005013, ++ RISCVI_SRAI = 0x40005013, ++ ++ RISCVI_ADD = 0x00000033, ++ RISCVI_SUB = 0x40000033, ++ RISCVI_SLL = 0x00001033, ++ RISCVI_SLT = 0x00002033, ++ RISCVI_SLTU = 0x00003033, ++ RISCVI_XOR = 0x00004033, ++ RISCVI_SRL = 0x00005033, ++ RISCVI_SRA = 0x40005033, ++ RISCVI_OR = 0x00006033, ++ RISCVI_AND = 0x00007033, ++ ++ RISCVI_LB = 0x00000003, ++ RISCVI_LH = 0x00001003, ++ RISCVI_LW = 0x00002003, ++ RISCVI_LBU = 0x00004003, ++ RISCVI_LHU = 0x00005003, ++ RISCVI_SB = 0x00000023, ++ RISCVI_SH = 0x00001023, ++ RISCVI_SW = 0x00002023, ++ ++ RISCVI_BEQ = 0x00000063, ++ RISCVI_BNE = 0x00001063, ++ RISCVI_BLT = 0x00004063, ++ RISCVI_BGE = 0x00005063, ++ RISCVI_BLTU = 0x00006063, ++ RISCVI_BGEU = 0x00007063, ++ ++ RISCVI_ECALL = 0x00000073, ++ RISCVI_EBREAK = 0x00100073, ++ ++ RISCVI_NOP = 0x00000013, ++ RISCVI_MV = 0x00000013, ++ RISCVI_NOT = 0xfff04013, ++ RISCVI_NEG = 0x40000033, ++ RISCVI_RET = 0x00008067, ++ RISCVI_ZEXT_B = 0x0ff07013, ++ ++#if LJ_TARGET_RISCV64 ++ RISCVI_LWU = 0x00007003, ++ RISCVI_LD = 0x00003003, ++ RISCVI_SD = 0x00003023, ++ ++ RISCVI_ADDIW = 0x0000001b, ++ ++ RISCVI_SLLIW = 0x0000101b, ++ RISCVI_SRLIW = 0x0000501b, ++ RISCVI_SRAIW = 0x4000501b, ++ ++ RISCVI_ADDW = 0x0000003b, ++ RISCVI_SUBW = 0x4000003b, ++ RISCVI_SLLW = 0x0000103b, ++ RISCVI_SRLW = 0x0000503b, ++ RISCVI_SRAW = 0x4000503b, ++ ++ RISCVI_NEGW = 0x4000003b, ++ RISCVI_SEXT_W = 0x0000001b, ++#endif ++ ++ /* --- RVM --- */ ++ RISCVI_MUL = 0x02000033, ++ RISCVI_MULH = 0x02001033, ++ RISCVI_MULHSU = 0x02002033, ++ RISCVI_MULHU = 0x02003033, ++ RISCVI_DIV = 0x02004033, ++ RISCVI_DIVU = 0x02005033, ++ RISCVI_REM = 0x02006033, ++ RISCVI_REMU = 0x02007033, ++#if LJ_TARGET_RISCV64 ++ RISCVI_MULW = 0x0200003b, ++ RISCVI_DIVW = 0x0200403b, ++ RISCVI_DIVUW = 0x0200503b, ++ RISCVI_REMW = 0x0200603b, ++ RISCVI_REMUW = 0x0200703b, ++#endif ++ ++ /* --- RVF --- */ ++ RISCVI_FLW = 0x00002007, ++ RISCVI_FSW = 0x00002027, ++ ++ RISCVI_FMADD_S = 0x00000043, ++ RISCVI_FMSUB_S = 0x00000047, ++ RISCVI_FNMSUB_S = 0x0000004b, ++ RISCVI_FNMADD_S = 0x0000004f, ++ ++ RISCVI_FADD_S = 0x00000053, ++ RISCVI_FSUB_S = 0x08000053, ++ RISCVI_FMUL_S = 0x10000053, ++ RISCVI_FDIV_S = 0x18000053, ++ RISCVI_FSQRT_S = 0x58000053, ++ ++ RISCVI_FSGNJ_S = 0x20000053, ++ RISCVI_FSGNJN_S = 0x20001053, ++ RISCVI_FSGNJX_S = 0x20002053, ++ ++ RISCVI_FMIN_S = 0x28000053, ++ RISCVI_FMAX_S = 0x28001053, ++ ++ RISCVI_FCVT_W_S = 0xc0000053, ++ RISCVI_FCVT_WU_S = 0xc0100053, ++ ++ RISCVI_FMV_X_W = 0xe0000053, ++ ++ RISCVI_FEQ_S = 0xa0002053, ++ RISCVI_FLT_S = 0xa0001053, ++ RISCVI_FLE_S = 0xa0000053, ++ ++ RISCVI_FCLASS_S = 0xe0001053, ++ ++ RISCVI_FCVT_S_W = 0xd0000053, ++ RISCVI_FCVT_S_WU = 0xd0100053, ++ RISCVI_FMV_W_X = 0xf0000053, ++ ++ RISCVI_FMV_S = 0x20000053, ++ RISCVI_FNEG_S = 0x20001053, ++ RISCVI_FABS_S = 0x20002053, ++#if LJ_TARGET_RISCV64 ++ RISCVI_FCVT_L_S = 0xc0200053, ++ RISCVI_FCVT_LU_S = 0xc0300053, ++ RISCVI_FCVT_S_L = 0xd0200053, ++ RISCVI_FCVT_S_LU = 0xd0300053, ++#endif ++ ++ /* --- RVD --- */ ++ RISCVI_FLD = 0x00003007, ++ RISCVI_FSD = 0x00003027, ++ ++ RISCVI_FMADD_D = 0x02000043, ++ RISCVI_FMSUB_D = 0x02000047, ++ RISCVI_FNMSUB_D = 0x0200004b, ++ RISCVI_FNMADD_D = 0x0200004f, ++ ++ RISCVI_FADD_D = 0x02000053, ++ RISCVI_FSUB_D = 0x0a000053, ++ RISCVI_FMUL_D = 0x12000053, ++ RISCVI_FDIV_D = 0x1a000053, ++ RISCVI_FSQRT_D = 0x5a000053, ++ ++ RISCVI_FSGNJ_D = 0x22000053, ++ RISCVI_FSGNJN_D = 0x22001053, ++ RISCVI_FSGNJX_D = 0x22002053, ++ ++ RISCVI_FMIN_D = 0x2a000053, ++ RISCVI_FMAX_D = 0x2a001053, ++ ++ RISCVI_FCVT_S_D = 0x40100053, ++ RISCVI_FCVT_D_S = 0x42000053, ++ ++ RISCVI_FEQ_D = 0xa2002053, ++ RISCVI_FLT_D = 0xa2001053, ++ RISCVI_FLE_D = 0xa2000053, ++ ++ RISCVI_FCLASS_D = 0xe2001053, ++ ++ RISCVI_FCVT_W_D = 0xc2000053, ++ RISCVI_FCVT_WU_D = 0xc2100053, ++ RISCVI_FCVT_D_W = 0xd2000053, ++ RISCVI_FCVT_D_WU = 0xd2100053, ++ ++ RISCVI_FMV_D = 0x22000053, ++ RISCVI_FNEG_D = 0x22001053, ++ RISCVI_FABS_D = 0x22002053, ++#if LJ_TARGET_RISCV64 ++ RISCVI_FCVT_L_D = 0xc2200053, ++ RISCVI_FCVT_LU_D = 0xc2300053, ++ RISCVI_FMV_X_D = 0xe2000053, ++ RISCVI_FCVT_D_L = 0xd2200053, ++ RISCVI_FCVT_D_LU = 0xd2300053, ++ RISCVI_FMV_D_X = 0xf2000053, ++#endif ++ ++ /* --- Zifencei --- */ ++ RISCVI_FENCE = 0x0000000f, ++ RISCVI_FENCE_I = 0x0000100f, ++ ++ /* --- Zicsr --- */ ++ RISCVI_CSRRW = 0x00001073, ++ RISCVI_CSRRS = 0x00002073, ++ RISCVI_CSRRC = 0x00003073, ++ RISCVI_CSRRWI = 0x00005073, ++ RISCVI_CSRRSI = 0x00006073, ++ RISCVI_CSRRCI = 0x00007073, ++ ++ /* --- RVB --- */ ++ /* Zba */ ++ RISCVI_SH1ADD = 0x20002033, ++ RISCVI_SH2ADD = 0x20004033, ++ RISCVI_SH3ADD = 0x20006033, ++#if LJ_TARGET_RISCV64 ++ RISCVI_ADD_UW = 0x0800003b, ++ ++ RISCVI_SH1ADD_UW = 0x2000203b, ++ RISCVI_SH2ADD_UW = 0x2000403b, ++ RISCVI_SH3ADD_UW = 0x2000603b, ++ ++ RISCVI_SLLI_UW = 0x0800101b, ++ ++ RISCVI_ZEXT_W = 0x0800003b, ++#endif ++ /* Zbb */ ++ RISCVI_ANDN = 0x40007033, ++ RISCVI_ORN = 0x40006033, ++ RISCVI_XNOR = 0x40004033, ++ ++ RISCVI_CLZ = 0x60001013, ++ RISCVI_CTZ = 0x60101013, ++ ++ RISCVI_CPOP = 0x60201013, ++ ++ RISCVI_MAX = 0x0a006033, ++ RISCVI_MAXU = 0x0a007033, ++ RISCVI_MIN = 0x0a004033, ++ RISCVI_MINU = 0x0a005033, ++ ++ RISCVI_SEXT_B = 0x60401013, ++ RISCVI_SEXT_H = 0x60501013, ++#if LJ_TARGET_RISCV64 ++ RISCVI_ZEXT_H = 0x0800403b, ++#endif ++ ++ RISCVI_ROL = 0x60001033, ++ RISCVI_ROR = 0x60005033, ++ RISCVI_RORI = 0x60005013, ++ ++ RISCVI_ORC_B = 0x28705013, ++ ++#if LJ_TARGET_RISCV64 ++ RISCVI_REV8 = 0x6b805013, ++ ++ RISCVI_CLZW = 0x6000101b, ++ RISCVI_CTZW = 0x6010101b, ++ ++ RISCVI_CPOPW = 0x6020101b, ++ ++ RISCVI_ROLW = 0x6000103b, ++ RISCVI_RORIW = 0x6000501b, ++ RISCVI_RORW = 0x6000503b, ++#endif ++ /* NYI: Zbc, Zbs */ ++ ++ /* --- Zicond --- */ ++ RISCVI_CZERO_EQZ = 0x0e005033, ++ RISCVI_CZERO_NEZ = 0x0e007033, ++ ++ /* --- Zfa --- */ ++ RISCVI_FLI_S = 0xf0100053, ++ RISCVI_FMINM_S = 0x28002053, ++ RISCVI_FMAXM_S = 0x28003053, ++ RISCVI_FROUND_S = 0x40400053, ++ RISCVI_FROUNDNX_S = 0x40500053, ++ RISCVI_FCVTMOD_W_D = 0xc2801053, ++ RISCVI_FLEQ_S = 0xa0004053, ++ RISCVI_FLTQ_S = 0xa0005053, ++ RISCVI_FLI_D = 0xf2100053, ++ RISCVI_FMINM_D = 0x2a002053, ++ RISCVI_FMAXM_D = 0x2a003053, ++ RISCVI_FROUND_D = 0x42400053, ++ RISCVI_FROUNDNX_D = 0x42500053, ++ RISCVI_FLEQ_D = 0xa2004053, ++ RISCVI_FLTQ_D = 0xa2005053, ++ ++ RISCVI_FROUND_S_RTZ = 0x40401053, ++ RISCVI_FROUND_S_RDN = 0x40402053, ++ RISCVI_FROUND_S_RUP = 0x40403053, ++ RISCVI_FROUNDNX_S_RTZ = 0x40501053, ++ RISCVI_FROUNDNX_S_RDN = 0x40502053, ++ RISCVI_FROUNDNX_S_RUP = 0x40503053, ++ RISCVI_FROUND_D_RTZ = 0x42401053, ++ RISCVI_FROUND_D_RDN = 0x42402053, ++ RISCVI_FROUND_D_RUP = 0x42403053, ++ RISCVI_FROUNDNX_D_RTZ = 0x42501053, ++ RISCVI_FROUNDNX_D_RDN = 0x42502053, ++ RISCVI_FROUNDNX_D_RUP = 0x42503053, ++ ++ /* TBD: RVV?, RVP?, RVJ? */ ++ ++ /* --- XThead* --- */ ++ /* XTHeadBa */ ++ RISCVI_TH_ADDSL = 0x0000100b, ++ ++ /* XTHeadBb */ ++ RISCVI_TH_SRRI = 0x1000100b, ++#if LJ_TARGET_RISCV64 ++ RISCVI_TH_SRRIW = 0x1400100b, ++#endif ++ RISCVI_TH_EXT = 0x0000200b, ++ RISCVI_TH_EXTU = 0x0000300b, ++ RISCVI_TH_FF0 = 0x8400100b, ++ RISCVI_TH_FF1 = 0x8600100b, ++ RISCVI_TH_REV = 0x8200100b, ++#if LJ_TARGET_RISCV64 ++ RISCVI_TH_REVW = 0x9000100b, ++#endif ++ RISCVI_TH_TSTNBZ = 0x8000100b, ++ ++ /* XTHeadBs */ ++ RISCVI_TH_TST = 0x8800100b, ++ ++ /* XTHeadCondMov */ ++ RISCVI_TH_MVEQZ = 0x4000100b, ++ RISCVI_TH_MVNEZ = 0x4200100b, ++ ++ /* XTHeadMac */ ++ RISCVI_TH_MULA = 0x2000100b, ++ RISCVI_TH_MULAH = 0x2800100b, ++#if LJ_TARGET_RISCV64 ++ RISCVI_TH_MULAW = 0x2400100b, ++#endif ++ RISCVI_TH_MULS = 0x2200100b, ++ RISCVI_TH_MULSH = 0x2a00100b, ++ RISCVI_TH_MULSW = 0x2600100b, ++ ++ /* NYI: XTHeadMemIdx, XTHeadFMemIdx, XTHeadMemPair */ ++} RISCVIns; ++ ++typedef enum RISCVRM { ++ RISCVRM_RNE = 0, ++ RISCVRM_RTZ = 1, ++ RISCVRM_RDN = 2, ++ RISCVRM_RUP = 3, ++ RISCVRM_RMM = 4, ++ RISCVRM_DYN = 7, ++} RISCVRM; ++ ++static const uint16_t riscv_fli_map_hi16[32] = { ++ 0xbff0u, // -1 ++ 0x0010u, // min ++ 0x3ef0u, // 2^-16 ++ 0x3f00u, // 2^-15 ++ 0x3f70u, // 2^-8 ++ 0x3f80u, // 2^-7 ++ 0x3fb0u, // 2^-4 ++ 0x3fc0u, // 2^-3, 0.125 ++ 0x3fd0u, // 2^-2, 0.25 ++ 0x3fd4u, // 0.3125 ++ 0x3fd8u, // 0.375 ++ 0x3fdcu, // 0.4375 ++ 0x3fe0u, // 0.5 ++ 0x3fe4u, // 0.625 ++ 0x3fe8u, // 0.75 ++ 0x3fecu, // 0.875 ++ 0x3ff0u, // 1 ++ 0x3ff4u, // 1.25 ++ 0x3ff8u, // 1.5 ++ 0x3ffcu, // 1.75 ++ 0x4000u, // 2 ++ 0x4004u, // 2.5 ++ 0x4008u, // 3 ++ 0x4010u, // 4 ++ 0x4020u, // 8 ++ 0x4030u, // 16 ++ 0x4060u, // 128 ++ 0x4070u, // 256 ++ 0x40e0u, // 2^15, 32768 ++ 0x40f0u, // 2^16, 65536 ++ 0x7ff0u, // inf ++ 0x7ff8u, // canonical nan ++}; ++ ++#endif +--- a/src/lj_ccall.c ++++ b/src/lj_ccall.c +@@ -689,6 +689,146 @@ + if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ + } + ++#elif LJ_TARGET_RISCV64 ++/* -- RISC-V lp64d calling conventions ------------------------------------ */ ++ ++#define CCALL_HANDLE_STRUCTRET \ ++ /* Return structs of size > 16 by reference. */ \ ++ cc->retref = !(sz <= 16); \ ++ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; ++ ++#define CCALL_HANDLE_STRUCTRET2 \ ++ CCallStructClass cl = ccall_classify_struct(cts, ctr); \ ++ CCallStructMix mix = cl.mix; \ ++ switch (mix.val) { \ ++ case MIX_IX: { \ ++ ((intptr_t *)dp)[0] = cc->gpr[0]; \ ++ break; \ ++ } \ ++ case MIX_FX: case MIX_DX: \ ++ case MIX_FF: case MIX_FD: \ ++ case MIX_DF: case MIX_DD: { \ ++ eCCallStructMixElem es[2] = { mix.e1, mix.e2 }; \ ++ for (int ti = 0; ti < 2; ti++) { \ ++ if (es[ti] == MIX_ELEM_FLOAT) { \ ++ ((float *)dp)[ti] = cc->fpr[ti].f; \ ++ } else /*if (es[ti] == MIX_ELEM_DOUBLE)*/ { \ ++ ((double *)dp)[ti] = cc->fpr[ti].d; \ ++ } \ ++ } \ ++ break; \ ++ } \ ++ case MIX_FI: case MIX_DI: \ ++ case MIX_IF: case MIX_ID: { \ ++ eCCallStructMixElem es[2] = { mix.e1, mix.e2 }; \ ++ for (int ti = 0; ti < 2; ti++) { \ ++ if (es[ti] == MIX_ELEM_FLOAT) { \ ++ ((float *)dp)[ti] = cc->fpr[0].f; \ ++ } else if (es[ti] == MIX_ELEM_DOUBLE) { \ ++ ((double *)dp)[ti] = cc->fpr[0].d; \ ++ } else /*if (es[ti] == MIX_ELEM_INT)*/ { \ ++ ((intptr_t *)dp)[ti] = cc->gpr[0]; \ ++ } \ ++ } \ ++ break; \ ++ } \ ++ case MIX_UNINIT: \ ++ case MIX_FAILED: { \ ++ memcpy(dp, sp, ctr->size); \ ++ break; \ ++ } \ ++ default: lj_assertL(0, "Invalid ret mix %d", mix.val); \ ++ } ++ ++#define CCALL_HANDLE_COMPLEXRET \ ++ /* Complex values are returned in 1 or 2 FPRs. */ \ ++ cc->retref = 0; ++ ++#define CCALL_HANDLE_COMPLEXRET2 \ ++ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ ++ ((float *)dp)[0] = cc->fpr[0].f; \ ++ ((float *)dp)[1] = cc->fpr[1].f; \ ++ } else { /* Copy complex double from FPRs. */ \ ++ ((double *)dp)[0] = cc->fpr[0].d; \ ++ ((double *)dp)[1] = cc->fpr[1].d; \ ++ } ++ ++#define CCALL_HANDLE_COMPLEXARG \ ++ /* Pass long double complex by reference. */ \ ++ if (sz == 2*sizeof(long double)) { \ ++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ ++ sz = CTSIZE_PTR; \ ++ } \ ++ /* Pass complex in two FPRs or two GPRs or on stack. */ \ ++ else if (sz == 2*sizeof(float)) { \ ++ mix = (CCallStructMix){ .val = MIX_FF }; \ ++ sz = CTSIZE_PTR; \ ++ } else /*if (sz == 2*sizeof(double))*/ { \ ++ mix = (CCallStructMix){ .val = MIX_DD }; \ ++ sz = 2*CTSIZE_PTR; \ ++ } ++ ++#define CCALL_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ sp = (uint8_t *)&cc->fpr[0].f; ++ ++#define CCALL_HANDLE_STRUCTARG \ ++ /* Pass structs of size >16 by reference. */ \ ++ CCallStructClass cl = ccall_classify_struct(cts, d); \ ++ uint8_t ispod = cl.ispod; \ ++ mix = cl.mix; \ ++ if (!ispod && sz > 16) { \ ++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ ++ sz = CTSIZE_PTR; \ ++ } ++ ++ ++#define CCALL_HANDLE_REGARG \ ++ if (!isva) { /* Try determine MIX registers. */ \ ++ int n2 = 0; \ ++ switch (mix.val) { \ ++ case MIX_UNINIT: \ ++ if (isfp) { \ ++ n2 = 1; \ ++ break; \ ++ } \ ++ /* fallthrough */ \ ++ case MIX_FAILED: \ ++ /* MIX_[IFD]X are just like a standalone element */ \ ++ case MIX_IX: goto reghandle_gpr; \ ++ case MIX_FX: case MIX_DX: \ ++ n2 = 1; \ ++ break; \ ++ /* MIX_[FD][FD] are just like two standalone elements */ \ ++ /* fix float later */ \ ++ case MIX_FF: case MIX_DD: \ ++ case MIX_FD: case MIX_DF: \ ++ n2 = 2; \ ++ break; \ ++ /* Setup MIX_I[FD] or MIX[FD]I on stack first, fix later */ \ ++ default: goto reghandle_exit; \ ++ } \ ++ if (nfpr + n2 <= CCALL_NARG_FPR) { \ ++ dp = &cc->fpr[nfpr]; \ ++ nfpr += n2; \ ++ goto done; \ ++ } else { \ ++ if (ngpr + n2 <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n2; \ ++ goto done; \ ++ } \ ++ } \ ++ } else { /* Try to pass argument in GPRs. */ \ ++ reghandle_gpr: \ ++ if (ngpr + n <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ reghandle_exit: \ ++ } ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -1090,6 +1230,164 @@ static void ccall_copy_struct(CCallState + #define ccall_struct_align(cts, ct) ((ct)->info & CTF_ALIGN) + #endif + ++/* -- RISC-V ABI struct classification ---------------------------- */ ++ ++#if LJ_TARGET_RISCV64 ++ ++/* RISC-V 64 LP64D fp reg struct classification. */ ++/* X: unknown/uninit, F: float, D: double, I: integer */ ++ ++typedef enum eCCallStructMixElem { ++ MIX_ELEM_UNINIT = 0, ++ MIX_ELEM_FLOAT = 1, ++ MIX_ELEM_DOUBLE = 2, ++ MIX_ELEM_INT = 3, ++} eCCallStructMixElem; ++typedef enum eCCallStructMix { ++ MIX_UNINIT = 0, // i.e. MIX_XX ++ MIX_FX = 1, ++ MIX_DX = 2, ++ MIX_IX = 3, ++ MIX_XF = 4, ++ MIX_FF = 5, ++ MIX_DF = 6, ++ MIX_IF = 7, ++ MIX_XD = 8, ++ MIX_FD = 9, ++ MIX_DD = 10, ++ MIX_ID = 11, ++ MIX_XI = 12, ++ MIX_FI = 13, ++ MIX_DI = 14, ++ MIX_FAILED = 15, // MIX_II but that's not mixed ++} eCCallStructMix; ++ ++typedef union CCallStructMix { ++ eCCallStructMix val : 4; ++ struct { ++ eCCallStructMixElem e1 : 2; ++ eCCallStructMixElem e2 : 2; ++ }; ++} CCallStructMix; ++ ++typedef union CCallStructClass { ++ uint32_t val; ++ struct { ++ uint8_t ispod; ++ CCallStructMix mix; ++ }; ++} CCallStructClass; ++ ++static CCallStructClass ccall_classify_struct(CTState *cts, CType *ct) ++{ ++ CTSize sz = ct->size; ++ CCallStructMix mix = { .val = MIX_UNINIT }; ++ if (ct->info & CTF_UNION) mix.val = MIX_FAILED; ++ while (ct->sib && mix.val != MIX_FAILED) { ++ unsigned int m = 1; ++ CType *sct; ++ ct = ctype_get(cts, ct->sib); ++ if (ctype_isfield(ct->info)) { ++ sct = ctype_rawchild(cts, ct); ++ if (ctype_isarray(sct->info)) { ++ CType *cct = ctype_rawchild(cts, sct); ++ if (!cct->size) continue; ++ m = sct->size / cct->size; ++ sct = cct; ++ } ++ if (ctype_isfp(sct->info)) { ++ while (m--) { ++ /* Mix state trans: fp ++ * mix XX -> mix = [FD]X ++ * mix X[IFD] -> mix = FAILED ++ * mix [IFD]X -> mix = [IFD][FD] ++ * mix [IFD][IFD] -> mix = FAILED ++ */ ++ eCCallStructMixElem ne = (sct->size == 4) ? MIX_ELEM_FLOAT : MIX_ELEM_DOUBLE; ++ if (mix.val == MIX_UNINIT) { ++ mix = (CCallStructMix){ .e1 = ne, .e2 = MIX_ELEM_UNINIT }; ++ } else { ++ eCCallStructMixElem o1 = mix.e1, o2 = mix.e2, n2 = ne; ++ mix = (o2 != MIX_ELEM_UNINIT ? (CCallStructMix){ .val = MIX_FAILED } ++ : (CCallStructMix){ .e1 = o1, .e2 = n2 }); ++ } ++ } ++ } else if (ctype_iscomplex(sct->info)) { ++ while (m--) { ++ /* Mix state trans: complex ++ * mix XX -> mix = [FD][FD] ++ * mix other -> mix = FAILED ++ */ ++ eCCallStructMixElem ne = (sct->size == 8) ? MIX_ELEM_FLOAT : MIX_ELEM_DOUBLE; ++ mix = (mix.val == MIX_UNINIT) ? (CCallStructMix){ .e1 = ne, .e2 = ne } ++ : (CCallStructMix){ .val = MIX_FAILED }; ++ } ++ } else if (ctype_isinteger_or_bool(sct->info) || ctype_isenum(sct->info)) { ++ while (m--) { ++ /* Mix state trans: int ++ * mix XX -> mix = IX ++ * mix X[IFD] -> mix = FAILED ++ * mix [IFD]X -> mix = [FD]I; this auto fails II ++ * mix [IFD][IFD] -> mix = FAILED ++ */ ++ if (mix.val == MIX_UNINIT) { ++ mix = (CCallStructMix){ .e1 = MIX_ELEM_INT, .e2 = MIX_ELEM_UNINIT }; ++ } else { ++ eCCallStructMixElem o1 = mix.e1, o2 = mix.e2, n2 = MIX_ELEM_INT; ++ mix = (o2 != MIX_ELEM_UNINIT ? (CCallStructMix){ .val = MIX_FAILED } ++ : (CCallStructMix){ .e1 = o1, .e2 = n2 }); ++ } ++ } ++ } else if (ctype_isstruct(sct->info)) { ++ goto substruct; ++ } else { ++ goto not_ag; ++ } ++ } else if (ctype_isbitfield(ct->info)) { ++ goto not_ag; ++ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { ++ sct = ctype_rawchild(cts, ct); ++ substruct: ++ if (sct->size > 0) { ++ CCallStructClass s = ccall_classify_struct(cts, sct); ++ CCallStructMix smix = s.mix; ++ uint8_t spod = s.ispod; ++ if (smix.val == MIX_FAILED) mix.val = MIX_FAILED; ++ if (!spod) goto not_ag; ++ while (m--) { ++ /* Mix state transfer: substruct ++ * mix XX, smix any -> mix = smix ++ * mix X[IFD], smix any -> mix = FAILED ++ * mix [IFD]X, smix [XIFD]X -> mix = [IFD][XIFD] ++ * smix other -> mix = FAILED ++ * mix [IFD][IFD], smix XX -> mix = mix ++ * smix other -> mix = FAILED; this keep II fail ++ */ ++ if (mix.val == MIX_UNINIT) { ++ mix = smix; ++ } else { ++ eCCallStructMixElem o1 = mix.e1, o2 = mix.e2; ++ eCCallStructMixElem n1 = smix.e1, n2 = smix.e2; ++ if (o2 != MIX_ELEM_UNINIT) { ++ mix = (smix.val != MIX_UNINIT) ? mix ++ : (CCallStructMix){ .val = MIX_FAILED }; ++ } else { ++ mix = (n2 != MIX_ELEM_UNINIT) ? (CCallStructMix){ .val = MIX_FAILED } ++ : (CCallStructMix){ .e1 = o1, .e2 = n1 }; ++ } ++ } ++ } ++ } ++ } ++ } ++ if (MIX_UNINIT < mix.val && mix.val < MIX_FAILED) /* Mixed passing */ ++ return (CCallStructClass){ .ispod = 1, .mix = mix }; ++not_ag: /* Not a float/double aggregate or int/fp mix pair aggregate */ ++ return (CCallStructClass){ .ispod = (sz <= 16), .mix = mix }; /* Return structs of size <= 16 in GPRs. */ ++} ++ ++#endif ++ + /* -- Common C call handling ---------------------------------------------- */ + + /* Infer the destination CTypeID for a vararg argument. +@@ -1141,6 +1439,7 @@ static int ccall_set_args(lua_State *L, + #endif + #endif + ++ + /* Clear unused regs to get some determinism in case of misdeclaration. */ + memset(cc->gpr, 0, sizeof(cc->gpr)); + #if CCALL_NUM_FPR +@@ -1203,6 +1502,11 @@ static int ccall_set_args(lua_State *L, + CType *d; + CTSize sz; + MSize n, isfp = 0, isva = 0; ++#if LJ_TARGET_RISCV64 ++ MSize onsp, mnsp; ++ CCallStructMix mix = { .val = MIX_UNINIT }; ++ int isstack = 0; ++#endif + void *dp, *rp = NULL; + #if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + int isf32 = 0; +@@ -1251,6 +1555,10 @@ static int ccall_set_args(lua_State *L, + #if LJ_TARGET_S390X + onstack = 1; + #endif ++#if LJ_TARGET_RISCV64 ++ isstack = 1; ++ onsp = nsp; ++#endif + if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */ + MSize align = (1u << ctype_align(ccall_struct_align(cts, d))) - 1; + #if LJ_TARGET_ARM64 && LJ_TARGET_OSX +@@ -1266,6 +1574,9 @@ static int ccall_set_args(lua_State *L, + #else + dp = ((uint8_t *)cc->stack) + nsp; + #endif ++#if LJ_TARGET_RISCV64 ++ mnsp = nsp + n * CTSIZE_PTR / 2; ++#endif + nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR; + if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ + err_nyi: +@@ -1333,7 +1644,11 @@ static int ccall_set_args(lua_State *L, + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } + #endif +-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) ++#if LJ_TARGET_RISCV64 ++ if (isfp && d->size == sizeof(float)) ++ ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */ ++#endif ++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) + #if LJ_TARGET_MIPS64 + || (isfp && nsp == 0) +@@ -1373,6 +1688,107 @@ static int ccall_set_args(lua_State *L, + CTSize i = (sz >> 2) - 1; + do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); + } ++#elif LJ_TARGET_RISCV64 ++ switch (mix.val) { ++ case MIX_UNINIT: ++ break; ++ /* Fix MIX values */ ++ case MIX_DF: { ++ ((uint32_t *)dp)[3] = 0xffffffffu; ++ break; ++ } ++ case MIX_FF: ++ if (isstack) break; ++ ((uint64_t *)dp)[1] = 0xffffffff00000000ul | ((uint32_t *)dp)[1]; ++ case MIX_FX: ++ case MIX_FD: { ++ ((uint64_t *)dp)[0] = 0xffffffff00000000ul | ((uint32_t *)dp)[0]; ++ break; ++ } ++ case MIX_FI: { ++ lj_assertL(sz == 8 || sz == 16, "invalid MIX_FI size %d", (int)sz); ++ if (ngpr >= CCALL_NARG_GPR) break; ++ if (sz == 8) { ++ FPRArg farg = { .hi = 0xffffffffu, .lo = ((uint32_t *)dp)[0] }; ++ if (!isva && nfpr + 1 <= CCALL_NARG_FPR) { ++ cc->fpr[nfpr++] = farg; ++ goto fi_next; ++ } else if (ngpr + 1 <= CCALL_NARG_GPR) { ++ cc->gpr[ngpr++] = farg.u; ++fi_next: ++ ((uint32_t *)dp)[0] = ((uint32_t *)dp)[1]; ++ ((uint32_t *)dp)[1] = 0; ++ if (ngpr + 1 <= CCALL_NARG_GPR) { ++ cc->gpr[ngpr++] = ((uint32_t *)dp)[0]; ++ ((uint32_t *)dp)[0] = 0, nsp = onsp; ++ } ++ } ++ break; ++ } else /*if (sz == 16)*/ { ++ ((uint64_t *)dp)[0] |= 0xffffffff00000000ul; ++ /* fallthrough */ ++ } ++ } ++ case MIX_DI: { ++ lj_assertL(sz == 16, "invalid MIX_DI size %d", (int)sz); ++ if (ngpr >= CCALL_NARG_GPR) break; ++ if (!isva && nfpr + 1 <= CCALL_NARG_FPR) { ++ cc->fpr[nfpr++] = (FPRArg){ .u = ((uint64_t *)dp)[0] }; ++ goto di_next; ++ } else if (ngpr + 1 <= CCALL_NARG_GPR) { ++ cc->gpr[ngpr++] = ((uint64_t *)dp)[0]; ++di_next: ++ ((uint64_t *)dp)[0] = ((uint64_t *)dp)[1]; ++ ((uint64_t *)dp)[1] = 0, nsp = mnsp; ++ if (ngpr + 1 <= CCALL_NARG_GPR) { ++ cc->gpr[ngpr++] = ((uint64_t *)dp)[0]; ++ ((uint64_t *)dp)[0] = 0, nsp = onsp; ++ } ++ } ++ break; ++ } ++ case MIX_IF: { ++ lj_assertL(sz == 8 || sz == 16, "invalid MIX_IF size %d", (int)sz); ++ if (sz == 8) { ++ FPRArg farg = { .hi = 0xffffffffu, .lo = ((uint32_t *)dp)[1] }; ++ if (ngpr + 1 <= CCALL_NARG_GPR) { ++ cc->gpr[ngpr++] = ((uint32_t *)dp)[0]; ++ ((uint32_t *)dp)[0] = ((uint32_t *)dp)[1]; ++ ((uint32_t *)dp)[1] = 0; ++ if (!isva && nfpr + 1 <= CCALL_NARG_FPR) { ++ cc->fpr[nfpr++] = farg; ++ goto if_next; ++ } else if (ngpr + 1 <= CCALL_NARG_GPR) { ++ cc->gpr[ngpr++] = farg.u; ++if_next: ++ ((uint32_t *)dp)[0] = 0, nsp = onsp; ++ } ++ } ++ break; ++ } else /*if (sz == 16)*/ { ++ ((uint64_t *)dp)[1] |= 0xffffffff00000000ul; ++ /* fallthrough */ ++ } ++ } ++ case MIX_ID: { ++ lj_assertL(sz == 16, "invalid MIX_ID size %d", (int)sz); ++ if (ngpr + 1 <= CCALL_NARG_GPR) { ++ cc->gpr[ngpr++] = ((uint64_t *)dp)[0]; ++ ((uint64_t *)dp)[0] = ((uint64_t *)dp)[1]; ++ ((uint64_t *)dp)[1] = 0, nsp = mnsp; ++ if (!isva && nfpr + 1 <= CCALL_NARG_FPR) { ++ cc->fpr[nfpr++] = (FPRArg){ .u = ((uint64_t *)dp)[0] }; ++ goto id_next; ++ } else if (ngpr + 1 <= CCALL_NARG_GPR) { ++ cc->gpr[ngpr++] = ((uint64_t *)dp)[0]; ++id_next: ++ ((uint64_t *)dp)[0] = 0, nsp = onsp; ++ } ++ } ++ break; ++ } ++ default: break; ++ } + #else + UNUSED(isfp); + #endif +--- a/src/lj_ccall.h ++++ b/src/lj_ccall.h +@@ -157,6 +157,23 @@ typedef union FPRArg { + float f; + } FPRArg; + ++#elif LJ_TARGET_RISCV64 ++ ++#define CCALL_NARG_GPR 8 ++#define CCALL_NARG_FPR 8 ++#define CCALL_NRET_GPR 2 ++#define CCALL_NRET_FPR 2 ++#define CCALL_SPS_EXTRA 3 ++#define CCALL_SPS_FREE 1 ++ ++typedef intptr_t GPRArg; ++typedef union FPRArg { ++ double d; ++ uint64_t u; ++ struct { LJ_ENDIAN_LOHI(float f; , float g;) }; ++ struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) }; ++} FPRArg; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +--- a/src/lj_ccallback.c ++++ b/src/lj_ccallback.c +@@ -102,6 +102,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs + + #define CALLBACK_MCODE_HEAD 52 + ++#elif LJ_TARGET_RISCV64 ++ ++#define CALLBACK_MCODE_HEAD 68 ++ + #else + + /* Missing support for this architecture. */ +@@ -315,6 +319,39 @@ static void *callback_mcode_init(global_ + } + return p; + } ++#elif LJ_TARGET_RISCV64 ++static void *callback_mcode_init(global_State *g, uint32_t *page) ++{ ++ uint32_t *p = page; ++ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; ++ uintptr_t ug = (uintptr_t)(void *)g; ++ uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL; ++ uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL; ++ MSize slot; ++ *p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi)); ++ *p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi)); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi)); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi)); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff); ++ *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0); ++ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { ++ *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot); ++ *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p)); ++ p++; ++ } ++ return p; ++} + #else + /* Missing support for this architecture. */ + #define callback_mcode_init(g, p) (p) +@@ -617,6 +654,31 @@ void lj_ccallback_mcode_free(CTState *ct + if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ + } + ++#elif LJ_TARGET_RISCV64 ++ ++#define CALLBACK_HANDLE_REGARG \ ++ if (isfp) { \ ++ if (nfpr + n <= CCALL_NARG_FPR) { \ ++ sp = &cts->cb.fpr[nfpr]; \ ++ nfpr += n; \ ++ goto done; \ ++ } else if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } else { \ ++ if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } ++ ++#define CALLBACK_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ ((float *)dp)[1] = *(float *)dp; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -772,7 +834,7 @@ static void callback_conv_result(CTState + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } + #endif +-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) ++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 + /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ + if (ctr->size <= 4 && + (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) +--- a/src/lib_jit.c ++++ b/src/lib_jit.c +@@ -706,6 +706,104 @@ JIT_PARAMDEF(JIT_PARAMINIT) + #include + #endif + ++#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX ++ ++#if LJ_TARGET_LINUX ++#include ++ ++struct riscv_hwprobe hwprobe_requests[] = { ++ {RISCV_HWPROBE_KEY_IMA_EXT_0} ++}; ++ ++const uint64_t *hwprobe_ext = &hwprobe_requests[0].value; ++ ++int hwprobe_ret = 0; ++#endif ++ ++static int riscv_compressed() ++{ ++#if defined(__riscv_c) || defined(__riscv_compressed) ++ /* Don't bother checking for RVC -- would crash before getting here. */ ++ return 1; ++#elif LJ_TARGET_LINUX ++ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_IMA_C)) ? 1 : 0; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_zba() ++{ ++#if defined(__riscv_b) || defined(__riscv_zba) ++ /* Don't bother checking for Zba -- would crash before getting here. */ ++ return 1; ++#elif LJ_TARGET_LINUX ++ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBA)) ? 1 : 0; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_zbb() ++{ ++#if defined(__riscv_b) || defined(__riscv_zbb) ++ /* Don't bother checking for Zbb -- would crash before getting here. */ ++ return 1; ++#elif LJ_TARGET_LINUX ++ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBB)) ? 1 : 0; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_zicond() ++{ ++#if defined(__riscv_zicond) ++ /* Don't bother checking for Zicond -- would crash before getting here. */ ++ return 1; ++#elif LJ_TARGET_LINUX ++ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZICOND)) ? 1 : 0; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_zfa() ++{ ++#if defined(__riscv_zfa) ++ /* Don't bother checking for Zfa -- would crash before getting here. */ ++ return 1; ++#elif LJ_TARGET_LINUX ++ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZFA)) ? 1 : 0; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_xthead() ++{ ++#if (defined(__riscv_xtheadba) \ ++ && defined(__riscv_xtheadbb) \ ++ && defined(__riscv_xtheadcondmov) \ ++ && defined(__riscv_xtheadmac)) ++ /* Don't bother checking for XThead -- would crash before getting here. */ ++ return 1; ++#else ++/* ++** Hardcoded as there's no easy way of detection: ++** - SIGILL have some trouble with libluajit as we speak ++** - Checking mvendorid looks good, but might not be reliable. ++*/ ++ return 0; ++#endif ++} ++ ++static uint32_t riscv_probe(int (*func)(void), uint32_t flag) ++{ ++ return func() ? flag : 0; ++} ++#endif ++ + /* Arch-dependent CPU feature detection. */ + static uint32_t jit_cpudetect(void) + { +@@ -778,6 +876,29 @@ static uint32_t jit_cpudetect(void) + #endif + #elif LJ_TARGET_S390X + /* No optional CPU features to detect (for now). */ ++ ++#elif LJ_TARGET_RISCV64 ++#if LJ_HASJIT ++ ++#if LJ_TARGET_LINUX ++ /* HWPROBE-based detection of RVC, Zba, Zbb and Zicond. */ ++ hwprobe_ret = syscall(__NR_riscv_hwprobe, &hwprobe_requests, ++ sizeof(hwprobe_requests) / sizeof(struct riscv_hwprobe), 0, ++ NULL, 0); ++ ++ flags |= riscv_probe(riscv_compressed, JIT_F_RVC); ++ flags |= riscv_probe(riscv_zba, JIT_F_RVZba); ++ flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb); ++ flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond); ++ flags |= riscv_probe(riscv_zfa, JIT_F_RVZfa); ++ flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); ++ ++#endif ++ ++ /* Detect V/P? */ ++ /* V have no hardware available, P not ratified yet. */ ++#endif ++ + #else + #error "Missing CPU detection for this architecture" + #endif +--- a/src/lj_jit.h ++++ b/src/lj_jit.h +@@ -68,6 +68,46 @@ + #endif + #endif + ++#elif LJ_TARGET_RISCV64 ++ ++#define JIT_F_RVC (JIT_F_CPU << 0) ++#define JIT_F_RVZba (JIT_F_CPU << 1) ++#define JIT_F_RVZbb (JIT_F_CPU << 2) ++#define JIT_F_RVZicond (JIT_F_CPU << 3) ++#define JIT_F_RVZfa (JIT_F_CPU << 4) ++#define JIT_F_RVXThead (JIT_F_CPU << 5) ++ ++#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\003Zfa\006XThead" ++ ++#if LJ_TARGET_LINUX ++#include ++ ++#ifndef __NR_riscv_hwprobe ++#ifndef __NR_arch_specific_syscall ++#define __NR_arch_specific_syscall 244 ++#endif ++#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14) ++#endif ++ ++struct riscv_hwprobe { ++ int64_t key; ++ uint64_t value; ++}; ++ ++#define RISCV_HWPROBE_KEY_MVENDORID 0 ++#define RISCV_HWPROBE_KEY_MARCHID 1 ++#define RISCV_HWPROBE_KEY_MIMPID 2 ++#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3 ++#define RISCV_HWPROBE_KEY_IMA_EXT_0 4 ++ ++#define RISCV_HWPROBE_IMA_C (1 << 1) ++#define RISCV_HWPROBE_EXT_ZBA (1 << 3) ++#define RISCV_HWPROBE_EXT_ZBB (1 << 4) ++#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32) ++#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35) ++ ++#endif ++ + #else + + #define JIT_F_CPUSTRING "" +@@ -364,13 +404,13 @@ enum { + #if LJ_TARGET_MIPS + LJ_K64_2P31, /* 2^31 */ + #endif +-#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 ++#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 || LJ_TARGET_RISCV64 + LJ_K64_VM_EXIT_HANDLER, + LJ_K64_VM_EXIT_INTERP, + #endif + LJ_K64__MAX, + }; +-#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) ++#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS || LJ_TARGET_RISCV64) + + enum { + #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 +--- a/src/lj_trace.c ++++ b/src/lj_trace.c +@@ -351,7 +351,7 @@ void lj_trace_initstate(global_State *g) + J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; + J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; + #endif +-#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 ++#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 || LJ_TARGET_RISCV64 + J->k64[LJ_K64_VM_EXIT_HANDLER].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_handler, 0); + J->k64[LJ_K64_VM_EXIT_INTERP].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_interp, 0); + #endif +--- /dev/null ++++ b/src/lj_emit_riscv.h +@@ -0,0 +1,576 @@ ++/* ++** RISC-V instruction emitter. ++** Copyright (C) 2022-2026 ISRC, ISCAS. See Copyright Notice in luajit.h ++** ++** Contributed by gns from PLCT Lab, ISRC, ISCAS. ++*/ ++ ++static intptr_t get_k64val(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ir->o == IR_KINT64) { ++ return (intptr_t)ir_kint64(ir)->u64; ++ } else if (ir->o == IR_KGC) { ++ return (intptr_t)ir_kgc(ir); ++ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { ++ return (intptr_t)ir_kptr(ir); ++ } else { ++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, ++ "bad 64 bit const IR op %d", ir->o); ++ return ir->i; /* Sign-extended. */ ++ } ++} ++ ++#define get_kval(as, ref) get_k64val(as, ref) ++ ++/* -- Emit basic instructions --------------------------------------------- */ ++ ++static void emit_r(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2) ++{ ++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2); ++} ++ ++#define emit_ds(as, riscvi, rd, rs1) emit_r(as, riscvi, rd, rs1, 0) ++#define emit_ds2(as, riscvi, rd, rs2) emit_r(as, riscvi, rd, 0, rs2) ++#define emit_ds1s2(as, riscvi, rd, rs1, rs2) emit_r(as, riscvi, rd, rs1, rs2) ++ ++static void emit_r4(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg rs3) ++{ ++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_S3(rs3); ++} ++ ++#define emit_ds1s2s3(as, riscvi, rd, rs1, rs2, rs3) emit_r4(as, riscvi, rd, rs1, rs2, rs3) ++ ++static void emit_i(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t i) ++{ ++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_IMMI((uint32_t)i & 0xfff); ++} ++ ++#define emit_di(as, riscvi, rd, i) emit_i(as, riscvi, rd, 0, i) ++#define emit_dsi(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i) ++#define emit_dsshamt(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i&0x3f) ++ ++static void emit_s(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) ++{ ++ *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMS((uint32_t)i & 0xfff); ++} ++ ++#define emit_s1s2i(as, riscvi, rs1, rs2, i) emit_s(as, riscvi, rs1, rs2, i) ++ ++/* ++static void emit_b(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) ++{ ++ *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB((uint32_t)i & 0x1ffe); ++} ++*/ ++ ++static void emit_u(ASMState *as, RISCVIns riscvi, Reg rd, uint32_t i) ++{ ++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMU(i & 0xfffff); ++} ++ ++#define emit_du(as, riscvi, rd, i) emit_u(as, riscvi, rd, i) ++ ++/* ++static void emit_j(ASMState *as, RISCVIns riscvi, Reg rd, int32_t i) ++{ ++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMJ((uint32_t)i & 0x1fffffe); ++} ++*/ ++ ++static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); ++static void ra_allockreg(ASMState *as, intptr_t k, Reg r); ++static Reg ra_scratch(ASMState *as, RegSet allow); ++ ++static void emit_lso(ASMState *as, RISCVIns riscvi, Reg data, Reg base, int32_t ofs) ++{ ++ lj_assertA(checki12(ofs), "load/store offset %d out of range", ofs); ++ switch (riscvi) { ++ case RISCVI_LD: case RISCVI_LW: case RISCVI_LH: case RISCVI_LB: ++ case RISCVI_LWU: case RISCVI_LHU: case RISCVI_LBU: ++ case RISCVI_FLW: case RISCVI_FLD: ++ emit_dsi(as, riscvi, data, base, ofs); ++ break; ++ case RISCVI_SD: case RISCVI_SW: case RISCVI_SH: case RISCVI_SB: ++ case RISCVI_FSW: case RISCVI_FSD: ++ emit_s1s2i(as, riscvi, base, data, ofs); ++ break; ++ default: lj_assertA(0, "invalid lso"); break; ++ } ++} ++ ++static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp, ++ int32_t shamt) ++{ ++ if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) { ++ if (!(as->flags & JIT_F_RVZbb)) switch (riscvi) { ++ case RISCVI_RORI: riscvi = RISCVI_TH_SRRI; break; ++ case RISCVI_RORIW: riscvi = RISCVI_TH_SRRIW; break; ++ default: lj_assertA(0, "invalid roti op"); break; ++ } ++ emit_dsshamt(as, riscvi, rd, rs1, shamt); ++ } else { ++ RISCVIns ai, bi; ++ int32_t shwid, shmsk; ++ switch (riscvi) { ++ case RISCVI_RORI: ++ ai = RISCVI_SRLI, bi = RISCVI_SLLI; ++ shwid = 64, shmsk = 63; ++ break; ++ case RISCVI_RORIW: ++ ai = RISCVI_SRLIW, bi = RISCVI_SLLIW; ++ shwid = 32, shmsk = 31; ++ break; ++ default: ++ lj_assertA(0, "invalid roti op"); ++ return; ++ } ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); ++ emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk); ++ emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk); ++ } ++} ++ ++static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp) ++{ ++ if (as->flags & JIT_F_RVZbb) { ++ emit_ds1s2(as, riscvi, rd, rs1, rs2); ++ } else { ++ RISCVIns sai, sbi; ++ switch (riscvi) { ++ case RISCVI_ROL: ++ sai = RISCVI_SLL, sbi = RISCVI_SRL; ++ break; ++ case RISCVI_ROR: ++ sai = RISCVI_SRL, sbi = RISCVI_SLL; ++ break; ++ case RISCVI_ROLW: ++ sai = RISCVI_SLLW, sbi = RISCVI_SRLW; ++ break; ++ case RISCVI_RORW: ++ sai = RISCVI_SRLW, sbi = RISCVI_SLLW; ++ break; ++ default: ++ lj_assertA(0, "invalid rot op"); ++ return; ++ } ++ if (rd == rs2) { ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); ++ emit_ds1s2(as, sbi, tmp, rs1, tmp); ++ emit_ds1s2(as, sai, rd, rs1, rs2); ++ emit_ds2(as, RISCVI_NEG, tmp, rs2); ++ } else { ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); ++ emit_ds1s2(as, sai, rd, rs1, rs2); ++ emit_ds1s2(as, sbi, tmp, rs1, tmp); ++ emit_ds2(as, RISCVI_NEG, tmp, rs2); ++ } ++ } ++} ++ ++static void emit_ext(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1) ++{ ++ if ((riscvi != RISCVI_ZEXT_W && as->flags & JIT_F_RVZbb) || ++ (riscvi == RISCVI_ZEXT_W && as->flags & JIT_F_RVZba)) { ++ emit_ds(as, riscvi, rd, rs1); ++ } else if (as->flags & JIT_F_RVXThead) { ++ uint32_t hi, sext; ++ switch (riscvi) { ++ case RISCVI_ZEXT_B: ++ case RISCVI_SEXT_W: ++ emit_ds(as, riscvi, rd, rs1); ++ return; ++ case RISCVI_ZEXT_H: ++ hi = 15, sext = 0; ++ break; ++ case RISCVI_ZEXT_W: ++ hi = 31, sext = 0; ++ break; ++ case RISCVI_SEXT_B: ++ hi = 7, sext = 1; ++ break; ++ case RISCVI_SEXT_H: ++ hi = 15, sext = 1; ++ break; ++ default: ++ lj_assertA(0, "invalid ext op"); ++ return; ++ } ++ emit_dsi(as, sext ? RISCVI_TH_EXT : RISCVI_TH_EXTU, ++ rd, rs1, hi << 6); ++ } else { ++ RISCVIns sli, sri; ++ int32_t shamt; ++ switch (riscvi) { ++ case RISCVI_ZEXT_B: ++ case RISCVI_SEXT_W: ++ emit_ds(as, riscvi, rd, rs1); ++ return; ++ case RISCVI_ZEXT_H: ++ sli = RISCVI_SLLI, sri = RISCVI_SRLI; ++ shamt = 48; ++ break; ++ case RISCVI_ZEXT_W: ++ sli = RISCVI_SLLI, sri = RISCVI_SRLI; ++ shamt = 32; ++ break; ++ case RISCVI_SEXT_B: ++ sli = RISCVI_SLLI, sri = RISCVI_SRAI; ++ shamt = 56; ++ break; ++ case RISCVI_SEXT_H: ++ sli = RISCVI_SLLI, sri = RISCVI_SRAI; ++ shamt = 48; ++ break; ++ default: ++ lj_assertA(0, "invalid ext op"); ++ return; ++ } ++ emit_dsshamt(as, sri, rd, rd, shamt); ++ emit_dsshamt(as, sli, rd, rs1, shamt); ++ } ++} ++ ++static void emit_cleartp(ASMState *as, Reg rd, Reg rs1) ++{ ++ if (as->flags & JIT_F_RVXThead) { ++ emit_dsi(as, RISCVI_TH_EXTU, rd, rs1, 46u << 6); ++ } else { ++ emit_dsshamt(as, RISCVI_SRLI, rd, rd, 17); ++ emit_dsshamt(as, RISCVI_SLLI, rd, rs1, 17); ++ } ++} ++ ++/* ++static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) ++{ ++ if (as->flags & JIT_F_RVZbb) { ++ emit_ds1s2(as, RISCVI_ANDN, rd, rs1, rs2); ++ } else { ++ emit_ds1s2(as, RISCVI_AND, rd, rs1, tmp); ++ emit_ds(as, RISCVI_NOT, tmp, rs2); ++ } ++} ++*/ ++ ++/* ++static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) ++{ ++ if (as->flags & JIT_F_RVZbb) { ++ emit_ds1s2(as, RISCVI_ORN, rd, rs1, rs2); ++ } else { ++ emit_ds1s2(as, RISCVI_OR, rd, rs1, tmp); ++ emit_ds(as, RISCVI_NOT, tmp, rs2); ++ } ++} ++*/ ++ ++static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2) ++{ ++ if (as->flags & JIT_F_RVZbb) { ++ emit_ds1s2(as, RISCVI_XNOR, rd, rs1, rs2); ++ } else { ++ emit_ds(as, RISCVI_NOT, rd, rd); ++ emit_ds1s2(as, RISCVI_XOR, rd, rs1, rs2); ++ } ++} ++ ++static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt) ++{ ++ if (as->flags & JIT_F_RVZba) { ++ switch (shamt) { ++ case 1: emit_ds1s2(as, RISCVI_SH1ADD, rd, rs2, rs1); break; ++ case 2: emit_ds1s2(as, RISCVI_SH2ADD, rd, rs2, rs1); break; ++ case 3: emit_ds1s2(as, RISCVI_SH3ADD, rd, rs2, rs1); break; ++ default: return; ++ } ++ } else if (as->flags & JIT_F_RVXThead) { ++ emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2); ++ } else { ++ emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp); ++ emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt); ++ } ++} ++ ++#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1) ++#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2) ++#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3) ++ ++static void emit_loadk12(ASMState *as, Reg rd, int32_t i) ++{ ++ emit_di(as, RISCVI_ADDI, rd, i); ++} ++ ++static void emit_loadk32(ASMState *as, Reg rd, int32_t i) ++{ ++ if (checki12((int64_t)i)) { ++ emit_loadk12(as, rd, i); ++ } else { ++ if(LJ_UNLIKELY(RISCVF_HI((uint32_t)i) == 0x80000u && i > 0)) ++ emit_dsi(as, RISCVI_XORI, rd, rd, RISCVF_LO(i)); ++ else ++ emit_dsi(as, RISCVI_ADDI, rd, rd, RISCVF_LO(i)); ++ emit_du(as, RISCVI_LUI, rd, RISCVF_HI((uint32_t)i)); ++ } ++} ++ ++/* -- Emit loads/stores --------------------------------------------------- */ ++ ++/* Prefer rematerialization of BASE/L from global_State over spills. */ ++#define emit_canremat(ref) ((ref) <= REF_BASE) ++ ++#define glofs(as, k) \ ++ ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) ++ ++/* Load a 32 bit constant into a GPR. */ ++#define emit_loadi(as, r, i) emit_loadk32(as, r, i); ++ ++/* Load a 64 bit constant into a GPR. */ ++static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) ++{ ++ int64_t u64_delta = (int64_t)((intptr_t)u64 - (intptr_t)(as->mcp - 2)); ++ if (checki32((int64_t)u64)) { ++ emit_loadk32(as, r, (int32_t)u64); ++ } else if (checki32auipc(u64_delta)) { ++ emit_dsi(as, RISCVI_ADDI, r, r, RISCVF_LO(u64_delta)); ++ emit_du(as, RISCVI_AUIPC, r, RISCVF_HI(u64_delta)); ++ } else { ++ uint32_t lo32 = u64 & 0xfffffffful; ++ if (checku11(lo32)) { ++ if (lo32 > 0) emit_dsi(as, RISCVI_ADDI, r, r, lo32); ++ emit_dsshamt(as, RISCVI_SLLI, r, r, 32); ++ } else { ++ RISCVIns li_insn[7] = {0}; ++ int shamt = 0, step = 0; ++ for(int bit = 0; bit < 32; bit++) { ++ if (lo32 & (1u << bit)) { ++ if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); ++ int inc = bit+10 > 31 ? 31-bit : 10; ++ bit += inc, shamt = inc+1; ++ uint32_t msk = ((1ul << (bit+1))-1)^((1ul << (((bit-inc) >= 0) ? (bit-inc) : 0))-1); ++ uint16_t payload = (lo32 & msk) >> (((bit-inc) >= 0) ? (bit-inc) : 0); ++ li_insn[step++] = RISCVI_ADDI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(payload); ++ } else shamt++; ++ } ++ if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); ++ ++ if (step < 6) { ++ for(int i = 0; i < step; i++) ++ *--as->mcp = li_insn[i]; ++ } else { ++ emit_dsi(as, RISCVI_ADDI, r, r, u64 & 0x3ff); ++ emit_dsshamt(as, RISCVI_SLLI, r, r, 10); ++ emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 10) & 0x7ff); ++ emit_dsshamt(as, RISCVI_SLLI, r, r, 11); ++ emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 21) & 0x7ff); ++ emit_dsshamt(as, RISCVI_SLLI, r, r, 11); ++ } ++ } ++ ++ uint32_t hi32 = u64 >> 32; ++ if (hi32 & 0xfff) emit_loadk32(as, r, hi32); ++ else emit_du(as, RISCVI_LUI, r, hi32 >> 12); ++ } ++} ++ ++#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) ++ ++/* Get/set from constant pointer. */ ++static void emit_lsptr(ASMState *as, RISCVIns riscvi, Reg r, void *p, RegSet allow) ++{ ++ emit_lso(as, riscvi, r, ra_allock(as, igcptr(p), allow), 0); ++} ++ ++/* Load 64 bit IR constant into register. */ ++static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) ++{ ++ const uint64_t *k = &ir_k64(ir)->u64; ++ Reg r64 = r; ++ if (rset_test(RSET_FPR, r)) { ++ if (as->flags & JIT_F_RVZfa) { ++ uint8_t sign = (*k >> 63) & 1; ++ uint16_t k_hi16 = (*k >> 48) & 0xffff; ++ uint64_t k_lo48 = *k & 0xffffffffffff; ++ uint16_t mk_hi16 = k_hi16 & 0x7fff; ++ if (!k_lo48) { ++ if (riscv_fli_map_hi16[0] == k_hi16) { ++ emit_ds(as, RISCVI_FLI_D, r, 0); ++ return; ++ } ++ for (int i = 1; i < 32; i++) { ++ if (riscv_fli_map_hi16[i] == mk_hi16) { ++ if (sign) ++ emit_ds1s2(as, RISCVI_FNEG_D, r, r, r); ++ emit_ds(as, RISCVI_FLI_D, r, i); ++ return; ++ } ++ } ++ } ++ } ++ r64 = RID_TMP; ++ emit_ds(as, RISCVI_FMV_D_X, r, r64); ++ } ++ emit_loadu64(as, r64, *k); ++} ++ ++/* Get/set global_State fields. */ ++static void emit_lsglptr(ASMState *as, RISCVIns riscvi, Reg r, int32_t ofs) ++{ ++ emit_lso(as, riscvi, r, RID_GL, ofs); ++} ++ ++#define emit_getgl(as, r, field) \ ++ emit_lsglptr(as, RISCVI_LD, (r), (int32_t)offsetof(global_State, field)) ++#define emit_setgl(as, r, field) \ ++ emit_lsglptr(as, RISCVI_SD, (r), (int32_t)offsetof(global_State, field)) ++ ++/* Trace number is determined from per-trace exit stubs. */ ++#define emit_setvmstate(as, i) UNUSED(i) ++ ++/* -- Emit control-flow instructions -------------------------------------- */ ++ ++/* Label for internal jumps. */ ++typedef MCode *MCLabel; ++ ++/* Return label pointing to current PC. */ ++#define emit_label(as) ((as)->mcp) ++ ++static void emit_branch(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, MCode *target, int jump) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = (char *)target - (char *)(p - 1); ++ switch (jump) { ++ case -1: ++ lj_assertA(RISCVF_SIMM_OK(delta, 13), "branch target out of range"); /* B */ ++ *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); ++ break; ++ case 0: case 1: ++ lj_assertA(RISCVF_SIMM_OK(delta, 21), "branch target out of range"); /* ^B+J */ ++ if (checki13(delta) && !jump) { ++ *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); ++ *--p = RISCVI_NOP; ++ } else { ++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); /* Poorman's trampoline */ ++ *--p = (riscvi^0x00001000) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); ++ } ++ break; ++ default: ++ lj_assertA(0, "invalid jump type"); ++ break; ++ } ++ as->mcp = p; ++} ++ ++static void emit_jump(ASMState *as, MCode *target, int jump) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta; ++ switch(jump) { ++ case -1: ++ delta = (char *)target - (char *)(p - 1); ++ lj_assertA(RISCVF_SIMM_OK(delta, 21), "jump target out of range"); /* J */ ++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); ++ break; ++ case 0: case 1: ++ delta = (char *)target - (char *)(p - 2); ++ lj_assertA(checki32auipc(delta), "jump target out of range"); /* AUIPC+JALR */ ++ if (checki21(delta) && !jump) { ++ *--p = RISCVI_NOP; ++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); ++ } else { ++ *--p = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); ++ *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); ++ } ++ break; ++ default: ++ lj_assertA(0, "invalid jump type"); ++ break; ++ } ++ as->mcp = p; ++} ++ ++#define emit_jmp(as, target) emit_jump(as, target, 0) ++ ++#define emit_mv(as, dst, src) \ ++ emit_ds(as, RISCVI_MV, (dst), (src)) ++ ++static void emit_call(ASMState *as, void *target, int needcfa) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = (char *)target - (char *)(p - 2); ++ if (checki21(delta)) { ++ *--p = RISCVI_NOP; ++ *--p = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ(delta); ++ } else if (checki32(delta)) { ++ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); ++ *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); ++ needcfa = 1; ++ } else { ++ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_CFUNCADDR) | RISCVF_IMMI(0); ++ needcfa = 2; ++ } ++ as->mcp = p; ++ if (needcfa > 1) ++ ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); ++} ++ ++/* -- Emit generic operations --------------------------------------------- */ ++ ++/* Generic move between two regs. */ ++static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) ++{ ++ if (src < RID_MAX_GPR && dst < RID_MAX_GPR) ++ emit_mv(as, dst, src); ++ else if (src < RID_MAX_GPR) ++ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, dst, src); ++ else if (dst < RID_MAX_GPR) ++ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dst, src); ++ else ++ emit_ds1s2(as, irt_isnum(ir->t) ? RISCVI_FMV_D : RISCVI_FMV_S, dst, src, src); ++} ++ ++/* Emit an arithmetic operation with a constant operand. */ ++static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src, ++ Reg tmp, intptr_t k) ++{ ++ if (checki12(k)) emit_dsi(as, riscvi, dest, src, k); ++ else { ++ switch (riscvi) { ++ case RISCVI_ADDI: riscvi = RISCVI_ADD; break; ++ case RISCVI_XORI: riscvi = RISCVI_XOR; break; ++ case RISCVI_ORI: riscvi = RISCVI_OR; break; ++ case RISCVI_ANDI: riscvi = RISCVI_AND; break; ++ default: lj_assertA(0, "NYI arithmetic RISCVIns"); return; ++ } ++ emit_ds1s2(as, riscvi, dest, src, tmp); ++ emit_loadu64(as, tmp, (uintptr_t)k); ++ } ++} ++ ++/* Generic load of register with base and (small) offset address. */ ++static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++ if (r < RID_MAX_GPR) ++ emit_lso(as, irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW, r, base, ofs); ++ else ++ emit_lso(as, irt_isnum(ir->t) ? RISCVI_FLD : RISCVI_FLW, r, base, ofs); ++} ++ ++/* Generic store of register with base and (small) offset address. */ ++static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++ if (r < RID_MAX_GPR) ++ emit_lso(as, irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW, r, base, ofs); ++ else ++ emit_lso(as, irt_isnum(ir->t) ? RISCVI_FSD : RISCVI_FSW, r, base, ofs); ++} ++ ++/* Add offset to pointer. */ ++static void emit_addptr(ASMState *as, Reg r, int32_t ofs) ++{ ++ if (ofs) ++ emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs); ++} ++ ++ ++#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -231,6 +231,8 @@ static Reg rset_pickrandom(ASMState *as, + #include "lj_emit_ppc.h" + #elif LJ_TARGET_MIPS + #include "lj_emit_mips.h" ++#elif LJ_TARGET_RISCV64 ++#include "lj_emit_riscv.h" + #else + #error "Missing instruction emitter for target CPU" + #endif +@@ -1719,6 +1721,8 @@ static void asm_loop(ASMState *as) + #include "lj_asm_mips.h" + #elif LJ_TARGET_S390X + #include "lj_asm_s390x.h" ++#elif LJ_TARGET_RISCV64 ++#include "lj_asm_riscv64.h" + #else + #error "Missing assembler for target CPU" + #endif +--- /dev/null ++++ b/src/lj_asm_riscv64.h +@@ -0,0 +1,2048 @@ ++/* ++** RISC-V IR assembler (SSA IR -> machine code). ++** Copyright (C) 2022-2026 ISRC, ISCAS. See Copyright Notice in luajit.h ++** ++** Contributed by gns from PLCT Lab, ISRC, ISCAS. ++*/ ++ ++/* -- Register allocator extensions --------------------------------------- */ ++ ++/* Allocate a register with a hint. */ ++static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) ++{ ++ Reg r = IR(ref)->r; ++ if (ra_noreg(r)) { ++ if (!ra_hashint(r) && !iscrossref(as, ref)) ++ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ ++ r = ra_allocref(as, ref, allow); ++ } ++ ra_noweak(as, r); ++ return r; ++} ++ ++/* Allocate a register or RID_ZERO. */ ++static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) ++{ ++ Reg r = IR(ref)->r; ++ if (ra_noreg(r)) { ++ if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) ++ return RID_ZERO; ++ r = ra_allocref(as, ref, allow); ++ } else { ++ ra_noweak(as, r); ++ } ++ return r; ++} ++ ++/* Allocate two source registers for three-operand instructions. */ ++static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) ++{ ++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); ++ Reg left = irl->r, right = irr->r; ++ if (ra_hasreg(left)) { ++ ra_noweak(as, left); ++ if (ra_noreg(right)) ++ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); ++ else ++ ra_noweak(as, right); ++ } else if (ra_hasreg(right)) { ++ ra_noweak(as, right); ++ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); ++ } else if (ra_hashint(right)) { ++ right = ra_alloc1z(as, ir->op2, allow); ++ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); ++ } else { ++ left = ra_alloc1z(as, ir->op1, allow); ++ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); ++ } ++ return left | (right << 8); ++} ++ ++/* -- Guard handling ------------------------------------------------------ */ ++ ++/* Copied from MIPS, AUIPC+JALR is expensive to setup in-place */ ++#define RISCV_SPAREJUMP 4 ++ ++/* Setup spare long-range jump (trampoline?) slots per mcarea. */ ++ ++static void asm_sparejump_setup(ASMState *as) ++{ ++ MCode *mxp = as->mctop; ++ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { ++ for (int i = RISCV_SPAREJUMP*2; i--; ) ++ *--mxp = RISCVI_EBREAK; ++ as->mctop = mxp; ++ } ++} ++ ++static MCode *asm_sparejump_use(MCode *mcarea, MCode *target) ++{ ++ MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); ++ int slot = RISCV_SPAREJUMP; ++ RISCVIns tslot = RISCVI_EBREAK, tauipc, tjalr; ++ while (slot--) { ++ mxp -= 2; ++ ptrdiff_t delta = (char *)target - (char *)mxp; ++ tauipc = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)), ++ tjalr = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); ++ if (mxp[0] == tauipc && mxp[1] == tjalr) { ++ return mxp; ++ } else if (mxp[0] == tslot) { ++ mxp[0] = tauipc, mxp[1] = tjalr; ++ return mxp; ++ } ++ } ++ return NULL; ++} ++ ++/* Setup exit stub after the end of each trace. */ ++static void asm_exitstub_setup(ASMState *as, ExitNo nexits) ++{ ++ ExitNo i; ++ MCode *target = (MCode *)(void *)lj_vm_exit_handler; ++ MCode *mxp = as->mctop; ++ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) ++ asm_mclimit(as); ++ for (i = nexits-1; (int32_t)i >= 0; i--) ++ *--mxp = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ((uintptr_t)(4*(-4-i))); ++ ptrdiff_t delta = (char *)target - (char *)(mxp-3); ++ /* !ind: 1: sw ra, 0(sp); auipc+jalr ->vm_exit_handler; lui x0, traceno; jal <1; jal <1; ... ++ ** ind: 1: sw ra, 0(sp); ld tmp, K64_VXH(gl); jalr tmp; lui x0, traceno; jal <1; jal <1; ... ++ ** Note: RID_TMP is RID_RA! ++ */ ++ *--mxp = RISCVI_LUI | RISCVF_IMMU(as->T->traceno); ++ if (checki32auipc(delta)) { ++ *--mxp = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) ++ | RISCVF_IMMI(RISCVF_LO((uintptr_t)(void *)delta)); ++ *--mxp = RISCVI_AUIPC | RISCVF_D(RID_TMP) ++ | RISCVF_IMMU(RISCVF_HI((uintptr_t)(void *)delta)); ++ } else { ++ *--mxp = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(0); ++ *--mxp = RISCVI_LD | RISCVF_D(RID_TMP) | RISCVF_S1(RID_GL) ++ | RISCVF_IMMI(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER])); ++ lj_assertA(checki12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER])), ++ "exit handler address offset overflow"); ++ } ++ *--mxp = RISCVI_SD | RISCVF_S2(RID_RA) | RISCVF_S1(RID_SP); ++ as->mctop = mxp; ++} ++ ++static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) ++{ ++ /* Keep this in-sync with exitstub_trace_addr(). */ ++ return as->mctop + exitno + 4; ++} ++ ++/* Emit conditional branch to exit for guard. */ ++static void asm_guard(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2) ++{ ++ MCode *target = asm_exitstub_addr(as, as->snapno); ++ MCode *p = as->mcp; ++ if (LJ_UNLIKELY(p == as->invmcp)) { ++ as->loopinv = 1; ++ *p = RISCVI_JAL | RISCVF_IMMJ((char *)target - (char *)p); ++ riscvi = riscvi^RISCVF_FUNCT3(1); /* Invert cond. */ ++ target = p - 1; /* Patch target later in asm_loop_fixup. */ ++ } ++ ptrdiff_t delta = (char *)target - (char *)(p - 1); ++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); ++ *--p = (riscvi^RISCVF_FUNCT3(1)) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); ++ as->mcp = p; ++} ++ ++/* -- Operand fusion ------------------------------------------------------ */ ++ ++/* Limit linear search to this distance. Avoids O(n^2) behavior. */ ++#define CONFLICT_SEARCH_LIM 31 ++ ++/* Check if there's no conflicting instruction between curins and ref. */ ++static int noconflict(ASMState *as, IRRef ref, IROp conflict) ++{ ++ IRIns *ir = as->ir; ++ IRRef i = as->curins; ++ if (i > ref + CONFLICT_SEARCH_LIM) ++ return 0; /* Give up, ref is too far away. */ ++ while (--i > ref) ++ if (ir[i].o == conflict) ++ return 0; /* Conflict found. */ ++ return 1; /* Ok, no conflict. */ ++} ++ ++/* Fuse the array base of colocated arrays. */ ++static int32_t asm_fuseabase(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && ++ !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) ++ return (int32_t)sizeof(GCtab); ++ return 0; ++} ++ ++/* Fuse array/hash/upvalue reference into register+offset operand. */ ++static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) ++{ ++ IRIns *ir = IR(ref); ++ if (ra_noreg(ir->r)) { ++ if (ir->o == IR_AREF) { ++ if (mayfuse(as, ref)) { ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (checki12(ofs)) { ++ *ofsp = ofs; ++ return ra_alloc1(as, refa, allow); ++ } ++ } ++ } ++ } else if (ir->o == IR_HREFK) { ++ if (mayfuse(as, ref)) { ++ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); ++ if (checki12(ofs)) { ++ *ofsp = ofs; ++ return ra_alloc1(as, ir->op1, allow); ++ } ++ } ++ } else if (ir->o == IR_UREFC) { ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; ++ intptr_t ofs = ((intptr_t)((uintptr_t)(&uv->tv) - (uintptr_t)&J2GG(as->J)->g)); ++ if (checki12(ofs)) { ++ *ofsp = (int32_t)ofs; ++ return RID_GL; ++ } ++ } ++ } else if (ir->o == IR_TMPREF) { ++ *ofsp = (int32_t)offsetof(global_State, tmptv); ++ return RID_GL; ++ } ++ } ++ *ofsp = 0; ++ return ra_alloc1(as, ref, allow); ++} ++ ++/* Fuse XLOAD/XSTORE reference into load/store operand. */ ++static void asm_fusexref(ASMState *as, RISCVIns riscvi, Reg rd, IRRef ref, ++ RegSet allow, int32_t ofs) ++{ ++ IRIns *ir = IR(ref); ++ Reg base; ++ if (ra_noreg(ir->r) && canfuse(as, ir)) { ++ intptr_t ofs2; ++ if (ir->o == IR_ADD) { ++ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), ++ checki12(ofs2))) { ++ ref = ir->op1; ++ ofs = (int32_t)ofs2; ++ } ++ } else if (ir->o == IR_STRREF) { ++ ofs2 = 4096; ++ lj_assertA(ofs == 0, "bad usage"); ++ ofs = (int32_t)sizeof(GCstr); ++ if (irref_isk(ir->op2)) { ++ ofs2 = ofs + get_kval(as, ir->op2); ++ ref = ir->op1; ++ } else if (irref_isk(ir->op1)) { ++ ofs2 = ofs + get_kval(as, ir->op1); ++ ref = ir->op2; ++ } ++ if (!checki12(ofs2)) { ++ /* NYI: Fuse ADD with constant. */ ++ Reg right, left = ra_alloc2(as, ir, allow); ++ right = (left >> 8); left &= 255; ++ emit_lso(as, riscvi, rd, RID_TMP, ofs); ++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, left, right); ++ return; ++ } ++ ofs = ofs2; ++ } ++ } ++ base = ra_alloc1(as, ref, allow); ++ emit_lso(as, riscvi, rd, base, ofs); ++} ++ ++/* Fuse Integer multiply-accumulate. */ ++ ++static int asm_fusemac(ASMState *as, IRIns *ir, RISCVIns riscvi) ++{ ++ IRRef lref = ir->op1, rref = ir->op2; ++ IRIns *irm; ++ if (lref != rref && ++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && ++ ra_noreg(irm->r)) || ++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && ++ (rref = lref, ra_noreg(irm->r))))) { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg add = ra_hintalloc(as, rref, dest, RSET_GPR); ++ Reg left = ra_alloc2(as, irm, ++ rset_exclude(rset_exclude(RSET_GPR, dest), add)); ++ Reg right = (left >> 8); left &= 255; ++ emit_ds1s2(as, riscvi, dest, left, right); ++ if (dest != add) emit_mv(as, dest, add); ++ return 1; ++ } ++ return 0; ++} ++ ++/* Fuse FP multiply-add/sub. */ ++ ++static int asm_fusemadd(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvir) ++{ ++ IRRef lref = ir->op1, rref = ir->op2; ++ IRIns *irm; ++ if ((as->flags & JIT_F_OPT_FMA) && ++ lref != rref && ++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && ++ ra_noreg(irm->r)) || ++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && ++ (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); ++ Reg left = ra_alloc2(as, irm, ++ rset_exclude(rset_exclude(RSET_FPR, dest), add)); ++ Reg right = (left >> 8); left &= 255; ++ emit_ds1s2s3(as, riscvi, dest, left, right, add); ++ return 1; ++ } ++ return 0; ++} ++/* -- Calls --------------------------------------------------------------- */ ++ ++/* Generate a call to a C function. */ ++static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) ++{ ++ uint32_t n, nargs = CCI_XNARGS(ci); ++ int32_t ofs = 0; ++ Reg gpr, fpr = REGARG_FIRSTFPR; ++ if ((void *)ci->func) ++ emit_call(as, (void *)ci->func, 1); ++ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) ++ as->cost[gpr] = REGCOST(~0u, ASMREF_L); ++ gpr = REGARG_FIRSTGPR; ++ for (n = 0; n < nargs; n++) { /* Setup args. */ ++ IRRef ref = args[n]; ++ IRIns *ir = IR(ref); ++ if (ref) { ++ if (irt_isfp(ir->t)) { ++ if (fpr <= REGARG_LASTFPR) { ++ lj_assertA(rset_test(as->freeset, fpr), ++ "reg %d not free", fpr); /* Must have been evicted. */ ++ ra_leftov(as, fpr, ref); ++ fpr++; if(ci->flags & CCI_VARARG) gpr++; ++ } else if (!(ci->flags & CCI_VARARG) && gpr <= REGARG_LASTGPR) { ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ ++ ra_leftov(as, gpr, ref); ++ gpr++; ++ } else { ++ Reg r = ra_alloc1(as, ref, RSET_FPR); ++ emit_spstore(as, ir, r, ofs); ++ ofs += 8; ++ } ++ } else { ++ if (gpr <= REGARG_LASTGPR) { ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ ++ ra_leftov(as, gpr, ref); ++ gpr++; if(ci->flags & CCI_VARARG) fpr++; ++ } else { ++ Reg r = ra_alloc1z(as, ref, RSET_GPR); ++ emit_spstore(as, ir, r, ofs); ++ ofs += 8; ++ } ++ } ++ } ++ } ++} ++ ++/* Setup result reg/sp for call. Evict scratch regs. */ ++static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ RegSet drop = RSET_SCRATCH; ++ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ if (hiop && ra_hasreg((ir+1)->r)) ++ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); /* Evictions must be performed first. */ ++ if (ra_used(ir)) { ++ lj_assertA(!irt_ispri(ir->t), "PRI dest"); ++ if (irt_isfp(ir->t)) { ++ if ((ci->flags & CCI_CASTU64)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, ++ dest, RID_RET); ++ } else { ++ ra_destreg(as, ir, RID_FPRET); ++ } ++ } else if (hiop) { ++ ra_destpair(as, ir); ++ } else { ++ ra_destreg(as, ir, RID_RET); ++ } ++ } ++} ++ ++static void asm_callx(ASMState *as, IRIns *ir) ++{ ++ IRRef args[CCI_NARGS_MAX*2]; ++ CCallInfo ci; ++ IRRef func; ++ IRIns *irf; ++ ci.flags = asm_callx_flags(as, ir); ++ asm_collectargs(as, ir, &ci, args); ++ asm_setupresult(as, ir, &ci); ++ func = ir->op2; irf = IR(func); ++ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } ++ if (irref_isk(func)) { /* Call to constant address. */ ++ ci.func = (ASMFunction)(void *)get_kval(as, func); ++ } else { /* Need specific register for indirect calls. */ ++ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); ++ MCode *p = as->mcp; ++ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(r); ++ if (r == RID_CFUNCADDR) ++ *--p = RISCVI_ADDI | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); ++ else ++ *--p = RISCVI_MV | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); ++ as->mcp = p; ++ ci.func = (ASMFunction)(void *)0; ++ } ++ asm_gencall(as, &ci, args); ++} ++ ++/* -- Returns ------------------------------------------------------------- */ ++ ++/* Return to lower frame. Guard that it goes to the right spot. */ ++static void asm_retf(ASMState *as, IRIns *ir) ++{ ++ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); ++ void *pc = ir_kptr(IR(ir->op2)); ++ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); ++ as->topslot -= (BCReg)delta; ++ if ((int32_t)as->topslot < 0) as->topslot = 0; ++ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ ++ emit_setgl(as, base, jit_base); ++ emit_addptr(as, base, -8*delta); ++ asm_guard(as, RISCVI_BNE, RID_TMP, ++ ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); ++ emit_lso(as, RISCVI_LD, RID_TMP, base, -8); ++} ++ ++/* -- Buffer operations --------------------------------------------------- */ ++ ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irgc; ++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ ++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); ++ emit_ds1s2(as, RISCVI_OR, RID_TMP, RID_TMP, tmp); ++ emit_dsi(as, RISCVI_ANDI, tmp, tmp, SBUF_MASK_FLAG); ++ emit_getgl(as, RID_TMP, cur_L); ++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++} ++#endif ++ ++/* -- Type conversions ---------------------------------------------------- */ ++ ++static void asm_tointg(ASMState *as, IRIns *ir, Reg left) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); ++ Reg dest = ra_dest(as, ir, RSET_GPR), cmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); ++ asm_guard(as, RISCVI_BEQ, cmp, RID_ZERO); ++ emit_ds1s2(as, RISCVI_FEQ_D, cmp, tmp, left); ++ emit_ds(as, RISCVI_FCVT_D_W, tmp, dest); ++ emit_ds(as, RISCVI_FCVT_W_D, dest, left); ++} ++ ++static void asm_tobit(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_FPR; ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, allow); ++ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); ++ Reg tmp = ra_scratch(as, rset_clear(allow, right)); ++ emit_ds(as, RISCVI_FMV_X_W, dest, tmp); ++ emit_ds1s2(as, RISCVI_FADD_D, tmp, left, right); ++} ++ ++static void asm_conv(ASMState *as, IRIns *ir) ++{ ++ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); ++ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); ++ int stfp = (st == IRT_NUM || st == IRT_FLOAT); ++ IRRef lref = ir->op1; ++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); ++ /* Use GPR to pass floating-point arguments */ ++ if (irt_isfp(ir->t) && ir->r >= RID_X10 && ir->r <= RID_X17) { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg ftmp = ra_scratch(as, RSET_FPR); ++ if (stfp) { /* FP to FP conversion. */ ++ emit_ds(as, st == IRT_NUM ? RISCVI_FMV_X_W : RISCVI_FMV_X_D, dest, ftmp); ++ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, ++ ftmp, ra_alloc1(as, lref, RSET_FPR)); ++ } else { /* Integer to FP conversion. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ RISCVIns riscvi = irt_isfloat(ir->t) ? ++ (((IRT_IS64 >> st) & 1) ? ++ (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : ++ (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : ++ (((IRT_IS64 >> st) & 1) ? ++ (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : ++ (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); ++ emit_ds(as, st64 ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dest, ftmp); ++ emit_ds(as, riscvi, ftmp, left); ++ } ++ } else if (irt_isfp(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ if (stfp) { /* FP to FP conversion. */ ++ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, ++ dest, ra_alloc1(as, lref, RSET_FPR)); ++ } else { /* Integer to FP conversion. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ RISCVIns riscvi = irt_isfloat(ir->t) ? ++ (((IRT_IS64 >> st) & 1) ? ++ (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : ++ (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : ++ (((IRT_IS64 >> st) & 1) ? ++ (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : ++ (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); ++ emit_ds(as, riscvi, dest, left); ++ } ++ } else if (stfp) { /* FP to integer conversion. */ ++ if (irt_isguard(ir->t)) { ++ /* Checked conversions are only supported from number to int. */ ++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, ++ "bad type for checked CONV"); ++ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); ++ } else { ++ Reg left = ra_alloc1(as, lref, RSET_FPR); ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); ++ if (irt_isu64(ir->t)) { ++ MCLabel l_end = emit_label(as); ++ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_L_D : RISCVI_FCVT_L_S, dest, left); ++ emit_branch(as, RISCVI_BNE, dest, RID_ZERO, l_end, -1); ++ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_LU_D : RISCVI_FCVT_LU_S, dest, left); ++ } else { ++ RISCVIns riscvi = irt_is64(ir->t) ? ++ (st == IRT_NUM ? RISCVI_FCVT_L_D : RISCVI_FCVT_L_S) : ++ (st == IRT_NUM ? RISCVI_FCVT_W_D : RISCVI_FCVT_W_S); ++ emit_ds(as, riscvi|RISCVF_RM(RISCVRM_RTZ), dest, left); ++ } ++ } ++ } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ RISCVIns riscvi = st == IRT_I8 ? RISCVI_SEXT_B : ++ st == IRT_U8 ? RISCVI_ZEXT_B : ++ st == IRT_I16 ? RISCVI_SEXT_H : RISCVI_ZEXT_H; ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); ++ emit_ext(as, riscvi, dest, left); ++ } else { /* 32/64 bit integer conversions. */ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (irt_is64(ir->t)) { ++ if (st64) { ++ /* 64/64 bit no-op (cast)*/ ++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ ++ } else { /* 32 to 64 bit sign extension. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ ++ emit_ext(as, RISCVI_SEXT_W, dest, left); ++ } else { /* 32 to 64 bit zero extension. */ ++ emit_ext(as, RISCVI_ZEXT_W, dest, left); ++ } ++ } ++ } else { ++ if (st64 && !(ir->op2 & IRCONV_NONE)) { ++ /* This is either a 32 bit reg/reg mov which zeroes the hiword ++ ** or a load of the loword from a 64 bit address. ++ */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ emit_ext(as, RISCVI_ZEXT_W, dest, left); ++ } else { /* 32/32 bit no-op (cast). */ ++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ ++ } ++ } ++ } ++} ++ ++static void asm_strto(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; ++ IRRef args[2]; ++ int32_t ofs = SPOFS_TMP; ++ RegSet drop = RSET_SCRATCH; ++ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ ++ ra_evictset(as, drop); ++ if (ir->s) ofs = sps_scale(ir->s); ++ asm_guard(as, RISCVI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ ++ args[0] = ir->op1; /* GCstr *str */ ++ args[1] = ASMREF_TMP1; /* TValue *n */ ++ asm_gencall(as, ci, args); ++ /* Store the result to the spill slot or temp slots. */ ++ Reg tmp = ra_releasetmp(as, ASMREF_TMP1); ++ emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs); ++} ++ ++/* -- Memory references --------------------------------------------------- */ ++ ++/* Store tagged value for ref at base+ofs. */ ++static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) ++{ ++ RegSet allow = rset_exclude(RSET_GPR, base); ++ IRIns *ir = IR(ref); ++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), ++ "store of IR type %d", irt_type(ir->t)); ++ if (irref_isk(ref)) { ++ TValue k; ++ lj_ir_kvalue(as->J->L, &k, ir); ++ emit_lso(as, RISCVI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); ++ } else { ++ Reg src = ra_alloc1(as, ref, allow); ++ rset_clear(allow, src); ++ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); ++ emit_lso(as, RISCVI_SD, RID_TMP, base, ofs); ++ if (irt_isinteger(ir->t)) { ++ if (as->flags & JIT_F_RVZba) { ++ emit_ds1s2(as, RISCVI_ADD_UW, RID_TMP, src, type); ++ } else { ++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, RID_TMP, type); ++ emit_ext(as, RISCVI_ZEXT_W, RID_TMP, src); ++ } ++ } else { ++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, type); ++ } ++ } ++} ++ ++/* Get pointer to TValue. */ ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) // todo-new ++{ ++ if ((mode & IRTMPREF_IN1)) { ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { ++ /* Use the number constant itself as a TValue. */ ++ ra_allockreg(as, igcptr(ir_knum(ir)), dest); ++ return; ++ } ++ emit_lso(as, RISCVI_FSD, ra_alloc1(as, ref, RSET_FPR), dest, 0); ++ } else { ++ asm_tvstore64(as, dest, 0, ref); ++ } ++ } ++ /* g->tmptv holds the TValue(s). */ ++ emit_opk(as, RISCVI_ADDI, dest, RID_GL, dest, offsetof(global_State, tmptv)); ++} ++ ++static void asm_aref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg idx, base; ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (checki12(ofs)) { ++ base = ra_alloc1(as, refa, RSET_GPR); ++ emit_dsi(as, RISCVI_ADDI, dest, base, ofs); ++ return; ++ } ++ } ++ base = ra_alloc1(as, ir->op1, RSET_GPR); ++ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); ++ emit_sh3add(as, dest, base, idx, RID_TMP); ++} ++ ++/* Inlined hash lookup. Specialized for key type and for const keys. ++** The equivalent C code is: ++** Node *n = hashkey(t, key); ++** do { ++** if (lj_obj_equal(&n->key, key)) return &n->val; ++** } while ((n = nextnode(n))); ++** return niltv(L); ++*/ ++static void asm_href(ASMState *as, IRIns *ir, IROp merge) ++{ ++ RegSet allow = RSET_GPR; ++ int destused = ra_used(ir); ++ Reg dest = ra_dest(as, ir, allow); ++ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); ++ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2; ++ Reg cmp64 = RID_NONE; ++ IRRef refkey = ir->op2; ++ IRIns *irkey = IR(refkey); ++ int isk = irref_isk(refkey); ++ IRType1 kt = irkey->t; ++ uint32_t khash; ++ MCLabel l_end, l_loop, l_next; ++ rset_clear(allow, tab); ++ tmp1 = ra_scratch(as, allow); ++ rset_clear(allow, tmp1); ++ tmp2 = ra_scratch(as, allow); ++ rset_clear(allow, tmp2); ++ ++ if (irt_isnum(kt)) { ++ key = ra_alloc1(as, refkey, RSET_FPR); ++ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); ++ } else { ++ /* Allocate cmp64 register used for 64-bit comparisons */ ++ if (!isk && irt_isaddr(kt)) { ++ cmp64 = tmp2; ++ } else { ++ int64_t k; ++ if (isk && irt_isaddr(kt)) { ++ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; ++ } else { ++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); ++ k = ~((int64_t)~irt_toitype(kt) << 47); ++ } ++ cmp64 = ra_allock(as, k, allow); ++ rset_clear(allow, cmp64); ++ } ++ if (!irt_ispri(kt)) { ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ } ++ } ++ ++ /* Key not found in chain: jump to exit (if merged) or load niltv. */ ++ l_end = emit_label(as); ++ int is_lend_exit = 0; ++ as->invmcp = NULL; ++ if (merge == IR_NE) ++ asm_guard(as, RISCVI_BEQ, RID_ZERO, RID_ZERO); ++ else if (destused) ++ emit_loada(as, dest, niltvg(J2G(as->J))); ++ ++ /* Follow hash chain until the end. */ ++ l_loop = --as->mcp; ++ emit_mv(as, dest, tmp1); ++ emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, next)); ++ l_next = emit_label(as); ++ ++ /* Type and value comparison. */ ++ if (merge == IR_EQ) { /* Must match asm_guard(). */ ++ l_end = asm_exitstub_addr(as, as->snapno); ++ is_lend_exit = 1; ++ } ++ if (irt_isnum(kt)) { ++ emit_branch(as, RISCVI_BNE, tmp1, RID_ZERO, l_end, is_lend_exit); ++ emit_ds1s2(as, RISCVI_FEQ_D, tmp1, tmpnum, key); ++ emit_branch(as, RISCVI_BEQ, tmp1, RID_ZERO, l_next, -1); ++ emit_dsi(as, RISCVI_SLTIU, tmp1, tmp1, ((int32_t)LJ_TISNUM)); ++ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 47); ++ emit_ds(as, RISCVI_FMV_D_X, tmpnum, tmp1); ++ } else { ++ emit_branch(as, RISCVI_BEQ, tmp1, cmp64, l_end, is_lend_exit); ++ } ++ emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); ++ *l_loop = RISCVI_BNE | RISCVF_S1(tmp1) | RISCVF_S2(RID_ZERO) ++ | RISCVF_IMMB((char *)as->mcp-(char *)l_loop); ++ if (!isk && irt_isaddr(kt)) { ++ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); ++ emit_ds1s2(as, RISCVI_ADD, tmp2, key, type); ++ rset_clear(allow, type); ++ } ++ ++ /* Load main position relative to tab->node into dest. */ ++ khash = isk ? ir_khash(as, irkey) : 1; ++ if (khash == 0) { ++ emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); ++ } else { ++ Reg tmphash = tmp1; ++ if (isk) ++ tmphash = ra_allock(as, khash, allow); ++ /* node = tab->node + (idx*32-idx*8) */ ++ emit_ds1s2(as, RISCVI_ADD, dest, dest, tmp1); ++ lj_assertA(sizeof(Node) == 24, "bad Node size"); ++ emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp2, tmp1); ++ emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 3); ++ emit_dsshamt(as, RISCVI_SLLIW, tmp2, tmp1, 5); ++ emit_ds1s2(as, RISCVI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask ++ emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); ++ emit_lso(as, RISCVI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); ++ if (isk) { ++ /* Nothing to do. */ ++ } else if (irt_isstr(kt)) { ++ emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); ++ } else { /* Must match with hash*() in lj_tab.c. */ ++ emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2); ++ emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f); ++ emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2); ++ emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f); ++ emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest); ++ emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1); ++ emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f); ++ if (irt_isnum(kt)) { ++ emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1); ++ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi ++ emit_ext(as, RISCVI_SEXT_W, tmp2, tmp1); // lo ++ emit_ds(as, RISCVI_FMV_X_D, tmp1, key); ++ } else { ++ checkmclim(as); ++ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi ++ emit_ext(as, RISCVI_SEXT_W, tmp2, key); // lo ++ emit_ds1s2(as, RISCVI_ADD, tmp1, key, type); ++ } ++ } ++ } ++} ++ ++static void asm_hrefk(ASMState *as, IRIns *ir) ++{ ++ IRIns *kslot = IR(ir->op2); ++ IRIns *irkey = IR(kslot->op1); ++ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); ++ int32_t kofs = ofs + (int32_t)offsetof(Node, key); ++ int bigofs = !checki12(kofs); ++ Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; ++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); ++ RegSet allow = rset_exclude(RSET_GPR, node); ++ Reg idx = node; ++ int64_t k; ++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); ++ if (bigofs) { ++ idx = dest; ++ rset_clear(allow, dest); ++ kofs = (int32_t)offsetof(Node, key); ++ } else if (ra_hasreg(dest)) { ++ emit_dsi(as, RISCVI_ADDI, dest, node, ofs); ++ } ++ if (irt_ispri(irkey->t)) { ++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); ++ k = ~((int64_t)~irt_toitype(irkey->t) << 47); ++ } else if (irt_isnum(irkey->t)) { ++ k = (int64_t)ir_knum(irkey)->u64; ++ } else { ++ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); ++ } ++ asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow)); ++ emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs); ++ if (bigofs) ++ emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow)); ++} ++ ++static void asm_uref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); ++ if (irref_isk(ir->op1) && !guarded) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; ++ emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR); ++ } else { ++ if (guarded) ++ asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO); ++ if (ir->o == IR_UREFC) ++ emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); ++ else ++ emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v)); ++ if (guarded) ++ emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); ++ emit_loada(as, dest, o); ++ } else { ++ emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR), ++ (int32_t)offsetof(GCfuncL, uvptr) + ++ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); ++ } ++ } ++} ++ ++static void asm_fref(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); UNUSED(ir); ++ lj_assertA(!ra_used(ir), "unfused FREF"); ++} ++ ++static void asm_strref(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg dest = ra_dest(as, ir, allow); ++ Reg base = ra_alloc1(as, ir->op1, allow); ++ IRIns *irr = IR(ir->op2); ++ int32_t ofs = sizeof(GCstr); ++ rset_clear(allow, base); ++ if (irref_isk(ir->op2) && checki12(ofs + irr->i)) { ++ emit_dsi(as, RISCVI_ADDI, dest, base, ofs + irr->i); ++ } else { ++ emit_dsi(as, RISCVI_ADDI, dest, dest, ofs); ++ emit_ds1s2(as, RISCVI_ADD, dest, base, ra_alloc1(as, ir->op2, allow)); ++ } ++} ++ ++/* -- Loads and stores ---------------------------------------------------- */ ++ ++static RISCVIns asm_fxloadins(IRIns *ir) ++{ ++ switch (irt_type(ir->t)) { ++ case IRT_I8: return RISCVI_LB; ++ case IRT_U8: return RISCVI_LBU; ++ case IRT_I16: return RISCVI_LH; ++ case IRT_U16: return RISCVI_LHU; ++ case IRT_NUM: return RISCVI_FLD; ++ case IRT_FLOAT: return RISCVI_FLW; ++ default: return irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW; ++ } ++} ++ ++static RISCVIns asm_fxstoreins(IRIns *ir) ++{ ++ switch (irt_type(ir->t)) { ++ case IRT_I8: case IRT_U8: return RISCVI_SB; ++ case IRT_I16: case IRT_U16: return RISCVI_SH; ++ case IRT_NUM: return RISCVI_FSD; ++ case IRT_FLOAT: return RISCVI_FSW; ++ default: return irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW; ++ } ++} ++ ++static void asm_fload(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg idx, dest = ra_dest(as, ir, allow); ++ rset_clear(allow, dest); ++ RISCVIns riscvi = asm_fxloadins(ir); ++ int32_t ofs; ++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ ++ idx = RID_GL; ++ ofs = (ir->op2 << 2) - GG_OFS(g); ++ } else { ++ idx = ra_alloc1(as, ir->op1, allow); ++ if (ir->op2 == IRFL_TAB_ARRAY) { ++ ofs = asm_fuseabase(as, ir->op1); ++ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ ++ emit_dsi(as, RISCVI_ADDI, dest, idx, ofs); ++ return; ++ } ++ } ++ ofs = field_ofs[ir->op2]; ++ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); ++ } ++ rset_clear(allow, idx); ++ emit_lso(as, riscvi, dest, idx, ofs); ++} ++ ++static void asm_fstore(ASMState *as, IRIns *ir) ++{ ++ if (ir->r != RID_SINK) { ++ Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); ++ IRIns *irf = IR(ir->op1); ++ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); ++ int32_t ofs = field_ofs[irf->op2]; ++ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); ++ emit_lso(as, asm_fxstoreins(ir), src, idx, ofs); ++ } ++} ++ ++static void asm_xload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); ++ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), ++ "unaligned XLOAD"); ++ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); ++} ++ ++static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) ++{ ++ if (ir->r != RID_SINK) { ++ Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); ++ asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, ++ rset_exclude(RSET_GPR, src), ofs); ++ } ++} ++ ++#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) ++ ++static void asm_ahuvload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = RID_NONE, type = RID_TMP, idx; ++ RegSet allow = RSET_GPR; ++ int32_t ofs = 0; ++ IRType1 t = ir->t; ++ if (ra_used(ir)) { ++ lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad load type %d", irt_type(ir->t)); ++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ if (irt_isaddr(t)) { ++ emit_cleartp(as, dest, dest); ++ } else if (irt_isint(t)) ++ emit_ext(as, RISCVI_SEXT_W, dest, dest); ++ } ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; ++ rset_clear(allow, idx); ++ if (irt_isnum(t)) { ++ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); ++ emit_dsi(as, RISCVI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); ++ } else { ++ asm_guard(as, RISCVI_BNE, type, ++ ra_allock(as, (int32_t)irt_toitype(t), allow)); ++ } ++ if (ra_hasreg(dest)) { ++ if (irt_isnum(t)) { ++ emit_lso(as, RISCVI_FLD, dest, idx, ofs); ++ dest = type; ++ } ++ } else { ++ dest = type; ++ } ++ emit_dsshamt(as, RISCVI_SRAI, type, dest, 47); ++ emit_lso(as, RISCVI_LD, dest, idx, ofs); ++} ++ ++static void asm_ahustore(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg idx, src = RID_NONE, type = RID_NONE; ++ int32_t ofs = 0; ++ if (ir->r == RID_SINK) ++ return; ++ if (irt_isnum(ir->t)) { ++ src = ra_alloc1(as, ir->op2, RSET_FPR); ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ emit_lso(as, RISCVI_FSD, src, idx, ofs); ++ } else { ++ Reg tmp = RID_TMP; ++ if (irt_ispri(ir->t)) { ++ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); ++ rset_clear(allow, tmp); ++ } else { ++ src = ra_alloc1(as, ir->op2, allow); ++ rset_clear(allow, src); ++ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); ++ rset_clear(allow, type); ++ } ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ emit_lso(as, RISCVI_SD, tmp, idx, ofs); ++ if (ra_hasreg(src)) { ++ if (irt_isinteger(ir->t)) { ++ if (as->flags & JIT_F_RVZba) { ++ emit_ds1s2(as, RISCVI_ADD_UW, tmp, src, type); ++ } else { ++ emit_ds1s2(as, RISCVI_ADD, tmp, tmp, type); ++ emit_ext(as, RISCVI_ZEXT_W, tmp, src); ++ } ++ } else { ++ emit_ds1s2(as, RISCVI_ADD, tmp, src, type); ++ } ++ } ++ } ++} ++ ++static void asm_sload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = RID_NONE, type = RID_NONE, base; ++ RegSet allow = RSET_GPR; ++ IRType1 t = ir->t; ++ int32_t ofs = 8*((int32_t)ir->op1-2); ++ lj_assertA(checki12(ofs), "sload IR operand out of range"); ++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT), ++ "bad parent SLOAD"); /* Handled by asm_head_side(). */ ++ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), ++ "inconsistent SLOAD variant"); ++ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { ++ dest = ra_scratch(as, RSET_FPR); ++ asm_tointg(as, ir, dest); ++ t.irt = IRT_NUM; /* Continue with a regular number type check. */ ++ } else if (ra_used(ir)) { ++ Reg tmp = RID_NONE; ++ if ((ir->op2 & IRSLOAD_CONVERT)) ++ tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); ++ lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), ++ "bad SLOAD type %d", irt_type(t)); ++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ base = ra_alloc1(as, REF_BASE, allow); ++ rset_clear(allow, base); ++ if (irt_isaddr(t)) { /* Clear type from pointers. */ ++ emit_cleartp(as, dest, dest); ++ } else if (ir->op2 & IRSLOAD_CONVERT) { ++ if (irt_isint(t)) { ++ emit_ds(as, RISCVI_FCVT_W_D|RISCVF_RM(RISCVRM_RTZ), dest, tmp); ++ /* If value is already loaded for type check, move it to FPR. */ ++ if ((ir->op2 & IRSLOAD_TYPECHECK)) ++ emit_ds(as, RISCVI_FMV_D_X, tmp, dest); ++ else ++ dest = tmp; ++ t.irt = IRT_NUM; /* Check for original type. */ ++ } else { ++ emit_ds(as, RISCVI_FCVT_D_W, dest, tmp); ++ dest = tmp; ++ t.irt = IRT_INT; /* Check for original type. */ ++ } ++ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { ++ /* Sign-extend integers. */ ++ emit_ext(as, RISCVI_SEXT_W, dest, dest); ++ } ++ goto dotypecheck; ++ } ++ base = ra_alloc1(as, REF_BASE, allow); ++ rset_clear(allow, base); ++dotypecheck: ++ if ((ir->op2 & IRSLOAD_TYPECHECK)) { ++ type = dest < RID_MAX_GPR ? dest : RID_TMP; ++ if (irt_ispri(t)) { ++ asm_guard(as, RISCVI_BNE, type, ++ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); ++ } else if ((ir->op2 & IRSLOAD_KEYINDEX)) { ++ asm_guard(as, RISCVI_BNE, RID_TMP, ++ ra_allock(as, (int32_t)LJ_KEYINDEX, allow)); ++ emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 32); ++ } else { ++ if (irt_isnum(t)) { ++ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); ++ emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, LJ_TISNUM); ++ if (ra_hasreg(dest)) { ++ emit_lso(as, RISCVI_FLD, dest, base, ofs); ++ } ++ } else { ++ asm_guard(as, RISCVI_BNE, RID_TMP, ++ ra_allock(as, (int32_t)irt_toitype(t), allow)); ++ } ++ emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 47); ++ } ++ emit_lso(as, RISCVI_LD, type, base, ofs); ++ } else if (ra_hasreg(dest)) { ++ emit_lso(as, irt_isnum(t) ? RISCVI_FLD : ++ irt_isint(t) ? RISCVI_LW : RISCVI_LD, ++ dest, base, ofs); ++ } ++} ++ ++/* -- Allocations --------------------------------------------------------- */ ++ ++#if LJ_HASFFI ++static void asm_cnew(ASMState *as, IRIns *ir) ++{ ++ CTState *cts = ctype_ctsG(J2G(as->J)); ++ CTypeID id = (CTypeID)IR(ir->op1)->i; ++ CTSize sz; ++ CTInfo info = lj_ctype_info(cts, id, &sz); ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; ++ IRRef args[4]; ++ RegSet drop = RSET_SCRATCH; ++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), ++ "bad CNEW/CNEWI operands"); ++ ++ as->gcsteps++; ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); ++ if (ra_used(ir)) ++ ra_destreg(as, ir, RID_RET); /* GCcdata * */ ++ ++ /* Initialize immutable cdata object. */ ++ if (ir->o == IR_CNEWI) { ++ RegSet allow = (RSET_GPR & ~RSET_SCRATCH); ++ emit_lso(as, sz == 8 ? RISCVI_SD : RISCVI_SW, ra_alloc1(as, ir->op2, allow), ++ RID_RET, (sizeof(GCcdata))); ++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); ++ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ++ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ir->op1; /* CTypeID id */ ++ args[2] = ir->op2; /* CTSize sz */ ++ args[3] = ASMREF_TMP1; /* CTSize align */ ++ asm_gencall(as, ci, args); ++ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); ++ return; ++ } ++ ++ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ ++ emit_lso(as, RISCVI_SB, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))); ++ emit_lso(as, RISCVI_SH, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))); ++ emit_loadk12(as, RID_RET+1, ~LJ_TCDATA); ++ emit_loadk32(as, RID_TMP, id); ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ASMREF_TMP1; /* MSize size */ ++ asm_gencall(as, ci, args); ++ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ++ ra_releasetmp(as, ASMREF_TMP1)); ++} ++#endif ++ ++/* -- Write barriers ------------------------------------------------------ */ ++ ++static void asm_tbar(ASMState *as, IRIns *ir) ++{ ++ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); ++ Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); ++ Reg link = RID_TMP; ++ MCLabel l_end = emit_label(as); ++ emit_lso(as, RISCVI_SD, link, tab, (int32_t)offsetof(GCtab, gclist)); ++ emit_lso(as, RISCVI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); ++ emit_setgl(as, tab, gc.grayagain); // make tab gray again ++ emit_getgl(as, link, gc.grayagain); ++ emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: not jump ++ emit_ds1s2(as, RISCVI_XOR, mark, mark, RID_TMP); // mark=0: gray ++ emit_dsi(as, RISCVI_ANDI, RID_TMP, mark, LJ_GC_BLACK); ++ emit_lso(as, RISCVI_LBU, mark, tab, ((int32_t)offsetof(GCtab, marked))); ++} ++ ++static void asm_obar(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg obj, val, tmp; ++ /* No need for other object barriers (yet). */ ++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ir->op1; /* TValue *tv */ ++ asm_gencall(as, ci, args); ++ emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); ++ obj = IR(ir->op1)->r; ++ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); ++ emit_branch(as, RISCVI_BEQ, tmp, RID_ZERO, l_end, -1); ++ emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: jump ++ emit_dsi(as, RISCVI_ANDI, tmp, tmp, LJ_GC_BLACK); ++ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); ++ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); ++ emit_lso(as, RISCVI_LBU, tmp, obj, ++ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))); ++ emit_lso(as, RISCVI_LBU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))); ++} ++ ++/* -- Arithmetic and logic operations ------------------------------------- */ ++ ++static void asm_fparith(ASMState *as, IRIns *ir, RISCVIns riscvi) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ emit_ds1s2(as, riscvi, dest, left, right); ++} ++ ++static void asm_fpunary(ASMState *as, IRIns *ir, RISCVIns riscvi) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); ++ switch(riscvi) { ++ case RISCVI_FROUND_S_RTZ: case RISCVI_FROUND_S_RDN: case RISCVI_FROUND_S_RUP: ++ case RISCVI_FROUND_D_RTZ: case RISCVI_FROUND_D_RDN: case RISCVI_FROUND_D_RUP: ++ case RISCVI_FSQRT_S: case RISCVI_FSQRT_D: ++ emit_ds(as, riscvi, dest, left); ++ break; ++ case RISCVI_FMV_S: case RISCVI_FMV_D: ++ case RISCVI_FABS_S: case RISCVI_FABS_D: ++ case RISCVI_FNEG_S: case RISCVI_FNEG_D: ++ emit_ds1s2(as, riscvi, dest, left, left); ++ break; ++ default: ++ lj_assertA(0, "bad fp unary instruction"); ++ return; ++ } ++} ++ ++static void asm_fpround(ASMState *as, IRIns *ir, RISCVIns riscvi) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); ++ MCLabel l_end = emit_label(as); ++ ++ if (dest != left) { ++ emit_ds1s2(as, RISCVI_FSGNJ_D, dest, dest, left); ++ emit_ds(as, RISCVI_FCVT_D_L, dest, RID_TMP); ++ } else { ++ Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); ++ emit_ds1s2(as, RISCVI_FSGNJ_D, dest, ftmp, left); ++ emit_ds(as, RISCVI_FCVT_D_L, ftmp, RID_TMP); ++ } ++ emit_ds(as, riscvi, RID_TMP, left); ++ emit_branch(as, RISCVI_BLT, RID_ZERO, RID_TMP, l_end, -1); ++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1075); ++ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, 0x7ff); ++ emit_dsi(as, RISCVI_SRLI, RID_TMP, RID_TMP, 52); ++ if (dest != left) ++ emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); ++ emit_ds(as, RISCVI_FMV_X_D, RID_TMP, left); ++} ++ ++static void asm_fpmath(ASMState *as, IRIns *ir) ++{ ++ IRFPMathOp fpm = (IRFPMathOp)ir->op2; ++ if (fpm <= IRFPM_TRUNC) ++ if (as->flags & JIT_F_RVZfa) { ++ asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FROUND_D_RDN : ++ fpm == IRFPM_CEIL ? RISCVI_FROUND_D_RUP : RISCVI_FROUND_D_RTZ); ++ } else { ++ asm_fpround(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RDN) : ++ fpm == IRFPM_CEIL ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RUP) : ++ RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RTZ)); ++ } ++ else if (fpm == IRFPM_SQRT) ++ asm_fpunary(as, ir, RISCVI_FSQRT_D); ++ else ++ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); ++} ++ ++static void asm_add(ASMState *as, IRIns *ir) ++{ ++ IRType1 t = ir->t; ++ if (irt_isnum(t)) { ++ if (!asm_fusemadd(as, ir, RISCVI_FMADD_D, RISCVI_FMADD_D)) ++ asm_fparith(as, ir, RISCVI_FADD_D); ++ return; ++ } else { ++ if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULA)) ++ return; ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if (checki12(k)) { ++ if (irt_is64(t)) { ++ emit_dsi(as, RISCVI_ADDI, dest, left, k); ++ } else { ++ emit_dsi(as, RISCVI_ADDIW, dest, left, k); ++ } ++ return; ++ } ++ } ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_ds1s2(as, irt_is64(t) ? RISCVI_ADD : RISCVI_ADDW, dest, ++ left, right); ++ } ++} ++ ++static void asm_sub(ASMState *as, IRIns *ir) +{ -+ int vk = 0; -+ |=>defop: ++ if (irt_isnum(ir->t)) { ++ if (!asm_fusemadd(as, ir, RISCVI_FMSUB_D, RISCVI_FNMSUB_D)) ++ asm_fparith(as, ir, RISCVI_FSUB_D); ++ return; ++ } else { ++ if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULS)) ++ return; ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, ++ left, right); ++ } ++} + -+ switch (op) { ++static void asm_mul(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fparith(as, ir, RISCVI_FMUL_D); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_MUL : RISCVI_MULW, dest, ++ left, right); ++ } ++} + -+ /* -- Comparison ops ---------------------------------------------------- */ ++static void asm_fpdiv(ASMState *as, IRIns *ir) ++{ ++ asm_fparith(as, ir, RISCVI_FDIV_D); ++} + -+ /* Remember: all ops branch for a true comparison, fall through otherwise. */ ++static void asm_neg(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fpunary(as, ir, RISCVI_FNEG_D); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, ++ RID_ZERO, left); ++ } ++} + -+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: -+ | // RA = src1*8, RD = src2*8, JMP with RD = target -+ | add RA, BASE, RA -+ | add RD, BASE, RD -+ if (op == BC_ISLT || op == BC_ISGE) { -+ | ld CARG1, 0(RA) -+ | ld CARG2, 0(RD) -+ | gettp CARG3, CARG1 -+ | gettp CARG4, CARG2 -+ } else { -+ | ld CARG2, 0(RA) -+ | ld CARG1, 0(RD) -+ | gettp CARG3, CARG2 -+ | gettp CARG4, CARG1 -+ } -+ | lhu TMP2, OFS_RD(PC) // TMP2=jump -+ | addi PC, PC, 4 -+ | bne CARG3, TISNUM, >2 -+ | decode_BC4b TMP2 -+ | bne CARG4, TISNUM, >5 -+ | sext.w CARG1, CARG1 -+ | sext.w CARG2, CARG2 -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ | slt TMP1, CARG1, CARG2 -+ | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 -+ if (op == BC_ISLT || op == BC_ISGT) { -+ | neg TMP1, TMP1 -+ } else { -+ | addi TMP1, TMP1, -1 -+ } -+ | and TMP2, TMP2, TMP1 -+ |1: -+ | add PC, PC, TMP2 -+ | ins_next -+ | -+ |2: // RA is not an integer. -+ | sltiu TMP1, CARG3, LJ_TISNUM -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ | bxeqz TMP1, ->vmeta_comp -+ | sltiu TMP1, CARG4, LJ_TISNUM -+ | decode_BC4b TMP2 -+ | beqz TMP1, >4 -+ | fmv.d.x FTMP0, CARG1 -+ | fmv.d.x FTMP2, CARG2 -+ |3: // RA and RD are both numbers. -+ | addw TMP2, TMP2, TMP3 -+ if (op == BC_ISLT) { -+ | flt.d TMP3, FTMP0, FTMP2 -+ | neg TMP3, TMP3 -+ } else if (op == BC_ISGE) { -+ | flt.d TMP3, FTMP0, FTMP2 -+ | addi TMP3, TMP3, -1 -+ } else if (op == BC_ISLE) { -+ | fle.d TMP3, FTMP2, FTMP0 -+ | neg TMP3, TMP3 -+ } else if (op == BC_ISGT) { -+ | fle.d TMP3, FTMP2, FTMP0 -+ | addi TMP3, TMP3, -1 -+ } -+ | and TMP2, TMP2, TMP3 -+ | j <1 -+ | -+ |4: // RA is a number, RD is not a number. -+ | // RA is a number, RD is an integer. Convert RD to a number. -+ | bxne CARG4, TISNUM, ->vmeta_comp -+ if (op == BC_ISLT || op == BC_ISGE) { -+ | fcvt.d.w FTMP2, CARG2 -+ | fmv.d.x FTMP0, CARG1 -+ } else { -+ | fcvt.d.w FTMP0, CARG1 -+ | fmv.d.x FTMP2, CARG2 -+ } -+ | j <3 -+ | -+ |5: // RA is an integer, RD is not an integer -+ | sltiu TMP1, CARG4, LJ_TISNUM -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ | bxeqz TMP1, ->vmeta_comp -+ | // RA is an integer, RD is a number. Convert RA to a number. -+ if (op == BC_ISLT || op == BC_ISGE) { -+ | fcvt.d.w FTMP0, CARG1 -+ | fmv.d.x FTMP2, CARG2 -+ } else { -+ | fcvt.d.w FTMP2, CARG2 -+ | fmv.d.x FTMP0, CARG1 ++#define asm_abs(as, ir) asm_fpunary(as, ir, RISCVI_FABS_D) ++ ++static void asm_arithov(ASMState *as, IRIns *ir) ++{ ++ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); ++ lj_assertA(!irt_is64(ir->t), "bad usage"); ++ if (irref_isk(ir->op2)) { ++ int k = IR(ir->op2)->i; ++ if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); ++ if (checki12(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ ++ left = ra_alloc1(as, ir->op1, RSET_GPR); ++ asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left); ++ emit_dsi(as, RISCVI_ADDI, dest, left, k); ++ if (dest == left) emit_mv(as, RID_TMP, left); ++ return; + } -+ | j <3 -+ break; ++ } ++ left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), ++ right), dest)); ++ asm_guard(as, RISCVI_BLT, RID_TMP, RID_ZERO); ++ emit_ds1s2(as, RISCVI_AND, RID_TMP, RID_TMP, tmp); ++ if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ ++ emit_ds1s2(as, RISCVI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); ++ } else { /* ((dest^left) & (dest^~right)) < 0 */ ++ emit_xnor(as, RID_TMP, dest, dest == right ? RID_TMP : right); ++ } ++ emit_ds1s2(as, RISCVI_XOR, tmp, dest, dest == left ? RID_TMP : left); ++ emit_ds1s2(as, ir->o == IR_ADDOV ? RISCVI_ADDW : RISCVI_SUBW, dest, left, right); ++ if (dest == left || dest == right) ++ emit_mv(as, RID_TMP, dest == left ? left : right); ++} ++ ++#define asm_addov(as, ir) asm_arithov(as, ir) ++#define asm_subov(as, ir) asm_arithov(as, ir) ++ ++static void asm_mulov(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ asm_guard(as, RISCVI_BNE, dest, RID_TMP); ++ emit_ext(as, RISCVI_SEXT_W, dest, RID_TMP); // dest: [31:0]+signextend ++ emit_ds1s2(as, RISCVI_MUL, RID_TMP, left, right); // RID_TMP: [63:0] ++} ++ ++static void asm_bnot(ASMState *as, IRIns *ir) ++{ ++ Reg left, right, dest = ra_dest(as, ir, RSET_GPR); ++ IRIns *irl = IR(ir->op1); ++ if (as->flags & JIT_F_RVZbb && mayfuse(as, ir->op1) && irl->o == IR_BXOR) { ++ left = ra_alloc2(as, irl, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_ds1s2(as, RISCVI_XNOR, dest, left, right); ++ } else { ++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_ds(as, RISCVI_NOT, dest, left); ++ } ++} ++ ++static void asm_bswap(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left); ++ if (as->flags & JIT_F_RVZbb) { ++ if (!irt_is64(ir->t)) ++ emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32); ++ emit_ds(as, RISCVI_REV8, dest, left); ++ } else if (as->flags & JIT_F_RVXThead) { ++ emit_ds(as, irt_is64(ir->t) ? RISCVI_TH_REV : RISCVI_TH_REVW, ++ dest, left); ++ } else if (irt_is64(ir->t)) { ++ Reg tmp1, tmp2, tmp3, tmp4; ++ tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); ++ tmp2 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp2); ++ tmp3 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp3); ++ tmp4 = ra_scratch(as, allow); ++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp4); ++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp3); ++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); ++ emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 40); ++ emit_dsshamt(as, RISCVI_SLLI, dest, left, 56); ++ emit_ds1s2(as, RISCVI_OR, tmp3, tmp1, tmp3); ++ emit_ds1s2(as, RISCVI_AND, tmp4, left, RID_TMP); ++ emit_dsshamt(as, RISCVI_SLLI, tmp3, tmp3, 32); ++ emit_dsshamt(as, RISCVI_SLLI, tmp1, tmp1, 24); ++ emit_dsshamt(as, RISCVI_SRLIW, tmp3, left, 24); ++ emit_ds1s2(as, RISCVI_OR, tmp2, tmp3, tmp2); ++ emit_ds1s2(as, RISCVI_AND, tmp1, left, tmp1); ++ emit_ds1s2(as, RISCVI_OR, tmp3, tmp4, tmp3); ++ emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 24); ++ emit_dsshamt(as, RISCVI_SRLIW, tmp4, tmp4, 24); ++ emit_ds1s2(as, RISCVI_AND, tmp3, tmp3, tmp1); ++ emit_dsshamt(as, RISCVI_SRLI, tmp4, left, 8); ++ emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 24); ++ emit_ds1s2(as, RISCVI_OR, tmp2, tmp2, tmp3); ++ emit_du(as, RISCVI_LUI, tmp1, RISCVF_HI(0xff0000u)); ++ emit_ds1s2(as, RISCVI_AND, tmp2, tmp2, RID_TMP); ++ emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 56); ++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); ++ emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); ++ emit_dsshamt(as, RISCVI_SRLI, tmp2, left, 40); ++ } else { ++ Reg tmp1, tmp2; ++ tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); ++ tmp2 = ra_scratch(as, allow); ++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); ++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp1); ++ emit_dsshamt(as, RISCVI_SLLI, tmp2, RID_TMP, 8); ++ emit_dsshamt(as, RISCVI_SLLIW, dest, left, 24); ++ emit_ds1s2(as, RISCVI_OR, tmp1, tmp1, tmp2); ++ emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP); ++ emit_ds1s2(as, RISCVI_AND, tmp1, tmp1, RID_TMP); ++ emit_dsshamt(as, RISCVI_SRLIW, tmp2, left, 24); ++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); ++ emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); ++ emit_dsshamt(as, RISCVI_SRLI, tmp1, left, 8); ++ } ++} + -+ case BC_ISEQV: case BC_ISNEV: -+ vk = op == BC_ISEQV; -+ | // RA = src1*8, RD = src2*8, JMP with RD = target -+ | add RA, BASE, RA -+ | add RD, BASE, RD -+ | addi PC, PC, 4 -+ | ld CARG1, 0(RA) -+ | ld CARG2, 0(RD) -+ | lhu TMP2, -4+OFS_RD(PC) -+ | gettp CARG3, CARG1 -+ | gettp CARG4, CARG2 -+ | sltu TMP0, TISNUM, CARG3 -+ | sltu TMP1, TISNUM, CARG4 -+ | or TMP0, TMP0, TMP1 -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ if (vk) { -+ | beqz TMP0, ->BC_ISEQN_Z -+ } else { -+ | beqz TMP0, ->BC_ISNEN_Z -+ } -+ |// Either or both types are not numbers. -+ |.if FFI -+ | // Check if RA or RD is a cdata. -+ | xori TMP0, CARG3, LJ_TCDATA -+ | xori TMP1, CARG4, LJ_TCDATA -+ | and TMP0, TMP0, TMP1 -+ | bxeqz TMP0, ->vmeta_equal_cd -+ |.endif -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ | decode_BC4b TMP2 -+ | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 -+ | bne CARG1, CARG2, >2 -+ | // Tag and value are equal. -+ if (vk) { -+ |->BC_ISEQV_Z: -+ | add PC, PC, TMP2 -+ } -+ |1: -+ | ins_next -+ | -+ |2: // Check if the tags are the same and it's a table or userdata. -+ | xor TMP3, CARG3, CARG4 // Same type? -+ | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1 -+ | beqz TMP3, >3 -+ | mv TMP0, x0 // TMP0=0: not same type, or same type table/userdata -+ |3: -+ | cleartp TAB:TMP1, CARG1 -+ if (vk) { -+ | beqz TMP0, <1 -+ } else { -+ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. ++static void asm_bitop(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik, RISCVIns riscvin) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left, right; ++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if (checki12(k)) { ++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_dsi(as, riscvik, dest, left, k); ++ return; + } -+ | // Different tables or userdatas. Need to check __eq metamethod. -+ | // Field metatable must be at same offset for GCtab and GCudata! -+ | ld TAB:TMP3, TAB:TMP1->metatable -+ if (vk) { -+ | beqz TAB:TMP3, <1 // No metatable? -+ | lbu TMP3, TAB:TMP3->nomm -+ | andi TMP3, TMP3, 1<BC_ISEQV_Z // No metatable? -+ | lbu TMP3, TAB:TMP3->nomm -+ | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? ++ } else if (as->flags & JIT_F_RVZbb) { ++ if (mayfuse(as, ir->op1) && irl->o == IR_BNOT) { ++ left = ra_alloc1(as, irl->op1, RSET_GPR); ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_ds1s2(as, riscvin, dest, right, left); ++ return; ++ } else if (mayfuse(as, ir->op2) && irr->o == IR_BNOT) { ++ left = ra_alloc1(as, ir->op1, RSET_GPR); ++ right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); ++ emit_ds1s2(as, riscvin, dest, left, right); ++ return; + } -+ | j ->vmeta_equal // Handle __eq metamethod. -+ break; ++ } ++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_ds1s2(as, riscvi, dest, left, right); ++} + -+ case BC_ISEQS: case BC_ISNES: -+ vk = op == BC_ISEQS; -+ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target -+ | add RA, BASE, RA -+ | addi PC, PC, 4 -+ | ld CARG1, 0(RA) -+ | sub RD, KBASE, RD -+ | lhu TMP2, -4+OFS_RD(PC) -+ | ld CARG2, -8(RD) // KBASE-8-str_const*8 -+ |.if FFI -+ | gettp CARG3, CARG1 -+ | li TMP1, LJ_TCDATA -+ |.endif -+ | li TMP0, LJ_TSTR -+ | decode_BC4b TMP2 -+ | settp CARG2, TMP0 -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ |.if FFI -+ | bxeq CARG3, TMP1, ->vmeta_equal_cd -+ |.endif -+ | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D -+ | addw TMP2, TMP2, TMP3 -+ if (vk) { -+ | seqz TMP4, TMP0 -+ } else { -+ | snez TMP4, TMP0 -+ } -+ | neg TMP4, TMP4 -+ | and TMP2, TMP2, TMP4 -+ | add PC, PC, TMP2 -+ | ins_next -+ break; ++#define asm_band(as, ir) asm_bitop(as, ir, RISCVI_AND, RISCVI_ANDI, RISCVI_ANDN) ++#define asm_bor(as, ir) asm_bitop(as, ir, RISCVI_OR, RISCVI_ORI, RISCVI_ORN) ++#define asm_bxor(as, ir) asm_bitop(as, ir, RISCVI_XOR, RISCVI_XORI, RISCVI_XNOR) + -+ case BC_ISEQN: case BC_ISNEN: -+ vk = op == BC_ISEQN; -+ | // RA = src*8, RD = num_const*8, JMP with RD = target -+ | add RA, BASE, RA -+ | add RD, KBASE, RD -+ | ld CARG1, 0(RA) -+ | ld CARG2, 0(RD) -+ | lhu TMP2, OFS_RD(PC) -+ | gettp CARG3, CARG1 -+ | gettp CARG4, CARG2 -+ | addi PC, PC, 4 -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ if (vk) { -+ |->BC_ISEQN_Z: -+ } else { -+ |->BC_ISNEN_Z: -+ } -+ | decode_BC4b TMP2 -+ | bne CARG3, TISNUM, >4 -+ | addw TMP2, TMP2, TMP3 -+ | bne CARG4, TISNUM, >6 -+ | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D -+ |1: -+ if (vk) { -+ | seqz TMP4, TMP0 -+ | neg TMP4, TMP4 -+ | and TMP2, TMP2, TMP4 -+ | add PC, PC, TMP2 -+ |2: -+ } else { -+ | snez TMP4, TMP0 -+ | neg TMP4, TMP4 -+ | and TMP2, TMP2, TMP4 -+ |2: -+ | add PC, PC, TMP2 ++static void asm_bitshift(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ uint32_t shmsk = irt_is64(ir->t) ? 63 : 31; ++ if (irref_isk(ir->op2)) { /* Constant shifts. */ ++ uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmsk); ++ switch (riscvik) { ++ case RISCVI_SRAI: case RISCVI_SRLI: case RISCVI_SLLI: ++ case RISCVI_SRAIW: case RISCVI_SLLIW: case RISCVI_SRLIW: ++ emit_dsshamt(as, riscvik, dest, left, shift); ++ break; ++ case RISCVI_ADDI: shift = (-shift) & shmsk; ++ case RISCVI_RORI: ++ emit_roti(as, RISCVI_RORI, dest, left, RID_TMP, shift); ++ break; ++ case RISCVI_ADDIW: shift = (-shift) & shmsk; ++ case RISCVI_RORIW: ++ emit_roti(as, RISCVI_RORIW, dest, left, RID_TMP, shift); ++ break; ++ default: ++ lj_assertA(0, "bad shift instruction"); ++ return; + } -+ |3: -+ | ins_next -+ | -+ |4: // RA is not an integer. -+ | addw TMP2, TMP2, TMP3 -+ |.if FFI -+ | bgeu CARG3, TISNUM, >7 -+ |.else -+ | bgeu CARG3, TISNUM, <2 -+ |.endif -+ | fmv.d.x FTMP0, CARG1 -+ | fmv.d.x FTMP2, CARG2 -+ | bne CARG4, TISNUM, >5 -+ |// RA is a number, RD is an integer. -+ | fcvt.d.w FTMP2, CARG2 -+ | -+ |5: // RA and RD are both numbers. -+ | feq.d TMP0, FTMP0, FTMP2 -+ | seqz TMP0, TMP0 -+ | j <1 -+ | -+ |6: // RA is an integer, RD is a number. -+ |.if FFI -+ | bgeu CARG4, TISNUM, >8 -+ |.else -+ | bgeu CARG4, TISNUM, <2 -+ |.endif -+ | fcvt.d.w FTMP0, CARG1 -+ | fmv.d.x FTMP2, CARG2 -+ | j <5 -+ | -+ |.if FFI -+ |7: // RA not int, not number -+ | li TMP0, LJ_TCDATA -+ | bne CARG3, TMP0, <2 -+ | j ->vmeta_equal_cd -+ | -+ |8: // RD not int, not number -+ | li TMP0, LJ_TCDATA -+ | bne CARG4, TMP0, <2 -+ | j ->vmeta_equal_cd -+ |.endif -+ break; -+ -+ case BC_ISEQP: case BC_ISNEP: -+ vk = op == BC_ISEQP; -+ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target -+ | add RA, BASE, RA -+ | srliw TMP0, RD, 3 -+ | ld TMP1, 0(RA) -+ | not TMP0, TMP0 // ~TMP0: ~0 ~1 ~2 -+ | lhu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target -+ | gettp TMP1, TMP1 -+ | addi PC, PC, 4 -+ | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D -+ |.if FFI -+ | li TMP3, LJ_TCDATA -+ | bxeq TMP1, TMP3, ->vmeta_equal_cd -+ |.endif -+ | decode_BC4b TMP2 -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 -+ if (vk) { -+ | seqz TMP4, TMP0 -+ } else { -+ | snez TMP4, TMP0 ++ } else { ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ switch (riscvi) { ++ case RISCVI_SRA: case RISCVI_SRL: case RISCVI_SLL: ++ case RISCVI_SRAW: case RISCVI_SRLW: case RISCVI_SLLW: ++ emit_ds1s2(as, riscvi, dest, left, right); ++ break; ++ case RISCVI_ROR: case RISCVI_ROL: ++ case RISCVI_RORW: case RISCVI_ROLW: ++ emit_rot(as, riscvi, dest, left, right, RID_TMP); ++ break; ++ default: ++ lj_assertA(0, "bad shift instruction"); ++ return; + } -+ | neg TMP4, TMP4 -+ | and TMP2, TMP2, TMP4 -+ | add PC, PC, TMP2 -+ | ins_next -+ break; ++ } ++} + -+ /* -- Unary test and copy ops ------------------------------------------- */ ++#define asm_bshl(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, RISCVI_SLL, RISCVI_SLLI) : \ ++ asm_bitshift(as, ir, RISCVI_SLLW, RISCVI_SLLIW)) ++#define asm_bshr(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, RISCVI_SRL, RISCVI_SRLI) : \ ++ asm_bitshift(as, ir, RISCVI_SRLW, RISCVI_SRLIW)) ++#define asm_bsar(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, RISCVI_SRA, RISCVI_SRAI) : \ ++ asm_bitshift(as, ir, RISCVI_SRAW, RISCVI_SRAIW)) ++#define asm_brol(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, RISCVI_ROL, RISCVI_ADDI) : \ ++ asm_bitshift(as, ir, RISCVI_ROLW, RISCVI_ADDIW)) ++ // ROLI -> ADDI, ROLIW -> ADDIW; Hacky but works. ++#define asm_bror(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, RISCVI_ROR, RISCVI_RORI) : \ ++ asm_bitshift(as, ir, RISCVI_RORW, RISCVI_RORIW)) + -+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: -+ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target -+ | add RD, BASE, RD -+ | lhu TMP2, OFS_RD(PC) -+ | ld CRET1, 0(RD) -+ | addi PC, PC, 4 -+ | gettp TMP0, CRET1 -+ | add RA, BASE, RA -+ | sltiu TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false -+ | decode_BC4b TMP2 -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 -+ if (op == BC_IST || op == BC_ISTC) { -+ | beqz TMP0, >1 -+ if (op == BC_ISTC) { -+ | sd CRET1, 0(RA) -+ } ++static void asm_min_max(ASMState *as, IRIns *ir, int ismax) ++{ ++ if (irt_isnum(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ MCLabel l_ret_left, l_end; ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ l_end = emit_label(as); ++ ++ if (dest != left) ++ emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); ++ l_ret_left = emit_label(as); ++ ++ if (dest != left) ++ emit_jump(as, l_end, -1); ++ if (dest != right) ++ emit_ds1s2(as, RISCVI_FMV_D, dest, right, right); ++ ++ emit_branch(as, RISCVI_BNE, RID_TMP, RID_ZERO, l_ret_left, -1); ++ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, ismax ? right : left, ++ ismax ? left : right); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ if (as->flags & JIT_F_RVZbb) { ++ emit_ds1s2(as, ismax ? RISCVI_MAX : RISCVI_MIN, dest, left, right); + } else { -+ | bnez TMP0, >1 -+ if (op == BC_ISFC) { -+ | sd CRET1, 0(RA) ++ if (as->flags & JIT_F_RVXThead) { ++ if (left == right) { ++ if (dest != left) emit_mv(as, dest, left); ++ } else { ++ if (dest == left) { ++ emit_ds1s2(as, RISCVI_TH_MVNEZ, dest, right, RID_TMP); ++ } else { ++ emit_ds1s2(as, RISCVI_TH_MVEQZ, dest, left, RID_TMP); ++ if (dest != right) emit_mv(as, dest, right); ++ } ++ } ++ } else if (as->flags & JIT_F_RVZicond) { ++ emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP); ++ if (dest != right) { ++ emit_ds1s2(as, RISCVI_CZERO_EQZ, RID_TMP, right, RID_TMP); ++ emit_ds1s2(as, RISCVI_CZERO_NEZ, dest, left, RID_TMP); ++ } else { ++ emit_ds1s2(as, RISCVI_CZERO_NEZ, RID_TMP, left, RID_TMP); ++ emit_ds1s2(as, RISCVI_CZERO_EQZ, dest, right, RID_TMP); ++ } ++ } else { ++ if (dest != right) { ++ emit_ds1s2(as, RISCVI_XOR, dest, right, dest); ++ emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); ++ emit_ds1s2(as, RISCVI_XOR, dest, right, left); ++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1); ++ } else { ++ emit_ds1s2(as, RISCVI_XOR, dest, left, dest); ++ emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); ++ emit_ds1s2(as, RISCVI_XOR, dest, left, right); ++ emit_ds1s2(as, RISCVI_SUB, RID_TMP, RID_ZERO, RID_TMP); ++ } + } ++ emit_ds1s2(as, RISCVI_SLT, RID_TMP, ++ ismax ? left : right, ismax ? right : left); + } -+ | add PC, PC, TMP2 -+ |1: -+ | ins_next -+ break; ++ } ++} + -+ case BC_ISTYPE: -+ | // RA = src*8, RD = -type*8 -+ | add TMP0, BASE, RA -+ | srliw TMP1, RD, 3 -+ | ld TMP0, 0(TMP0) -+ | gettp TMP0, TMP0 -+ | add TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0 -+ | bxnez TMP0, ->vmeta_istype -+ | ins_next -+ break; -+ case BC_ISNUM: -+ | // RA = src*8, RD = -(TISNUM-1)*8 -+ | add TMP0, BASE, RA -+ | ld TMP0, 0(TMP0) -+ | checknum TMP0, ->vmeta_istype -+ | ins_next -+ break; ++#define asm_min(as, ir) asm_min_max(as, ir, 0) ++#define asm_max(as, ir) asm_min_max(as, ir, 1) + -+ /* -- Unary ops --------------------------------------------------------- */ ++/* -- Comparisons --------------------------------------------------------- */ + -+ case BC_MOV: -+ | // RA = dst*8, RD = src*8 -+ | add RD, BASE, RD -+ | add RA, BASE, RA -+ | ld TMP0, 0(RD) -+ | ins_next1 -+ | sd TMP0, 0(RA) -+ | ins_next2 -+ break; -+ case BC_NOT: -+ | // RA = dst*8, RD = src*8 -+ | add RD, BASE, RD -+ | add RA, BASE, RA -+ | ld TMP0, 0(RD) -+ | li TMP1, LJ_TTRUE -+ | ins_next1 -+ | gettp TMP0, TMP0 -+ | sltu TMP0, TMP1, TMP0 -+ | addiw TMP0, TMP0, 1 -+ | slli TMP0, TMP0, 47 -+ | not TMP0, TMP0 -+ | sd TMP0, 0(RA) -+ | ins_next2 -+ break; -+ case BC_UNM: -+ | // RA = dst*8, RD = src*8 -+ | add RB, BASE, RD -+ | add RA, BASE, RA -+ | ld TMP0, 0(RB) -+ | lui TMP1, 0x80000 -+ | gettp CARG3, TMP0 -+ | bne CARG3, TISNUM, >1 -+ | sext.w TMP0, TMP0 -+ | bxeq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31. -+ | negw TMP0, TMP0 -+ | zext.w TMP0, TMP0 -+ | settp_b TMP0, TISNUM -+ | j >2 -+ |1: -+ | sltiu TMP3, CARG3, LJ_TISNUM -+ | slli TMP1, TMP1, 32 -+ | bxeqz TMP3, ->vmeta_unm -+ | xor TMP0, TMP0, TMP1 // sign => ~sign -+ |2: -+ | sd TMP0, 0(RA) -+ | ins_next -+ break; -+ case BC_LEN: -+ | // RA = dst*8, RD = src*8 -+ | add CARG2, BASE, RD -+ | ld TMP0, 0(CARG2) -+ | add RA, BASE, RA -+ | gettp TMP1, TMP0 -+ | addi TMP2, TMP1, -LJ_TSTR -+ | cleartp STR:CARG1, TMP0 -+ | bnez TMP2, >2 -+ | lwu CARG1, STR:CARG1->len -+ |1: -+ | settp_b CARG1, TISNUM -+ | sd CARG1, 0(RA) -+ | ins_next -+ |2: -+ | addi TMP2, TMP1, -LJ_TTAB -+ | bxnez TMP2, ->vmeta_len -+#if LJ_52 -+ | ld TAB:TMP2, TAB:CARG1->metatable -+ | bnez TAB:TMP2, >9 -+ |3: -+#endif -+ |->BC_LEN_Z: -+ | call_intern BC_LEN, lj_tab_len // (GCtab *t) -+ | // Returns uint32_t (but less than 2^31). -+ | j <1 -+#if LJ_52 -+ |9: -+ | lbu TMP0, TAB:TMP2->nomm -+ | andi TMP0, TMP0, 1<vmeta_len -+#endif -+ break; ++/* FP comparisons. */ ++static void asm_fpcomp(ASMState *as, IRIns *ir) ++{ ++ IROp op = ir->o; ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ asm_guard(as, (op < IR_EQ ? (op&4) : (op&1)) ++ ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); ++ switch (op) { ++ case IR_LT: case IR_UGE: ++ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, left, right); ++ break; ++ case IR_LE: case IR_UGT: case IR_ABC: ++ emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, left, right); ++ break; ++ case IR_GT: case IR_ULE: ++ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, right, left); ++ break; ++ case IR_GE: case IR_ULT: ++ emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, right, left); ++ break; ++ case IR_EQ: case IR_NE: ++ emit_ds1s2(as, RISCVI_FEQ_D, RID_TMP, left, right); ++ break; ++ default: ++ break; ++ } ++} ++ ++/* Integer comparisons. */ ++static void asm_intcomp(ASMState *as, IRIns *ir) ++{ ++ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ ++ /* 00 01 10 11 100 101 110 111 */ ++ IROp op = ir->o; ++ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (op == IR_ABC) op = IR_UGT; ++ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { ++ switch (op) { ++ case IR_LT: asm_guard(as, RISCVI_BGE, left, RID_ZERO); break; ++ case IR_GE: asm_guard(as, RISCVI_BLT, left, RID_ZERO); break; ++ case IR_LE: asm_guard(as, RISCVI_BLT, RID_ZERO, left); break; ++ case IR_GT: asm_guard(as, RISCVI_BGE, RID_ZERO, left); break; ++ default: break; ++ } ++ return; ++ } ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if ((op&2)) k++; ++ if (checki12(k)) { ++ asm_guard(as, (op&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); ++ emit_dsi(as, (op&4) ? RISCVI_SLTIU : RISCVI_SLTI, RID_TMP, left, k); ++ return; ++ } ++ } ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1), ++ (op&2) ? right : left, (op&2) ? left : right); ++} + -+ /* -- Binary ops -------------------------------------------------------- */ ++static void asm_comp(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) ++ asm_fpcomp(as, ir); ++ else ++ asm_intcomp(as, ir); ++} + -+ |.macro fpmod, a, b, c -+ | fdiv.d FARG1, b, c -+ | jal ->vm_floor // floor(b/c) -+ | fmul.d a, FRET1, c -+ | fsub.d a, b, a // b - floor(b/c)*c -+ |.endmacro -+ | -+ |.macro ins_arithpre -+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); -+ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 -+ ||if (vk == 1) { -+ | // RA = dst*8, RB = num_const*8, RC = src1*8 -+ | decode_RB8 RC, INS -+ | decode_RDtoRC8 RB, RD -+ ||} else { -+ | // RA = dst*8, RB = src1*8, RC = num_const*8 -+ | decode_RB8 RB, INS -+ | decode_RDtoRC8 RC, RD -+ ||} -+ ||switch (vk) { -+ ||case 0: // suffix is VN -+ | add RB, BASE, RB -+ | add RC, KBASE, RC -+ || break; -+ ||case 1: // suffix is NV -+ | add RC, BASE, RC -+ | add RB, KBASE, RB -+ || break; -+ ||default: // CAT or suffix is VV -+ | add RB, BASE, RB -+ | add RC, BASE, RC -+ || break; -+ ||} -+ |.endmacro -+ | -+ |.macro ins_arithfp, fpins, itype1, itype2 -+ | fld FTMP0, 0(RB) -+ | sltu itype1, itype1, TISNUM -+ | sltu itype2, itype2, TISNUM -+ | fld FTMP2, 0(RC) -+ | and itype1, itype1, itype2 -+ | add RA, BASE, RA -+ | bxeqz itype1, ->vmeta_arith -+ | fpins FRET1, FTMP0, FTMP2 -+ | ins_next1 -+ | fsd FRET1, 0(RA) -+ | ins_next2 -+ |.endmacro -+ | -+ |.macro ins_arithead, itype1, itype2, tval1, tval2 -+ | ld tval1, 0(RB) -+ | ld tval2, 0(RC) -+ | // Check for two integers. -+ | gettp itype1, tval1 -+ | gettp itype2, tval2 -+ |.endmacro -+ | -+ |.macro ins_arithdn, intins, fpins -+ | ins_arithpre -+ | ins_arithead TMP0, TMP1, CARG1, CARG2 -+ | bne TMP0, TISNUM, >1 -+ | bne TMP1, TISNUM, >1 -+ | sext.w CARG3, CARG1 -+ | sext.w CARG4, CARG2 -+ |.if "intins" == "addw" -+ | intins CRET1, CARG3, CARG4 -+ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. -+ | xor TMP2, CRET1, CARG4 -+ | and TMP1, TMP1, TMP2 -+ | add RA, BASE, RA -+ | bxltz TMP1, ->vmeta_arith -+ |.elif "intins" == "subw" -+ | intins CRET1, CARG3, CARG4 -+ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. -+ | xor TMP2, CARG3, CARG4 -+ | and TMP1, TMP1, TMP2 -+ | add RA, BASE, RA -+ | bxltz TMP1, ->vmeta_arith -+ |.elif "intins" == "mulw" -+ | mul TMP2, CARG3, CARG4 -+ | add RA, BASE, RA -+ | sext.w CRET1, TMP2 -+ | bxne CRET1, TMP2, ->vmeta_arith // 63-32bit not all 0 or 1: overflow. -+ |.endif -+ | zext.w CRET1, CRET1 -+ | settp_b CRET1, TISNUM -+ | sd CRET1, 0(RA) -+ | ins_next -+ |1: // Check for two numbers. -+ | ins_arithfp, fpins, TMP0, TMP1 -+ |.endmacro -+ | -+ |.macro ins_arithdiv, fpins -+ | ins_arithpre -+ | ins_arithead TMP0, TMP1, CARG1, CARG2 -+ | ins_arithfp, fpins, TMP0, TMP1 -+ |.endmacro -+ | -+ |.macro ins_arithmod, fpins, BC -+ | ins_arithpre -+ | ins_arithead TMP0, TMP1, CARG1, CARG2 -+ | bne TMP0, TISNUM, >1 -+ | bne TMP1, TISNUM, >1 -+ | sext.w CARG1, CARG1 -+ | sext.w CARG2, CARG2 -+ | add RA, BASE, RA -+ | bxeqz CARG2, ->vmeta_arith -+ | call_intern BC, lj_vm_modi -+ | zext.w CRET1, CRET1 -+ | settp_b CRET1, TISNUM -+ | sd CRET1, 0(RA) -+ | ins_next -+ |1: // Check for two numbers. -+ | ins_arithfp, fpins, TMP0, TMP1 -+ |.endmacro -+ -+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: -+ | ins_arithdn addw, fadd.d -+ break; -+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: -+ | ins_arithdn subw, fsub.d -+ break; -+ case BC_MULVN: case BC_MULNV: case BC_MULVV: -+ | ins_arithdn mulw, fmul.d -+ break; -+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: -+ | ins_arithdiv fdiv.d -+ break; -+ case BC_MODVN: -+ | ins_arithmod fpmod, BC_MODVN -+ break; -+ case BC_MODNV: -+ | ins_arithmod fpmod, BC_MODNV -+ break; -+ case BC_MODVV: -+ | ins_arithmod fpmod, BC_MODVV -+ break; -+ case BC_POW: -+ | ins_arithpre -+ | ld CARG1, 0(RB) -+ | ld CARG2, 0(RC) -+ | gettp TMP0, CARG1 -+ | gettp TMP1, CARG2 -+ | sltiu TMP0, TMP0, LJ_TISNUM -+ | sltiu TMP1, TMP1, LJ_TISNUM -+ | and TMP0, TMP0, TMP1 -+ | add RA, BASE, RA -+ | bxeqz TMP0, ->vmeta_arith -+ | fld FARG1, 0(RB) -+ | fld FARG2, 0(RC) -+ | call_extern BC_POW, pow -+ | ins_next1 -+ | fsd FRET1, 0(RA) -+ | ins_next2 -+ break; ++static void asm_equal(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fpcomp(as, ir); ++ } else { ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ asm_guard(as, (ir->o & 1) ? RISCVI_BEQ : RISCVI_BNE, left, right); ++ } ++} + -+ case BC_CAT: -+ | // RA = dst*8, RB = src_start*8, RC = src_end*8 -+ | decode_RB8 RB, INS -+ | decode_RDtoRC8 RC, RD -+ | sub CARG3, RC, RB -+ | sd BASE, L->base -+ | add CARG2, BASE, RC -+ | mv MULTRES, RB -+ |->BC_CAT_Z: -+ | srliw CARG3, CARG3, 3 -+ | sd PC, SAVE_PC(sp) -+ | mv CARG1, L -+ | call_intern BC_CAT, lj_meta_cat // (lua_State *L, TValue *top, int left) -+ | // Returns NULL (finished) or TValue * (metamethod). -+ | ld BASE, L->base -+ | bxnez CRET1, ->vmeta_binop -+ | add RB, BASE, MULTRES -+ | ld TMP0, 0(RB) -+ | add RA, BASE, RA -+ | sd TMP0, 0(RA) -+ | ins_next ++/* -- Split register ops -------------------------------------------------- */ ++ ++/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ ++static void asm_hiop(ASMState *as, IRIns *ir) ++{ ++ /* HIOP is marked as a store because it needs its own DCE logic. */ ++ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ ++ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; ++ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ ++ switch ((ir-1)->o) { ++ case IR_CALLN: ++ case IR_CALLL: ++ case IR_CALLS: ++ case IR_CALLXS: ++ if (!uselo) ++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; ++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; ++ } ++} ++ ++/* -- Profiling ----------------------------------------------------------- */ ++ ++static void asm_prof(ASMState *as, IRIns *ir) ++{ ++ UNUSED(ir); ++ asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); ++ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); ++ emit_lsglptr(as, RISCVI_LBU, RID_TMP, ++ (int32_t)offsetof(global_State, hookmask)); ++} ++ ++/* -- Stack handling ------------------------------------------------------ */ ++ ++/* Check Lua stack size for overflow. Use exit handler as fallback. */ ++static void asm_stack_check(ASMState *as, BCReg topslot, ++ IRIns *irp, RegSet allow, ExitNo exitno) ++{ ++ /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */ ++ Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; ++ ExitNo oldsnap = as->snapno; ++ rset_clear(allow, pbase); ++ as->snapno = exitno; ++ asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); ++ as->snapno = oldsnap; ++ if (allow) { ++ tmp = rset_pickbot(allow); ++ ra_modified(as, tmp); ++ } else { // allow == RSET_EMPTY ++ tmp = RID_RET; ++ emit_lso(as, RISCVI_LD, tmp, RID_SP, 0); /* Restore tmp1 register. */ ++ } ++ emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); ++ emit_ds1s2(as, RISCVI_SUB, RID_TMP, tmp, pbase); ++ emit_lso(as, RISCVI_LD, tmp, tmp, offsetof(lua_State, maxstack)); ++ if (pbase == RID_TMP) ++ emit_getgl(as, RID_TMP, jit_base); ++ emit_getgl(as, tmp, cur_L); ++ if (allow == RSET_EMPTY) /* Spill temp register. */ ++ emit_lso(as, RISCVI_SD, tmp, RID_SP, 0); ++} + -+ /* -- Constant ops ------------------------------------------------------ */ ++/* Restore Lua stack from on-trace state. */ ++static void asm_stack_restore(ASMState *as, SnapShot *snap) ++{ ++ SnapEntry *map = &as->T->snapmap[snap->mapofs]; ++#ifdef LUA_USE_ASSERT ++ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; ++#endif ++ MSize n, nent = snap->nent; ++ /* Store the value of all modified slots to the Lua stack. */ ++ for (n = 0; n < nent; n++) { ++ SnapEntry sn = map[n]; ++ BCReg s = snap_slot(sn); ++ int32_t ofs = 8*((int32_t)s-1-LJ_FR2); ++ IRRef ref = snap_ref(sn); ++ IRIns *ir = IR(ref); ++ if ((sn & SNAP_NORESTORE)) ++ continue; ++ if (irt_isnum(ir->t)) { ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ emit_lso(as, RISCVI_FSD, src, RID_BASE, ofs); ++ } else { ++ if ((sn & SNAP_KEYINDEX)) { ++ RegSet allow = rset_exclude(RSET_GPR, RID_BASE); ++ int64_t kki = (int64_t)LJ_KEYINDEX << 32; ++ if (irref_isk(ref)) { ++ emit_lso(as, RISCVI_SD, ++ ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow), ++ RID_BASE, ofs); ++ } else { ++ Reg src = ra_alloc1(as, ref, allow); ++ Reg rki = ra_allock(as, kki, rset_exclude(allow, src)); ++ emit_lso(as, RISCVI_SD, RID_TMP, RID_BASE, ofs); ++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, rki); ++ } ++ } else { ++ asm_tvstore64(as, RID_BASE, ofs, ref); ++ } ++ } ++ checkmclim(as); ++ } ++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); ++} + -+ case BC_KSTR: -+ | // RA = dst*8, RD = str_const*8 (~) -+ | sub TMP1, KBASE, RD -+ | li TMP2, LJ_TSTR -+ | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 -+ | add RA, BASE, RA -+ | settp TMP0, TMP2 -+ | sd TMP0, 0(RA) -+ | ins_next -+ break; -+ case BC_KCDATA: -+ |.if FFI -+ | // RA = dst*8, RD = cdata_const*8 (~) -+ | sub TMP1, KBASE, RD -+ | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 -+ | li TMP2, LJ_TCDATA -+ | add RA, BASE, RA -+ | settp TMP0, TMP2 -+ | sd TMP0, 0(RA) -+ | ins_next -+ |.endif -+ break; -+ case BC_KSHORT: -+ | // RA = dst*8, RD = int16_literal*8 -+ | sraiw RD, INS, 16 -+ | add RA, BASE, RA -+ | zext.w RD, RD -+ | ins_next1 -+ | settp_b RD, TISNUM -+ | sd RD, 0(RA) -+ | ins_next2 -+ break; -+ case BC_KNUM: -+ | // RA = dst*8, RD = num_const*8 -+ | add RD, KBASE, RD -+ | add RA, BASE, RA -+ | ld TMP0, 0(RD) -+ | ins_next1 -+ | sd TMP0, 0(RA) -+ | ins_next2 -+ break; -+ case BC_KPRI: -+ | // RA = dst*8, RD = primitive_type*8 (~) -+ | add RA, BASE, RA -+ | slli TMP0, RD, 44 // 44+3 -+ | not TMP0, TMP0 -+ | ins_next1 -+ | sd TMP0, 0(RA) -+ | ins_next2 -+ break; -+ case BC_KNIL: -+ | // RA = base*8, RD = end*8 -+ | add RA, BASE, RA -+ | sd TISNIL, 0(RA) -+ | addi RA, RA, 8 -+ | add RD, BASE, RD -+ |1: -+ | sd TISNIL, 0(RA) -+ | slt TMP0, RA, RD -+ | addi RA, RA, 8 -+ | bnez TMP0, <1 -+ | ins_next -+ break; ++/* -- GC handling --------------------------------------------------------- */ + -+ /* -- Upvalue and function ops ------------------------------------------ */ ++/* Marker to prevent patching the GC check exit. */ ++#define RISCV_NOPATCH_GC_CHECK \ ++ (RISCVI_OR|RISCVF_D(RID_TMP)|RISCVF_S1(RID_TMP)|RISCVF_S2(RID_TMP)) + -+ case BC_UGET: -+ | // RA = dst*8, RD = uvnum*8 -+ | ld LFUNC:TMP0, FRAME_FUNC(BASE) -+ | add RA, BASE, RA -+ | cleartp LFUNC:TMP0 -+ | add RD, RD, LFUNC:TMP0 -+ | ld UPVAL:TMP0, LFUNC:RD->uvptr -+ | ld TMP1, UPVAL:TMP0->v -+ | ld TMP2, 0(TMP1) -+ | ins_next1 -+ | sd TMP2, 0(RA) -+ | ins_next2 -+ break; -+ case BC_USETV: -+ | // RA = uvnum*8, RD = src*8 -+ | ld LFUNC:TMP0, FRAME_FUNC(BASE) -+ | add RD, BASE, RD -+ | cleartp LFUNC:TMP0 -+ | add RA, RA, LFUNC:TMP0 -+ | ld UPVAL:TMP0, LFUNC:RA->uvptr -+ | ld CRET1, 0(RD) -+ | lbu TMP3, UPVAL:TMP0->marked -+ | ld CARG2, UPVAL:TMP0->v -+ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) -+ | lbu TMP0, UPVAL:TMP0->closed -+ | gettp TMP2, CRET1 -+ | sd CRET1, 0(CARG2) -+ | or TMP3, TMP3, TMP0 -+ | li TMP0, LJ_GC_BLACK|1 -+ | addi TMP2, TMP2, -(LJ_TNUMX+1) -+ | beq TMP3, TMP0, >2 // Upvalue is closed and black? -+ |1: -+ | ins_next -+ | -+ |2: // Check if new value is collectable. -+ | sltiu TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1) -+ | cleartp GCOBJ:CRET1, CRET1 -+ | beqz TMP0, <1 // tvisgcv(v) -+ | lbu TMP3, GCOBJ:CRET1->gch.marked -+ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) -+ | beqz TMP3, <1 -+ | // Crossed a write barrier. Move the barrier forward. -+ | mv CARG1, GL -+ | call_intern BC_USETV, lj_gc_barrieruv // (global_State *g, TValue *tv) -+ | j <1 -+ break; -+ case BC_USETS: -+ | // RA = uvnum*8, RD = str_const*8 (~) -+ | ld LFUNC:TMP0, FRAME_FUNC(BASE) -+ | sub TMP1, KBASE, RD -+ | cleartp LFUNC:TMP0 -+ | add RA, RA, LFUNC:TMP0 -+ | ld UPVAL:TMP0, LFUNC:RA->uvptr -+ | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 -+ | lbu TMP2, UPVAL:TMP0->marked -+ | ld CARG2, UPVAL:TMP0->v -+ | lbu TMP3, STR:TMP1->marked -+ | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv) -+ | lbu TMP2, UPVAL:TMP0->closed -+ | li TMP0, LJ_TSTR -+ | settp TMP1, TMP0 -+ | sd TMP1, 0(CARG2) -+ | bnez TMP4, >2 -+ |1: -+ | ins_next -+ | -+ |2: // Check if string is white and ensure upvalue is closed. -+ | beqz TMP2, <1 -+ | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str) -+ | beqz TMP0, <1 -+ | // Crossed a write barrier. Move the barrier forward. -+ | mv CARG1, GL -+ | call_intern BC_USETS, lj_gc_barrieruv // (global_State *g, TValue *tv) -+ | j <1 -+ break; -+ case BC_USETN: -+ | // RA = uvnum*8, RD = num_const*8 -+ | ld LFUNC:TMP0, FRAME_FUNC(BASE) -+ | add RD, KBASE, RD -+ | cleartp LFUNC:TMP0 -+ | add TMP0, RA, LFUNC:TMP0 -+ | ld UPVAL:TMP0, LFUNC:TMP0->uvptr -+ | ld TMP1, 0(RD) -+ | ld TMP0, UPVAL:TMP0->v -+ | sd TMP1, 0(TMP0) -+ | ins_next -+ break; -+ case BC_USETP: -+ | // RA = uvnum*8, RD = primitive_type*8 (~) -+ | ld LFUNC:TMP0, FRAME_FUNC(BASE) -+ | slli TMP2, RD, 44 -+ | cleartp LFUNC:TMP0 -+ | add TMP0, RA, LFUNC:TMP0 -+ | not TMP2, TMP2 -+ | ld UPVAL:TMP0, LFUNC:TMP0->uvptr -+ | ld TMP1, UPVAL:TMP0->v -+ | sd TMP2, 0(TMP1) -+ | ins_next -+ break; ++/* Check GC threshold and do one or more GC steps. */ ++static void asm_gc_check(ASMState *as) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg tmp; ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ ++ asm_guard(as, RISCVI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */ ++ *--as->mcp = RISCV_NOPATCH_GC_CHECK; ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ASMREF_TMP2; /* MSize steps */ ++ asm_gencall(as, ci, args); ++ emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); ++ tmp = ra_releasetmp(as, ASMREF_TMP2); ++ emit_loadi(as, tmp, as->gcsteps); ++ /* Jump around GC step if GC total < GC threshold. */ ++ emit_branch(as, RISCVI_BLTU, RID_TMP, tmp, l_end, -1); ++ emit_getgl(as, tmp, gc.threshold); ++ emit_getgl(as, RID_TMP, gc.total); ++ as->gcsteps = 0; ++ checkmclim(as); ++} + -+ case BC_UCLO: -+ | // RA = level*8, RD = target -+ | ld TMP2, L->openupval -+ | branch_RD // Do this first since RD is not saved. -+ | sd BASE, L->base -+ | mv CARG1, L -+ | beqz TMP2, >1 -+ | add CARG2, BASE, RA -+ | call_intern BC_UCLO, lj_func_closeuv // (lua_State *L, TValue *level) -+ | ld BASE, L->base -+ |1: -+ | ins_next -+ break; ++/* -- Loop handling ------------------------------------------------------- */ + -+ case BC_FNEW: -+ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) -+ | sub TMP1, KBASE, RD -+ | ld CARG3, FRAME_FUNC(BASE) -+ | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 -+ | sd BASE, L->base -+ | sd PC, SAVE_PC(sp) -+ | cleartp CARG3 -+ | mv CARG1, L -+ | // (lua_State *L, GCproto *pt, GCfuncL *parent) -+ | call_intern BC_FNEW, lj_func_newL_gc -+ | // Returns GCfuncL *. -+ | li TMP0, LJ_TFUNC -+ | ld BASE, L->base -+ | settp CRET1, TMP0 -+ | add RA, BASE, RA -+ | sd CRET1, 0(RA) -+ | ins_next -+ break; ++/* Fixup the loop branch. */ ++static void asm_loop_fixup(ASMState *as) ++{ ++ MCode *p = as->mctop; ++ MCode *target = as->mcp; ++ ptrdiff_t delta; ++ if (as->loopinv) { /* Inverted loop branch? */ ++ delta = (char *)target - (char *)(p - 2); ++ /* asm_guard* already inverted the branch, and patched the final b. */ ++ lj_assertA(checki21(delta), "branch target out of range"); ++ p[-2] = (p[-2]&0x00000fff) | RISCVF_IMMJ(delta); ++ } else { ++ /* J */ ++ delta = (char *)target - (char *)(p - 1); ++ p[-1] = RISCVI_JAL | RISCVF_IMMJ(delta); ++ } ++} + -+ /* -- Table ops --------------------------------------------------------- */ ++/* Fixup the tail of the loop. */ ++static void asm_loop_tail_fixup(ASMState *as) ++{ ++ UNUSED(as); /* Nothing to do(?) */ ++} + -+ case BC_TNEW: -+ case BC_TDUP: -+ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) -+ | ld TMP0, GL->gc.total -+ | ld TMP1, GL->gc.threshold -+ | sd BASE, L->base -+ | sd PC, SAVE_PC(sp) -+ | bgeu TMP0, TMP1, >5 -+ |1: -+ if (op == BC_TNEW) { -+ | srliw CARG2, RD, 3 -+ | andi CARG2, CARG2, 0x7ff -+ | lzi TMP0, 0x801 -+ | addiw TMP2, CARG2, -0x7ff -+ | srliw CARG3, RD, 14 -+ | seqz TMP3, TMP2 -+ | neg TMP4, TMP3 -+ | xor CARG1, TMP0, CARG2 // CARG2 = TMP3 ? TMP0 : CARG2 -+ | and CARG1, CARG1, TMP4 -+ | xor CARG2, CARG2, CARG1 -+ | mv CARG1, L -+ | // (lua_State *L, int32_t asize, uint32_t hbits) -+ | call_intern BC_TNEW, lj_tab_new -+ | // Returns Table *. ++/* -- Head of trace ------------------------------------------------------- */ ++ ++/* Coalesce BASE register for a root trace. */ ++static void asm_head_root_base(ASMState *as) ++{ ++ IRIns *ir = IR(REF_BASE); ++ Reg r = ir->r; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ++ if (r != RID_BASE) ++ emit_mv(as, r, RID_BASE); ++ } ++} ++ ++/* Coalesce BASE register for a side trace. */ ++static Reg asm_head_side_base(ASMState *as, IRIns *irp) ++{ ++ IRIns *ir = IR(REF_BASE); ++ Reg r = ir->r; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ++ if (irp->r == r) { ++ return r; /* Same BASE register already coalesced. */ ++ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { ++ emit_mv(as, r, irp->r); /* Move from coalesced parent reg. */ ++ return irp->r; + } else { -+ | sub TMP1, KBASE, RD -+ | mv CARG1, L -+ | ld CARG2, -8(TMP1) // KBASE-8-str_const*8 -+ | call_intern BC_TDUP, lj_tab_dup // (lua_State *L, Table *kt) -+ | // Returns Table *. ++ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ + } -+ | li TMP0, LJ_TTAB -+ | ld BASE, L->base -+ | ins_next1 -+ | settp CRET1, TMP0 -+ | add RA, BASE, RA -+ | sd CRET1, 0(RA) -+ | ins_next2 -+ |5: -+ | mv MULTRES, RD -+ | mv CARG1, L -+ if (op == BC_TNEW) { -+ | call_intern BC_TNEW, lj_gc_step_fixtop // (lua_State *L) ++ } ++ return RID_NONE; ++} ++ ++/* -- Tail of trace ------------------------------------------------------- */ ++ ++/* Fixup the tail code. */ ++static void asm_tail_fixup(ASMState *as, TraceNo lnk) ++{ ++ MCode *mcp = as->mctail; ++ MCode *target; ++ int32_t spadj = as->T->spadjust; ++ if (spadj) { /* Emit stack adjustment */ ++ *mcp++ = RISCVI_ADDI | RISCVF_D(RID_SP) | RISCVF_S1(RID_SP) | RISCVF_IMMI(spadj); ++ } ++ /* Emit exit jump. */ ++ target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; ++ ptrdiff_t delta = (char *)target - (char *)mcp; ++ if (lnk || checki32auipc(delta)) { ++ *mcp++ = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); ++ *mcp++ = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); ++ } else { ++ *mcp++ = RISCVI_LD | RISCVF_D(RID_TMP) | RISCVF_S1(RID_GL) | RISCVF_IMMI(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP])); ++ *mcp++ = RISCVI_JALR | RISCVF_S1(RID_TMP); ++ } ++ while (as->mctop > mcp) *--as->mctop = RISCVI_NOP; /* NOP out unused space. */ ++} ++ ++/* Prepare tail of code. */ ++static void asm_tail_prep(ASMState *as, TraceNo lnk) ++{ ++ MCode *p = as->mctop - 1; /* Leave room for exitstub. */ ++ if (as->loopref) { ++ as->invmcp = as->mcp = p; /* A single jump */ ++ } else { ++ as->mcp = (p -= 2); /* Stack pointer adjustment and AUIPC+JALR */ ++ as->invmcp = NULL; ++ p[0] = p[1] = p[2] = RISCVI_EBREAK; ++ } ++ as->mctail = p; ++} ++ ++/* -- Trace setup --------------------------------------------------------- */ ++ ++/* Ensure there are enough stack slots for call arguments. */ ++static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ IRRef args[CCI_NARGS_MAX*2]; ++ uint32_t i, nargs = CCI_XNARGS(ci); ++ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; ++ asm_collectargs(as, ir, ci, args); ++ for (i = 0; i < nargs; i++) { ++ if (args[i] && irt_isfp(IR(args[i])->t)) { ++ if (nfpr > 0) { ++ nfpr--; if(ci->flags & CCI_VARARG) ngpr--; ++ } else if (!(ci->flags & CCI_VARARG) && ngpr > 0) ngpr--; ++ else nslots += 2; + } else { -+ | call_intern BC_TDUP, lj_gc_step_fixtop // (lua_State *L) ++ if (ngpr > 0) { ++ ngpr--; if(ci->flags & CCI_VARARG) nfpr--; ++ } else nslots += 2; ++ } ++ } ++ if (nslots > as->evenspill) /* Leave room for args in stack slots. */ ++ as->evenspill = nslots; ++ return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); ++} ++ ++static void asm_setup_target(ASMState *as) ++{ ++ asm_sparejump_setup(as); ++ asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); ++} ++ ++/* -- Trace patching ------------------------------------------------------ */ ++ ++/* Patch exit jumps of existing machine code to a new target. */ ++void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) ++{ ++ MCode *p = T->mcode; ++ MCode *pe = (MCode *)((char *)p + T->szmcode); ++ MCode *px = exitstub_trace_addr(T, exitno); ++ MCode *cstart = NULL; ++ MCode *mcarea = lj_mcode_patch(J, p, 0); ++ ++ for (; p < pe; p++) { ++ /* Look for exitstub branch, replace with branch to target. */ ++ ptrdiff_t odelta = (char *)px - (char *)(p+1), ++ ndelta = (char *)target - (char *)(p+1); ++ if ((((p[0] ^ RISCVF_IMMB(8)) & 0xfe000f80u) == 0 && ++ ((p[0] & 0x0000007fu) == 0x63u) && ++ ((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && ++ ((p[1] & 0x0000007fu) == 0x6fu) && p[-1] != RISCV_NOPATCH_GC_CHECK) || ++ (((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && ++ ((p[1] & 0x0000007fu) == 0x6fu) && p[0] != RISCV_NOPATCH_GC_CHECK)) { ++ lj_assertJ(checki32(ndelta), "branch target out of range"); ++ /* Patch jump, if within range. */ ++ patchbranch: ++ if (checki21(ndelta)) { /* Patch jump */ ++ p[1] = RISCVI_JAL | RISCVF_IMMJ(ndelta); ++ if (!cstart) cstart = p + 1; ++ } else { /* Branch out of range. Use spare jump slot in mcarea. */ ++ MCode *mcjump = asm_sparejump_use(mcarea, target); ++ if (mcjump) { ++ lj_mcode_sync(mcjump, mcjump+2); ++ ndelta = (char *)mcjump - (char *)(p+1); ++ if (checki21(ndelta)) { ++ goto patchbranch; ++ } else { ++ lj_assertJ(0, "spare jump out of range: -Osizemcode too big"); ++ } ++ } ++ /* Ignore jump slot overflow. Child trace is simply not attached. */ ++ } ++ } ++ } ++ if (cstart) lj_mcode_sync(cstart, px+1); ++ lj_mcode_patch(J, mcarea, 1); ++} +--- a/src/host/buildvm.c ++++ b/src/host/buildvm.c +@@ -69,6 +69,8 @@ static int collect_reloc(BuildCtx *ctx, + #include "../dynasm/dasm_mips.h" + #elif LJ_TARGET_S390X + #include "../dynasm/dasm_s390x.h" ++#elif LJ_TARGET_RISCV64 ++#include "../dynasm/dasm_riscv.h" + #else + #error "No support for this architecture (yet)" + #endif +--- a/src/host/buildvm_asm.c ++++ b/src/host/buildvm_asm.c +@@ -208,6 +208,34 @@ static void emit_asm_wordreloc(BuildCtx + "Error: unsupported opcode %08x for %s symbol relocation.\n", + ins, sym); + exit(1); ++#elif LJ_TARGET_RISCV64 ++ if ((ins & 0x7f) == 0x17u) { ++ fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym); ++ } else if ((ins & 0x7f) == 0x67u) { ++ fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym); ++ } else if ((ins & 0x7f) == 0x6fu) { ++ fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym); ++ } else if ((ins & 0x7f) == 0x03u) { ++ uint8_t funct3 = (ins >> 12) & 7; ++ uint8_t rd = (ins >> 7) & 31, rs1 = (ins >> 15) & 31; ++ switch (funct3) { ++ case 0: fprintf(ctx->fp, "\tlb"); break; ++ case 1: fprintf(ctx->fp, "\tlh"); break; ++ case 2: fprintf(ctx->fp, "\tlw"); break; ++ case 3: fprintf(ctx->fp, "\tld"); break; ++ case 4: fprintf(ctx->fp, "\tlbu"); break; ++ case 5: fprintf(ctx->fp, "\tlhu"); break; ++ case 6: fprintf(ctx->fp, "\tlwu"); break; ++ default: goto rv_reloc_err; + } -+ | mv RD, MULTRES -+ | j <1 -+ break; -+ -+ case BC_GGET: -+ | // RA = dst*8, RD = str_const*8 (~) -+ case BC_GSET: -+ | // RA = src*8, RD = str_const*8 (~) -+ | ld LFUNC:TMP0, FRAME_FUNC(BASE) -+ | sub TMP1, KBASE, RD -+ | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 -+ | cleartp LFUNC:TMP0 -+ | ld TAB:RB, LFUNC:TMP0->env -+ | add RA, BASE, RA -+ if (op == BC_GGET) { -+ | j ->BC_TGETS_Z -+ } else { -+ | j ->BC_TSETS_Z ++ fprintf(ctx->fp, " x%d, %s(x%d)\n", rd, sym, rs1); ++ } else { ++rv_reloc_err: ++ fprintf(stderr, ++ "Error: unsupported opcode %08x for %s symbol relocation.\n", ++ ins, sym); ++ exit(1); ++ } + #else + #error "missing relocation support for this architecture" + #endif +@@ -304,6 +332,9 @@ void emit_asm(BuildCtx *ctx) + #if LJ_TARGET_MIPS + fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n"); + #endif ++#if LJ_TARGET_RISCV64 ++ fprintf(ctx->fp, ".option norvc\n.option norelax\n"); ++#endif + emit_asm_align(ctx, 4); + + #if LJ_TARGET_PS3 +--- a/src/jit/bcsave.lua ++++ b/src/jit/bcsave.lua +@@ -105,6 +105,7 @@ local map_arch = { + mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, + mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, + s390x = { e = "be", b = 64, m = 22, }, ++ riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, }, + } + + local map_os = { +--- a/src/lj_gdbjit.c ++++ b/src/lj_gdbjit.c +@@ -306,6 +306,9 @@ enum { + #elif LJ_TARGET_MIPS + DW_REG_SP = 29, + DW_REG_RA = 31, ++#elif LJ_TARGET_RISCV64 ++ DW_REG_SP = 2, ++ DW_REG_RA = 1, + #else + #error "Unsupported target architecture" + #endif +@@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = + .machine = 20, + #elif LJ_TARGET_MIPS + .machine = 8, ++#elif LJ_TARGET_RISCV64 ++ .machine = 243, + #else + #error "Unsupported target architecture" + #endif +@@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(G + for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } + for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } + } ++#elif LJ_TARGET_RISCV64 ++ { ++ int i; ++ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); } ++ DB(DW_CFA_offset|9); DUV(17); ++ DB(DW_CFA_offset|8); DUV(18); ++ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); } ++ DB(DW_CFA_offset|32|9); DUV(29); ++ DB(DW_CFA_offset|32|8); DUV(30); + } -+ break; + #else + #error "Unsupported target architecture" + #endif +--- a/src/lj_mcode.c ++++ b/src/lj_mcode.c +@@ -38,6 +38,12 @@ + void sys_icache_invalidate(void *start, size_t len); + #endif + ++#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX ++#include ++#include ++#include ++#endif + -+ case BC_TGETV: -+ | // RA = dst*8, RB = table*8, RC = key*8 -+ | decode_RB8 RB, INS -+ | decode_RDtoRC8 RC, RD -+ | add CARG2, BASE, RB -+ | add CARG3, BASE, RC -+ | ld TAB:RB, 0(CARG2) -+ | ld TMP2, 0(CARG3) -+ | add RA, BASE, RA -+ | checktab TAB:RB, ->vmeta_tgetv -+ | gettp TMP3, TMP2 -+ | lw TMP0, TAB:RB->asize -+ | bne TMP3, TISNUM, >5 // Integer key? -+ | sext.w TMP2, TMP2 -+ | ld TMP1, TAB:RB->array -+ | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part? -+ | slliw TMP2, TMP2, 3 -+ | add TMP2, TMP1, TMP2 -+ | ld CRET1, 0(TMP2) -+ | beq CRET1, TISNIL, >2 -+ |1: -+ | sd CRET1, 0(RA) -+ | ins_next -+ | -+ |2: // Check for __index if table value is nil. -+ | ld TAB:TMP2, TAB:RB->metatable -+ | beqz TAB:TMP2, <1 // No metatable: done. -+ | lbu TMP0, TAB:TMP2->nomm -+ | andi TMP0, TMP0, 1<vmeta_tgetv -+ | -+ |5: -+ | li TMP0, LJ_TSTR -+ | cleartp RC, TMP2 -+ | bxne TMP3, TMP0, ->vmeta_tgetv // String key? -+ | j ->BC_TGETS_Z -+ break; -+ case BC_TGETS: -+ | // RA = dst*8, RB = table*8, RC = str_const*8 (~) -+ | decode_RB8 RB, INS -+ | decode_RDtoRC8 RC, RD -+ | add CARG2, BASE, RB -+ | sub CARG3, KBASE, RC -+ | ld TAB:RB, 0(CARG2) -+ | add RA, BASE, RA -+ | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 -+ | checktab TAB:RB, ->vmeta_tgets1 -+ |->BC_TGETS_Z: -+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 -+ | lw TMP0, TAB:RB->hmask -+ | lw TMP1, STR:RC->sid -+ | ld NODE:TMP2, TAB:RB->node -+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask -+ | slliw TMP0, TMP1, 5 -+ | slliw TMP1, TMP1, 3 -+ | subw TMP1, TMP0, TMP1 -+ | li TMP3, LJ_TSTR -+ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) -+ | settp STR:RC, TMP3 // Tagged key to look for. -+ |1: -+ | ld CARG1, NODE:TMP2->key -+ | ld CARG2, NODE:TMP2->val -+ | ld NODE:TMP1, NODE:TMP2->next -+ | ld TAB:TMP3, TAB:RB->metatable -+ | bne CARG1, RC, >4 -+ | beq CARG2, TISNIL, >5 // Key found, but nil value? -+ |3: -+ | sd CARG2, 0(RA) -+ | ins_next -+ | -+ |4: // Follow hash chain. -+ | mv NODE:TMP2, NODE:TMP1 -+ | bnez NODE:TMP1, <1 -+ | // End of hash chain: key not found, nil result. -+ | -+ |5: // Check for __index if table value is nil. -+ | mv CARG2, TISNIL -+ | beqz TAB:TMP3, <3 // No metatable: done. -+ | lbu TMP0, TAB:TMP3->nomm -+ | andi TMP0, TMP0, 1<vmeta_tgets -+ break; -+ case BC_TGETB: -+ | // RA = dst*8, RB = table*8, RC = index*8 -+ | decode_RB8 RB, INS -+ | add CARG2, BASE, RB -+ | decode_RDtoRC8 RC, RD -+ | ld TAB:RB, 0(CARG2) -+ | add RA, BASE, RA -+ | srliw TMP0, RC, 3 -+ | checktab TAB:RB, ->vmeta_tgetb -+ | lw TMP1, TAB:RB->asize -+ | ld TMP2, TAB:RB->array -+ | bxgeu TMP0, TMP1, ->vmeta_tgetb -+ | add RC, TMP2, RC -+ | ld CRET1, 0(RC) -+ | beq CRET1, TISNIL, >5 -+ |1: -+ | sd CRET1, 0(RA) -+ | ins_next -+ | -+ |5: // Check for __index if table value is nil. -+ | ld TAB:TMP2, TAB:RB->metatable -+ | beqz TAB:TMP2, <1 // No metatable: done. -+ | lbu TMP1, TAB:TMP2->nomm -+ | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! -+ break; -+ case BC_TGETR: -+ | // RA = dst*8, RB = table*8, RC = key*8 -+ | decode_RB8 RB, INS -+ | decode_RDtoRC8 RC, RD -+ | add RB, BASE, RB -+ | add RC, BASE, RC -+ | ld TAB:CARG1, 0(RB) -+ | lw CARG2, 0(RC) -+ | add RA, BASE, RA -+ | cleartp TAB:CARG1 -+ | lw TMP0, TAB:CARG1->asize -+ | ld TMP1, TAB:CARG1->array -+ | bxgeu CARG2, TMP0, ->vmeta_tgetr // In array part? -+ | slliw TMP2, CARG2, 3 -+ | add TMP3, TMP1, TMP2 -+ | ld TMP1, 0(TMP3) -+ |->BC_TGETR_Z: -+ | ins_next1 -+ | sd TMP1, 0(RA) -+ | ins_next2 -+ break; + /* Synchronize data/instruction cache. */ + void lj_mcode_sync(void *start, void *end) + { +@@ -52,6 +58,17 @@ void lj_mcode_sync(void *start, void *en + sys_icache_invalidate(start, (char *)end-(char *)start); + #elif LJ_TARGET_PPC + lj_vm_cachesync(start, end); ++#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX ++#if (defined(__GNUC__) || defined(__clang__)) ++ __asm__ volatile("fence rw, rw"); ++#else ++ lj_vm_fence_rw_rw(); ++#endif ++#ifdef __GLIBC__ ++ __riscv_flush_icache(start, end, 0); ++#else ++ syscall(__NR_riscv_flush_icache, start, end, 0UL); ++#endif + #elif defined(__GNUC__) || defined(__clang__) + __clear_cache(start, end); + #else +--- a/src/lj_alloc.c ++++ b/src/lj_alloc.c +@@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, siz + #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) + #define CALL_MREMAP_NOMOVE 0 + #define CALL_MREMAP_MAYMOVE 1 +-#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) ++#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64) + #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE + #else + #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE +--- /dev/null ++++ b/src/jit/dis_riscv.lua +@@ -0,0 +1,979 @@ ++------------------------------------------------------------------------------ ++-- LuaJIT RISC-V disassembler module. ++-- ++-- Copyright (C) 2022-2026 ISRC, ISCAS. All rights reserved. ++-- Released under the MIT license. See Copyright Notice in luajit.h ++-- ++-- Contributed by Milos Poletanovic from Syrmia.com. ++-- Contributed by gns from PLCT Lab, ISRC, ISCAS. ++------------------------------------------------------------------------------ ++-- This is a helper module used by the LuaJIT machine code dumper module. ++-- ++-- It disassembles most standard RISC-V instructions. ++-- Mode is little-endian ++------------------------------------------------------------------------------ + -+ case BC_TSETV: -+ | // RA = src*8, RB = table*8, RC = key*8 -+ | decode_RB8 RB, INS -+ | decode_RDtoRC8 RC, RD -+ | add CARG2, BASE, RB -+ | add CARG3, BASE, RC -+ | ld TAB:RB, 0(CARG2) -+ | ld TMP2, 0(CARG3) -+ | add RA, BASE, RA -+ | checktab TAB:RB, ->vmeta_tsetv -+ | sext.w RC, TMP2 -+ | checkint TMP2, >5 -+ | lw TMP0, TAB:RB->asize -+ | ld TMP1, TAB:RB->array -+ | bxgeu RC, TMP0, ->vmeta_tsetv // Integer key and in array part? -+ | slliw TMP2, RC, 3 -+ | add TMP1, TMP1, TMP2 -+ | lbu TMP3, TAB:RB->marked -+ | ld TMP0, 0(TMP1) -+ | ld CRET1, 0(RA) -+ | beq TMP0, TISNIL, >3 -+ |1: -+ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) -+ | sd CRET1, 0(TMP1) -+ | bnez TMP2, >7 -+ |2: -+ | ins_next -+ | -+ |3: // Check for __newindex if previous value is nil. -+ | ld TAB:TMP2, TAB:RB->metatable -+ | beqz TAB:TMP2, <1 // No metatable: done. -+ | lbu TMP2, TAB:TMP2->nomm -+ | andi TMP2, TMP2, 1<vmeta_tsetv -+ |5: -+ | gettp TMP0, TMP2 -+ | addi TMP0, TMP0, -LJ_TSTR -+ | bxnez TMP0, ->vmeta_tsetv -+ | cleartp STR:RC, TMP2 -+ | j ->BC_TSETS_Z // String key? -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:RB, TMP3, TMP0, <2 -+ break; -+ case BC_TSETS: -+ | // RA = src*8, RB = table*8, RC = str_const*8 (~) -+ | decode_RB8 RB, INS -+ | decode_RDtoRC8 RC, RD -+ | add CARG2, BASE, RB -+ | sub CARG3, KBASE, RC -+ | ld TAB:RB, 0(CARG2) -+ | ld RC, -8(CARG3) // KBASE-8-str_const*8 -+ | add RA, BASE, RA -+ | cleartp STR:RC -+ | checktab TAB:RB, ->vmeta_tsets1 -+ |->BC_TSETS_Z: -+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 -+ | lw TMP0, TAB:RB->hmask -+ | lw TMP1, STR:RC->sid -+ | ld NODE:TMP2, TAB:RB->node -+ | sb x0, TAB:RB->nomm // Clear metamethod cache. -+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask -+ | slliw TMP0, TMP1, 5 -+ | slliw TMP1, TMP1, 3 -+ | subw TMP1, TMP0, TMP1 -+ | li TMP3, LJ_TSTR -+ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) -+ | settp STR:RC, TMP3 // Tagged key to look for. -+ |1: -+ | ld TMP0, NODE:TMP2->key -+ | ld CARG2, NODE:TMP2->val -+ | ld NODE:TMP1, NODE:TMP2->next -+ | lbu TMP3, TAB:RB->marked -+ | bne TMP0, RC, >5 -+ | ld CARG1, 0(RA) -+ | ld TAB:TMP0, TAB:RB->metatable -+ | beq CARG2, TISNIL, >4 // Key found, but nil value? -+ |2: -+ | andi TMP4, TMP3, LJ_GC_BLACK // isblack(table) -+ | sd CARG1, NODE:TMP2->val -+ | bnez TMP4, >7 -+ |3: -+ | ins_next -+ | -+ |4: // Check for __newindex if previous value is nil. -+ | beqz TAB:TMP0, <2 // No metatable: done. -+ | lbu TMP0, TAB:TMP0->nomm -+ | andi TMP0, TMP0, 1<vmeta_tsets -+ | -+ |5: // Follow hash chain. -+ | mv NODE:TMP2, NODE:TMP1 -+ | bnez NODE:TMP1, <1 -+ | // End of hash chain: key not found, add a new one -+ | -+ | // But check for __newindex first. -+ | ld TAB:TMP2, TAB:RB->metatable -+ | addi CARG3, GL, offsetof(global_State, tmptv) -+ | beqz TAB:TMP2, >6 // No metatable: continue. -+ | lbu TMP0, TAB:TMP2->nomm -+ | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. -+ |6: -+ | sd RC, 0(CARG3) -+ | sd BASE, L->base -+ | mv CARG2, TAB:RB -+ | sd PC, SAVE_PC(sp) -+ | mv CARG1, L -+ | // (lua_State *L, GCtab *t, TValue *k) -+ | call_intern BC_TSETS, lj_tab_newkey -+ | // Returns TValue *. -+ | ld TMP0, 0(RA) -+ | ld BASE, L->base -+ | sd TMP0, 0(CRET1) -+ | j <3 // No 2nd write barrier needed. -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:RB, TMP3, TMP0, <3 -+ break; -+ case BC_TSETB: -+ | // RA = src*8, RB = table*8, RC = index*8 -+ | decode_RB8 RB, INS -+ | decode_RDtoRC8 RC, RD -+ | add CARG2, BASE, RB -+ | add RA, BASE, RA -+ | ld TAB:RB, 0(CARG2) -+ | srliw TMP0, RC, 3 -+ | checktab RB, ->vmeta_tsetb -+ | lw TMP1, TAB:RB->asize -+ | ld TMP2, TAB:RB->array -+ | bxgeu TMP0, TMP1, ->vmeta_tsetb -+ | add RC, TMP2, RC -+ | ld TMP1, 0(RC) -+ | lbu TMP3, TAB:RB->marked -+ | beq TMP1, TISNIL, >5 -+ |1: -+ | ld CRET1, 0(RA) -+ | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table) -+ | sd CRET1, 0(RC) -+ | bnez TMP1, >7 -+ |2: -+ | ins_next -+ | -+ |5: // Check for __newindex if previous value is nil. -+ | ld TAB:TMP2, TAB:RB->metatable -+ | beqz TAB:TMP2, <1 // No metatable: done. -+ | lbu TMP1, TAB:TMP2->nomm -+ | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:RB, TMP3, TMP0, <2 -+ break; -+ case BC_TSETR: -+ | // RA = dst*8, RB = table*8, RC = key*8 -+ | decode_RB8 RB, INS -+ | decode_RDtoRC8 RC, RD -+ | add CARG1, BASE, RB -+ | add CARG3, BASE, RC -+ | ld TAB:CARG2, 0(CARG1) -+ | lw CARG3, 0(CARG3) -+ | cleartp TAB:CARG2 -+ | lbu TMP3, TAB:CARG2->marked -+ | lw TMP0, TAB:CARG2->asize -+ | ld TMP1, TAB:CARG2->array -+ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) -+ | add RA, BASE, RA -+ | bnez TMP2, >7 -+ |2: -+ | bxgeu CARG3, TMP0, ->vmeta_tsetr // In array part? -+ | slliw TMP2, CARG3, 3 -+ | add CRET1, TMP1, TMP2 -+ |->BC_TSETR_Z: -+ | ld TMP1, 0(RA) -+ | ins_next1 -+ | sd TMP1, 0(CRET1) -+ | ins_next2 -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:CARG2, TMP3, CRET1, <2 -+ break; ++local type = type ++local byte, format = string.byte, string.format ++local match, gmatch = string.match, string.gmatch ++local concat = table.concat ++local bit = require("bit") ++local band, bor, tohex = bit.band, bit.bor, bit.tohex ++local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift ++local jit = require("jit") ++ ++local jstat = { jit.status() } ++local function is_opt_enabled(opt) ++ for _, v in ipairs(jstat) do ++ if v == opt then ++ return true ++ end ++ end ++ return false ++end ++local xthead = is_opt_enabled("XThead") ++ ++------------------------------------------------------------------------------ ++-- Opcode maps ++------------------------------------------------------------------------------ ++ ++--RVC32 extension ++ ++local map_quad0 = { ++ shift = 13, mask = 7, ++ [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn", ++ false, "c.fsdNMh", "c.swZMn", "c.fswNMn" ++} ++ ++local map_sub2quad1 = { ++ shift = 5, mask = 3, ++ [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ" ++} ++ ++local map_sub1quad1 = { ++ shift = 10, mask = 3, ++ [0] = "c.srliM1", "c.sraiM1", "c.andiMx", map_sub2quad1 ++} + -+ case BC_TSETM: -+ | // RA = base*8 (table at base-1), RD = num_const*8 (start index) -+ | add RA, BASE, RA -+ |1: -+ | add TMP3, KBASE, RD -+ | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. -+ | addiw TMP0, MULTRES, -8 -+ | lw TMP3, 0(TMP3) // Integer constant is in lo-word. -+ | srliw CARG3, TMP0, 3 -+ | beqz TMP0, >4 // Nothing to copy? -+ | cleartp TAB:CARG2 -+ | addw CARG3, CARG3, TMP3 -+ | lw TMP2, TAB:CARG2->asize -+ | slliw TMP1, TMP3, 3 -+ | lbu TMP3, TAB:CARG2->marked -+ | ld CARG1, TAB:CARG2->array -+ | bltu TMP2, CARG3, >5 -+ | add TMP2, RA, TMP0 -+ | add TMP1, TMP1, CARG1 -+ | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) -+ |3: // Copy result slots to table. -+ | ld CRET1, 0(RA) -+ | addi RA, RA, 8 -+ | sd CRET1, 0(TMP1) -+ | addi TMP1, TMP1, 8 -+ | bltu RA, TMP2, <3 -+ | bnez TMP0, >7 -+ |4: -+ | ins_next -+ | -+ |5: // Need to resize array part. -+ | sd BASE, L->base -+ | sd PC, SAVE_PC(sp) -+ | mv BASE, RD -+ | mv CARG1, L -+ | // (lua_State *L, GCtab *t, int nasize) -+ | call_intern BC_TSETM, lj_tab_reasize -+ | // Must not reallocate the stack. -+ | mv RD, BASE -+ | ld BASE, L->base // Reload BASE for lack of a saved register. -+ | j <1 -+ | -+ |7: // Possible table write barrier for any value. Skip valiswhite check. -+ | barrierback TAB:CARG2, TMP3, TMP0, <4 -+ break; ++local map_quad1 = { ++ shift = 13, mask = 7, ++ [0] = { ++ shift = 7, mask = 31, ++ [0] = "c.nop", _ = "c.addiDx" ++ }, ++ [1] = "c.jalT", [2] = "c.liDx", ++ [3] = { ++ shift = 7, mask = 31, ++ [0] = "c.luiDK", [1] = "c.luiDK", [2] = "c.addi16spX", ++ _ = "c.luiDK" ++ }, ++ [4] = map_sub1quad1, [5] = "c.jT", [6] = "c.beqzMq", [7] = "c.bnezMq" ++} + -+ /* -- Calls and vararg handling ----------------------------------------- */ ++local map_sub1quad2 = { ++ shift = 12, mask = 1, ++ [0] = { ++ shift = 2, mask = 31, ++ [0] = "c.jrD", _ = "c.mvDE" ++ }, ++ [1] = { ++ shift = 2, mask = 31, ++ [0] = { ++ shift = 7, mask = 31, ++ [0] = "c.ebreak", _ = "c.jalrD" ++ }, ++ _ = "c.addDE" ++ } ++} + -+ case BC_CALLM: -+ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 -+ | decode_RDtoRC8 NARGS8:RC, RD -+ | addw NARGS8:RC, NARGS8:RC, MULTRES -+ | j ->BC_CALL_Z -+ break; -+ case BC_CALL: -+ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 -+ | decode_RDtoRC8 NARGS8:RC, RD -+ |->BC_CALL_Z: -+ | mv TMP2, BASE -+ | add BASE, BASE, RA -+ | ld LFUNC:RB, 0(BASE) -+ | addi BASE, BASE, 16 -+ | addiw NARGS8:RC, NARGS8:RC, -8 -+ | checkfunc RB, ->vmeta_call -+ | ins_call -+ break; ++local map_quad2 = { ++ shift = 13, mask = 7, ++ [0] = "c.slliD1", [1] = "c.fldspFQ",[2] = "c.lwspDY", [3] = "c.flwspFY", ++ [4] = map_sub1quad2, [5] = "c.fsdspVt", [6] = "c.swspEu", [7] = "c.fswspVu" ++} ++ ++local map_compr = { ++ [0] = map_quad0, map_quad1, map_quad2 ++} ++ ++--RV32M ++local map_mext = { ++ shift = 12, mask = 7, ++ [0] = "mulDRr", "mulhDRr", "mulhsuDRr", "mulhuDRr", ++ "divDRr", "divuDRr", "remDRr", "remuDRr" ++} ++ ++--RV64M ++local map_mext64 = { ++ shift = 12, mask = 7, ++ [0] = "mulwDRr", [4] = "divwDRr", [5] = "divuwDRr", [6] = "remwDRr", ++ [7] = "remuwDRr" ++} ++ ++--RV32F, RV64F, RV32D, RV64D ++local map_fload = { ++ shift = 12, mask = 7, ++ [2] = "flwFL", [3] = "fldFL" ++} ++ ++local map_fstore = { ++ shift = 12, mask = 7, ++ [2] = "fswSg", [3] = "fsdSg" ++} ++ ++local map_fmadd = { ++ shift = 25, mask = 3, ++ [0] = "fmadd.sFGgHo", "fmadd.dFGgHo" ++} ++ ++local map_fmsub = { ++ shift = 25, mask = 3, ++ [0] = "fmsub.sFGgHo", "fmsub.dFGgHo" ++} ++ ++local map_fnmsub = { ++ shift = 25, mask = 3, ++ [0] = "fnmsub.sFGgHo", "fnmsub.dFGgHo" ++} ++ ++local map_fnmadd = { ++ shift = 25, mask = 3, ++ [0] = "fnmadd.sFGgHo", "fnmadd.dFGgHo" ++} ++ ++local map_fsgnjs = { ++ shift = 12, mask = 7, ++ [0] = "fsgnj.s|fmv.sFGg6", "fsgnjn.s|fneg.sFGg6", "fsgnjx.s|fabs.sFGg6" ++} ++ ++local map_fsgnjd = { ++ shift = 12, mask = 7, ++ [0] = "fsgnj.d|fmv.dFGg6", "fsgnjn.d|fneg.dFGg6", "fsgnjx.d|fabs.dFGg6" ++} ++ ++local map_fms = { ++ shift = 12, mask = 7, ++ [0] = "fmin.sFGg", "fmax.sFGg", "fminm.sFGg", "fmaxm.sFGg" ++} ++ ++local map_fmd = { ++ shift = 12, mask = 7, ++ [0] = "fmin.dFGg", "fmax.dFGg", "fminm.dFGg", "fmaxm.dFGg" ++} ++ ++local map_fcomps = { ++ shift = 12, mask = 7, ++ [0] = "fle.sDGg", "flt.sDGg", "feq.sDGg", ++ [4] = "fleq.sDGg", "fltq.sDGg" ++} ++ ++local map_fcompd = { ++ shift = 12, mask = 7, ++ [0] = "fle.dDGg", "flt.dDGg", "feq.dDGg", ++ [4] = "fleq.dDGg", "fltq.dDGg" ++} ++ ++local map_fcvtwls = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.w.sDGo", "fcvt.wu.sDGo", "fcvt.l.sDGo", "fcvt.lu.sDGo" ++} ++ ++local map_fcvtwld = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.w.dDGo", "fcvt.wu.dDGo", "fcvt.l.dDGo", "fcvt.lu.dDGo", ++ [8] = { ++ shift = 12, mask = 7, ++ [1] = "fcvtmodw.dDG" ++ } ++} ++ ++local map_fcvts = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.s.wFRo", "fcvt.s.wuFRo", "fcvt.s.lFRo", "fcvt.s.luFRo" ++} ++ ++local map_fcvtd = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.d.wFRo", "fcvt.d.wuFRo", "fcvt.d.lFRo", "fcvt.d.luFRo" ++} ++ ++local map_fcvtsd = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.s.dFGo", ++ [4] = "fround.sFGo", [5] = "froundnx.sFGo" ++} ++ ++local map_fcvtds = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.d.sFGo", ++ [4] = "fround.dFGo", [5] = "froundnx.dFGo" ++} ++ ++local map_fmvwx = { ++ shift = 20, mask = 31, ++ [0] = "fmv.w.xFR", [1] = "fli.sFy" ++} ++ ++local map_fmvdx = { ++ shift = 20, mask = 31, ++ [0] = "fmv.d.xFR", [1] = "fli.dFy" ++} ++ ++local map_fext = { ++ shift = 25, mask = 127, ++ [0] = "fadd.sFGgo", [1] = "fadd.dFGgo", [4] = "fsub.sFGgo", [5] = "fsub.dFGgo", ++ [8] = "fmul.sFGgo", [9] = "fmul.dFGgo", [12] = "fdiv.sFGgo", [13] = "fdiv.dFGgo", ++ [16] = map_fsgnjs, [17] = map_fsgnjd, [20] = map_fms, [21] = map_fmd, ++ [32] = map_fcvtsd, [33] = map_fcvtds,[44] = "fsqrt.sFGo", [45] = "fsqrt.dFGo", ++ [80] = map_fcomps, [81] = map_fcompd, [96] = map_fcvtwls, [97] = map_fcvtwld, ++ [104] = map_fcvts, [105] = map_fcvtd, ++ [112] = { ++ shift = 12, mask = 7, ++ [0] = "fmv.x.wDG", "fclass.sDG" ++ }, ++ [113] = { ++ shift = 12, mask = 7, ++ [0] = "fmv.x.dDG", "fclass.dDG" ++ }, ++ [120] = map_fmvwx, [121] = map_fmvdx ++} ++ ++--RV32A, RV64A ++local map_aext = { ++ shift = 27, mask = 31, ++ [0] = { ++ shift = 12, mask = 7, ++ [2] = "amoadd.wDrO", [3] = "amoadd.dDrO" ++ }, ++ { ++ shift = 12, mask = 7, ++ [2] = "amoswap.wDrO", [3] = "amoswap.dDrO" ++ }, ++ { ++ shift = 12, mask = 7, ++ [2] = "lr.wDO", [3] = "lr.dDO" ++ }, ++ { ++ shift = 12, mask = 7, ++ [2] = "sc.wDrO", [3] = "sc.dDrO" ++ }, ++ { ++ shift = 12, mask = 7, ++ [2] = "amoxor.wDrO", [3] = "amoxor.dDrO" ++ }, ++ [8] = { ++ shift = 12, mask = 7, ++ [2] = "amoor.wDrO", [3] = "amoor.dDrO" ++ }, ++ [12] = { ++ shift = 12, mask = 7, ++ [2] = "amoand.wDrO", [3] = "amoand.dDrO" ++ }, ++ [16] = { ++ shift = 12, mask = 7, ++ [2] = "amomin.wDrO", [3] = "amomin.dDrO" ++ }, ++ [20] = { ++ shift = 12, mask = 7, ++ [2] = "amomax.wDrO", [3] = "amomax.dDrO" ++ }, ++ [24] = { ++ shift = 12, mask = 7, ++ [2] = "amominu.wDrO", [3] = "amominu.dDrO" ++ }, ++ [28] = { ++ shift = 12, mask = 7, ++ [2] = "amomaxu.wDrO", [3] = "amomaxu.dDrO" ++ }, ++} + -+ case BC_CALLMT: -+ | // RA = base*8, (RB = 0,) RC = extra_nargs*8 -+ | addw NARGS8:RD, NARGS8:RD, MULTRES -+ | // Fall through. Assumes BC_CALLT follows. -+ break; -+ case BC_CALLT: -+ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 -+ | add RA, BASE, RA -+ | ld LFUNC:RB, 0(RA) -+ | mv NARGS8:RC, RD -+ | ld TMP1, FRAME_PC(BASE) -+ | addi RA, RA, 16 -+ | addiw NARGS8:RC, NARGS8:RC, -8 -+ | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt -+ |->BC_CALLT_Z: -+ | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. -+ | lbu TMP3, LFUNC:CARG3->ffid -+ | xori TMP2, TMP1, FRAME_VARG -+ | bnez TMP0, >7 -+ |1: -+ | sd LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. -+ | sltiu CARG4, TMP3, 2 // (> FF_C) Calling a fast function? -+ | mv TMP2, BASE -+ | mv RB, CARG3 -+ | mv TMP3, NARGS8:RC -+ | beqz NARGS8:RC, >3 -+ |2: -+ | ld CRET1, 0(RA) -+ | addi RA, RA, 8 -+ | addiw TMP3, TMP3, -8 -+ | sd CRET1, 0(TMP2) -+ | addi TMP2, TMP2, 8 -+ | bnez TMP3, <2 -+ |3: -+ | or TMP0, TMP0, CARG4 -+ | beqz TMP0, >5 -+ |4: -+ | ins_callt -+ | -+ |5: // Tailcall to a fast function with a Lua frame below. -+ | lw INS, -4(TMP1) -+ | decode_RA8 RA, INS -+ | sub TMP1, BASE, RA -+ | ld TMP1, -32(TMP1) -+ | cleartp LFUNC:TMP1 -+ | ld TMP1, LFUNC:TMP1->pc -+ | ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. -+ | j <4 -+ | -+ |7: // Tailcall from a vararg function. -+ | andi CARG4, TMP2, FRAME_TYPEP -+ | sub TMP2, BASE, TMP2 // Relocate BASE down. -+ | bnez CARG4, <1 // Vararg frame below? -+ | mv BASE, TMP2 -+ | ld TMP1, FRAME_PC(TMP2) -+ | andi TMP0, TMP1, FRAME_TYPE -+ | j <1 -+ break; ++-- RV32I, RV64I ++local map_load = { ++ shift = 12, mask = 7, ++ [0] = "lbDL", "lhDL", "lwDL", "ldDL", ++ "lbuDL", "lhuDL", "lwuDL" ++} + -+ case BC_ITERC: -+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) -+ | mv TMP2, BASE // Save old BASE for vmeta_call. -+ | add BASE, BASE, RA -+ | ld RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1. -+ | ld CARG1, -16(BASE) -+ | ld CARG2, -8(BASE) -+ | li NARGS8:RC, 16 // Iterators get 2 arguments. -+ | sd RB, 0(BASE) // Copy callable. -+ | sd CARG1, 16(BASE) // Copy state. -+ | sd CARG2, 24(BASE) // Copy control var. -+ | addi BASE, BASE, 16 -+ | checkfunc RB, ->vmeta_call -+ | ins_call -+ break; ++local map_opimm = { ++ shift = 12, mask = 7, ++ [0] = { ++ shift = 7, mask = 0x1ffffff, ++ [0] = "nop", _ = "addi|li|mvDR0I2" ++ }, ++ { ++ shift = 25, mask = 127, ++ [48] = { ++ shift = 20, mask = 31, ++ [4] = "sext.bDR", [5] = "sext.hDR" ++ }, ++ _ = "slliDRi", ++ }, "sltiDRI", "sltiu|seqzDRI5", ++ "xori|notDRI4", ++ { ++ shift = 26, mask = 63, ++ [0] = "srliDRi", [16] = "sraiDRi", [24] = "roriDRi", ++ [26] = { ++ shift = 20, mask = 63, ++ [56] = "rev8DR" ++ } ++ }, ++ "oriDRI", "andiDRI" ++} + -+ case BC_ITERN: -+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) -+ |.if JIT -+ | hotloop -+ |.endif -+ |->vm_IITERN: -+ | add RA, BASE, RA -+ | ld TAB:RB, -16(RA) -+ | lw RC, -8(RA) // Get index from control var. -+ | cleartp TAB:RB -+ | addi PC, PC, 4 -+ | lw TMP0, TAB:RB->asize -+ | ld TMP1, TAB:RB->array -+ | slli CARG3, TISNUM, 47 -+ |1: // Traverse array part. -+ | bleu TMP0, RC, >5 // Index points after array part? -+ | slliw TMP3, RC, 3 -+ | add TMP3, TMP1, TMP3 -+ | ld CARG1, 0(TMP3) -+ | lhu RD, -4+OFS_RD(PC) // ITERL RD -+ | or TMP2, RC, CARG3 -+ | addiw RC, RC, 1 -+ | beq CARG1, TISNIL, <1 // Skip holes in array part. -+ | sd TMP2, 0(RA) -+ | sd CARG1, 8(RA) -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ | decode_BC4b RD -+ | add RD, RD, TMP3 -+ | sw RC, -8(RA) // Update control var. -+ | add PC, PC, RD -+ |3: -+ | ins_next -+ | -+ |5: // Traverse hash part. -+ | lw TMP1, TAB:RB->hmask -+ | subw RC, RC, TMP0 -+ | ld TMP2, TAB:RB->node -+ |6: -+ | bltu TMP1, RC, <3 // End of iteration? Branch to ITERL+1. -+ | slliw TMP3, RC, 5 -+ | slliw RB, RC, 3 -+ | subw TMP3, TMP3, RB -+ | add NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8) -+ | ld CARG1, 0(NODE:TMP3) -+ | lhu RD, -4+OFS_RD(PC) // ITERL RD -+ | addiw RC, RC, 1 -+ | beq CARG1, TISNIL, <6 // Skip holes in hash part. -+ | ld CARG2, NODE:TMP3->key -+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ | sd CARG1, 8(RA) -+ | addw RC, RC, TMP0 -+ | decode_BC4b RD -+ | addw RD, RD, TMP3 -+ | sd CARG2, 0(RA) -+ | add PC, PC, RD -+ | sw RC, -8(RA) // Update control var. -+ | j <3 -+ break; ++local map_branch = { ++ shift = 12, mask = 7, ++ [0] = "beq|beqzRr0B", "bne|bnezRr0B" , false, false, ++ "blt|bgtz|bltzR0r2B", "bge|blez|bgezR0r2B", "bltuRrB", "bgeuRrB" ++} + -+ case BC_ISNEXT: -+ | // RA = base*8, RD = target (points to ITERN) -+ | add RA, BASE, RA -+ | srliw TMP0, RD, 1 -+ | ld CFUNC:CARG1, -24(RA) -+ | add TMP0, PC, TMP0 -+ | ld CARG2, -16(RA) -+ | ld CARG3, -8(RA) -+ | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 -+ | checkfunc CFUNC:CARG1, >5 -+ | gettp CARG2, CARG2 -+ | addi CARG2, CARG2, -LJ_TTAB -+ | lbu TMP1, CFUNC:CARG1->ffid -+ | addi CARG3, CARG3, -LJ_TNIL -+ | or TMP3, CARG2, CARG3 -+ | addi TMP1, TMP1, -FF_next_N -+ | or TMP3, TMP3, TMP1 -+ | lui TMP1, ((LJ_KEYINDEX - (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)) >> 12) & 0xfffff -+ | bnez TMP3, >5 -+ | add PC, TMP0, TMP2 -+ | addi TMP1, TMP1, (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800) -+ | slli TMP1, TMP1, 32 -+ | sd TMP1, -8(RA) -+ |1: -+ | ins_next -+ |5: // Despecialize bytecode if any of the checks fail. -+ | li TMP3, BC_JMP -+ | li TMP1, BC_ITERC -+ | sb TMP3, -4+OFS_OP(PC) -+ | add PC, TMP0, TMP2 -+ |.if JIT -+ | lb TMP0, OFS_OP(PC) -+ | li TMP3, BC_ITERN -+ | lhu TMP2, OFS_RD(PC) -+ | bne TMP0, TMP3, >6 -+ |.endif -+ | sb TMP1, OFS_OP(PC) -+ | j <1 -+ |.if JIT -+ |6: // Unpatch JLOOP. -+ | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. -+ | slliw TMP2, TMP2, 3 -+ | add TMP0, TMP0, TMP2 -+ | ld TRACE:TMP2, 0(TMP0) -+ | lw TMP0, TRACE:TMP2->startins -+ | andi TMP0, TMP0, -256 -+ | or TMP0, TMP0, TMP1 -+ | sw TMP0, 0(PC) -+ | j <1 -+ |.endif -+ break; ++local map_store = { ++ shift = 12, mask = 7, ++ [0] = "sbSr", "shSr", "swSr", "sdSr" ++} + -+ case BC_VARG: -+ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 -+ | ld TMP0, FRAME_PC(BASE) -+ | decode_RDtoRC8 RC, RD -+ | decode_RB8 RB, INS -+ | add RC, BASE, RC -+ | add RA, BASE, RA -+ | addi RC, RC, FRAME_VARG -+ | add TMP2, RA, RB -+ | addi TMP3, BASE, -16 // TMP3 = vtop -+ | sub RC, RC, TMP0 // RC = vbase -+ | // Note: RC may now be even _above_ BASE if nargs was < numparams. -+ | sub TMP1, TMP3, RC -+ | beqz RB, >5 // Copy all varargs? -+ | addi TMP2, TMP2, -16 -+ |1: // Copy vararg slots to destination slots. -+ | ld CARG1, 0(RC) -+ | sltu TMP0, RC, TMP3 -+ | addi RC, RC, 8 -+ | bnez TMP0, >2 -+ | mv CARG1, TISNIL -+ |2: -+ | sd CARG1, 0(RA) -+ | sltu TMP0, RA, TMP2 -+ | addi RA, RA, 8 -+ | bnez TMP0, <1 -+ |3: -+ | ins_next -+ | -+ |5: // Copy all varargs. -+ | ld TMP0, L->maxstack -+ | li MULTRES, 8 // MULTRES = (0+1)*8 -+ | blez TMP1, <3 // No vararg slots? -+ | add TMP2, RA, TMP1 -+ | addi MULTRES, TMP1, 8 -+ | bltu TMP0, TMP2, >7 -+ |6: -+ | ld CRET1, 0(RC) -+ | addi RC, RC, 8 -+ | sd CRET1, 0(RA) -+ | addi RA, RA, 8 -+ | bltu RC, TMP3, <6 // More vararg slots? -+ | j <3 -+ | -+ |7: // Grow stack for varargs. -+ | sd RA, L->top -+ | sub RA, RA, BASE -+ | sd BASE, L->base -+ | sub BASE, RC, BASE // Need delta, because BASE may change. -+ | sd PC, SAVE_PC(sp) -+ | srliw CARG2, TMP1, 3 -+ | mv CARG1, L -+ | call_intern BC_VARG, lj_state_growstack // (lua_State *L, int n) -+ | mv RC, BASE -+ | ld BASE, L->base -+ | add RA, BASE, RA -+ | add RC, BASE, RC -+ | addi TMP3, BASE, -16 -+ | j <6 -+ break; ++local map_op = { ++ shift = 25, mask = 127, ++ [0] = { ++ shift = 12, mask = 7, ++ [0] = "addDRr", "sllDRr", "slt|sgtz|sltzDR0r2", "sltu|snezDR0r", ++ "xorDRr", "srlDRr", "orDRr", "andDRr" ++ }, ++ [1] = map_mext, ++ [4] = { + -+ /* -- Returns ----------------------------------------------------------- */ ++ }, ++ [5] = { -- Zbb ++ shift = 12, mask = 7, ++ [4] = "minDRr", [5] = "minuDRr", [6] = "maxDRr", [7] = "maxuDRr" ++ }, ++ [7] = { -- Zicond ++ shift = 12, mask = 7, ++ [5] = "czero.eqzDRr", [7] = "czero.nezDRr" ++ }, ++ [16] = { -- Zba ++ shift = 12, mask = 7, ++ [2] = "sh1addDRr", [4] = "sh2addDRr", [6] = "sh3addDRr" ++ }, ++ [32] = { -- Zbb ++ shift = 12, mask = 7, ++ [0] = "sub|negDR0r", [4] = "xnorDRr", [5] = "sraDRr", [6] = "ornDRr", [7] = "andnDRr" ++ }, ++ [48] = { -- Zbb ++ shift = 12, mask = 7, ++ [1] = "rolDRr", [5] = "rorDRr" ++ } ++} + -+ case BC_RETM: -+ | // RA = results*8, RD = extra_nresults*8 -+ | addw RD, RD, MULTRES -+ | // Fall through. Assumes BC_RET follows. -+ break; ++--- 64I ++local map_opimm32 = { ++ shift = 12, mask = 7, ++ [0] = "addiw|sext.wDRI0", "slliwDRi", ++ [2] = { -- Zba ++ shift = 25, mask = 127, ++ [1] = "slli.uwDRi" ++ }, ++ [5] = { -- 64I ++ shift = 25, mask = 127, ++ [0] = "srliwDRi", [32] = "sraiwDRi", [48] = "roriwDRi" ++ }, ++ [48] = { -- Zbb ++ shift = 25, mask = 127, ++ [5] = "roriwDRi" ++ } ++} + -+ case BC_RET: -+ | // RA = results*8, RD = (nresults+1)*8 -+ | ld PC, FRAME_PC(BASE) -+ | add RA, BASE, RA -+ | mv MULTRES, RD -+ |1: -+ | andi TMP0, PC, FRAME_TYPE -+ | xori TMP1, PC, FRAME_VARG -+ | bnez TMP0, ->BC_RETV_Z -+ | -+ |->BC_RET_Z: -+ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return -+ | lw INS, -4(PC) -+ | addi TMP2, BASE, -16 -+ | addi RC, RD, -8 -+ | decode_RA8 TMP0, INS -+ | decode_RB8 RB, INS -+ | sub BASE, TMP2, TMP0 -+ | add TMP3, TMP2, RB -+ | beqz RC, >3 -+ |2: -+ | ld CRET1, 0(RA) -+ | addi RA, RA, 8 -+ | addi RC, RC, -8 -+ | sd CRET1, 0(TMP2) -+ | addi TMP2, TMP2, 8 -+ | bnez RC, <2 -+ |3: -+ | addi TMP3, TMP3, -8 -+ |5: -+ | bltu TMP2, TMP3, >6 -+ | ld LFUNC:TMP1, FRAME_FUNC(BASE) -+ | cleartp LFUNC:TMP1 -+ | ld TMP1, LFUNC:TMP1->pc -+ | ins_next1 -+ | ld KBASE, PC2PROTO(k)(TMP1) -+ | ins_next2 -+ | -+ |6: // Fill up results with nil. -+ | sd TISNIL, 0(TMP2) -+ | addi TMP2, TMP2, 8 -+ | j <5 -+ | -+ |->BC_RETV_Z: // Non-standard return case. -+ | andi TMP2, TMP1, FRAME_TYPEP -+ | bnez TMP2, ->vm_return -+ | // Return from vararg function: relocate BASE down. -+ | sub BASE, BASE, TMP1 -+ | ld PC, FRAME_PC(BASE) -+ | j <1 -+ break; ++local map_op32 = { ++ shift = 25, mask = 127, ++ [0] = { -- 64I ++ shift = 12, mask = 7, ++ [0] = "addwDRr", [1] = "sllwDRr", [5] = "srlwDRr" ++ }, ++ [1] = map_mext64, ++ [4] = { -- Zba & Zbb ++ shift = 12, mask = 7, ++ [0] = "add.uw|zext.w|DRr0", [4] = "zext.hDRr" ++ }, ++ [16] = { -- Zba ++ shift = 12, mask = 7, ++ [2] = "sh1add.uw", [4] = "sh2add.uw", [6] = "sh3add.uw" ++ }, ++ [32] = { -- 64I ++ shift = 12, mask = 7, ++ [0] = "subw|negwDR0r", [5] = "srawDRr" ++ }, ++ [48] = { -- Zbb ++ shift = 12, mask = 7, ++ [1] = "rolwDRr", [5] = "rorwDRr" ++ } ++} + -+ case BC_RET0: case BC_RET1: -+ | // RA = results*8, RD = (nresults+1)*8 -+ | ld PC, FRAME_PC(BASE) -+ | add RA, BASE, RA -+ | mv MULTRES, RD -+ | andi TMP0, PC, FRAME_TYPE -+ | xori TMP1, PC, FRAME_VARG -+ | bnez TMP0, ->BC_RETV_Z -+ | lw INS, -4(PC) -+ | addi TMP2, BASE, -16 -+ if (op == BC_RET1) { -+ | ld CRET1, 0(RA) ++local map_ecabre = { ++ shift = 12, mask = 7, ++ [0] = { ++ shift = 20, mask = 4095, ++ [0] = "ecall", "ebreak" ++ } ++} ++ ++local map_fence = { ++ shift = 12, mask = 1, ++ [0] = "fence", --"fence.i" ZIFENCEI EXTENSION ++} ++ ++local map_jalr = { ++ shift = 7, mask = 0x1ffffff, ++ _ = "jalr|jrDRI7", [256] = "ret" ++} ++ ++local map_xthead_custom0 = { ++ shift = 12, mask = 7, ++ [1] = { -- Arithmetic ++ shift = 27, mask = 31, ++ [0] = "th.addslDRrv", ++ [2] = { ++ shift = 26, mask = 63, ++ [4] = "th.srriDRi", ++ [5] = { ++ shift = 25, mask = 127, ++ [10] = "th.srriwDRi" ++ } ++ }, ++ [4] = { -- XTheadMac ++ shift = 25, mask = 3, ++ [0] = "th.mulaDRr", "th.mulsDRr", "th.mulawDRr", "th.mulswDRr" ++ }, ++ [5] = { -- XTheadMac ++ shift = 25, mask = 3, ++ [0] = "th.mulahDRr", "th.mulshDRr" ++ }, ++ [8] = { -- XTheadCondMov ++ shift = 25, mask = 3, ++ [0] = "th.mveqzDRr", "th.mvnezDRr" ++ }, ++ [16] = { -- XTheadBb ++ shift = 20, mask = 31, ++ [0] = { ++ shift = 25, mask = 3, ++ [0] = "th.tstnbzDRi", "th.revDR", "th.ff0DR", "th.ff1DR" ++ } ++ }, ++ [17] = { -- XTheadBb ++ shift = 26, mask = 1, ++ [0] = "th.tstDRi" ++ }, ++ [18] = { -- XTheadBb ++ shift = 20, mask = 31, ++ [0] = { ++ shift = 25, mask = 3, ++ [0] = "th.revwDR" ++ } + } -+ | decode_RB8 RB, INS -+ | decode_RA8 RA, INS -+ | sub BASE, TMP2, RA -+ if (op == BC_RET1) { -+ | sd CRET1, 0(TMP2) ++ }, ++ [2] = "th.extDRji", [3] = "th.extuDRji", ++ { -- MemLoad ++ shift = 29, mask = 7, ++ [7] = { -- XTheadMemPair ++ shift = 25, mask = 3, ++ [0] = "th.lwdDrP", [2] = "th.lwudDrP", "th.lddDrP" + } -+ |5: -+ | bltu RD, RB, >6 -+ | ld TMP1, FRAME_FUNC(BASE) -+ | cleartp LFUNC:TMP1 -+ | ld TMP1, LFUNC:TMP1->pc -+ | ins_next1 -+ | ld KBASE, PC2PROTO(k)(TMP1) -+ | ins_next2 -+ | -+ |6: // Fill up results with nil. -+ | addi TMP2, TMP2, 8 -+ | addi RD, RD, 8 -+ if (op == BC_RET1) { -+ | sd TISNIL, 0(TMP2) -+ } else { -+ | sd TISNIL, -8(TMP2) ++ }, ++ { -- MemStore ++ shift = 29, mask = 7, ++ [7] = { -- XTheadMemPair ++ shift = 25, mask = 3, ++ [0] = "th.swdDrP", [3] = "th.sddDrP" + } -+ | j <5 -+ break; ++ } ++} + -+ /* -- Loops and branches ------------------------------------------------ */ ++local map_custom0 = xthead and map_xthead_custom0 or nil + -+ case BC_FORL: -+ |.if JIT -+ | hotloop -+ |.endif -+ | // Fall through. Assumes BC_IFORL follows. -+ break; ++local map_pri = { ++ [3] = map_load, [7] = map_fload, [11] = map_custom0, [15] = map_fence, [19] = map_opimm, ++ [23] = "auipcDA", [27] = map_opimm32, ++ [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_op, ++ [55] = "luiDU", [59] = map_op32, [67] = map_fmadd, [71] = map_fmsub, ++ [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext, ++ [103] = map_jalr, [111] = "jal|j|D0J", [115] = map_ecabre ++} + -+ case BC_JFORI: -+ case BC_JFORL: -+#if !LJ_HASJIT -+ break; -+#endif -+ case BC_FORI: -+ case BC_IFORL: -+ | // RA = base*8, RD = target (after end of loop or start of loop) -+ vk = (op == BC_IFORL || op == BC_JFORL); -+ | add RA, BASE, RA -+ | ld CARG1, FORL_IDX*8(RA) // CARG1 = IDX -+ | ld CARG2, FORL_STEP*8(RA) // CARG2 = STEP -+ | ld CARG3, FORL_STOP*8(RA) // CARG3 = STOP -+ | gettp CARG4, CARG1 -+ | gettp CARG5, CARG2 -+ | gettp CARG6, CARG3 -+ if (op != BC_JFORL) { -+ | srliw RD, RD, 1 -+ | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J<<2 -+ | add TMP2, RD, TMP2 -+ } -+ | bne CARG4, TISNUM, >3 -+ | sext.w CARG4, CARG1 // start -+ | sext.w CARG3, CARG3 // stop -+ if (!vk) { // init -+ | bxne CARG6, TISNUM, ->vmeta_for -+ | bxne CARG5, TISNUM, ->vmeta_for -+ | bfextri TMP0, CARG2, 31, 31 // sign -+ | slt CARG2, CARG3, CARG4 -+ | slt TMP1, CARG4, CARG3 -+ | neg TMP4, TMP0 -+ | xor TMP0, TMP1, CARG2 // CARG2 = TMP0 ? TMP1 : CARG2 -+ | and TMP0, TMP0, TMP4 -+ | xor CARG2, CARG2, TMP0 // CARG2=0: +,start <= stop or -,start >= stop -+ } else { -+ | sext.w CARG5, CARG2 // step -+ | addw CARG1, CARG4, CARG5 // start + step -+ | xor TMP3, CARG1, CARG4 // y^a -+ | xor TMP1, CARG1, CARG5 // y^b -+ | and TMP3, TMP3, TMP1 -+ | slt TMP1, CARG1, CARG3 // start+step < stop ? -+ | slt CARG3, CARG3, CARG1 // stop < start+step ? -+ | sltz TMP0, CARG5 // step < 0 ? -+ | sltz TMP3, TMP3 // ((y^a) & (y^b)) < 0: overflow. -+ | neg TMP4, TMP0 -+ | xor TMP1, TMP1, CARG3 // CARG3 = TMP0 ? TMP1 : CARG3 -+ | and TMP1, TMP1, TMP4 -+ | xor CARG3, CARG3, TMP1 -+ | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue -+ | zext.w CARG1, CARG1 -+ | settp_b CARG1, TISNUM -+ | sd CARG1, FORL_IDX*8(RA) -+ } -+ |1: -+ if (op == BC_FORI) { -+ | neg TMP4, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS -+ | and TMP2, TMP2, TMP4 -+ | add PC, PC, TMP2 -+ } else if (op == BC_JFORI) { -+ | add PC, PC, TMP2 -+ | lhu RD, -4+OFS_RD(PC) -+ } else if (op == BC_IFORL) { -+ | addi TMP4, CARG2, -1 // CARG2!=0: next INS; CARG2==0: jump back -+ | and TMP2, TMP2, TMP4 -+ | add PC, PC, TMP2 -+ } -+ | ins_next1 -+ | sd CARG1, FORL_EXT*8(RA) -+ |2: -+ if (op == BC_JFORI) { -+ | decode_RD8b RD -+ | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop -+ } else if (op == BC_JFORL) { -+ | beqz CARG2, =>BC_JLOOP -+ } -+ | ins_next2 -+ | -+ |3: // FP loop. -+ | fld FTMP0, FORL_IDX*8(RA) // start -+ | fld FTMP1, FORL_STOP*8(RA) // stop -+ | ld TMP0, FORL_STEP*8(RA) // step -+ | sltz CARG2, TMP0 // step < 0 ? -+ | neg CARG2, CARG2 -+ if (!vk) { -+ | sltiu TMP3, CARG4, LJ_TISNUM // start is number ? -+ | sltiu TMP0, CARG5, LJ_TISNUM // step is number ? -+ | sltiu TMP1, CARG6, LJ_TISNUM // stop is number ? -+ | and TMP3, TMP3, TMP1 -+ | and TMP0, TMP0, TMP3 -+ | bxeqz TMP0, ->vmeta_for // if start or step or stop isn't number -+ | flt.d TMP3, FTMP0, FTMP1 // start < stop ? -+ | flt.d TMP4, FTMP1, FTMP0 // stop < start ? -+ | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 -+ | and TMP0, TMP0, CARG2 -+ | xor CARG2, TMP4, TMP0 // CARG2=0:+,startstop -+ | j <1 -+ } else { -+ | fld FTMP3, FORL_STEP*8(RA) -+ | fadd.d FTMP0, FTMP0, FTMP3 // start + step -+ | flt.d TMP3, FTMP0, FTMP1 // start + step < stop ? -+ | flt.d TMP4, FTMP1, FTMP0 -+ | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 -+ | and TMP0, TMP0, CARG2 -+ | xor CARG2, TMP4, TMP0 -+ if (op == BC_IFORL) { -+ | addi TMP3, CARG2, -1 -+ | and TMP2, TMP2, TMP3 -+ | add PC, PC, TMP2 -+ } -+ | fsd FTMP0, FORL_IDX*8(RA) -+ | ins_next1 -+ | fsd FTMP0, FORL_EXT*8(RA) -+ | j <2 -+ } -+ break; ++------------------------------------------------------------------------------ + -+ case BC_ITERL: -+ |.if JIT -+ | hotloop -+ |.endif -+ | // Fall through. Assumes BC_IITERL follows. -+ break; ++local map_gpr = { ++ [0] = "zero", "ra", "sp", "gp", "tp", "x5", "x6", "x7", ++ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", ++ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", ++ "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31", ++} ++ ++local map_fgpr = { ++ [0] = "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++} ++ ++local map_rm = { ++ [0] = "rne", "rtz", "rdn", "rup", "rmm", [7] = "dyn" ++} ++ ++local map_fli = { ++ [0] = "-1.0", ++ "min", ++ "0x1p-16", "0x1p-15", "0x1p-8", "0x1p-7", ++ "0.0625", "0.125", ++ "0.25", "0.3125", "0.375", "0.4375", ++ "0.5", "0.625", "0.75", "0.875", ++ "1.0", "1.25", "1.5", "1.75", ++ "2.0", "2.5", "3.0", ++ "4.0", "8.0", "16.0", "128.0", "256.0", ++ "32768.0", "65536.0", "inf", "nan" ++} ++ ++------------------------------------------------------------------------------ ++ ++-- Output a nicely formatted line with an opcode and operands. ++local function putop(ctx, text, operands) ++ local pos = ctx.pos ++ local extra = "" ++ if ctx.rel then ++ local sym = ctx.symtab[ctx.rel] ++ if sym then extra = "\t->"..sym end ++ end ++ if ctx.hexdump > 0 then ++ ctx.out:write((format("%08x %s %-7s %s%s\n", ++ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ","), extra))) ++ else ++ ctx.out(format("%08x %-7s %s%s\n", ++ ctx.addr+pos, text, concat(operands, ", "), extra)) ++ end ++ local pos = ctx.pos ++ local first_byte = byte(ctx.code, ctx.pos+1) ++ --Examine if the next instruction is 16-bits or 32-bits ++ if(band(first_byte, 3) < 3) then ++ ctx.pos = pos + 2 ++ else ++ ctx.pos = pos + 4 ++ end ++end ++ ++-- Fallback for unknown opcodes. ++local function unknown(ctx) ++ return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) ++end ++ ++local function get_le(ctx) ++ local pos = ctx.pos ++ --Examine if the next instruction is 16-bits or 32-bits ++ local first_byte = byte(ctx.code, pos+1) ++ if(band(first_byte, 3) < 3) then --checking first two bits of opcode ++ local b0, b1 = byte(ctx.code, pos+1, pos+2) ++ return bor(lshift(b1, 8), b0) ++ else ++ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) ++ return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) ++ end ++end + -+ case BC_JITERL: -+#if !LJ_HASJIT -+ break; -+#endif -+ case BC_IITERL: -+ | // RA = base*8, RD = target -+ | add RA, BASE, RA -+ | ld TMP1, 0(RA) -+ | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. -+ if (op == BC_JITERL) { -+ | sd TMP1,-8(RA) -+ | j =>BC_JLOOP -+ } else { -+ | branch_RD // Otherwise save control var + branch. -+ | sd TMP1, -8(RA) -+ } -+ |1: -+ | ins_next -+ break; ++local function parse_W(opcode) ++ local part1 = band(rshift(opcode, 7), 15) --9:6 ++ local part2 = band(rshift(opcode, 11), 3) --5:4 ++ local part3 = band(rshift(opcode, 5), 1)--3 ++ local part4 = band(rshift(opcode, 6), 1)--2 ++ return bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 4), ++ lshift(part3, 3), lshift(part4, 2)) ++end + -+ case BC_LOOP: -+ | // RA = base*8, RD = target (loop extent) -+ | // Note: RA/RD is only used by trace recorder to determine scope/extent -+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop. -+ |.if JIT -+ | hotloop -+ |.endif -+ | // Fall through. Assumes BC_ILOOP follows. -+ break; ++local function parse_x(opcode) ++ local part1 = band(rshift(opcode, 12), 1) --5 ++ local part2 = band(rshift(opcode, 2), 31) --4:0 ++ if(part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x1ffffff, 6), lshift(part1, 5), part2) ++ else ++ return bor(lshift(0, 31), lshift(part1, 5), part2) ++ end ++end + -+ case BC_ILOOP: -+ | // RA = base*8, RD = target (loop extent) -+ | ins_next -+ break; ++local function parse_X(opcode) ++ local part1 = band(rshift(opcode, 12), 1) --12 ++ local part2 = band(rshift(opcode, 3), 3) --8:7 ++ local part3 = band(rshift(opcode, 5), 1) --6 ++ local part4 = band(rshift(opcode, 2), 1) --5 ++ local part5 = band(rshift(opcode, 6), 1) --4 ++ if(part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x3fffff, 9), lshift(part2, 7), ++ lshift(part3, 6), lshift(part4, 5), lshift(part5, 4)) ++ else ++ return bor(lshift(0, 31), lshift(part2, 7), lshift(part3, 6), ++ lshift(part4, 5), lshift(part5, 4)) ++ end ++end + -+ case BC_JLOOP: -+ |.if JIT -+ | // RA = base*8 (ignored), RD = traceno*8 -+ | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. -+ | add TMP0, TMP0, RD -+ | // Traces on RISC-V don't store the trace number, so use 0. -+ | sd x0, GL->vmstate -+ | ld TRACE:TMP1, 0(TMP0) -+ | sd BASE, GL->jit_base // store Current JIT code L->base -+ | ld TMP1, TRACE:TMP1->mcode -+ | sd L, GL->tmpbuf.L -+ | jr TMP1 -+ |.endif -+ break; ++local function parse_S(opcode) ++ local part1 = band(rshift(opcode, 25), 127) --11:5 ++ local sign = band(rshift(part1, 6), 1) ++ local part2 = band(rshift(opcode, 7), 31) --4:0 ++ if (sign == 1) then ++ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 5), part2) ++ else ++ return bor(lshift(0, 31), lshift(part1, 5), part2) ++ end ++end + -+ case BC_JMP: -+ | // RA = base*8 (only used by trace recorder), RD = target -+ | branch_RD // PC + (jump - 0x8000)<<2 -+ | ins_next -+ break; ++local function parse_B(opcode) ++ local part1 = band(rshift(opcode, 7), 1) --11 ++ local part2 = band(rshift(opcode, 25), 63) --10:5 ++ local part3 = band(rshift(opcode, 8), 15) -- 4 : 1 ++ if (part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), ++ lshift(part2, 5), lshift(part3, 1), 0) ++ else ++ return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 5), ++ lshift(part3, 1), 0) ++ end ++end + -+ /* -- Function headers -------------------------------------------------- */ ++local function parse_q(opcode) ++ local part1 = band(rshift(opcode, 12), 1) --8 ++ local part2 = band(rshift(opcode, 5), 3) --7:6 ++ local part3 = band(rshift(opcode, 2), 1) --5 ++ local part4 = band(rshift(opcode, 10), 3) --4:3 ++ local part5 = band(rshift(opcode, 3), 3) --2:1 ++ if(part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x7fffff, 8), lshift(part2, 6), ++ lshift(part3, 5), lshift(part4, 3), lshift(part5, 1)) ++ else ++ return bor(lshift(0, 31), lshift(part2, 6), lshift(part3, 5), ++ lshift(part4, 3), lshift(part5, 1)) ++ end ++end + -+ case BC_FUNCF: -+ |.if JIT -+ | hotcall -+ |.endif -+ case BC_FUNCV: /* NYI: compiled vararg functions. */ -+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. -+ break; ++local function parse_J(opcode) ++ local part1 = band(rshift(opcode, 31), 1) --20 ++ local part2 = band(rshift(opcode, 12), 255) -- 19:12 ++ local part3 = band(rshift(opcode, 20), 1) --11 ++ local part4 = band(rshift(opcode, 21), 1023) --10:1 ++ if(part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x7ff, 20), lshift(part2, 12), ++ lshift(part3, 11), lshift(part4, 1)) ++ else ++ return bor(lshift(0, 31), lshift(0, 20), lshift(part2, 12), ++ lshift(part3, 11), lshift(part4, 1)) ++ end ++end + -+ case BC_JFUNCF: -+#if !LJ_HASJIT -+ break; -+#endif -+ case BC_IFUNCF: -+ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 -+ | ld TMP2, L->maxstack -+ | lbu TMP1, -4+PC2PROTO(numparams)(PC) -+ | ld KBASE, -4+PC2PROTO(k)(PC) -+ | bltu TMP2, RA, ->vm_growstack_l -+ | slliw TMP1, TMP1, 3 // numparams*8 -+ |2: -+ | bltu NARGS8:RC, TMP1, >3 // Check for missing parameters. -+ if (op == BC_JFUNCF) { -+ | decode_RD8 RD, INS -+ | j =>BC_JLOOP -+ } else { -+ | ins_next -+ } -+ | -+ |3: // Clear missing parameters. -+ | add TMP0, BASE, NARGS8:RC -+ | sd TISNIL, 0(TMP0) -+ | addiw NARGS8:RC, NARGS8:RC, 8 -+ | j <2 -+ break; ++local function parse_T(opcode) ++ local part1 = band(rshift(opcode, 12), 1) --11 ++ local part2 = band(rshift(opcode, 8), 1) --10 ++ local part3 = band(rshift(opcode, 9), 3)--9:8 ++ local part4 = band(rshift(opcode, 6), 1) --7 ++ local part5 = band(rshift(opcode, 7), 1) -- 6 ++ local part6 = band(rshift(opcode, 2), 1) --5 ++ local part7 = band(rshift(opcode, 11), 1) --4 ++ local part8 = band(rshift(opcode, 3), 7) --3:1 ++ if(part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), ++ lshift(part2, 10), lshift(part3, 8), lshift(part4, 7), ++ lshift(part5, 6), lshift(part6, 5), lshift(part7, 4), ++ lshift(part8, 1)) ++ else ++ return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 10), ++ lshift(part3, 8), lshift(part4, 7), lshift(part5, 6), ++ lshift(part6, 5), lshift(part7, 4), lshift(part8, 1)) ++ end ++end + -+ case BC_JFUNCV: -+#if !LJ_HASJIT -+ break; -+#endif -+ | NYI // NYI: compiled vararg functions -+ break; /* NYI: compiled vararg functions. */ ++local function parse_K(opcode) ++ local part1 = band(rshift(opcode, 12), 1) --5 17 ++ local part2 = band(rshift(opcode, 2), 31) --4:0 16:12 ++ if(part1 == 1) then ++ return bor(lshift(0, 31), lshift(0x7fff, 5), part2) ++ else ++ return bor(lshift(0, 31), lshift(part1, 5), part2) ++ end ++end + -+ case BC_IFUNCV: -+ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 -+ | li TMP0, LJ_TFUNC -+ | add TMP1, BASE, RC -+ | ld TMP2, L->maxstack -+ | settp LFUNC:RB, TMP0 -+ | add TMP0, RA, RC -+ | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. -+ | addi TMP2, TMP2, -8 -+ | addi TMP3, RC, 16+FRAME_VARG -+ | ld KBASE, -4+PC2PROTO(k)(PC) -+ | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. -+ | bgeu TMP0, TMP2, ->vm_growstack_l -+ | lbu TMP2, -4+PC2PROTO(numparams)(PC) -+ | mv RA, BASE -+ | mv RC, TMP1 -+ | ins_next1 -+ | addi BASE, TMP1, 16 -+ | beqz TMP2, >2 -+ |1: -+ | ld TMP0, 0(RA) -+ | sltu CARG2, RA, RC // Less args than parameters? -+ | addi RA, RA, 8 -+ | addi TMP1, TMP1, 8 -+ | addiw TMP2, TMP2, -1 -+ | beqz CARG2, >3 -+ | neg TMP4, CARG2 // Clear old fixarg slot (help the GC). -+ | xor TMP3, TISNIL, TMP0 // CARG1 = CARG2 ? TISNIL : TMP0 -+ | and TMP3, TMP3, TMP4 -+ | xor CARG1, TMP0, TMP3 -+ | sd CARG1, -8(RA) -+ | sd TMP0, 8(TMP1) -+ | bnez TMP2, <1 -+ |2: -+ | ins_next2 -+ |3: -+ | neg TMP4, CARG2 // Clear missing fixargs. -+ | xor TMP3, TMP0, TISNIL // TMP0 = CARG2 ? TMP0 : TISNIL -+ | and TMP3, TMP3, TMP4 -+ | xor TMP0, TISNIL, TMP3 -+ | sd TMP0, 8(TMP1) -+ | bnez TMP2, <1 -+ | j <2 -+ break; ++-- Disassemble a single instruction. ++local function disass_ins(ctx) ++ local op = ctx:get() ++ local operands = {} ++ local last = nil ++ ctx.op = op ++ ctx.rel =nil ++ ++ local opat = 0 ++ --for compressed instructions ++ if(band(op, 3) < 3) then ++ opat = ctx.map_compr[band(op, 3)] ++ while type(opat) ~= "string" do ++ if not opat then return unknown(ctx) end ++ local test = band(rshift(op, opat.shift), opat.mask) ++ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ ++ end ++ else ++ opat = ctx.map_pri[band(op,127)] ++ while type(opat) ~= "string" do ++ if not opat then return unknown(ctx) end ++ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ ++ end ++ end ++ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") ++ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") ++ local a1, a2 = 0 ++ if altname then ++ pat = pat2 ++ end + -+ case BC_FUNCC: -+ case BC_FUNCCW: -+ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 -+ if (op == BC_FUNCC) { -+ | ld CARG4, CFUNC:RB->f -+ } else { -+ | ld CARG4, GL->wrapf -+ } -+ | add TMP1, RA, NARGS8:RC -+ | ld TMP2, L->maxstack -+ | add RC, BASE, NARGS8:RC -+ | sd BASE, L->base // base of currently excuting function -+ | sd RC, L->top -+ | bgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack. -+ | li_vmstate C // li TMP0, ~LJ_VMST_C -+ if (op == BC_FUNCCW) { -+ | ld CARG2, CFUNC:RB->f -+ } -+ | mv CARG1, L -+ | st_vmstate // sw TMP0, GL->vmstate -+ | jalr CARG4 // (lua_State *L [, lua_CFunction f]) -+ | // Returns nresults. -+ | ld BASE, L->base -+ | ld TMP1, L->top -+ | sd L, GL->cur_L -+ | slliw RD, CRET1, 3 -+ | li_vmstate INTERP -+ | ld PC, FRAME_PC(BASE) // Fetch PC of caller. -+ | sub RA, TMP1, RD // RA = L->top - nresults*8 -+ | st_vmstate -+ | j ->vm_returnc -+ break; ++ local alias_done = false --variable for the case of 2 pseudoinstructions, if both parameters are x0, 0 + -+ /* ---------------------------------------------------------------------- */ ++ for p in gmatch(pat, ".") do ++ local x = nil ++ if p == "D" then ++ x = map_gpr[band(rshift(op, 7), 31)] ++ elseif p == "F" then ++ x = map_fgpr[band(rshift(op, 7), 31)] ++ elseif p == "R" then ++ x = map_gpr[band(rshift(op, 15), 31)] ++ elseif p == "G" then ++ x = map_fgpr[band(rshift(op, 15), 31)] ++ elseif p == "r" then ++ x = map_gpr[band(rshift(op, 20), 31)] ++ if(name == "sb" or name == "sh" or name == "sw" or name == "sd") then ++ local temp = last --because of the diffrent order of the characters ++ operands[#operands] = x ++ x = temp ++ end ++ elseif p == "g" then ++ x = map_fgpr[band(rshift(op, 20), 31)] ++ if(name == "fsw" or name == "fsd") then ++ local temp = last ++ operands[#operands] = x ++ x = temp ++ end ++ elseif p == "Z" then ++ x = map_gpr[8 + band(rshift(op, 2), 7)] ++ elseif p == "N" then ++ x = map_fgpr[8 + band(rshift(op, 2), 7)] ++ elseif p == "M" then ++ x = map_gpr[8 + band(rshift(op, 7), 7)] ++ elseif p == "E" then ++ x = map_gpr[band(rshift(op, 2), 31)] ++ elseif p == "W" then ++ local uimm = parse_W(op) ++ x = format("%s,%d", "sp", uimm) ++ elseif p == "x" then ++ x = parse_x(op) ++ elseif p == "h" then ++ local part1 = band(rshift(op, 5), 3) --7:6 ++ local part2 = band(rshift(op, 10), 7) --5:3 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 3)) ++ operands[#operands] = format("%d(%s)", uimm, last) ++ elseif p == "X" then ++ local imm = parse_X(op) ++ x = format("%s,%d", "sp", imm) ++ elseif p == "O" then ++ x = format("(%s)", map_gpr[band(rshift(op, 15), 31)]) ++ elseif p == "H" then ++ x = map_fgpr[band(rshift(op, 27), 31)] ++ elseif p == "L" then ++ local register = map_gpr[band(rshift(op, 15), 31)] ++ local disp = arshift(op, 20) ++ x = format("%d(%s)", disp, register) ++ elseif p == "P" then -- XTheadMemPair ++ local register = map_gpr[band(rshift(op, 15), 31)] ++ local disp = band(arshift(op, 25), 3) ++ local isword = bxor(band(arshift(op, 26), 1), 1) ++ x = format("(%s), %d, %d", register, disp, isword and 3 or 4) ++ elseif p == "I" then ++ x = arshift(op, 20) ++ --different for jalr ++ if(name == "jalr") then ++ local reg = map_gpr[band(rshift(op, 15), 31)] ++ if(ctx.reltab[reg] == nil) then ++ operands[#operands] = format("%d(%s)", x, last) ++ else ++ local target = ctx.reltab[reg] + x ++ operands[#operands] = format("%d(%s) #0x%08x", x, last, target) ++ ctx.rel = target ++ ctx.reltab[reg] = nil --assume no reuses of the register ++ end ++ x = nil --not to add additional operand ++ end ++ elseif p == "i" then ++ --both for RV32I AND RV64I ++ local value = band(arshift(op, 20), 63) ++ x = string.format("%d", value) ++ elseif p == "j" then -- XThead imm1[31..26] ++ local value = band(rshift(op, 26), 63) ++ x = string.format("%d", value) ++ elseif p == "v" then --XThead imm[2][26..25] ++ local value = band(rshift(op, 25), 3) ++ x = string.format("%d", value) ++ elseif p == "S" then ++ local register = map_gpr[band(rshift(op, 15), 31)] --register ++ local imm = parse_S(op) ++ x = format("%d(%s)", imm, register) ++ elseif p == "n" then ++ local part1 = band(rshift(op, 5), 1) --6 ++ local part2 = band(rshift(op, 10), 7) --5:3 ++ local part3 = band(rshift(op, 6), 1) --2 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3), ++ lshift(part3, 2)) ++ operands[#operands] = format("%d(%s)", uimm, last) ++ elseif p == "A" then ++ local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)] ++ ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12) ++ x = format("0x%x", value) ++ elseif p == "B" then ++ x = ctx.addr + ctx.pos + parse_B(op) ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "U" then ++ local value = band(rshift(op, 12), 0xfffff) ++ x = string.format("0x%x", value) ++ elseif p == "Q" then ++ local part1 = band(rshift(op, 2), 7) --8:6 ++ local part2 = band(rshift(op, 12), 1) --5 ++ local part3 = band(rshift(op, 5), 3) --4:3 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), ++ lshift(part3, 3)) ++ x = format("%d(%s)", uimm, "sp") ++ elseif p == "q" then ++ x = ctx.addr + ctx.pos + parse_q(op) ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "J" then ++ x = ctx.addr + ctx.pos + parse_J(op) ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "K" then ++ local value = parse_K(op) ++ x = string.format("0x%x", value) ++ elseif p == "Y" then ++ local part1 = band(rshift(op, 2), 3) --7:6 ++ local part2 = band(rshift(op, 12), 1) --5 ++ local part3 = band(rshift(op, 4), 7) --4:2 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), ++ lshift(part3, 2)) ++ x = format("%d(%s)", uimm, "sp") ++ elseif p == "o" then -- rounding mode ++ x = map_rm[band(rshift(op, 12), 7)] ++ elseif p == "y" then -- fli lut ++ x = map_fli[band(rshift(op, 15), 31)] ++ elseif p == "1" then ++ local part1 = band(rshift(op, 12), 1) --5 ++ local part2 = band(rshift(op, 2), 31) --4:0 ++ local uimm = bor(lshift(0, 31), lshift(part1, 5), part2) ++ x = string.format("0x%x", uimm) ++ elseif p == "T" then ++ x = ctx.addr + ctx.pos + parse_T(op) ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "t" then ++ local part1 = band(rshift(op, 7), 7) --8:6 ++ local part2 = band(rshift(op, 10), 7) --5:3 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3)) ++ x = format("%d(%s)", uimm, "sp") ++ elseif p == "u" then ++ local part1 = band(rshift(op, 7), 3) --7:6 ++ local part2 = band(rshift(op, 9), 15) --5:2 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2)) ++ x = format("%d(%s)", uimm, "sp") ++ elseif p == "V" then ++ x = map_fgpr[band(rshift(op, 2), 31)] ++ elseif p == "0" then --PSEUDOINSTRUCTIONS ++ if (last == "zero" or last == 0) then ++ local n = #operands ++ operands[n] = nil ++ last = operands[n-1] ++ local a1, a2 = match(altname, "([^|]*)|(.*)") ++ if a1 then name, altname = a1, a2 ++ else name = altname end ++ alias_done = true ++ end ++ elseif (p == "4") then ++ if(last == -1) then ++ name = altname ++ operands[#operands] = nil ++ end ++ elseif (p == "5") then ++ if(last == 1) then ++ name = altname ++ operands[#operands] = nil ++ end ++ elseif (p == "6") then ++ if(last == operands[#operands - 1]) then ++ name = altname ++ operands[#operands] = nil ++ end ++ elseif (p == "7") then --jalr rs ++ local value = string.sub(operands[#operands], 1, 1) ++ local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1) ++ if(value == "0" and ++ (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then ++ if(operands[#operands - 1] == "zero") then ++ name = altname ++ end ++ operands[#operands] = nil ++ operands[#operands] = reg ++ end ++ elseif (p == "2" and alias_done == false) then ++ if (last == "zero" or last == 0) then ++ local a1, a2 = match(altname, "([^|]*)|(.*)") ++ name = a2 ++ operands[#operands] = nil ++ end ++ end ++ if x then operands[#operands+1] = x; last = x end ++ end ++ return putop(ctx, name, operands) ++end + -+ default: -+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); -+ exit(2); -+ break; -+ } -+} ++------------------------------------------------------------------------------ + -+static int build_backend(BuildCtx *ctx) -+{ -+ int op; ++-- Disassemble a block of code. ++local function disass_block(ctx, ofs, len) ++ if not ofs then ++ ofs = 0 ++ end ++ local stop = len and ofs+len or #ctx.code ++ --instructions can be both 32 and 16 bits ++ stop = stop - stop % 2 ++ ctx.pos = ofs - ofs % 2 ++ ctx.rel = nil ++ while ctx.pos < stop do disass_ins(ctx) end ++end + -+ dasm_growpc(Dst, BC__MAX); ++-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). ++local function create(code, addr, out) ++ local ctx = {} ++ ctx.code = code ++ ctx.addr = addr or 0 ++ ctx.out = out or io.write ++ ctx.symtab = {} ++ ctx.disass = disass_block ++ ctx.hexdump = 8 ++ ctx.get = get_le ++ ctx.map_pri = map_pri ++ ctx.map_compr = map_compr ++ ctx.reltab = {} ++ return ctx ++end + -+ build_subroutines(ctx); ++-- Simple API: disassemble code (a string) at address and output via out. ++local function disass(code, addr, out) ++ create(code, addr, out):disass(addr) ++end + -+ |.code_op -+ for (op = 0; op < BC__MAX; op++) -+ build_ins(ctx, (BCOp)op, op); ++-- Return register name for RID. ++local function regname(r) ++ if r < 32 then return map_gpr[r] end ++ return "f"..(r-32) ++end + -+ return BC__MAX; ++-- Public module functions. ++return { ++ create = create, ++ disass = disass, ++ regname = regname +} +--- /dev/null ++++ b/src/jit/dis_riscv64.lua +@@ -0,0 +1,16 @@ ++---------------------------------------------------------------------------- ++-- LuaJIT RISC-V 64 disassembler wrapper module. ++-- ++-- Copyright (C) 2022-2026 ISRC, ISCAS. All rights reserved. ++-- Released under the MIT license. See Copyright Notice in luajit.h ++---------------------------------------------------------------------------- ++-- This module just exports the default riscv little-endian functions from the ++-- RISC-V disassembler module. All the interesting stuff is there. ++------------------------------------------------------------------------------ + -+/* Emit pseudo frame-info for all assembler functions. */ -+static void emit_asm_debug(BuildCtx *ctx) -+{ -+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); -+ int i; -+ switch (ctx->mode) { -+ case BUILD_elfasm: -+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); -+ fprintf(ctx->fp, -+ ".Lframe0:\n" -+ "\t.4byte .LECIE0-.LSCIE0\n" -+ ".LSCIE0:\n" -+ "\t.4byte 0xffffffff\n" -+ "\t.byte 0x1\n" -+ "\t.string \"\"\n" -+ "\t.uleb128 0x1\n" -+ "\t.sleb128 -4\n" -+ "\t.byte 1\n" /* Return address is in ra. */ -+ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ -+ "\t.align 3\n" -+ ".LECIE0:\n\n"); -+ fprintf(ctx->fp, -+ ".LSFDE0:\n" -+ "\t.4byte .LEFDE0-.LASFDE0\n" -+ ".LASFDE0:\n" -+ "\t.4byte .Lframe0\n" -+ "\t.8byte .Lbegin\n" -+ "\t.8byte %d\n" -+ "\t.byte 0xe\n\t.uleb128 %d\n" -+ "\t.byte 0x81\n\t.uleb128 2*6\n" /* offset ra */, -+ fcofs, CFRAME_SIZE); -+ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ -+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); -+ fprintf(ctx->fp, -+ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ -+ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); -+ for (i = 27; i >= 18; i--) /* offset f31-f18 */ -+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); -+ fprintf(ctx->fp, -+ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ -+ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ -+ "\t.align 3\n" -+ ".LEFDE0:\n\n"); -+#if LJ_HASFFI -+ fprintf(ctx->fp, -+ ".LSFDE1:\n" -+ "\t.4byte .LEFDE1-.LASFDE1\n" -+ ".LASFDE1:\n" -+ "\t.4byte .Lframe0\n" -+ "\t.4byte lj_vm_ffi_call\n" -+ "\t.4byte %d\n" -+ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ -+ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ -+ "\t.byte 0xd\n\t.uleb128 0x12\n" -+ "\t.align 3\n" -+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -+#endif -+#if !LJ_NO_UNWIND -+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); -+ fprintf(ctx->fp, -+ ".Lframe1:\n" -+ "\t.4byte .LECIE1-.LSCIE1\n" -+ ".LSCIE1:\n" -+ "\t.4byte 0\n" -+ "\t.byte 0x1\n" -+ "\t.string \"zPR\"\n" -+ "\t.uleb128 0x1\n" -+ "\t.sleb128 -4\n" -+ "\t.byte 1\n" /* Return address is in ra. */ -+ "\t.uleb128 6\n" /* augmentation length */ -+ "\t.byte 0x1b\n" -+ "\t.4byte lj_err_unwind_dwarf-.\n" -+ "\t.byte 0x1b\n" -+ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ -+ "\t.align 2\n" -+ ".LECIE1:\n\n"); -+ fprintf(ctx->fp, -+ ".LSFDE2:\n" -+ "\t.4byte .LEFDE2-.LASFDE2\n" -+ ".LASFDE2:\n" -+ "\t.4byte .LASFDE2-.Lframe1\n" -+ "\t.4byte .Lbegin-.\n" -+ "\t.4byte %d\n" -+ "\t.uleb128 0\n" /* augmentation length */ -+ "\t.byte 0xe\n\t.uleb128 %d\n" -+ "\t.byte 0x81\n\t.uleb128 2*6\n", /* offset ra */ -+ fcofs, CFRAME_SIZE); -+ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ -+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); -+ fprintf(ctx->fp, -+ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ -+ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); -+ for (i = 27; i >= 18; i--) /* offset f31-f18 */ -+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); -+ fprintf(ctx->fp, -+ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ -+ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ -+ "\t.align 2\n" -+ ".LEFDE2:\n\n"); -+#if LJ_HASFFI -+ fprintf(ctx->fp, -+ ".Lframe2:\n" -+ "\t.4byte .LECIE2-.LSCIE2\n" -+ ".LSCIE2:\n" -+ "\t.4byte 0\n" -+ "\t.byte 0x1\n" -+ "\t.string \"zR\"\n" -+ "\t.uleb128 0x1\n" -+ "\t.sleb128 -4\n" -+ "\t.byte 1\n" /* Return address is in ra. */ -+ "\t.uleb128 1\n" /* augmentation length */ -+ "\t.byte 0x1b\n" -+ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ -+ "\t.align 2\n" -+ ".LECIE2:\n\n"); -+ fprintf(ctx->fp, -+ ".LSFDE3:\n" -+ "\t.4byte .LEFDE3-.LASFDE3\n" -+ ".LASFDE3:\n" -+ "\t.4byte .LASFDE3- .Lframe2\n" -+ "\t.4byte lj_vm_ffi_call-.\n" -+ "\t.4byte %d\n" -+ "\t.uleb128 0\n" /* augmentation length */ -+ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ -+ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ -+ "\t.byte 0xd\n\t.uleb128 0x12\n" -+ "\t.align 2\n" -+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -+#endif -+#endif -+ break; -+ default: -+ break; -+ } ++local dis_riscv = require((string.match(..., ".*%.") or "").."dis_riscv") ++return { ++ create = dis_riscv.create, ++ disass = dis_riscv.disass, ++ regname = dis_riscv.regname +} +--- a/Makefile ++++ b/Makefile +@@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ + dis_mips64.lua dis_mips64el.lua \ + dis_mips64r6.lua dis_mips64r6el.lua \ ++ dis_riscv.lua dis_riscv64.lua \ + vmdef.lua + + ifeq (,$(findstring Windows,$(OS))) +--- a/src/Makefile ++++ b/src/Makefile +@@ -52,6 +52,7 @@ CCOPT_arm= + CCOPT_arm64= + CCOPT_ppc= + CCOPT_mips= ++CCOPT_riscv64= + # + #CCDEBUG= + # Uncomment the next line to generate debug information: +@@ -270,6 +271,9 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(T + TARGET_LJARCH= mips + endif + else ++ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) ++ TARGET_LJARCH= riscv64 ++else + $(error Unsupported target architecture) + endif + endif +@@ -278,6 +282,7 @@ endif + endif + endif + endif ++endif + + ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) + TARGET_SYS= PS3 +@@ -495,6 +500,9 @@ ifeq (ppc,$(TARGET_LJARCH)) + DASM_AFLAGS+= -D ELFV2 + endif + endif ++ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) ++ DASM_AFLAGS+= -D RISCV64 ++endif + endif + endif + diff --git a/lang/lua/luajit2/patches/060-ppc-musl.patch b/lang/lua/luajit2/patches/060-ppc-musl.patch new file mode 100644 index 0000000000000..efb676b33a664 --- /dev/null +++ b/lang/lua/luajit2/patches/060-ppc-musl.patch @@ -0,0 +1,112 @@ +From 195e0be62c0aa0f6aaf63a93ee322bb0a630576c Mon Sep 17 00:00:00 2001 +From: Clint Bland +Date: Wed, 13 Mar 2019 19:19:16 -0700 +Subject: [PATCH] Have powerpc use fake GOT like MIPS + +--- + src/lj_dispatch.c | 15 +++++++++++++++ + src/lj_dispatch.h | 29 ++++++++++++++++++++++++++++- + src/vm_ppc.dasc | 9 ++++++++- + 3 files changed, 51 insertions(+), 2 deletions(-) + +--- a/src/lj_dispatch.c ++++ b/src/lj_dispatch.c +@@ -56,6 +56,18 @@ static const ASMFunction dispatch_got[] + #undef GOTFUNC + #endif + ++#if LJ_TARGET_PPC ++#include ++LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, ++ lua_State *co); ++ ++#define GOTFUNC(name) (ASMFunction)name, ++static const ASMFunction dispatch_got[] = { ++ GOTDEF(GOTFUNC) ++}; ++#undef GOTFUNC ++#endif ++ + /* Initialize instruction dispatch table and hot counters. */ + void lj_dispatch_init(GG_State *GG) + { +@@ -79,6 +91,9 @@ void lj_dispatch_init(GG_State *GG) + #if LJ_TARGET_MIPS + memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); + #endif ++#if LJ_TARGET_PPC ++ memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4); ++#endif + } + + #if LJ_HASJIT +--- a/src/lj_dispatch.h ++++ b/src/lj_dispatch.h +@@ -66,6 +66,33 @@ GOTDEF(GOTENUM) + }; + #endif + ++#if LJ_TARGET_PPC ++/* Need our own global offset table for the dreaded MIPS calling conventions. */ ++#if LJ_SOFTFP ++#ifndef _LJ_IRCALL_H ++extern double __ledf2(double a, double b); ++extern double __adddf3(double a, double b); ++extern double __subdf3(double a, double b); ++extern double __muldf3(double a, double b); ++extern double __divdf3(double a, double b); ++#endif ++#define SFGOTDEF(_) _(__ledf2) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) ++#else ++#define SFGOTDEF(_) ++#endif ++#define GOTDEF(_) \ ++ _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ ++ _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ ++ _(pow) _(fmod) _(ldexp) _(sqrt) SFGOTDEF(_) ++ ++enum { ++#define GOTENUM(name) LJ_GOT_##name, ++GOTDEF(GOTENUM) ++#undef GOTENUM ++ LJ_GOT__MAX ++}; ++#endif ++ + /* Type of hot counter. Must match the code in the assembler VM. */ + /* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */ + typedef uint16_t HotCount; +@@ -93,7 +120,7 @@ typedef struct GG_State { + /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ + uint8_t align1[(16-sizeof(global_State))&15]; + #endif +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_PPC + ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ + #endif + #if LJ_HASJIT +--- a/src/vm_ppc.dasc ++++ b/src/vm_ppc.dasc +@@ -50,7 +50,12 @@ + |.macro blex, target; bl extern target; nop; .endmacro + |.macro .toc, a, b; a, b; .endmacro + |.else +-|.macro blex, target; bl extern target@plt; .endmacro ++|.macro blex, target ++| lwz TMP0, DISPATCH_GOT(target)(DISPATCH) ++| mtctr TMP0 ++| bctrl ++| //bl extern target@plt ++|.endmacro + |.macro .toc, a, b; .endmacro + |.endif + |.if OPD +@@ -577,6 +582,8 @@ + |// Assumes DISPATCH is relative to GL. + #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) + #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) ++#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) ++#define DISPATCH_GOT(name) (GG_DISP2GOT + 4*LJ_GOT_##name) + | + #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) + | diff --git a/lang/lua/luajit2/test-version.sh b/lang/lua/luajit2/test-version.sh index 236bc554dcefe..818f2a1dfba26 100644 --- a/lang/lua/luajit2/test-version.sh +++ b/lang/lua/luajit2/test-version.sh @@ -4,9 +4,8 @@ case "$PKG_NAME" in #luajit2 use build number at -v but releases are named by date luajit2) - exit 0 + luajit2 -v 2>&1 | grep -F "LuaJIT 2.1." ;; - *) echo "Untested package: $PKG_NAME" >&2 exit 1