From a1ce3e7ae1447d45da4bf7351395a424b214e7b2 Mon Sep 17 00:00:00 2001 From: ZhouGuangyuan Date: Tue, 5 May 2026 18:09:16 +0800 Subject: [PATCH 1/7] amd64: support SETCS --- amd64_helper_edge_test.go | 25 +++++++++++++++++++++++++ amd64_lower_arith.go | 4 +++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/amd64_helper_edge_test.go b/amd64_helper_edge_test.go index bf14835..3e5af3c 100644 --- a/amd64_helper_edge_test.go +++ b/amd64_helper_edge_test.go @@ -599,6 +599,7 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { mustLower("SETEQ", Instr{Raw: "SETEQ AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}) mustLower("SETGT", Instr{Raw: "SETGT ret+8(FP)", Args: []Operand{{Kind: OpFP, FPOffset: 8}}}) mustLower("SETHI", Instr{Raw: "SETHI BX", Args: []Operand{{Kind: OpReg, Reg: BX}}}) + mustLower("SETCS", Instr{Raw: "SETCS AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}) mustLower("CMOVQEQ", Instr{Raw: "CMOVQEQ CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) mustLower("CMOVQNE", Instr{Raw: "CMOVQNE CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) mustLower("CMOVQCS", Instr{Raw: "CMOVQCS CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) @@ -656,6 +657,30 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { } } +func TestAMD64SetCSUsesCarryFlag(t *testing.T) { + c, b := newAMD64CtxWithFuncForTest(t, Func{}, FuncSig{Name: "example.setcs", Ret: Void}, nil) + if err := c.storeReg(AX, "0"); err != nil { + t.Fatalf("storeReg(AX) error = %v", err) + } + b.WriteString(" store i1 true, ptr " + c.flagsCFSlot + "\n") + + ins := Instr{Raw: "SETCS AX", Args: []Operand{{Kind: OpReg, Reg: AX}}} + if ok, term, err := c.lowerArith("SETCS", ins); !ok || term || err != nil { + t.Fatalf("lowerArith(SETCS) = (%v, %v, %v)", ok, term, err) + } + + out := b.String() + for _, want := range []string{ + "store i1 true, ptr %flags_cf", + "load i1, ptr %flags_cf", + "select i1 %", + } { + if !strings.Contains(out, want) { + t.Fatalf("missing %q in output:\n%s", want, out) + } + } +} + func TestAMD64VectorCoverage(t *testing.T) { fn := Func{ Instrs: []Instr{ diff --git a/amd64_lower_arith.go b/amd64_lower_arith.go index efded90..4bc58cb 100644 --- a/amd64_lower_arith.go +++ b/amd64_lower_arith.go @@ -785,7 +785,7 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e } return true, false, fmt.Errorf("amd64: unsupported bit op %s", op) - case "SETEQ", "SETGT", "SETHI": + case "SETEQ", "SETGT", "SETHI", "SETCS": // SETcc dst: set byte based on flags. // We support register destinations and FP result slots. if len(ins.Args) != 1 { @@ -813,6 +813,8 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e t2 := c.newTmp() fmt.Fprintf(c.b, " %%%s = xor i1 %%%s, true\n", t2, t1) cond = "%" + t2 + case "SETCS": + cond = c.loadFlag(c.flagsCFSlot) } switch ins.Args[0].Kind { case OpReg: From 5619ebb326f922595fab10854a8863f200449b12 Mon Sep 17 00:00:00 2001 From: ZhouGuangyuan Date: Tue, 5 May 2026 18:19:01 +0800 Subject: [PATCH 2/7] amd64: support SETCS AH and AL --- amd64_ctx.go | 82 +++++++++++++++++++++++++-------------- amd64_helper_edge_test.go | 26 ++++++++++--- types.go | 12 ++++++ types_deep_test.go | 1 + 4 files changed, 86 insertions(+), 35 deletions(-) diff --git a/amd64_ctx.go b/amd64_ctx.go index 3bbb818..c3cd515 100644 --- a/amd64_ctx.go +++ b/amd64_ctx.go @@ -469,31 +469,44 @@ func amd64ValueAsI64(c *amd64Ctx, ty LLVMType, v string) (out string, ok bool, e } } -func (c *amd64Ctx) loadReg(r Reg) (string, error) { - // Model low-byte aliases used by stdlib asm. - // We treat writes/reads of AL/BL/CL/DL as operating on the full AX/BX/CX/DX, - // returning the masked low byte as a zero-extended i64. - alias := func(rr Reg) (base Reg, mask int64, ok bool) { - switch rr { - case AL: - return AX, 0xff, true - case BL: - return BX, 0xff, true - case CL: - return CX, 0xff, true - case DL: - return DX, 0xff, true - default: - return "", 0, false - } +func amd64ByteAlias(rr Reg) (base Reg, shift uint, ok bool) { + switch rr { + case AL: + return AX, 0, true + case AH: + return AX, 8, true + case BL: + return BX, 0, true + case BH: + return BX, 8, true + case CL: + return CX, 0, true + case CH: + return CX, 8, true + case DL: + return DX, 0, true + case DH: + return DX, 8, true + default: + return "", 0, false } - if base, mask, ok := alias(r); ok { +} + +func (c *amd64Ctx) loadReg(r Reg) (string, error) { + // Model byte aliases used by stdlib asm. Reads return the selected byte as a + // zero-extended i64. + if base, shift, ok := amd64ByteAlias(r); ok { v, err := c.loadReg(base) if err != nil { return "", err } + if shift != 0 { + t := c.newTmp() + fmt.Fprintf(c.b, " %%%s = lshr i64 %s, %d\n", t, v, shift) + v = "%" + t + } t := c.newTmp() - fmt.Fprintf(c.b, " %%%s = and i64 %s, %d\n", t, v, mask) + fmt.Fprintf(c.b, " %%%s = and i64 %s, 255\n", t, v) return "%" + t, nil } slot, ok := c.regSlot[r] @@ -506,16 +519,27 @@ func (c *amd64Ctx) loadReg(r Reg) (string, error) { } func (c *amd64Ctx) storeReg(r Reg, v string) error { - // See loadReg for alias handling. - switch r { - case AL: - r = AX - case BL: - r = BX - case CL: - r = CX - case DL: - r = DX + // See loadReg for byte-alias handling. + if base, shift, ok := amd64ByteAlias(r); ok { + cur, err := c.loadReg(base) + if err != nil { + return err + } + mask := int64(0xff) << shift + cleared := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %s, %d\n", cleared, cur, ^mask) + byteVal := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %s, 255\n", byteVal, v) + ins := "%" + byteVal + if shift != 0 { + shifted := c.newTmp() + fmt.Fprintf(c.b, " %%%s = shl i64 %%%s, %d\n", shifted, byteVal, shift) + ins = "%" + shifted + } + merged := c.newTmp() + fmt.Fprintf(c.b, " %%%s = or i64 %%%s, %s\n", merged, cleared, ins) + r = base + v = "%" + merged } slot, ok := c.regSlot[r] if !ok { diff --git a/amd64_helper_edge_test.go b/amd64_helper_edge_test.go index 3e5af3c..752ce1b 100644 --- a/amd64_helper_edge_test.go +++ b/amd64_helper_edge_test.go @@ -599,7 +599,8 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { mustLower("SETEQ", Instr{Raw: "SETEQ AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}) mustLower("SETGT", Instr{Raw: "SETGT ret+8(FP)", Args: []Operand{{Kind: OpFP, FPOffset: 8}}}) mustLower("SETHI", Instr{Raw: "SETHI BX", Args: []Operand{{Kind: OpReg, Reg: BX}}}) - mustLower("SETCS", Instr{Raw: "SETCS AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}) + mustLower("SETCS", Instr{Raw: "SETCS AL", Args: []Operand{{Kind: OpReg, Reg: AL}}}) + mustLower("SETCS", Instr{Raw: "SETCS AH", Args: []Operand{{Kind: OpReg, Reg: AH}}}) mustLower("CMOVQEQ", Instr{Raw: "CMOVQEQ CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) mustLower("CMOVQNE", Instr{Raw: "CMOVQNE CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) mustLower("CMOVQCS", Instr{Raw: "CMOVQCS CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) @@ -659,14 +660,18 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { func TestAMD64SetCSUsesCarryFlag(t *testing.T) { c, b := newAMD64CtxWithFuncForTest(t, Func{}, FuncSig{Name: "example.setcs", Ret: Void}, nil) - if err := c.storeReg(AX, "0"); err != nil { + if err := c.storeReg(AX, "4660"); err != nil { t.Fatalf("storeReg(AX) error = %v", err) } b.WriteString(" store i1 true, ptr " + c.flagsCFSlot + "\n") - ins := Instr{Raw: "SETCS AX", Args: []Operand{{Kind: OpReg, Reg: AX}}} - if ok, term, err := c.lowerArith("SETCS", ins); !ok || term || err != nil { - t.Fatalf("lowerArith(SETCS) = (%v, %v, %v)", ok, term, err) + for _, ins := range []Instr{ + {Raw: "SETCS AL", Args: []Operand{{Kind: OpReg, Reg: AL}}}, + {Raw: "SETCS AH", Args: []Operand{{Kind: OpReg, Reg: AH}}}, + } { + if ok, term, err := c.lowerArith("SETCS", ins); !ok || term || err != nil { + t.Fatalf("lowerArith(%s) = (%v, %v, %v)", ins.Raw, ok, term, err) + } } out := b.String() @@ -674,6 +679,8 @@ func TestAMD64SetCSUsesCarryFlag(t *testing.T) { "store i1 true, ptr %flags_cf", "load i1, ptr %flags_cf", "select i1 %", + "shl i64", + "or i64", } { if !strings.Contains(out, want) { t.Fatalf("missing %q in output:\n%s", want, out) @@ -1878,7 +1885,7 @@ func TestAMD64CtxAliasAndFPFallbackCoverage(t *testing.T) { t.Fatalf("storeReg(%s) error = %v", tc.r, err) } } - for _, r := range []Reg{AL, BL, CL, DL} { + for _, r := range []Reg{AL, AH, BL, BH, CL, CH, DL, DH} { if got, err := c.loadReg(r); err != nil || got == "" { t.Fatalf("loadReg(%s) = (%q, %v)", r, got, err) } @@ -1891,9 +1898,13 @@ func TestAMD64CtxAliasAndFPFallbackCoverage(t *testing.T) { v string }{ {AL, "17"}, + {AH, "33"}, {BL, "18"}, + {BH, "34"}, {CL, "19"}, + {CH, "35"}, {DL, "20"}, + {DH, "36"}, } { if err := c.storeReg(tc.r, tc.v); err != nil { t.Fatalf("storeReg(%s) error = %v", tc.r, err) @@ -1944,6 +1955,9 @@ func TestAMD64CtxAliasAndFPFallbackCoverage(t *testing.T) { out := b.String() for _, want := range []string{ "and i64", + "lshr i64", + "shl i64", + "or i64", "zext i1", "zext i8", "zext i16", diff --git a/types.go b/types.go index 9ecf439..e41467b 100644 --- a/types.go +++ b/types.go @@ -32,9 +32,13 @@ const ( PC Reg = "PC" AL Reg = "AL" + AH Reg = "AH" BL Reg = "BL" + BH Reg = "BH" CL Reg = "CL" + CH Reg = "CH" DL Reg = "DL" + DH Reg = "DH" ZR Reg = "ZR" ) @@ -78,12 +82,20 @@ func parseReg(s string) (Reg, bool) { return PC, true case "AL": return AL, true + case "AH": + return AH, true case "BL": return BL, true + case "BH": + return BH, true case "CL": return CL, true + case "CH": + return CH, true case "DL": return DL, true + case "DH": + return DH, true case "ZR": return ZR, true case "G": diff --git a/types_deep_test.go b/types_deep_test.go index 4b9c275..08f4ed0 100644 --- a/types_deep_test.go +++ b/types_deep_test.go @@ -112,6 +112,7 @@ func TestTypeParserEdgeCoverage(t *testing.T) { ok bool }{ {"rax", AX, true}, + {"ah", AH, true}, {"w3", "R3", true}, {"g", "R28", true}, {"lr", "R30", true}, From 903991ce77623d1d9cf4d51cb2f253064c96ca09 Mon Sep 17 00:00:00 2001 From: ZhouGuangyuan Date: Tue, 5 May 2026 18:26:08 +0800 Subject: [PATCH 3/7] amd64: support SETGE --- amd64_helper_edge_test.go | 27 +++++++++++++++++++++++++++ amd64_lower_arith.go | 7 ++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/amd64_helper_edge_test.go b/amd64_helper_edge_test.go index 752ce1b..ce9d73e 100644 --- a/amd64_helper_edge_test.go +++ b/amd64_helper_edge_test.go @@ -598,6 +598,7 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { mustLower("BSRL", Instr{Raw: "BSRL AX, DI", Args: []Operand{{Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: DI}}}) mustLower("SETEQ", Instr{Raw: "SETEQ AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}) mustLower("SETGT", Instr{Raw: "SETGT ret+8(FP)", Args: []Operand{{Kind: OpFP, FPOffset: 8}}}) + mustLower("SETGE", Instr{Raw: "SETGE AH", Args: []Operand{{Kind: OpReg, Reg: AH}}}) mustLower("SETHI", Instr{Raw: "SETHI BX", Args: []Operand{{Kind: OpReg, Reg: BX}}}) mustLower("SETCS", Instr{Raw: "SETCS AL", Args: []Operand{{Kind: OpReg, Reg: AL}}}) mustLower("SETCS", Instr{Raw: "SETCS AH", Args: []Operand{{Kind: OpReg, Reg: AH}}}) @@ -688,6 +689,32 @@ func TestAMD64SetCSUsesCarryFlag(t *testing.T) { } } +func TestAMD64SetGEUsesSignedFlag(t *testing.T) { + c, b := newAMD64CtxWithFuncForTest(t, Func{}, FuncSig{Name: "example.setge", Ret: Void}, nil) + if err := c.storeReg(AX, "4660"); err != nil { + t.Fatalf("storeReg(AX) error = %v", err) + } + b.WriteString(" store i1 false, ptr " + c.flagsSltSlot + "\n") + + ins := Instr{Raw: "SETGE AH", Args: []Operand{{Kind: OpReg, Reg: AH}}} + if ok, term, err := c.lowerArith("SETGE", ins); !ok || term || err != nil { + t.Fatalf("lowerArith(%s) = (%v, %v, %v)", ins.Raw, ok, term, err) + } + + out := b.String() + for _, want := range []string{ + "store i1 false, ptr %flags_slt", + "load i1, ptr %flags_slt", + "xor i1 %", + "shl i64", + "or i64", + } { + if !strings.Contains(out, want) { + t.Fatalf("missing %q in output:\n%s", want, out) + } + } +} + func TestAMD64VectorCoverage(t *testing.T) { fn := Func{ Instrs: []Instr{ diff --git a/amd64_lower_arith.go b/amd64_lower_arith.go index 4bc58cb..acec331 100644 --- a/amd64_lower_arith.go +++ b/amd64_lower_arith.go @@ -785,7 +785,7 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e } return true, false, fmt.Errorf("amd64: unsupported bit op %s", op) - case "SETEQ", "SETGT", "SETHI", "SETCS": + case "SETEQ", "SETGT", "SETGE", "SETHI", "SETCS": // SETcc dst: set byte based on flags. // We support register destinations and FP result slots. if len(ins.Args) != 1 { @@ -804,6 +804,11 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e t2 := c.newTmp() fmt.Fprintf(c.b, " %%%s = xor i1 %%%s, true\n", t2, t1) cond = "%" + t2 + case "SETGE": + slt := c.loadFlag(c.flagsSltSlot) + t := c.newTmp() + fmt.Fprintf(c.b, " %%%s = xor i1 %s, true\n", t, slt) + cond = "%" + t case "SETHI": // unsigned > cf := c.loadFlag(c.flagsCFSlot) From d50bd410439f2d0722a3ff76c4876817c32b26b0 Mon Sep 17 00:00:00 2001 From: ZhouGuangyuan Date: Tue, 5 May 2026 18:30:50 +0800 Subject: [PATCH 4/7] amd64: support ADCB --- amd64_helper_edge_test.go | 34 +++++++++++++++++ amd64_lower_arith.go | 79 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/amd64_helper_edge_test.go b/amd64_helper_edge_test.go index ce9d73e..7943bc3 100644 --- a/amd64_helper_edge_test.go +++ b/amd64_helper_edge_test.go @@ -569,6 +569,7 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { mustLower("ANDQ", Instr{Raw: "ANDQ $4, AX", Args: []Operand{{Kind: OpImm, Imm: 4}, {Kind: OpReg, Reg: AX}}}) mustLower("ORQ", Instr{Raw: "ORQ $5, AX", Args: []Operand{{Kind: OpImm, Imm: 5}, {Kind: OpReg, Reg: AX}}}) mustLower("ADCQ", Instr{Raw: "ADCQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) + mustLower("ADCB", Instr{Raw: "ADCB $1, AL", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AL}}}) mustLower("SBBQ", Instr{Raw: "SBBQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("ADCXQ", Instr{Raw: "ADCXQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("ADOXQ", Instr{Raw: "ADOXQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) @@ -715,6 +716,39 @@ func TestAMD64SetGEUsesSignedFlag(t *testing.T) { } } +func TestAMD64ADCBUsesCarryFlag(t *testing.T) { + c, b := newAMD64CtxWithFuncForTest(t, Func{}, FuncSig{Name: "example.adcb", Ret: Void}, nil) + if err := c.storeReg(AX, "4660"); err != nil { + t.Fatalf("storeReg(AX) error = %v", err) + } + b.WriteString(" store i1 true, ptr " + c.flagsCFSlot + "\n") + + for _, ins := range []Instr{ + {Raw: "ADCB $1, AL", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AL}}}, + {Raw: "ADCB $1, AH", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AH}}}, + } { + if ok, term, err := c.lowerArith("ADCB", ins); !ok || term || err != nil { + t.Fatalf("lowerArith(%s) = (%v, %v, %v)", ins.Raw, ok, term, err) + } + } + + out := b.String() + for _, want := range []string{ + "store i1 true, ptr %flags_cf", + "load i1, ptr %flags_cf", + "zext i1 %", + "add i8", + "zext i8", + "icmp ugt i16", + "shl i64", + "or i64", + } { + if !strings.Contains(out, want) { + t.Fatalf("missing %q in output:\n%s", want, out) + } + } +} + func TestAMD64VectorCoverage(t *testing.T) { fn := Func{ Instrs: []Instr{ diff --git a/amd64_lower_arith.go b/amd64_lower_arith.go index acec331..9e883cc 100644 --- a/amd64_lower_arith.go +++ b/amd64_lower_arith.go @@ -387,6 +387,85 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e c.setZSFlagsFromI64(out) return true, false, nil + case "ADCB": + // 8-bit add with carry: src, dstReg/mem. + if len(ins.Args) != 2 { + return true, false, fmt.Errorf("amd64 %s expects src, dst: %q", op, ins.Raw) + } + var d8 string + var storeDst func(string) error + switch ins.Args[1].Kind { + case OpReg: + dst := ins.Args[1].Reg + dv64, err := c.loadReg(dst) + if err != nil { + return true, false, err + } + t := c.newTmp() + fmt.Fprintf(c.b, " %%%s = trunc i64 %s to i8\n", t, dv64) + d8 = "%" + t + storeDst = func(v8 string) error { + z := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i8 %s to i64\n", z, v8) + return c.storeReg(dst, "%"+z) + } + case OpMem: + addr, err := c.addrFromMem(ins.Args[1].Mem) + if err != nil { + return true, false, err + } + p := c.ptrFromAddrI64(addr) + ld := c.newTmp() + fmt.Fprintf(c.b, " %%%s = load i8, ptr %s, align 1\n", ld, p) + d8 = "%" + ld + storeDst = func(v8 string) error { + fmt.Fprintf(c.b, " store i8 %s, ptr %s, align 1\n", v8, p) + return nil + } + default: + return true, false, fmt.Errorf("amd64 %s expects reg/mem dst: %q", op, ins.Raw) + } + + s8, err := c.evalIntSized(ins.Args[0], I8) + if err != nil { + return true, false, err + } + cfIn := c.loadFlag(c.flagsCFSlot) + cf8t := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i1 %s to i8\n", cf8t, cfIn) + cf8 := "%" + cf8t + + sum := c.newTmp() + fmt.Fprintf(c.b, " %%%s = add i8 %s, %s\n", sum, d8, s8) + res := c.newTmp() + fmt.Fprintf(c.b, " %%%s = add i8 %%%s, %s\n", res, sum, cf8) + out8 := "%" + res + if err := storeDst(out8); err != nil { + return true, false, err + } + + d16 := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i8 %s to i16\n", d16, d8) + s16 := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i8 %s to i16\n", s16, s8) + cf16 := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i8 %s to i16\n", cf16, cf8) + total1 := c.newTmp() + fmt.Fprintf(c.b, " %%%s = add i16 %%%s, %%%s\n", total1, d16, s16) + total2 := c.newTmp() + fmt.Fprintf(c.b, " %%%s = add i16 %%%s, %%%s\n", total2, total1, cf16) + cf := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp ugt i16 %%%s, 255\n", cf, total2) + fmt.Fprintf(c.b, " store i1 %%%s, ptr %s\n", cf, c.flagsCFSlot) + fmt.Fprintf(c.b, " store i1 false, ptr %s\n", c.flagsOFSlot) + zf := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp eq i8 %%%s, 0\n", zf, res) + fmt.Fprintf(c.b, " store i1 %%%s, ptr %s\n", zf, c.flagsZSlot) + sf := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp slt i8 %%%s, 0\n", sf, res) + fmt.Fprintf(c.b, " store i1 %%%s, ptr %s\n", sf, c.flagsSltSlot) + return true, false, nil + case "ADCXQ", "ADOXQ": // BMI2 add-with-carry chains. if len(ins.Args) != 2 || ins.Args[1].Kind != OpReg { From db6124d5ffc7a13753b6fab3fe8fb9bb03be83e6 Mon Sep 17 00:00:00 2001 From: ZhouGuangyuan Date: Tue, 5 May 2026 18:37:32 +0800 Subject: [PATCH 5/7] amd64: support ADDB and bmi2 shifts --- amd64_helper_edge_test.go | 3 +++ amd64_lower_arith.go | 52 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/amd64_helper_edge_test.go b/amd64_helper_edge_test.go index 7943bc3..a43070e 100644 --- a/amd64_helper_edge_test.go +++ b/amd64_helper_edge_test.go @@ -574,6 +574,7 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { mustLower("ADCXQ", Instr{Raw: "ADCXQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("ADOXQ", Instr{Raw: "ADOXQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("ADDL", Instr{Raw: "ADDL $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) + mustLower("ADDB", Instr{Raw: "ADDB CL, DX", Args: []Operand{{Kind: OpReg, Reg: CL}, {Kind: OpReg, Reg: DX}}}) mustLower("SUBL", Instr{Raw: "SUBL CX, 12(BX)", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpMem, Mem: MemRef{Base: BX, Off: 12}}}}) mustLower("XORL", Instr{Raw: "XORL $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("ANDL", Instr{Raw: "ANDL $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) @@ -612,9 +613,11 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { mustLower("ANDNQ", Instr{Raw: "ANDNQ AX, BX, DX", Args: []Operand{{Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: BX}, {Kind: OpReg, Reg: DX}}}) mustLower("SHRQ", Instr{Raw: "SHRQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("SHLQ", Instr{Raw: "SHLQ CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) + mustLower("SHLXQ", Instr{Raw: "SHLXQ DI, CX, AX", Args: []Operand{{Kind: OpReg, Reg: DI}, {Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) mustLower("SARQ", Instr{Raw: "SARQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("SHLL", Instr{Raw: "SHLL $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("SHRL", Instr{Raw: "SHRL CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) + mustLower("SHRXQ", Instr{Raw: "SHRXQ DI, AX, CX", Args: []Operand{{Kind: OpReg, Reg: DI}, {Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: CX}}}) mustLower("SALQ", Instr{Raw: "SALQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("SALL", Instr{Raw: "SALL $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("ROLL", Instr{Raw: "ROLL $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) diff --git a/amd64_lower_arith.go b/amd64_lower_arith.go index 9e883cc..90d67f3 100644 --- a/amd64_lower_arith.go +++ b/amd64_lower_arith.go @@ -599,8 +599,8 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e c.setZSFlagsFromI32("%" + x) return true, false, nil - case "XORB", "ANDB", "ORB": - // 8-bit logical ops: src, dstReg (stored in low 8 bits of dst reg). + case "ADDB", "XORB", "ANDB", "ORB": + // 8-bit scalar ops: src, dstReg (stored in low 8 bits of dst reg). if len(ins.Args) != 2 || ins.Args[1].Kind != OpReg { return true, false, fmt.Errorf("amd64 %s expects src, dstReg: %q", op, ins.Raw) } @@ -628,6 +628,8 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e } x := c.newTmp() switch op { + case "ADDB": + fmt.Fprintf(c.b, " %%%s = add i8 %%%s, %s\n", x, d8, s8) case "XORB": fmt.Fprintf(c.b, " %%%s = xor i8 %%%s, %s\n", x, d8, s8) case "ANDB": @@ -640,7 +642,19 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e if err := c.storeReg(dst, "%"+z); err != nil { return true, false, err } - fmt.Fprintf(c.b, " store i1 false, ptr %s\n", c.flagsCFSlot) + if op == "ADDB" { + d16 := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i8 %%%s to i16\n", d16, d8) + s16 := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i8 %s to i16\n", s16, s8) + total := c.newTmp() + fmt.Fprintf(c.b, " %%%s = add i16 %%%s, %%%s\n", total, d16, s16) + cf := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp ugt i16 %%%s, 255\n", cf, total) + fmt.Fprintf(c.b, " store i1 %%%s, ptr %s\n", cf, c.flagsCFSlot) + } else { + fmt.Fprintf(c.b, " store i1 false, ptr %s\n", c.flagsCFSlot) + } fmt.Fprintf(c.b, " store i1 false, ptr %s\n", c.flagsOFSlot) zf := c.newTmp() fmt.Fprintf(c.b, " %%%s = icmp eq i8 %%%s, 0\n", zf, x) @@ -1059,6 +1073,38 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e fmt.Fprintf(c.b, " %%%s = zext i32 %%%s to i64\n", z, sh) return true, false, c.storeReg(dst, "%"+z) + case "SHLXQ", "SHRXQ": + // BMI2 variable shifts: amt, src, dst. + if len(ins.Args) != 3 || ins.Args[1].Kind != OpReg || ins.Args[2].Kind != OpReg { + return true, false, fmt.Errorf("amd64 %s expects amt, srcReg, dstReg: %q", op, ins.Raw) + } + src, err := c.loadReg(ins.Args[1].Reg) + if err != nil { + return true, false, err + } + var amt string + switch ins.Args[0].Kind { + case OpImm: + amt = fmt.Sprintf("%d", ins.Args[0].Imm&63) + case OpReg: + av, err := c.loadReg(ins.Args[0].Reg) + if err != nil { + return true, false, err + } + m := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %s, 63\n", m, av) + amt = "%" + m + default: + return true, false, fmt.Errorf("amd64 %s unsupported shift amt: %q", op, ins.Raw) + } + t := c.newTmp() + if op == "SHLXQ" { + fmt.Fprintf(c.b, " %%%s = shl i64 %s, %s\n", t, src, amt) + } else { + fmt.Fprintf(c.b, " %%%s = lshr i64 %s, %s\n", t, src, amt) + } + return true, false, c.storeReg(ins.Args[2].Reg, "%"+t) + case "ROLL": // 32-bit rotate-left: count, dstReg. if len(ins.Args) != 2 || ins.Args[1].Kind != OpReg { From 5c3031337460a23c302bbaf4fa17174b4cceda45 Mon Sep 17 00:00:00 2001 From: ZhouGuangyuan Date: Tue, 5 May 2026 19:11:08 +0800 Subject: [PATCH 6/7] amd64: support remaining compress amd64 ops --- amd64_blocks.go | 8 +- amd64_ctx.go | 93 ++++++++++++++++++++++ amd64_helper_edge_test.go | 31 ++++++++ amd64_lower_arith.go | 157 +++++++++++++++++++++++++++++++++++--- amd64_lower_branch.go | 13 +++- amd64_lower_cmpbt.go | 64 ++++++++++++++++ amd64_lower_mov.go | 54 ++++++++----- amd64_needed.go | 2 +- translate.go | 4 +- 9 files changed, 387 insertions(+), 39 deletions(-) diff --git a/amd64_blocks.go b/amd64_blocks.go index 279918b..19e9cba 100644 --- a/amd64_blocks.go +++ b/amd64_blocks.go @@ -20,8 +20,8 @@ func amd64SplitBlocks(fn Func) []amd64Block { switch Op(op) { case "JMP", "JE", "JEQ", "JZ", "JNE", "JNZ", - "JL", "JLT", "JLE", "JG", "JGT", "JGE", - "JB", "JLO", "JBE", "JA", "JHI", "JAE", "JHS", "JLS", + "JL", "JLT", "JLE", "JG", "JGT", "JGE", "JS", "JNS", + "JB", "JLO", "JBE", "JA", "JHI", "JAE", "JHS", "JLS", "JNA", "JNC", "JC", "JCC": default: return 0, false @@ -49,8 +49,8 @@ func amd64SplitBlocks(fn Func) []amd64Block { switch Op(op) { case "JMP", "JE", "JEQ", "JZ", "JNE", "JNZ", - "JL", "JLT", "JLE", "JG", "JGT", "JGE", - "JB", "JLO", "JBE", "JA", "JHI", "JAE", "JHS", "JLS", + "JL", "JLT", "JLE", "JG", "JGT", "JGE", "JS", "JNS", + "JB", "JLO", "JBE", "JA", "JHI", "JAE", "JHS", "JLS", "JNA", "JNC", "JC", "JCC": return true default: diff --git a/amd64_ctx.go b/amd64_ctx.go index c3cd515..bca38fc 100644 --- a/amd64_ctx.go +++ b/amd64_ctx.go @@ -492,6 +492,39 @@ func amd64ByteAlias(rr Reg) (base Reg, shift uint, ok bool) { } } +func amd64FullRegBase(r Reg) (Reg, bool) { + if base, _, ok := amd64ByteAlias(r); ok { + return base, true + } + if r == "" || r == PC || r == ZR { + return "", false + } + if _, ok := amd64ParseXReg(r); ok { + return "", false + } + if _, ok := amd64ParseYReg(r); ok { + return "", false + } + if _, ok := amd64ParseZReg(r); ok { + return "", false + } + if _, ok := amd64ParseKReg(r); ok { + return "", false + } + return r, true +} + +func amd64ByteRegBase(r Reg) (base Reg, shift uint, ok bool) { + if base, shift, ok := amd64ByteAlias(r); ok { + return base, shift, true + } + base, ok = amd64FullRegBase(r) + if !ok { + return "", 0, false + } + return base, 0, true +} + func (c *amd64Ctx) loadReg(r Reg) (string, error) { // Model byte aliases used by stdlib asm. Reads return the selected byte as a // zero-extended i64. @@ -549,6 +582,66 @@ func (c *amd64Ctx) storeReg(r Reg, v string) error { return nil } +func (c *amd64Ctx) storeRegSized(r Reg, ty LLVMType, v string) error { + switch ty { + case I8: + base, shift, ok := amd64ByteRegBase(r) + if !ok { + return fmt.Errorf("not a GP reg for i8 store: %s", r) + } + cur, err := c.loadReg(base) + if err != nil { + return err + } + mask := int64(0xff) << shift + cleared := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %s, %d\n", cleared, cur, ^mask) + ext := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i8 %s to i64\n", ext, v) + ins := "%" + ext + if shift != 0 { + shifted := c.newTmp() + fmt.Fprintf(c.b, " %%%s = shl i64 %%%s, %d\n", shifted, ext, shift) + ins = "%" + shifted + } + merged := c.newTmp() + fmt.Fprintf(c.b, " %%%s = or i64 %%%s, %s\n", merged, cleared, ins) + return c.storeReg(base, "%"+merged) + case I16: + base, ok := amd64FullRegBase(r) + if !ok { + return fmt.Errorf("not a GP reg for i16 store: %s", r) + } + cur, err := c.loadReg(base) + if err != nil { + return err + } + cleared := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %s, %d\n", cleared, cur, ^int64(0xffff)) + ext := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i16 %s to i64\n", ext, v) + merged := c.newTmp() + fmt.Fprintf(c.b, " %%%s = or i64 %%%s, %%%s\n", merged, cleared, ext) + return c.storeReg(base, "%"+merged) + case I32: + base, ok := amd64FullRegBase(r) + if !ok { + return fmt.Errorf("not a GP reg for i32 store: %s", r) + } + ext := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i32 %s to i64\n", ext, v) + return c.storeReg(base, "%"+ext) + case I64: + base, ok := amd64FullRegBase(r) + if ok { + return c.storeReg(base, v) + } + return c.storeReg(r, v) + default: + return fmt.Errorf("unsupported sized reg store %s to %s", ty, r) + } +} + func (c *amd64Ctx) loadX(r Reg) (string, error) { idx, ok := amd64ParseXReg(r) if !ok { diff --git a/amd64_helper_edge_test.go b/amd64_helper_edge_test.go index a43070e..ca32cac 100644 --- a/amd64_helper_edge_test.go +++ b/amd64_helper_edge_test.go @@ -593,6 +593,7 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { mustLower("LEAQ", Instr{Raw: "LEAQ global<>(SB), AX", Args: []Operand{{Kind: OpSym, Sym: "global<>(SB)"}, {Kind: OpReg, Reg: AX}}}) mustLower("POPCNTL", Instr{Raw: "POPCNTL AX, BX", Args: []Operand{{Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: BX}}}) mustLower("POPCNTQ", Instr{Raw: "POPCNTQ AX, CX", Args: []Operand{{Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: CX}}}) + mustLower("TZCNTQ", Instr{Raw: "TZCNTQ BX, DX", Args: []Operand{{Kind: OpReg, Reg: BX}, {Kind: OpReg, Reg: DX}}}) mustLower("BSFQ", Instr{Raw: "BSFQ AX, BX", Args: []Operand{{Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: BX}}}) mustLower("BSRQ", Instr{Raw: "BSRQ AX, CX", Args: []Operand{{Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: CX}}}) mustLower("BSWAPQ", Instr{Raw: "BSWAPQ AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}) @@ -611,11 +612,15 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { mustLower("CMOVQGT", Instr{Raw: "CMOVQGT CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) mustLower("ANDNL", Instr{Raw: "ANDNL AX, BX, CX", Args: []Operand{{Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: BX}, {Kind: OpReg, Reg: CX}}}) mustLower("ANDNQ", Instr{Raw: "ANDNQ AX, BX, DX", Args: []Operand{{Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: BX}, {Kind: OpReg, Reg: DX}}}) + mustLower("BEXTRQ", Instr{Raw: "BEXTRQ CX, AX, DX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: DX}}}) + mustLower("BZHIQ", Instr{Raw: "BZHIQ DI, AX, CX", Args: []Operand{{Kind: OpReg, Reg: DI}, {Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: CX}}}) mustLower("SHRQ", Instr{Raw: "SHRQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("SHLQ", Instr{Raw: "SHLQ CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) mustLower("SHLXQ", Instr{Raw: "SHLXQ DI, CX, AX", Args: []Operand{{Kind: OpReg, Reg: DI}, {Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) mustLower("SARQ", Instr{Raw: "SARQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) + mustLower("SARL", Instr{Raw: "SARL $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("SHLL", Instr{Raw: "SHLL $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) + mustLower("SHLB", Instr{Raw: "SHLB $2, R10", Args: []Operand{{Kind: OpImm, Imm: 2}, {Kind: OpReg, Reg: Reg("R10")}}}) mustLower("SHRL", Instr{Raw: "SHRL CX, AX", Args: []Operand{{Kind: OpReg, Reg: CX}, {Kind: OpReg, Reg: AX}}}) mustLower("SHRXQ", Instr{Raw: "SHRXQ DI, AX, CX", Args: []Operand{{Kind: OpReg, Reg: DI}, {Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: CX}}}) mustLower("SALQ", Instr{Raw: "SALQ $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) @@ -1242,6 +1247,7 @@ func TestAMD64CmpBtCoverage(t *testing.T) { check("TESTL", Instr{Raw: "TESTL $const, 8(BX)", Args: []Operand{{Kind: OpSym, Sym: "$const"}, {Kind: OpMem, Mem: MemRef{Base: BX, Off: 8}}}}) check("TESTQ", Instr{Raw: "TESTQ example.global(SB), DI", Args: []Operand{{Kind: OpSym, Sym: "example.global(SB)"}, {Kind: OpReg, Reg: DI}}}) check("BTQ", Instr{Raw: "BTQ $3, AX", Args: []Operand{{Kind: OpImm, Imm: 3}, {Kind: OpReg, Reg: AX}}}) + check("BTSQ", Instr{Raw: "BTSQ DX, AX", Args: []Operand{{Kind: OpReg, Reg: DX}, {Kind: OpReg, Reg: AX}}}) if ok, term, err := c.lowerCmpBt("BAD", Instr{}); ok || term || err != nil { t.Fatalf("lowerCmpBt(BAD) = (%v, %v, %v)", ok, term, err) @@ -1255,6 +1261,9 @@ func TestAMD64CmpBtCoverage(t *testing.T) { if _, _, err := c.lowerCmpBt("BTQ", Instr{Raw: "BTQ AX, BX", Args: []Operand{{Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: BX}}}); err == nil { t.Fatalf("bad BTQ unexpectedly succeeded") } + if _, _, err := c.lowerCmpBt("BTSQ", Instr{Raw: "BTSQ AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}); err == nil { + t.Fatalf("short BTSQ unexpectedly succeeded") + } if got, err := c.evalIntSized(Operand{Kind: OpSym, Sym: "$const"}, I32); err != nil || got != "0" { t.Fatalf("evalIntSized($const) = (%q, %v)", got, err) } @@ -1273,6 +1282,7 @@ func TestAMD64CmpBtCoverage(t *testing.T) { "and i64", "store i1 false, ptr %flags_cf", "lshr i64", + "shl i64 1", "load i64, ptr @\"example.global\"", } { if !strings.Contains(out, want) { @@ -1323,11 +1333,15 @@ func TestAMD64MovSyscallAndCRC32Coverage(t *testing.T) { checkMov("MOVLQSX", Instr{Raw: "MOVLQSX arg+0(FP), DX", Args: []Operand{{Kind: OpFP, FPOffset: 0}, {Kind: OpReg, Reg: DX}}}) checkMov("MOVLQSX", Instr{Raw: "MOVLQSX 8(BX), SI", Args: []Operand{{Kind: OpMem, Mem: MemRef{Base: BX, Off: 8}}, {Kind: OpReg, Reg: SI}}}) checkMov("MOVLQSX", Instr{Raw: "MOVLQSX example.global(SB), DI", Args: []Operand{{Kind: OpSym, Sym: "example.global(SB)"}, {Kind: OpReg, Reg: DI}}}) + checkMov("MOVWQSX", Instr{Raw: "MOVWQSX 8(BX), SI", Args: []Operand{{Kind: OpMem, Mem: MemRef{Base: BX, Off: 8}}, {Kind: OpReg, Reg: SI}}}) b.WriteString(" store i1 true, ptr " + c.flagsSltSlot + "\n") checkMov("CMOVQLT", Instr{Raw: "CMOVQLT AX, BX", Args: []Operand{{Kind: OpReg, Reg: AX}, {Kind: OpReg, Reg: BX}}}) checkMov("MOVB", Instr{Raw: "MOVB $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) + checkMov("MOVBLZX", Instr{Raw: "MOVBLZX 8(BX), AX", Args: []Operand{{Kind: OpMem, Mem: MemRef{Base: BX, Off: 8}}, {Kind: OpReg, Reg: AX}}}) checkMov("MOVB", Instr{Raw: "MOVB BX, ret+24(FP)", Args: []Operand{{Kind: OpReg, Reg: BX}, {Kind: OpFP, FPOffset: 24}}}) checkMov("MOVW", Instr{Raw: "MOVW arg+0(FP), 8(BX)", Args: []Operand{{Kind: OpFP, FPOffset: 0}, {Kind: OpMem, Mem: MemRef{Base: BX, Off: 8}}}}) + checkMov("MOVWLZX", Instr{Raw: "MOVWLZX 8(BX), CX", Args: []Operand{{Kind: OpMem, Mem: MemRef{Base: BX, Off: 8}}, {Kind: OpReg, Reg: CX}}}) + checkMov("MOVWQZX", Instr{Raw: "MOVWQZX 8(BX), DX", Args: []Operand{{Kind: OpMem, Mem: MemRef{Base: BX, Off: 8}}, {Kind: OpReg, Reg: DX}}}) checkMov("MOVW", Instr{Raw: "MOVW example.global(SB), example.dest(SB)", Args: []Operand{{Kind: OpSym, Sym: "example.global(SB)"}, {Kind: OpSym, Sym: "example.dest(SB)"}}}) checkMov("MOVQ", Instr{Raw: "MOVQ example.global(SB), CX", Args: []Operand{{Kind: OpSym, Sym: "example.global(SB)"}, {Kind: OpReg, Reg: CX}}}) checkMov("MOVQ", Instr{Raw: "MOVQ DX, ret+8(FP)", Args: []Operand{{Kind: OpReg, Reg: DX}, {Kind: OpFP, FPOffset: 8}}}) @@ -1464,6 +1478,7 @@ func TestAMD64MovSyscallAndCRC32Coverage(t *testing.T) { out := b.String() + sysb.String() for _, want := range []string{ "sext i32", + "sext i16", "select i1 %t", "zext i8", "store i16", @@ -1523,12 +1538,15 @@ func TestAMD64BranchCoverageDeep(t *testing.T) { {"JNE", Instr{Raw: "JNE tail", Args: []Operand{{Kind: OpIdent, Ident: "tail"}}}}, {"JL", Instr{Raw: "JL V1<>(SB)", Args: []Operand{{Kind: OpSym, Sym: "V1<>(SB)"}}}}, {"JGE", Instr{Raw: "JGE 2(PC)", Args: []Operand{{Kind: OpMem, Mem: MemRef{Base: PC, Off: 2}}}}}, + {"JS", Instr{Raw: "JS tail", Args: []Operand{{Kind: OpIdent, Ident: "tail"}}}}, + {"JNS", Instr{Raw: "JNS V1", Args: []Operand{{Kind: OpIdent, Ident: "V1"}}}}, {"JLE", Instr{Raw: "JLE tail", Args: []Operand{{Kind: OpIdent, Ident: "tail"}}}}, {"JG", Instr{Raw: "JG V1", Args: []Operand{{Kind: OpIdent, Ident: "V1"}}}}, {"JB", Instr{Raw: "JB tail", Args: []Operand{{Kind: OpIdent, Ident: "tail"}}}}, {"JNC", Instr{Raw: "JNC V1", Args: []Operand{{Kind: OpIdent, Ident: "V1"}}}}, {"JAE", Instr{Raw: "JAE tail", Args: []Operand{{Kind: OpIdent, Ident: "tail"}}}}, {"JBE", Instr{Raw: "JBE V1", Args: []Operand{{Kind: OpIdent, Ident: "V1"}}}}, + {"JNA", Instr{Raw: "JNA tail", Args: []Operand{{Kind: OpIdent, Ident: "tail"}}}}, {"JA", Instr{Raw: "JA tail", Args: []Operand{{Kind: OpIdent, Ident: "tail"}}}}, {"JMP", Instr{Raw: "JMP V1", Args: []Operand{{Kind: OpReg, Reg: Reg("V1")}}}}, {"JMP", Instr{Raw: "JMP tail(SB)", Args: []Operand{{Kind: OpSym, Sym: "tail(SB)"}}}}, @@ -1949,6 +1967,19 @@ func TestAMD64CtxAliasAndFPFallbackCoverage(t *testing.T) { t.Fatalf("storeReg(%s) error = %v", tc.r, err) } } + for _, tc := range []struct { + r Reg + ty LLVMType + v string + }{ + {BX, I8, "37"}, + {CX, I16, "38"}, + {DX, I32, "39"}, + } { + if err := c.storeRegSized(tc.r, tc.ty, tc.v); err != nil { + t.Fatalf("storeRegSized(%s, %s) error = %v", tc.r, tc.ty, err) + } + } for _, r := range []Reg{AL, AH, BL, BH, CL, CH, DL, DH} { if got, err := c.loadReg(r); err != nil || got == "" { t.Fatalf("loadReg(%s) = (%q, %v)", r, got, err) diff --git a/amd64_lower_arith.go b/amd64_lower_arith.go index 90d67f3..cdb3c6d 100644 --- a/amd64_lower_arith.go +++ b/amd64_lower_arith.go @@ -405,9 +405,7 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e fmt.Fprintf(c.b, " %%%s = trunc i64 %s to i8\n", t, dv64) d8 = "%" + t storeDst = func(v8 string) error { - z := c.newTmp() - fmt.Fprintf(c.b, " %%%s = zext i8 %s to i64\n", z, v8) - return c.storeReg(dst, "%"+z) + return c.storeRegSized(dst, I8, v8) } case OpMem: addr, err := c.addrFromMem(ins.Args[1].Mem) @@ -600,7 +598,7 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e return true, false, nil case "ADDB", "XORB", "ANDB", "ORB": - // 8-bit scalar ops: src, dstReg (stored in low 8 bits of dst reg). + // 8-bit scalar ops: src, dstReg (stored in the selected byte lane). if len(ins.Args) != 2 || ins.Args[1].Kind != OpReg { return true, false, fmt.Errorf("amd64 %s expects src, dstReg: %q", op, ins.Raw) } @@ -637,9 +635,7 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e case "ORB": fmt.Fprintf(c.b, " %%%s = or i8 %%%s, %s\n", x, d8, s8) } - z := c.newTmp() - fmt.Fprintf(c.b, " %%%s = zext i8 %%%s to i64\n", z, x) - if err := c.storeReg(dst, "%"+z); err != nil { + if err := c.storeRegSized(dst, I8, "%"+x); err != nil { return true, false, err } if op == "ADDB" { @@ -798,6 +794,26 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e fmt.Fprintf(c.b, " %%%s = call i64 @llvm.ctpop.i64(i64 %s)\n", call, srcv) return true, false, c.storeReg(dst, "%"+call) + case "TZCNTQ": + // TZCNTQ srcReg, dstReg. + if len(ins.Args) != 2 || ins.Args[0].Kind != OpReg || ins.Args[1].Kind != OpReg { + return true, false, fmt.Errorf("amd64 TZCNTQ expects srcReg, dstReg: %q", ins.Raw) + } + srcv, err := c.loadReg(ins.Args[0].Reg) + if err != nil { + return true, false, err + } + call := c.newTmp() + fmt.Fprintf(c.b, " %%%s = call i64 @llvm.cttz.i64(i64 %s, i1 false)\n", call, srcv) + if err := c.storeReg(ins.Args[1].Reg, "%"+call); err != nil { + return true, false, err + } + cf := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp eq i64 %s, 0\n", cf, srcv) + fmt.Fprintf(c.b, " store i1 %%%s, ptr %s\n", cf, c.flagsCFSlot) + c.setZSFlagsFromI64("%" + call) + return true, false, nil + case "BSFQ", "BSRQ", "BSWAPQ", "BSFL", "BSRL": // Bit scan/byte swap ops (reg, reg). src := Reg("") @@ -917,8 +933,8 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e switch ins.Args[0].Kind { case OpReg: sel := c.newTmp() - fmt.Fprintf(c.b, " %%%s = select i1 %s, i64 1, i64 0\n", sel, cond) - return true, false, c.storeReg(ins.Args[0].Reg, "%"+sel) + fmt.Fprintf(c.b, " %%%s = select i1 %s, i8 1, i8 0\n", sel, cond) + return true, false, c.storeRegSized(ins.Args[0].Reg, I8, "%"+sel) case OpFP: return true, false, c.storeFPResult(ins.Args[0].FPOffset, I1, cond) default: @@ -1001,7 +1017,85 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e fmt.Fprintf(c.b, " %%%s = zext i32 %%%s to i64\n", z, a) return true, false, c.storeReg(dst, "%"+z) - case "SHRQ", "SHLQ", "SARQ", "SHLL", "SHRL", "SALQ", "SALL": + case "BEXTRQ": + // BMI1 bit field extract: control, src, dst. + if len(ins.Args) != 3 || ins.Args[2].Kind != OpReg { + return true, false, fmt.Errorf("amd64 BEXTRQ expects control, src, dstReg: %q", ins.Raw) + } + ctrl, err := c.evalI64(ins.Args[0]) + if err != nil { + return true, false, err + } + src, err := c.evalI64(ins.Args[1]) + if err != nil { + return true, false, err + } + start := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %s, 255\n", start, ctrl) + lenShift := c.newTmp() + fmt.Fprintf(c.b, " %%%s = lshr i64 %s, 8\n", lenShift, ctrl) + length := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %%%s, 255\n", length, lenShift) + startOK := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp ult i64 %%%s, 64\n", startOK, start) + safeStart := c.newTmp() + fmt.Fprintf(c.b, " %%%s = select i1 %%%s, i64 %%%s, i64 63\n", safeStart, startOK, start) + shifted := c.newTmp() + fmt.Fprintf(c.b, " %%%s = lshr i64 %s, %%%s\n", shifted, src, safeStart) + rawLen := c.newTmp() + fmt.Fprintf(c.b, " %%%s = select i1 %%%s, i64 %%%s, i64 0\n", rawLen, startOK, length) + remain := c.newTmp() + fmt.Fprintf(c.b, " %%%s = sub i64 64, %%%s\n", remain, safeStart) + useRawLen := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp ult i64 %%%s, %%%s\n", useRawLen, rawLen, remain) + effLen := c.newTmp() + fmt.Fprintf(c.b, " %%%s = select i1 %%%s, i64 %%%s, i64 %%%s\n", effLen, useRawLen, rawLen, remain) + isFull := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp eq i64 %%%s, 64\n", isFull, effLen) + safeLen := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %%%s, 63\n", safeLen, effLen) + one := c.newTmp() + fmt.Fprintf(c.b, " %%%s = shl i64 1, %%%s\n", one, safeLen) + maskTmp := c.newTmp() + fmt.Fprintf(c.b, " %%%s = add i64 %%%s, -1\n", maskTmp, one) + mask := c.newTmp() + fmt.Fprintf(c.b, " %%%s = select i1 %%%s, i64 -1, i64 %%%s\n", mask, isFull, maskTmp) + out := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %%%s, %%%s\n", out, shifted, mask) + return true, false, c.storeReg(ins.Args[2].Reg, "%"+out) + + case "BZHIQ": + // BMI2 zero high bits: index, src, dst. + if len(ins.Args) != 3 || ins.Args[2].Kind != OpReg { + return true, false, fmt.Errorf("amd64 BZHIQ expects index, src, dstReg: %q", ins.Raw) + } + idx, err := c.evalI64(ins.Args[0]) + if err != nil { + return true, false, err + } + src, err := c.evalI64(ins.Args[1]) + if err != nil { + return true, false, err + } + valid := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp ult i64 %s, 64\n", valid, idx) + isZero := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp eq i64 %s, 0\n", isZero, idx) + safeIdx := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %s, 63\n", safeIdx, idx) + one := c.newTmp() + fmt.Fprintf(c.b, " %%%s = shl i64 1, %%%s\n", one, safeIdx) + maskTmp := c.newTmp() + fmt.Fprintf(c.b, " %%%s = add i64 %%%s, -1\n", maskTmp, one) + maskOrZero := c.newTmp() + fmt.Fprintf(c.b, " %%%s = select i1 %%%s, i64 0, i64 %%%s\n", maskOrZero, isZero, maskTmp) + mask := c.newTmp() + fmt.Fprintf(c.b, " %%%s = select i1 %%%s, i64 %%%s, i64 -1\n", mask, valid, maskOrZero) + out := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %s, %%%s\n", out, src, mask) + return true, false, c.storeReg(ins.Args[2].Reg, "%"+out) + + case "SHRQ", "SHLQ", "SARQ", "SHLL", "SHRL", "SARL", "SALQ", "SALL": // Shift ops: // - 2-operand: amt, dst (in-place) // - 3-operand: amt, src, dst @@ -1025,7 +1119,7 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e } amtMask := int64(63) valTy := I64 - if op == "SHLL" || op == "SHRL" || op == "SALL" { + if op == "SHLL" || op == "SHRL" || op == "SARL" || op == "SALL" { amtMask = 31 valTy = I32 } @@ -1066,6 +1160,8 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e sh := c.newTmp() if op == "SHLL" || op == "SALL" { fmt.Fprintf(c.b, " %%%s = shl i32 %%%s, %%%s\n", sh, tr, amt32) + } else if op == "SARL" { + fmt.Fprintf(c.b, " %%%s = ashr i32 %%%s, %%%s\n", sh, tr, amt32) } else { fmt.Fprintf(c.b, " %%%s = lshr i32 %%%s, %%%s\n", sh, tr, amt32) } @@ -1073,6 +1169,45 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e fmt.Fprintf(c.b, " %%%s = zext i32 %%%s to i64\n", z, sh) return true, false, c.storeReg(dst, "%"+z) + case "SHLB": + // 8-bit logical left shift: amt, dstReg. + if len(ins.Args) != 2 || ins.Args[1].Kind != OpReg { + return true, false, fmt.Errorf("amd64 SHLB expects amt, dstReg: %q", ins.Raw) + } + dst := ins.Args[1].Reg + dv64, err := c.loadReg(dst) + if err != nil { + return true, false, err + } + d8 := c.newTmp() + fmt.Fprintf(c.b, " %%%s = trunc i64 %s to i8\n", d8, dv64) + var amt string + switch ins.Args[0].Kind { + case OpImm: + amt = fmt.Sprintf("%d", ins.Args[0].Imm&31) + case OpReg: + av, err := c.loadReg(ins.Args[0].Reg) + if err != nil { + return true, false, err + } + m := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %s, 31\n", m, av) + amt = "%" + m + default: + return true, false, fmt.Errorf("amd64 SHLB unsupported shift amt: %q", ins.Raw) + } + inRange := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp ult i64 %s, 8\n", inRange, amt) + safeAmt := c.newTmp() + fmt.Fprintf(c.b, " %%%s = select i1 %%%s, i64 %s, i64 7\n", safeAmt, inRange, amt) + amt8 := c.newTmp() + fmt.Fprintf(c.b, " %%%s = trunc i64 %%%s to i8\n", amt8, safeAmt) + sh := c.newTmp() + fmt.Fprintf(c.b, " %%%s = shl i8 %%%s, %%%s\n", sh, d8, amt8) + out := c.newTmp() + fmt.Fprintf(c.b, " %%%s = select i1 %%%s, i8 %%%s, i8 0\n", out, inRange, sh) + return true, false, c.storeRegSized(dst, I8, "%"+out) + case "SHLXQ", "SHRXQ": // BMI2 variable shifts: amt, src, dst. if len(ins.Args) != 3 || ins.Args[1].Kind != OpReg || ins.Args[2].Kind != OpReg { diff --git a/amd64_lower_branch.go b/amd64_lower_branch.go index 3abdd41..6e8e625 100644 --- a/amd64_lower_branch.go +++ b/amd64_lower_branch.go @@ -44,8 +44,8 @@ func (c *amd64Ctx) lowerBranch(bi int, ii int, op Op, ins Instr, emitBr amd64Emi case "JMP", "JE", "JEQ", "JZ", "JNE", "JNZ", - "JL", "JLT", "JLE", "JG", "JGT", "JGE", - "JA", "JHI", "JAE", "JHS", "JB", "JLO", "JBE", "JLS", + "JL", "JLT", "JLE", "JG", "JGT", "JGE", "JS", "JNS", + "JA", "JHI", "JAE", "JHS", "JB", "JLO", "JBE", "JLS", "JNA", "JC", "JNC", "JCC": // ok default: @@ -164,6 +164,13 @@ func (c *amd64Ctx) lowerBranch(bi int, ii int, op Op, ins Instr, emitBr amd64Emi t := c.newTmp() fmt.Fprintf(c.b, " %%%s = xor i1 %s, true\n", t, slt) cond = "%" + t + case "JS": + cond = c.loadFlag(c.flagsSltSlot) + case "JNS": + slt := c.loadFlag(c.flagsSltSlot) + t := c.newTmp() + fmt.Fprintf(c.b, " %%%s = xor i1 %s, true\n", t, slt) + cond = "%" + t case "JLE": slt := c.loadFlag(c.flagsSltSlot) z := c.loadFlag(c.flagsZSlot) @@ -190,7 +197,7 @@ func (c *amd64Ctx) lowerBranch(bi int, ii int, op Op, ins Instr, emitBr amd64Emi t := c.newTmp() fmt.Fprintf(c.b, " %%%s = xor i1 %s, true\n", t, cf) cond = "%" + t - case "JBE", "JLS": + case "JBE", "JLS", "JNA": cf := c.loadFlag(c.flagsCFSlot) z := c.loadFlag(c.flagsZSlot) t := c.newTmp() diff --git a/amd64_lower_cmpbt.go b/amd64_lower_cmpbt.go index d87d99f..1188c93 100644 --- a/amd64_lower_cmpbt.go +++ b/amd64_lower_cmpbt.go @@ -78,6 +78,70 @@ func (c *amd64Ctx) lowerCmpBt(op Op, ins Instr) (ok bool, terminated bool, err e fmt.Fprintf(c.b, " %%%s = icmp ne i64 %%%s, 0\n", cf, and) fmt.Fprintf(c.b, " store i1 %%%s, ptr %s\n", cf, c.flagsCFSlot) return true, false, nil + + case "BTSQ": + // BTSQ src, dstReg|dstMem: CF = old bit, dst bit set. + if len(ins.Args) != 2 { + return true, false, fmt.Errorf("amd64 BTSQ expects src, dst: %q", ins.Raw) + } + var amt string + switch ins.Args[0].Kind { + case OpImm: + amt = fmt.Sprintf("%d", ins.Args[0].Imm&63) + case OpReg: + av, err := c.loadReg(ins.Args[0].Reg) + if err != nil { + return true, false, err + } + m := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %s, 63\n", m, av) + amt = "%" + m + default: + return true, false, fmt.Errorf("amd64 BTSQ expects imm/reg bit index: %q", ins.Raw) + } + + var dst string + var storeDst func(string) error + switch ins.Args[1].Kind { + case OpReg: + dv, err := c.loadReg(ins.Args[1].Reg) + if err != nil { + return true, false, err + } + dst = dv + storeDst = func(v string) error { return c.storeReg(ins.Args[1].Reg, v) } + case OpMem: + addr, err := c.addrFromMem(ins.Args[1].Mem) + if err != nil { + return true, false, err + } + p := c.ptrFromAddrI64(addr) + ld := c.newTmp() + fmt.Fprintf(c.b, " %%%s = load i64, ptr %s, align 1\n", ld, p) + dst = "%" + ld + storeDst = func(v string) error { + fmt.Fprintf(c.b, " store i64 %s, ptr %s, align 1\n", v, p) + return nil + } + default: + return true, false, fmt.Errorf("amd64 BTSQ expects reg/mem dst: %q", ins.Raw) + } + + sh := c.newTmp() + fmt.Fprintf(c.b, " %%%s = lshr i64 %s, %s\n", sh, dst, amt) + and := c.newTmp() + fmt.Fprintf(c.b, " %%%s = and i64 %%%s, 1\n", and, sh) + cf := c.newTmp() + fmt.Fprintf(c.b, " %%%s = icmp ne i64 %%%s, 0\n", cf, and) + fmt.Fprintf(c.b, " store i1 %%%s, ptr %s\n", cf, c.flagsCFSlot) + one := c.newTmp() + fmt.Fprintf(c.b, " %%%s = shl i64 1, %s\n", one, amt) + out := c.newTmp() + fmt.Fprintf(c.b, " %%%s = or i64 %s, %%%s\n", out, dst, one) + if err := storeDst("%" + out); err != nil { + return true, false, err + } + return true, false, nil } return false, false, nil } diff --git a/amd64_lower_mov.go b/amd64_lower_mov.go index 9eaf094..2323375 100644 --- a/amd64_lower_mov.go +++ b/amd64_lower_mov.go @@ -7,7 +7,7 @@ import ( func (c *amd64Ctx) lowerMov(op Op, ins Instr) (ok bool, terminated bool, err error) { switch op { - case "MOVQ", "MOVD", "MOVL", "MOVLQZX", "MOVLQSX", "MOVBQZX", "MOVB", "MOVW", "CMOVQLT": + case "MOVQ", "MOVD", "MOVL", "MOVLQZX", "MOVLQSX", "MOVBQZX", "MOVBLZX", "MOVB", "MOVW", "MOVWLZX", "MOVWQZX", "MOVWQSX", "CMOVQLT": // ok default: return false, false, nil @@ -16,32 +16,47 @@ func (c *amd64Ctx) lowerMov(op Op, ins Instr) (ok bool, terminated bool, err err return true, false, fmt.Errorf("amd64 %s expects 2 operands: %q", op, ins.Raw) } src, dst := ins.Args[0], ins.Args[1] + zeroExtendSmallDst := false if op == "MOVLQZX" { op = "MOVL" } if op == "MOVD" { op = "MOVQ" } - if op == "MOVBQZX" { + if op == "MOVBQZX" || op == "MOVBLZX" { op = "MOVB" + zeroExtendSmallDst = true } - if op == "MOVLQSX" { - // Sign-extend i32 source to i64 destination register. + if op == "MOVWQZX" || op == "MOVWLZX" { + op = "MOVW" + zeroExtendSmallDst = true + } + if op == "MOVLQSX" || op == "MOVWQSX" { + // Sign-extend i32/i16 source to i64 destination register. if dst.Kind != OpReg { - return true, false, fmt.Errorf("amd64 MOVLQSX expects dst reg: %q", ins.Raw) + return true, false, fmt.Errorf("amd64 %s expects dst reg: %q", op, ins.Raw) + } + srcTy := I32 + if op == "MOVWQSX" { + srcTy = I16 } - var i32v string + var sv string switch src.Kind { case OpImm: - i32v = fmt.Sprintf("%d", int32(src.Imm)) + switch srcTy { + case I16: + sv = fmt.Sprintf("%d", int16(src.Imm)) + default: + sv = fmt.Sprintf("%d", int32(src.Imm)) + } case OpReg, OpFP: v64, err := c.evalI64(src) if err != nil { return true, false, err } tr := c.newTmp() - fmt.Fprintf(c.b, " %%%s = trunc i64 %s to i32\n", tr, v64) - i32v = "%" + tr + fmt.Fprintf(c.b, " %%%s = trunc i64 %s to %s\n", tr, v64, srcTy) + sv = "%" + tr case OpMem: addr, err := c.addrFromMem(src.Mem) if err != nil { @@ -49,21 +64,21 @@ func (c *amd64Ctx) lowerMov(op Op, ins Instr) (ok bool, terminated bool, err err } p := c.ptrFromAddrI64(addr) ld := c.newTmp() - fmt.Fprintf(c.b, " %%%s = load i32, ptr %s, align 1\n", ld, p) - i32v = "%" + ld + fmt.Fprintf(c.b, " %%%s = load %s, ptr %s, align 1\n", ld, srcTy, p) + sv = "%" + ld case OpSym: p, err := c.ptrFromSB(src.Sym) if err != nil { return true, false, err } ld := c.newTmp() - fmt.Fprintf(c.b, " %%%s = load i32, ptr %s, align 1\n", ld, p) - i32v = "%" + ld + fmt.Fprintf(c.b, " %%%s = load %s, ptr %s, align 1\n", ld, srcTy, p) + sv = "%" + ld default: - return true, false, fmt.Errorf("amd64 MOVLQSX unsupported src: %q", ins.Raw) + return true, false, fmt.Errorf("amd64 %s unsupported src: %q", op, ins.Raw) } se := c.newTmp() - fmt.Fprintf(c.b, " %%%s = sext i32 %s to i64\n", se, i32v) + fmt.Fprintf(c.b, " %%%s = sext %s %s to i64\n", se, srcTy, sv) return true, false, c.storeReg(dst.Reg, "%"+se) } @@ -152,9 +167,12 @@ func (c *amd64Ctx) lowerMov(op Op, ins Instr) (ok bool, terminated bool, err err } switch dst.Kind { case OpReg: - z := c.newTmp() - fmt.Fprintf(c.b, " %%%s = zext %s %s to i64\n", z, widthTy, small) - return true, false, c.storeReg(dst.Reg, "%"+z) + if zeroExtendSmallDst { + z := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext %s %s to i64\n", z, widthTy, small) + return true, false, c.storeReg(dst.Reg, "%"+z) + } + return true, false, c.storeRegSized(dst.Reg, widthTy, small) case OpFP: return true, false, c.storeFPResult(dst.FPOffset, widthTy, small) case OpMem: diff --git a/amd64_needed.go b/amd64_needed.go index 5ae7a39..64f37ff 100644 --- a/amd64_needed.go +++ b/amd64_needed.go @@ -13,7 +13,7 @@ func funcNeedsAMD64CFG(fn Func) bool { switch Op(op) { case "JMP", "JL", "JLT", "JLE", "JG", "JGT", "JGE", "JB", "JLO", "JBE", "JA", "JHI", "JAE", "JHS", - "JZ", "JE", "JEQ", "JNZ", "JNE", "JNC", "JC", "JCC", "JLS": + "JZ", "JE", "JEQ", "JNZ", "JNE", "JNC", "JC", "JCC", "JLS", "JNA", "JS", "JNS": return true } // A handful of amd64 stdlib asm functions are straight-line, but if we diff --git a/translate.go b/translate.go index ad3ceef..94a6264 100644 --- a/translate.go +++ b/translate.go @@ -278,8 +278,8 @@ func emitExternSBGlobals(b *strings.Builder, file *File, resolve func(string) st // control-flow ops that use symbol operands as branch/call targets. switch opName { case "JMP", "JE", "JEQ", "JZ", "JNE", "JNZ", - "JL", "JLT", "JLE", "JG", "JGT", "JGE", - "JB", "JBE", "JA", "JAE", "JLS", + "JL", "JLT", "JLE", "JG", "JGT", "JGE", "JS", "JNS", + "JB", "JBE", "JA", "JAE", "JLS", "JNA", "JC", "JNC", "JCC", "CALL", "BL", "B": continue } From 9a73d7e44a27256f7326c645d55e62cffc97e68a Mon Sep 17 00:00:00 2001 From: ZhouGuangyuan Date: Tue, 5 May 2026 19:18:39 +0800 Subject: [PATCH 7/7] amd64: support mem inc and dec --- amd64_helper_edge_test.go | 2 + amd64_lower_arith.go | 84 +++++++++++++++++++++++++++++++-------- 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/amd64_helper_edge_test.go b/amd64_helper_edge_test.go index ca32cac..d809d25 100644 --- a/amd64_helper_edge_test.go +++ b/amd64_helper_edge_test.go @@ -584,8 +584,10 @@ func TestAMD64ArithmeticCoverage(t *testing.T) { mustLower("ORB", Instr{Raw: "ORB $1, AX", Args: []Operand{{Kind: OpImm, Imm: 1}, {Kind: OpReg, Reg: AX}}}) mustLower("INCQ", Instr{Raw: "INCQ AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}) mustLower("DECQ", Instr{Raw: "DECQ AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}) + mustLower("DECQ", Instr{Raw: "DECQ 96(AX)", Args: []Operand{{Kind: OpMem, Mem: MemRef{Base: AX, Off: 96}}}}) mustLower("INCL", Instr{Raw: "INCL AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}) mustLower("DECL", Instr{Raw: "DECL AX", Args: []Operand{{Kind: OpReg, Reg: AX}}}) + mustLower("DECL", Instr{Raw: "DECL 12(BX)", Args: []Operand{{Kind: OpMem, Mem: MemRef{Base: BX, Off: 12}}}}) mustLower("LEAQ", Instr{Raw: "LEAQ 8(BX)(CX*2), DI", Args: []Operand{{Kind: OpMem, Mem: MemRef{Base: BX, Index: CX, Scale: 2, Off: 8}}, {Kind: OpReg, Reg: DI}}}) mustLower("LEAL", Instr{Raw: "LEAL 4(BX), SI", Args: []Operand{{Kind: OpMem, Mem: MemRef{Base: BX, Off: 4}}, {Kind: OpReg, Reg: SI}}}) mustLower("LEAQ", Instr{Raw: "LEAQ ret+16(FP), R8", Args: []Operand{{Kind: OpFP, FPOffset: 16}, {Kind: OpReg, Reg: Reg("R8")}}}) diff --git a/amd64_lower_arith.go b/amd64_lower_arith.go index cdb3c6d..c9b7197 100644 --- a/amd64_lower_arith.go +++ b/amd64_lower_arith.go @@ -661,13 +661,35 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e return true, false, nil case "INCQ", "DECQ": - if len(ins.Args) != 1 || ins.Args[0].Kind != OpReg { - return true, false, fmt.Errorf("amd64 %s expects reg: %q", op, ins.Raw) + if len(ins.Args) != 1 { + return true, false, fmt.Errorf("amd64 %s expects dst: %q", op, ins.Raw) } - r := ins.Args[0].Reg - v, err := c.loadReg(r) - if err != nil { - return true, false, err + var v string + var storeDst func(string) error + switch ins.Args[0].Kind { + case OpReg: + r := ins.Args[0].Reg + dv, err := c.loadReg(r) + if err != nil { + return true, false, err + } + v = dv + storeDst = func(out string) error { return c.storeReg(r, out) } + case OpMem: + addr, err := c.addrFromMem(ins.Args[0].Mem) + if err != nil { + return true, false, err + } + p := c.ptrFromAddrI64(addr) + ld := c.newTmp() + fmt.Fprintf(c.b, " %%%s = load i64, ptr %s, align 1\n", ld, p) + v = "%" + ld + storeDst = func(out string) error { + fmt.Fprintf(c.b, " store i64 %s, ptr %s, align 1\n", out, p) + return nil + } + default: + return true, false, fmt.Errorf("amd64 %s expects reg/mem dst: %q", op, ins.Raw) } t := c.newTmp() if op == "INCQ" { @@ -676,32 +698,60 @@ func (c *amd64Ctx) lowerArith(op Op, ins Instr) (ok bool, terminated bool, err e fmt.Fprintf(c.b, " %%%s = sub i64 %s, 1\n", t, v) } out := "%" + t - if err := c.storeReg(r, out); err != nil { + if err := storeDst(out); err != nil { return true, false, err } c.setZSFlagsFromI64(out) return true, false, nil case "INCL", "DECL": - if len(ins.Args) != 1 || ins.Args[0].Kind != OpReg { - return true, false, fmt.Errorf("amd64 %s expects reg: %q", op, ins.Raw) + if len(ins.Args) != 1 { + return true, false, fmt.Errorf("amd64 %s expects dst: %q", op, ins.Raw) } - r := ins.Args[0].Reg - v64, err := c.loadReg(r) - if err != nil { - return true, false, err + var v64 string + var storeDst func(string) error + switch ins.Args[0].Kind { + case OpReg: + r := ins.Args[0].Reg + dv, err := c.loadReg(r) + if err != nil { + return true, false, err + } + v64 = dv + storeDst = func(out32 string) error { + z := c.newTmp() + fmt.Fprintf(c.b, " %%%s = zext i32 %s to i64\n", z, out32) + return c.storeReg(r, "%"+z) + } + case OpMem: + addr, err := c.addrFromMem(ins.Args[0].Mem) + if err != nil { + return true, false, err + } + p := c.ptrFromAddrI64(addr) + ld := c.newTmp() + fmt.Fprintf(c.b, " %%%s = load i32, ptr %s, align 1\n", ld, p) + v64 = "%" + ld + storeDst = func(out32 string) error { + fmt.Fprintf(c.b, " store i32 %s, ptr %s, align 1\n", out32, p) + return nil + } + default: + return true, false, fmt.Errorf("amd64 %s expects reg/mem dst: %q", op, ins.Raw) } tr := c.newTmp() - fmt.Fprintf(c.b, " %%%s = trunc i64 %s to i32\n", tr, v64) + if ins.Args[0].Kind == OpMem { + fmt.Fprintf(c.b, " %%%s = add i32 0, %s\n", tr, v64) + } else { + fmt.Fprintf(c.b, " %%%s = trunc i64 %s to i32\n", tr, v64) + } x := c.newTmp() if op == "INCL" { fmt.Fprintf(c.b, " %%%s = add i32 %%%s, 1\n", x, tr) } else { fmt.Fprintf(c.b, " %%%s = sub i32 %%%s, 1\n", x, tr) } - z := c.newTmp() - fmt.Fprintf(c.b, " %%%s = zext i32 %%%s to i64\n", z, x) - if err := c.storeReg(r, "%"+z); err != nil { + if err := storeDst("%" + x); err != nil { return true, false, err } c.setZSFlagsFromI32("%" + x)