diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 1e8b8f020b4f5d..7827050abd5ab2 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2444,13 +2444,17 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) // Unsigned multiplication can use mulx on BMI2-capable CPUs if (ins == INS_mulEAX && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { - // op1: EDX, op2: reg/mem (operand 3) => hiRes: (operand 1), lowReg: (operand 2) - assert((regOp->GetRegNum() == REG_EDX) || (rmOp->GetRegNum() != REG_EDX)); + // If rmOp is already in EDX, use that as implicit operand + if (rmOp->isUsedFromReg() && rmOp->GetRegNum() == REG_EDX) + { + std::swap(rmOp, regOp); + } // mov the first operand into implicit source operand EDX/RDX emit->emitIns_Mov(INS_mov, attr, REG_EDX, regOp->GetRegNum(), /* canSkip */ true); // emit MULX instruction + // regOp: EDX, rmOp: reg/mem (operand 3) => hiReg: (operand 1), lowReg: (operand 2) regNumber lowReg = node->GetRegByIndex(0); regNumber hiReg = node->GetRegByIndex(1); inst_RV_RV_TT(INS_mulx, attr, hiReg, lowReg, rmOp, /* isRMW */ false, INS_OPTS_NONE); @@ -2463,7 +2467,7 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) std::swap(rmOp, regOp); } - // op1: EAX, op2: reg/mem + // mov the first operand into implicit source operand EAX/RAX emit->emitIns_Mov(INS_mov, attr, REG_EAX, regOp->GetRegNum(), /* canSkip */ true); // emit the MUL/IMUL instruction diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index d147b0378a0deb..7b7e07eb98e682 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2512,14 +2512,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou SingleTypeRegSet apxAwareRegCandidates = ForceLowGprForApxIfNeeded(op2, RBM_NONE, canHWIntrinsicUseApxRegs); - // mulx, place op1 in implicit EDX register since op2 might be contained - srcCount = BuildOperandUses(op1, SRBM_EDX); + + // mulx always use EDX, if one operand is contained, specify other op with fixed EDX register + srcCount = BuildOperandUses(op1, op2->isContained() ? SRBM_EDX : apxAwareRegCandidates); srcCount += BuildOperandUses(op2, apxAwareRegCandidates); // result in any register SingleTypeRegSet apxAwareDestCandidates = ForceLowGprForApxIfNeeded(intrinsicTree, RBM_NONE, canHWIntrinsicUseApxRegs); - BuildDef(intrinsicTree, apxAwareDestCandidates, 0); + // We don't really need to place any result in EDX, but this seems to be a better way of killing + // the register than to specify a constraint on the use + BuildDef(intrinsicTree, SRBM_EDX, 0); BuildDef(intrinsicTree, apxAwareDestCandidates, 1); } else // Signed multiply or normal unsigned multiply in one operand form