Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2444,13 +2444,17 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
// Unsigned multiplication can use mulx on BMI2-capable CPUs
if (ins == INS_mulEAX && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
{
// op1: EDX, op2: reg/mem (operand 3) => hiRes: (operand 1), lowReg: (operand 2)
assert((regOp->GetRegNum() == REG_EDX) || (rmOp->GetRegNum() != REG_EDX));
// If rmOp is already in EDX, use that as implicit operand
if (rmOp->isUsedFromReg() && rmOp->GetRegNum() == REG_EDX)
{
std::swap(rmOp, regOp);
}

// mov the first operand into implicit source operand EDX/RDX
emit->emitIns_Mov(INS_mov, attr, REG_EDX, regOp->GetRegNum(), /* canSkip */ true);

// emit MULX instruction
// regOp: EDX, rmOp: reg/mem (operand 3) => hiReg: (operand 1), lowReg: (operand 2)
regNumber lowReg = node->GetRegByIndex(0);
regNumber hiReg = node->GetRegByIndex(1);
inst_RV_RV_TT(INS_mulx, attr, hiReg, lowReg, rmOp, /* isRMW */ false, INS_OPTS_NONE);
Expand All @@ -2463,7 +2467,7 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
std::swap(rmOp, regOp);
}

// op1: EAX, op2: reg/mem
// mov the first operand into implicit source operand EAX/RAX
emit->emitIns_Mov(INS_mov, attr, REG_EAX, regOp->GetRegNum(), /* canSkip */ true);

// emit the MUL/IMUL instruction
Expand Down
9 changes: 6 additions & 3 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2512,14 +2512,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou

SingleTypeRegSet apxAwareRegCandidates =
ForceLowGprForApxIfNeeded(op2, RBM_NONE, canHWIntrinsicUseApxRegs);
// mulx, place op1 in implicit EDX register since op2 might be contained
srcCount = BuildOperandUses(op1, SRBM_EDX);

// mulx always use EDX, if one operand is contained, specify other op with fixed EDX register
srcCount = BuildOperandUses(op1, op2->isContained() ? SRBM_EDX : apxAwareRegCandidates);
srcCount += BuildOperandUses(op2, apxAwareRegCandidates);

// result in any register
SingleTypeRegSet apxAwareDestCandidates =
ForceLowGprForApxIfNeeded(intrinsicTree, RBM_NONE, canHWIntrinsicUseApxRegs);
BuildDef(intrinsicTree, apxAwareDestCandidates, 0);
// We don't really need to place any result in EDX, but this seems to be a better way of killing
// the register than to specify a constraint on the use
BuildDef(intrinsicTree, SRBM_EDX, 0);
BuildDef(intrinsicTree, apxAwareDestCandidates, 1);
}
else // Signed multiply or normal unsigned multiply in one operand form
Expand Down