Fixed AVX instructions vmovss and vmovsd.

Improved asmjit_test_opcode to generate code that is easier to verify manually.
This commit is contained in:
kobalicek
2015-06-17 22:44:21 +02:00
parent 3c477177d2
commit ecda2e12f1
5 changed files with 123 additions and 94 deletions

View File

@@ -2938,6 +2938,24 @@ _EmitAvxRvm:
} }
break; break;
case kX86InstEncodingIdAvxMovDQ:
if (encoded == ENC_OPS(Reg, Reg, None)) {
if (static_cast<const X86Reg*>(o0)->isGp()) {
opCode = extendedInfo.getSecondaryOpCode();
opReg = x86OpReg(o1);
rmReg = x86OpReg(o0);
goto _EmitAvxR;
}
if (static_cast<const X86Reg*>(o1)->isGp()) {
opReg = x86OpReg(o0);
rmReg = x86OpReg(o1);
goto _EmitAvxR;
}
}
goto _AvxRmMr_AfterRegRegCheck;
case kX86InstEncodingIdAvxRmMr_P: case kX86InstEncodingIdAvxRmMr_P:
ADD_VEX_L(static_cast<const X86Reg*>(o0)->isYmm() | static_cast<const X86Reg*>(o1)->isYmm()); ADD_VEX_L(static_cast<const X86Reg*>(o0)->isYmm() | static_cast<const X86Reg*>(o1)->isYmm());
// ... Fall through ... // ... Fall through ...
@@ -2949,6 +2967,7 @@ _EmitAvxRvm:
goto _EmitAvxR; goto _EmitAvxR;
} }
_AvxRmMr_AfterRegRegCheck:
if (encoded == ENC_OPS(Reg, Mem, None)) { if (encoded == ENC_OPS(Reg, Mem, None)) {
opReg = x86OpReg(o0); opReg = x86OpReg(o0);
rmMem = x86OpMem(o1); rmMem = x86OpMem(o1);
@@ -3176,12 +3195,13 @@ _EmitAvxRvm:
} }
if (encoded == ENC_OPS(Reg, Mem, None)) { if (encoded == ENC_OPS(Reg, Mem, None)) {
opReg = x86RegAndVvvv(opReg, x86OpReg(o0)); opReg = x86OpReg(o0);
rmMem = x86OpMem(o1); rmMem = x86OpMem(o1);
goto _EmitAvxM; goto _EmitAvxM;
} }
if (encoded == ENC_OPS(Mem, Reg, None)) { if (encoded == ENC_OPS(Mem, Reg, None)) {
opCode = extendedInfo.getSecondaryOpCode();
opReg = x86OpReg(o1); opReg = x86OpReg(o1);
rmMem = x86OpMem(o0); rmMem = x86OpMem(o0);
goto _EmitAvxM; goto _EmitAvxM;
@@ -3843,7 +3863,7 @@ _EmitFpuOp:
vex_rxbmmmmm |= static_cast<uint32_t>(mBase - 8 < 8) << 5; \ vex_rxbmmmmm |= static_cast<uint32_t>(mBase - 8 < 8) << 5; \
vex_rxbmmmmm |= static_cast<uint32_t>(mIndex - 8 < 8) << 6; \ vex_rxbmmmmm |= static_cast<uint32_t>(mIndex - 8 < 8) << 6; \
\ \
if (vex_rxbmmmmm != 0x01 || vex_XvvvvLpp >= 0x80 || (options & kX86InstOptionVex3) != 0) { \ if ((vex_rxbmmmmm != 0x01) || (vex_XvvvvLpp >= 0x80) || ((options & kX86InstOptionVex3) != 0)) { \
vex_rxbmmmmm |= static_cast<uint32_t>(opReg << 4) & 0x80; \ vex_rxbmmmmm |= static_cast<uint32_t>(opReg << 4) & 0x80; \
vex_rxbmmmmm ^= 0xE0; \ vex_rxbmmmmm ^= 0xE0; \
vex_XvvvvLpp ^= 0x78; \ vex_XvvvvLpp ^= 0x78; \

View File

@@ -1223,7 +1223,7 @@ const char _x86InstName[] =
"xsetbv\0"; "xsetbv\0";
// Automatically generated, do not edit. // Automatically generated, do not edit.
enum X86InstAlphaIndex { enum kX86InstAlphaIndex {
kX86InstAlphaIndexFirst = 'a', kX86InstAlphaIndexFirst = 'a',
kX86InstAlphaIndexLast = 'z', kX86InstAlphaIndexLast = 'z',
kX86InstAlphaIndexInvalid = 0xFFFF kX86InstAlphaIndexInvalid = 0xFFFF
@@ -1260,7 +1260,7 @@ static const uint16_t _x86InstAlphaIndex[26] = {
}; };
// Automatically generated, do not edit. // Automatically generated, do not edit.
enum X86InstData_NameIndex { enum kX86InstData_NameIndex {
kInstIdNone_NameIndex = 0, kInstIdNone_NameIndex = 0,
kX86InstIdAdc_NameIndex = 1, kX86InstIdAdc_NameIndex = 1,
kX86InstIdAdd_NameIndex = 5, kX86InstIdAdd_NameIndex = 5,
@@ -2552,7 +2552,7 @@ const X86InstExtendedInfo _x86InstExtendedInfo[] = {
{ Enc(AvxRvmMvr_P) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(Xy) , O(XyMem) , U , U }, F(Avx) , O_660F38(2E,U,_,_,_) }, { Enc(AvxRvmMvr_P) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(Xy) , O(XyMem) , U , U }, F(Avx) , O_660F38(2E,U,_,_,_) },
{ Enc(AvxRmMr_P) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(Avx) , O_660F00(29,U,_,_,_) }, { Enc(AvxRmMr_P) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(Avx) , O_660F00(29,U,_,_,_) },
{ Enc(AvxRmMr_P) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(Avx) , O_000F00(29,U,_,_,_) }, { Enc(AvxRmMr_P) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(Avx) , O_000F00(29,U,_,_,_) },
{ Enc(AvxRmMr) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(Avx) , O_660F00(7E,U,_,_,_) }, { Enc(AvxMovDQ) , 0 , 0 , 0x00, 0x00, 0, { O(XmmMem) , O(XmmMem) , U , U , U }, F(Avx) , O_660F00(7E,U,_,_,_) },
{ Enc(AvxRmMr_P) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(Avx) , O_660F00(7F,U,_,_,_) }, { Enc(AvxRmMr_P) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(Avx) , O_660F00(7F,U,_,_,_) },
{ Enc(AvxRmMr_P) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(Avx) , O_F30F00(7F,U,_,_,_) }, { Enc(AvxRmMr_P) , 0 , 0 , 0x00, 0x00, 0, { O(XyMem) , O(XyMem) , U , U , U }, F(Avx) , O_F30F00(7F,U,_,_,_) },
{ Enc(AvxRvm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(Xmm) , U , U }, F(Avx) , U }, { Enc(AvxRvm) , 0 , 0 , 0x00, 0x00, 0, { O(Xmm) , O(Xmm) , O(Xmm) , U , U }, F(Avx) , U },
@@ -2612,7 +2612,7 @@ const X86InstExtendedInfo _x86InstExtendedInfo[] = {
}; };
// Automatically generated, do not edit. // Automatically generated, do not edit.
enum X86InstData_ExtendedIndex { enum kX86InstData_ExtendedIndex {
kInstIdNone_ExtendedIndex = 0, kInstIdNone_ExtendedIndex = 0,
kX86InstIdAdc_ExtendedIndex = 1, kX86InstIdAdc_ExtendedIndex = 1,
kX86InstIdAdd_ExtendedIndex = 2, kX86InstIdAdd_ExtendedIndex = 2,
@@ -4475,7 +4475,7 @@ const X86InstInfo _x86InstInfo[] = {
INST(kX86InstIdVminss , "vminss" , O_F30F00(5D,U,_,_,_), U , Enc(AvxRvm_P) , F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ), INST(kX86InstIdVminss , "vminss" , O_F30F00(5D,U,_,_,_), U , Enc(AvxRvm_P) , F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Xy) , O(XyMem) , U , U ),
INST(kX86InstIdVmovapd , "vmovapd" , O_660F00(28,U,_,_,_), O_660F00(29,U,_,_,_), Enc(AvxRmMr_P) , F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ), INST(kX86InstIdVmovapd , "vmovapd" , O_660F00(28,U,_,_,_), O_660F00(29,U,_,_,_), Enc(AvxRmMr_P) , F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ),
INST(kX86InstIdVmovaps , "vmovaps" , O_000F00(28,U,_,_,_), O_000F00(29,U,_,_,_), Enc(AvxRmMr_P) , F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ), INST(kX86InstIdVmovaps , "vmovaps" , O_000F00(28,U,_,_,_), O_000F00(29,U,_,_,_), Enc(AvxRmMr_P) , F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ),
INST(kX86InstIdVmovd , "vmovd" , O_660F00(6E,U,_,_,_), O_660F00(7E,U,_,_,_), Enc(AvxRmMr) , F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(XmmMem) , U , U , U ), INST(kX86InstIdVmovd , "vmovd" , O_660F00(6E,U,_,_,_), O_660F00(7E,U,_,_,_), Enc(AvxMovDQ) , F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(XmmMem) , U , U , U ),
INST(kX86InstIdVmovddup , "vmovddup" , O_F20F00(12,U,_,_,_), U , Enc(AvxRm_P) , F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), INST(kX86InstIdVmovddup , "vmovddup" , O_F20F00(12,U,_,_,_), U , Enc(AvxRm_P) , F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ),
INST(kX86InstIdVmovdqa , "vmovdqa" , O_660F00(6F,U,_,_,_), O_660F00(7F,U,_,_,_), Enc(AvxRmMr_P) , F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ), INST(kX86InstIdVmovdqa , "vmovdqa" , O_660F00(6F,U,_,_,_), O_660F00(7F,U,_,_,_), Enc(AvxRmMr_P) , F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ),
INST(kX86InstIdVmovdqu , "vmovdqu" , O_F30F00(6F,U,_,_,_), O_F30F00(7F,U,_,_,_), Enc(AvxRmMr_P) , F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ), INST(kX86InstIdVmovdqu , "vmovdqu" , O_F30F00(6F,U,_,_,_), O_F30F00(7F,U,_,_,_), Enc(AvxRmMr_P) , F(Avx) , EF(________), 0 , 0 , O(XyMem) , O(XyMem) , U , U , U ),
@@ -4491,7 +4491,7 @@ const X86InstInfo _x86InstInfo[] = {
INST(kX86InstIdVmovntdqa , "vmovntdqa" , O_660F38(2A,U,_,_,_), U , Enc(AvxRm_P) , F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Mem) , U , U , U ), INST(kX86InstIdVmovntdqa , "vmovntdqa" , O_660F38(2A,U,_,_,_), U , Enc(AvxRm_P) , F(Avx) , EF(________), 0 , 0 , O(Xy) , O(Mem) , U , U , U ),
INST(kX86InstIdVmovntpd , "vmovntpd" , O_660F00(2B,U,_,_,_), U , Enc(AvxMr_P) , F(Avx) , EF(________), 0 , 0 , O(Mem) , O(Xy) , U , U , U ), INST(kX86InstIdVmovntpd , "vmovntpd" , O_660F00(2B,U,_,_,_), U , Enc(AvxMr_P) , F(Avx) , EF(________), 0 , 0 , O(Mem) , O(Xy) , U , U , U ),
INST(kX86InstIdVmovntps , "vmovntps" , O_000F00(2B,U,_,_,_), U , Enc(AvxMr_P) , F(Avx) , EF(________), 0 , 0 , O(Mem) , O(Xy) , U , U , U ), INST(kX86InstIdVmovntps , "vmovntps" , O_000F00(2B,U,_,_,_), U , Enc(AvxMr_P) , F(Avx) , EF(________), 0 , 0 , O(Mem) , O(Xy) , U , U , U ),
INST(kX86InstIdVmovq , "vmovq" , O_660F00(6E,U,_,W,_), O_660F00(7E,U,_,_,_), Enc(AvxRmMr) , F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(XmmMem) , U , U , U ), INST(kX86InstIdVmovq , "vmovq" , O_660F00(6E,U,_,W,_), O_660F00(7E,U,_,_,_), Enc(AvxMovDQ) , F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(XmmMem) , U , U , U ),
INST(kX86InstIdVmovsd , "vmovsd" , O_F20F00(10,U,_,_,_), O_F20F00(11,U,_,_,_), Enc(AvxMovSsSd) , F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(XmmMem) , O(Xmm) , U , U ), INST(kX86InstIdVmovsd , "vmovsd" , O_F20F00(10,U,_,_,_), O_F20F00(11,U,_,_,_), Enc(AvxMovSsSd) , F(Avx) , EF(________), 0 , 0 , O(XmmMem) , O(XmmMem) , O(Xmm) , U , U ),
INST(kX86InstIdVmovshdup , "vmovshdup" , O_F30F00(16,U,_,_,_), U , Enc(AvxRm_P) , F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), INST(kX86InstIdVmovshdup , "vmovshdup" , O_F30F00(16,U,_,_,_), U , Enc(AvxRm_P) , F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ),
INST(kX86InstIdVmovsldup , "vmovsldup" , O_F30F00(12,U,_,_,_), U , Enc(AvxRm_P) , F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ), INST(kX86InstIdVmovsldup , "vmovsldup" , O_F30F00(12,U,_,_,_), U , Enc(AvxRm_P) , F(Avx) , EF(________), 0 , 0 , O(Xy) , O(XyMem) , U , U , U ),

View File

@@ -1414,6 +1414,8 @@ ASMJIT_ENUM(X86InstEncodingId) {
kX86InstEncodingIdAvxRvrmRvmr, kX86InstEncodingIdAvxRvrmRvmr,
//! AVX instruction encoded as 'RVRM' or 'RVMR' (Propagates AVX.L if Ymm used). //! AVX instruction encoded as 'RVRM' or 'RVMR' (Propagates AVX.L if Ymm used).
kX86InstEncodingIdAvxRvrmRvmr_P, kX86InstEncodingIdAvxRvrmRvmr_P,
//! Vmovd/Vmovq.
kX86InstEncodingIdAvxMovDQ,
//! Vmovss/Vmovsd. //! Vmovss/Vmovsd.
kX86InstEncodingIdAvxMovSsSd, kX86InstEncodingIdAvxMovSsSd,
//! AVX2 gather family instructions (VSIB). //! AVX2 gather family instructions (VSIB).

View File

@@ -23,7 +23,8 @@ typedef void (*VoidFunc)(void);
struct OpcodeDumpInfo { struct OpcodeDumpInfo {
uint32_t arch; uint32_t arch;
bool useRex; bool useRex1;
bool useRex2;
}; };
static const char* archIdToString(uint32_t archId) { static const char* archIdToString(uint32_t archId) {
@@ -42,26 +43,29 @@ int main(int argc, char* argv[]) {
OpcodeDumpInfo infoList[] = { OpcodeDumpInfo infoList[] = {
# if defined(ASMJIT_BUILD_X86) # if defined(ASMJIT_BUILD_X86)
{ asmjit::kArchX86, false }, { asmjit::kArchX86, false, false },
# endif // ASMJIT_BUILD_X86 # endif // ASMJIT_BUILD_X86
# if defined(ASMJIT_BUILD_X64) # if defined(ASMJIT_BUILD_X64)
{ asmjit::kArchX64, false }, { asmjit::kArchX64, false, false },
{ asmjit::kArchX64, true } { asmjit::kArchX64, false, true },
{ asmjit::kArchX64, true , false },
{ asmjit::kArchX64, true , true }
# endif // ASMJIT_BUILD_X64 # endif // ASMJIT_BUILD_X64
}; };
for (int i = 0; i < ASMJIT_ARRAY_SIZE(infoList); i++) { for (int i = 0; i < ASMJIT_ARRAY_SIZE(infoList); i++) {
const OpcodeDumpInfo& info = infoList[i]; const OpcodeDumpInfo& info = infoList[i];
printf("Opcodes [ARCH=%s REX=%s]\n", printf("Opcodes [ARCH=%s REX1=%s REX2=%s]\n",
archIdToString(info.arch), archIdToString(info.arch),
info.useRex ? "true" : "false"); info.useRex1 ? "true" : "false",
info.useRex2 ? "true" : "false");
asmjit::JitRuntime runtime; asmjit::JitRuntime runtime;
asmjit::X86Assembler a(&runtime, info.arch); asmjit::X86Assembler a(&runtime, info.arch);
a.setLogger(&logger); a.setLogger(&logger);
asmgen::opcode(a, info.useRex); asmgen::opcode(a, info.useRex1, info.useRex2);
VoidFunc p = asmjit_cast<VoidFunc>(a.make()); VoidFunc p = asmjit_cast<VoidFunc>(a.make());
// Only run if disassembly makes sense. // Only run if disassembly makes sense.

View File

@@ -16,7 +16,7 @@ namespace asmgen {
enum { kGenOpCodeInstCount = 2670 }; enum { kGenOpCodeInstCount = 2670 };
// Generate all instructions asmjit can emit. // Generate all instructions asmjit can emit.
static void opcode(asmjit::X86Assembler& a, bool useRex = false) { static void opcode(asmjit::X86Assembler& a, bool useRex1 = false, bool useRex2 = false) {
using namespace asmjit; using namespace asmjit;
using namespace asmjit::x86; using namespace asmjit::x86;
@@ -27,41 +27,44 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
// the `X86Assembler` is properly encoding all possible combinations. If the // the `X86Assembler` is properly encoding all possible combinations. If the
// `useRexRegs` argument is true the `A` version will in most cases contain // `useRexRegs` argument is true the `A` version will in most cases contain
// a register having index 8 (if encodable). // a register having index 8 (if encodable).
X86GpReg gLoA = useRex ? r8b : al; X86GpReg gLoA = useRex1 ? r8b : al;
X86GpReg gLoB = bl; X86GpReg gLoB = useRex2 ? r9b : bl;
X86GpReg gHiA = ah; X86GpReg gHiA = ah;
X86GpReg gHiB = bh; X86GpReg gHiB = bh;
X86GpReg gwA = useRex ? r8w : ax; X86GpReg gwA = useRex1 ? r8w : ax;
X86GpReg gwB = si; X86GpReg gwB = useRex2 ? r9w : bx;
X86GpReg gdA = useRex ? r8d : eax; X86GpReg gdA = useRex1 ? r8d : eax;
X86GpReg gdB = esi; X86GpReg gdB = useRex2 ? r9d : ebx;
X86GpReg gzA = useRex ? r8 : a.zax; X86GpReg gzA = useRex1 ? r8 : a.zax;
X86GpReg gzB = a.zsi; X86GpReg gzB = useRex2 ? r9 : a.zbx;
X86GpReg gzC = a.zcx; X86GpReg gzC = useRex2 ? r10 : a.zcx;
X86GpReg gzD = useRex2 ? r11 : a.zdx;
X86FpReg fpA = fp0; X86FpReg fpA = fp0;
X86FpReg fpB = fp7; X86FpReg fpB = fp7;
X86MmReg mmA = mm0; X86MmReg mmA = mm0;
X86MmReg mmB = mm7; X86MmReg mmB = mm1;
X86XmmReg xmmA = useRex ? xmm8 : xmm0; X86XmmReg xmmA = useRex1 ? xmm8 : xmm0;
X86XmmReg xmmB = xmm1; X86XmmReg xmmB = useRex2 ? xmm9 : xmm1;
X86XmmReg xmmC = xmm2; X86XmmReg xmmC = useRex2 ? xmm10 : xmm2;
X86XmmReg xmmD = xmm3; X86XmmReg xmmD = useRex2 ? xmm11 : xmm3;
X86YmmReg ymmA = useRex ? ymm8 : ymm0; X86YmmReg ymmA = useRex1 ? ymm8 : ymm0;
X86YmmReg ymmB = ymm1; X86YmmReg ymmB = useRex2 ? ymm9 : ymm1;
X86YmmReg ymmC = ymm2; X86YmmReg ymmC = useRex2 ? ymm10 : ymm2;
X86YmmReg ymmD = ymm3; X86YmmReg ymmD = useRex2 ? ymm11 : ymm3;
X86Mem anyptr_gpA = ptr(gzA); X86Mem anyptr_gpA = ptr(gzA);
X86Mem anyptr_gpB = ptr(gzB); X86Mem anyptr_gpB = ptr(gzB);
X86Mem anyptr_gpC = ptr(gzC); X86Mem anyptr_gpC = ptr(gzC);
X86Mem anyptr_gpD = ptr(gzD);
X86Mem intptr_gpA = a.intptr_ptr(gzA); X86Mem intptr_gpA = a.intptr_ptr(gzA);
X86Mem intptr_gpB = a.intptr_ptr(gzB); X86Mem intptr_gpB = a.intptr_ptr(gzB);
@@ -577,7 +580,7 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.movd(anyptr_gpA, mmB); a.movd(anyptr_gpA, mmB);
a.movd(gdA, mmB); a.movd(gdA, mmB);
a.movd(mmA, anyptr_gpB); a.movd(mmA, anyptr_gpB);
a.movd(mmA, esi); a.movd(mmA, gdB);
a.movq(mmA, mmB); a.movq(mmA, mmB);
a.movq(anyptr_gpA, mmB); a.movq(anyptr_gpA, mmB);
a.movq(mmA, anyptr_gpB); a.movq(mmA, anyptr_gpB);
@@ -747,7 +750,7 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.cvtpi2ps(xmmA, anyptr_gpB); a.cvtpi2ps(xmmA, anyptr_gpB);
a.cvtps2pi(mmA, xmmB); a.cvtps2pi(mmA, xmmB);
a.cvtps2pi(mmA, anyptr_gpB); a.cvtps2pi(mmA, anyptr_gpB);
a.cvtsi2ss(xmmA, gzA); a.cvtsi2ss(xmmA, gzB);
a.cvtsi2ss(xmmA, anyptr_gpB); a.cvtsi2ss(xmmA, anyptr_gpB);
a.cvtss2si(gzA, xmmB); a.cvtss2si(gzA, xmmB);
a.cvtss2si(gzA, anyptr_gpB); a.cvtss2si(gzA, anyptr_gpB);
@@ -759,7 +762,7 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.divps(xmmA, anyptr_gpB); a.divps(xmmA, anyptr_gpB);
a.divss(xmmA, xmmB); a.divss(xmmA, xmmB);
a.divss(xmmA, anyptr_gpB); a.divss(xmmA, anyptr_gpB);
a.ldmxcsr(anyptr_gpB); a.ldmxcsr(anyptr_gpA);
a.maskmovq(mmA, mmB); a.maskmovq(mmA, mmB);
a.maxps(xmmA, xmmB); a.maxps(xmmA, xmmB);
a.maxps(xmmA, anyptr_gpB); a.maxps(xmmA, anyptr_gpB);
@@ -1227,13 +1230,13 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.pblendw(xmmA, anyptr_gpB, 0); a.pblendw(xmmA, anyptr_gpB, 0);
a.pcmpeqq(xmmA, xmmB); a.pcmpeqq(xmmA, xmmB);
a.pcmpeqq(xmmA, anyptr_gpB); a.pcmpeqq(xmmA, anyptr_gpB);
a.pextrb(gzA, xmmA, 0); a.pextrb(gzA, xmmB, 0);
a.pextrb(anyptr_gpA, xmmB, 0); a.pextrb(anyptr_gpA, xmmB, 0);
a.pextrd(gzA, xmmA, 0); a.pextrd(gzA, xmmB, 0);
a.pextrd(anyptr_gpA, xmmB, 0); a.pextrd(anyptr_gpA, xmmB, 0);
a.pextrq(gzA, xmmA, 0); a.pextrq(gzA, xmmB, 0);
a.pextrq(anyptr_gpA, xmmB, 0); a.pextrq(anyptr_gpA, xmmB, 0);
a.pextrw(gzA, xmmA, 0); a.pextrw(gzA, xmmB, 0);
a.pextrw(anyptr_gpA, xmmB, 0); a.pextrw(anyptr_gpA, xmmB, 0);
a.phminposuw(xmmA, xmmB); a.phminposuw(xmmA, xmmB);
a.phminposuw(xmmA, anyptr_gpB); a.phminposuw(xmmA, anyptr_gpB);
@@ -1456,9 +1459,9 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.vcvtsd2si(gzA, anyptr_gpB); a.vcvtsd2si(gzA, anyptr_gpB);
a.vcvtsd2ss(xmmA, xmmB, xmmC); a.vcvtsd2ss(xmmA, xmmB, xmmC);
a.vcvtsd2ss(xmmA, xmmB, anyptr_gpC); a.vcvtsd2ss(xmmA, xmmB, anyptr_gpC);
a.vcvtsi2sd(xmmA, xmmB, gzA); a.vcvtsi2sd(xmmA, xmmB, gzC);
a.vcvtsi2sd(xmmA, xmmB, anyptr_gpC); a.vcvtsi2sd(xmmA, xmmB, anyptr_gpC);
a.vcvtsi2ss(xmmA, xmmB, gzA); a.vcvtsi2ss(xmmA, xmmB, gzC);
a.vcvtsi2ss(xmmA, xmmB, anyptr_gpC); a.vcvtsi2ss(xmmA, xmmB, anyptr_gpC);
a.vcvtss2sd(xmmA, xmmB, xmmC); a.vcvtss2sd(xmmA, xmmB, xmmC);
a.vcvtss2sd(xmmA, xmmB, anyptr_gpC); a.vcvtss2sd(xmmA, xmmB, anyptr_gpC);
@@ -1523,12 +1526,12 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.vmaskmovdqu(xmmA, xmmB); a.vmaskmovdqu(xmmA, xmmB);
a.vmaskmovps(xmmA, xmmB, anyptr_gpC); a.vmaskmovps(xmmA, xmmB, anyptr_gpC);
a.vmaskmovps(ymmA, ymmB, anyptr_gpC); a.vmaskmovps(ymmA, ymmB, anyptr_gpC);
a.vmaskmovps(anyptr_gpA, xmmB, xmmC);
a.vmaskmovps(anyptr_gpA, ymmB, ymmC);
a.vmaskmovpd(xmmA, xmmB, anyptr_gpC); a.vmaskmovpd(xmmA, xmmB, anyptr_gpC);
a.vmaskmovpd(ymmA, ymmB, anyptr_gpC); a.vmaskmovpd(ymmA, ymmB, anyptr_gpC);
a.vmaskmovps(anyptr_gpA, xmmA, xmmB); a.vmaskmovpd(anyptr_gpA, xmmB, xmmC);
a.vmaskmovps(anyptr_gpA, ymmA, ymmB); a.vmaskmovpd(anyptr_gpA, ymmB, ymmC);
a.vmaskmovpd(anyptr_gpA, xmmA, xmmB);
a.vmaskmovpd(anyptr_gpA, ymmA, ymmB);
a.vmaxpd(xmmA, xmmB, xmmC); a.vmaxpd(xmmA, xmmB, xmmC);
a.vmaxpd(xmmA, xmmB, anyptr_gpC); a.vmaxpd(xmmA, xmmB, anyptr_gpC);
a.vmaxpd(ymmA, ymmB, ymmC); a.vmaxpd(ymmA, ymmB, ymmC);
@@ -1565,7 +1568,7 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.vmovaps(ymmA, ymmB); a.vmovaps(ymmA, ymmB);
a.vmovaps(ymmA, anyptr_gpB); a.vmovaps(ymmA, anyptr_gpB);
a.vmovaps(anyptr_gpA, ymmB); a.vmovaps(anyptr_gpA, ymmB);
a.vmovd(xmmA, gzA); a.vmovd(xmmA, gzB);
a.vmovd(xmmA, anyptr_gpB); a.vmovd(xmmA, anyptr_gpB);
a.vmovd(gzA, xmmB); a.vmovd(gzA, xmmB);
a.vmovd(anyptr_gpA, xmmB); a.vmovd(anyptr_gpA, xmmB);
@@ -1760,11 +1763,11 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.vphsubsw(xmmA, xmmB, anyptr_gpC); a.vphsubsw(xmmA, xmmB, anyptr_gpC);
a.vphsubw(xmmA, xmmB, xmmC); a.vphsubw(xmmA, xmmB, xmmC);
a.vphsubw(xmmA, xmmB, anyptr_gpC); a.vphsubw(xmmA, xmmB, anyptr_gpC);
a.vpinsrb(xmmA, xmmB, gzA, 0); a.vpinsrb(xmmA, xmmB, gzC, 0);
a.vpinsrb(xmmA, xmmB, anyptr_gpC, 0); a.vpinsrb(xmmA, xmmB, anyptr_gpC, 0);
a.vpinsrd(xmmA, xmmB, gzA, 0); a.vpinsrd(xmmA, xmmB, gzC, 0);
a.vpinsrd(xmmA, xmmB, anyptr_gpC, 0); a.vpinsrd(xmmA, xmmB, anyptr_gpC, 0);
a.vpinsrw(xmmA, xmmB, gzA, 0); a.vpinsrw(xmmA, xmmB, gzC, 0);
a.vpinsrw(xmmA, xmmB, anyptr_gpC, 0); a.vpinsrw(xmmA, xmmB, anyptr_gpC, 0);
a.vpmaddubsw(xmmA, xmmB, xmmC); a.vpmaddubsw(xmmA, xmmB, xmmC);
a.vpmaddubsw(xmmA, xmmB, anyptr_gpC); a.vpmaddubsw(xmmA, xmmB, anyptr_gpC);
@@ -2540,100 +2543,100 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.vfmaddpd(xmmA, xmmB, xmmC, xmmD); a.vfmaddpd(xmmA, xmmB, xmmC, xmmD);
a.vfmaddpd(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddpd(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddpd(ymmA, ymmB, ymmC, ymmD); a.vfmaddpd(ymmA, ymmB, ymmC, ymmD);
a.vfmaddpd(ymmA, ymmB, anyptr_gpC, ymmD); a.vfmaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmaddpd(ymmA, ymmB, ymmC, anyptr_gpA); a.vfmaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmaddps(xmmA, xmmB, xmmC, xmmD); a.vfmaddps(xmmA, xmmB, xmmC, xmmD);
a.vfmaddps(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmaddps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddps(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmaddps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddps(ymmA, ymmB, ymmC, ymmD); a.vfmaddps(ymmA, ymmB, ymmC, ymmD);
a.vfmaddps(ymmA, ymmB, anyptr_gpC, ymmD); a.vfmaddps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmaddps(ymmA, ymmB, ymmC, anyptr_gpA); a.vfmaddps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmaddsd(xmmA, xmmB, xmmC, xmmD); a.vfmaddsd(xmmA, xmmB, xmmC, xmmD);
a.vfmaddsd(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmaddsd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddsd(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmaddsd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddss(xmmA, xmmB, xmmC, xmmD); a.vfmaddss(xmmA, xmmB, xmmC, xmmD);
a.vfmaddss(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmaddss(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddss(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmaddss(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddsubpd(xmmA, xmmB, xmmC, xmmD); a.vfmaddsubpd(xmmA, xmmB, xmmC, xmmD);
a.vfmaddsubpd(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmaddsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddsubpd(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmaddsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddsubpd(ymmA, ymmB, ymmC, ymmD); a.vfmaddsubpd(ymmA, ymmB, ymmC, ymmD);
a.vfmaddsubpd(ymmA, ymmB, anyptr_gpC, ymmD); a.vfmaddsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmaddsubpd(ymmA, ymmB, ymmC, anyptr_gpA); a.vfmaddsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmaddsubps(xmmA, xmmB, xmmC, xmmD); a.vfmaddsubps(xmmA, xmmB, xmmC, xmmD);
a.vfmaddsubps(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmaddsubps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddsubps(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmaddsubps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddsubps(ymmA, ymmB, ymmC, ymmD); a.vfmaddsubps(ymmA, ymmB, ymmC, ymmD);
a.vfmaddsubps(ymmA, ymmB, anyptr_gpC, ymmD); a.vfmaddsubps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmaddsubps(ymmA, ymmB, ymmC, anyptr_gpA); a.vfmaddsubps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmsubaddpd(xmmA, xmmB, xmmC, xmmD); a.vfmsubaddpd(xmmA, xmmB, xmmC, xmmD);
a.vfmsubaddpd(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmsubaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubaddpd(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmsubaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmsubaddpd(ymmA, ymmB, ymmC, ymmD); a.vfmsubaddpd(ymmA, ymmB, ymmC, ymmD);
a.vfmsubaddpd(ymmA, ymmB, anyptr_gpC, ymmD); a.vfmsubaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmsubaddpd(ymmA, ymmB, ymmC, anyptr_gpA); a.vfmsubaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmsubaddps(xmmA, xmmB, xmmC, xmmD); a.vfmsubaddps(xmmA, xmmB, xmmC, xmmD);
a.vfmsubaddps(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmsubaddps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubaddps(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmsubaddps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmsubaddps(ymmA, ymmB, ymmC, ymmD); a.vfmsubaddps(ymmA, ymmB, ymmC, ymmD);
a.vfmsubaddps(ymmA, ymmB, anyptr_gpC, ymmD); a.vfmsubaddps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmsubaddps(ymmA, ymmB, ymmC, anyptr_gpA); a.vfmsubaddps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmsubpd(xmmA, xmmB, xmmC, xmmD); a.vfmsubpd(xmmA, xmmB, xmmC, xmmD);
a.vfmsubpd(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubpd(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmsubpd(ymmA, ymmB, ymmC, ymmD); a.vfmsubpd(ymmA, ymmB, ymmC, ymmD);
a.vfmsubpd(ymmA, ymmB, anyptr_gpC, ymmD); a.vfmsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmsubpd(ymmA, ymmB, ymmC, anyptr_gpA); a.vfmsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmsubps(xmmA, xmmB, xmmC, xmmD); a.vfmsubps(xmmA, xmmB, xmmC, xmmD);
a.vfmsubps(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmsubps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubps(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmsubps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmsubps(ymmA, ymmB, ymmC, ymmD); a.vfmsubps(ymmA, ymmB, ymmC, ymmD);
a.vfmsubps(ymmA, ymmB, anyptr_gpC, ymmD); a.vfmsubps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmsubps(ymmA, ymmB, ymmC, anyptr_gpA); a.vfmsubps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmsubsd(xmmA, xmmB, xmmC, xmmD); a.vfmsubsd(xmmA, xmmB, xmmC, xmmD);
a.vfmsubsd(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmsubsd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubsd(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmsubsd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmsubss(xmmA, xmmB, xmmC, xmmD); a.vfmsubss(xmmA, xmmB, xmmC, xmmD);
a.vfmsubss(xmmA, xmmB, anyptr_gpC, xmmD); a.vfmsubss(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubss(xmmA, xmmB, xmmC, anyptr_gpA); a.vfmsubss(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmaddpd(xmmA, xmmB, xmmC, xmmD); a.vfnmaddpd(xmmA, xmmB, xmmC, xmmD);
a.vfnmaddpd(xmmA, xmmB, anyptr_gpC, xmmD); a.vfnmaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmaddpd(xmmA, xmmB, xmmC, anyptr_gpA); a.vfnmaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmaddpd(ymmA, ymmB, ymmC, ymmD); a.vfnmaddpd(ymmA, ymmB, ymmC, ymmD);
a.vfnmaddpd(ymmA, ymmB, anyptr_gpC, ymmD); a.vfnmaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfnmaddpd(ymmA, ymmB, ymmC, anyptr_gpA); a.vfnmaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfnmaddps(xmmA, xmmB, xmmC, xmmD); a.vfnmaddps(xmmA, xmmB, xmmC, xmmD);
a.vfnmaddps(xmmA, xmmB, anyptr_gpC, xmmD); a.vfnmaddps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmaddps(xmmA, xmmB, xmmC, anyptr_gpA); a.vfnmaddps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmaddps(ymmA, ymmB, ymmC, ymmD); a.vfnmaddps(ymmA, ymmB, ymmC, ymmD);
a.vfnmaddps(ymmA, ymmB, anyptr_gpC, ymmD); a.vfnmaddps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfnmaddps(ymmA, ymmB, ymmC, anyptr_gpA); a.vfnmaddps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfnmaddsd(xmmA, xmmB, xmmC, xmmD); a.vfnmaddsd(xmmA, xmmB, xmmC, xmmD);
a.vfnmaddsd(xmmA, xmmB, anyptr_gpC, xmmD); a.vfnmaddsd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmaddsd(xmmA, xmmB, xmmC, anyptr_gpA); a.vfnmaddsd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmaddss(xmmA, xmmB, xmmC, xmmD); a.vfnmaddss(xmmA, xmmB, xmmC, xmmD);
a.vfnmaddss(xmmA, xmmB, anyptr_gpC, xmmD); a.vfnmaddss(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmaddss(xmmA, xmmB, xmmC, anyptr_gpA); a.vfnmaddss(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmsubpd(xmmA, xmmB, xmmC, xmmD); a.vfnmsubpd(xmmA, xmmB, xmmC, xmmD);
a.vfnmsubpd(xmmA, xmmB, anyptr_gpC, xmmD); a.vfnmsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmsubpd(xmmA, xmmB, xmmC, anyptr_gpA); a.vfnmsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmsubpd(ymmA, ymmB, ymmC, ymmD); a.vfnmsubpd(ymmA, ymmB, ymmC, ymmD);
a.vfnmsubpd(ymmA, ymmB, anyptr_gpC, ymmD); a.vfnmsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfnmsubpd(ymmA, ymmB, ymmC, anyptr_gpA); a.vfnmsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfnmsubps(xmmA, xmmB, xmmC, xmmD); a.vfnmsubps(xmmA, xmmB, xmmC, xmmD);
a.vfnmsubps(xmmA, xmmB, anyptr_gpC, xmmD); a.vfnmsubps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmsubps(xmmA, xmmB, xmmC, anyptr_gpA); a.vfnmsubps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmsubps(ymmA, ymmB, ymmC, ymmD); a.vfnmsubps(ymmA, ymmB, ymmC, ymmD);
a.vfnmsubps(ymmA, ymmB, anyptr_gpC, ymmD); a.vfnmsubps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfnmsubps(ymmA, ymmB, ymmC, anyptr_gpA); a.vfnmsubps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfnmsubsd(xmmA, xmmB, xmmC, xmmD); a.vfnmsubsd(xmmA, xmmB, xmmC, xmmD);
a.vfnmsubsd(xmmA, xmmB, anyptr_gpC, xmmD); a.vfnmsubsd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmsubsd(xmmA, xmmB, xmmC, anyptr_gpA); a.vfnmsubsd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmsubss(xmmA, xmmB, xmmC, xmmD); a.vfnmsubss(xmmA, xmmB, xmmC, xmmD);
a.vfnmsubss(xmmA, xmmB, anyptr_gpC, xmmD); a.vfnmsubss(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmsubss(xmmA, xmmB, xmmC, anyptr_gpA); a.vfnmsubss(xmmA, xmmB, xmmC, anyptr_gpD);
// XOP. // XOP.
a.nop(); a.nop();
@@ -2652,10 +2655,10 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.vfrczss(xmmA, anyptr_gpB); a.vfrczss(xmmA, anyptr_gpB);
a.vpcmov(xmmA, xmmB, xmmC, xmmD); a.vpcmov(xmmA, xmmB, xmmC, xmmD);
a.vpcmov(xmmA, xmmB, anyptr_gpC, xmmD); a.vpcmov(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpcmov(xmmA, xmmB, xmmC, anyptr_gpA); a.vpcmov(xmmA, xmmB, xmmC, anyptr_gpD);
a.vpcmov(ymmA, ymmB, ymmC, ymmD); a.vpcmov(ymmA, ymmB, ymmC, ymmD);
a.vpcmov(ymmA, ymmB, anyptr_gpC, ymmD); a.vpcmov(ymmA, ymmB, anyptr_gpC, ymmD);
a.vpcmov(ymmA, ymmB, ymmC, anyptr_gpA); a.vpcmov(ymmA, ymmB, ymmC, anyptr_gpD);
a.vpcomb(xmmA, xmmB, xmmC, 0); a.vpcomb(xmmA, xmmB, xmmC, 0);
a.vpcomb(xmmA, xmmB, anyptr_gpC, 0); a.vpcomb(xmmA, xmmB, anyptr_gpC, 0);
a.vpcomd(xmmA, xmmB, xmmC, 0); a.vpcomd(xmmA, xmmB, xmmC, 0);
@@ -2674,16 +2677,16 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.vpcomuw(xmmA, xmmB, anyptr_gpC, 0); a.vpcomuw(xmmA, xmmB, anyptr_gpC, 0);
a.vpermil2pd(xmmA, xmmB, xmmC, xmmD); a.vpermil2pd(xmmA, xmmB, xmmC, xmmD);
a.vpermil2pd(xmmA, xmmB, anyptr_gpC, xmmD); a.vpermil2pd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpermil2pd(xmmA, xmmB, xmmC, anyptr_gpA); a.vpermil2pd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vpermil2pd(ymmA, ymmB, ymmC, ymmD); a.vpermil2pd(ymmA, ymmB, ymmC, ymmD);
a.vpermil2pd(ymmA, ymmB, anyptr_gpC, ymmD); a.vpermil2pd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vpermil2pd(ymmA, ymmB, ymmC, anyptr_gpA); a.vpermil2pd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vpermil2ps(xmmA, xmmB, xmmC, xmmD); a.vpermil2ps(xmmA, xmmB, xmmC, xmmD);
a.vpermil2ps(xmmA, xmmB, anyptr_gpC, xmmD); a.vpermil2ps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpermil2ps(xmmA, xmmB, xmmC, anyptr_gpA); a.vpermil2ps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vpermil2ps(ymmA, ymmB, ymmC, ymmD); a.vpermil2ps(ymmA, ymmB, ymmC, ymmD);
a.vpermil2ps(ymmA, ymmB, anyptr_gpC, ymmD); a.vpermil2ps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vpermil2ps(ymmA, ymmB, ymmC, anyptr_gpA); a.vpermil2ps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vphaddbd(xmmA, xmmB); a.vphaddbd(xmmA, xmmB);
a.vphaddbd(xmmA, anyptr_gpB); a.vphaddbd(xmmA, anyptr_gpB);
a.vphaddbq(xmmA, xmmB); a.vphaddbq(xmmA, xmmB);
@@ -2740,7 +2743,7 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.vpmadcswd(xmmA, xmmB, anyptr_gpC, xmmD); a.vpmadcswd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpperm(xmmA, xmmB, xmmC, xmmD); a.vpperm(xmmA, xmmB, xmmC, xmmD);
a.vpperm(xmmA, xmmB, anyptr_gpC, xmmD); a.vpperm(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpperm(xmmA, xmmB, xmmC, anyptr_gpA); a.vpperm(xmmA, xmmB, xmmC, anyptr_gpD);
a.vprotb(xmmA, xmmB, xmmC); a.vprotb(xmmA, xmmB, xmmC);
a.vprotb(xmmA, anyptr_gpB, xmmC); a.vprotb(xmmA, anyptr_gpB, xmmC);
a.vprotb(xmmA, xmmB, anyptr_gpC); a.vprotb(xmmA, xmmB, anyptr_gpC);
@@ -2790,7 +2793,7 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.nop(); a.nop();
a.andn(gzA, gzB, gzC); a.andn(gzA, gzB, gzC);
a.andn(gzA, gzB, anyptr_gpB); a.andn(gzA, gzB, anyptr_gpC);
a.bextr(gzA, gzB, gzC); a.bextr(gzA, gzB, gzC);
a.bextr(gzA, anyptr_gpB, gzC); a.bextr(gzA, anyptr_gpB, gzC);
a.blsi(gzA, gzB); a.blsi(gzA, gzB);
@@ -2818,11 +2821,11 @@ static void opcode(asmjit::X86Assembler& a, bool useRex = false) {
a.bzhi(gzA, gzB, gzC); a.bzhi(gzA, gzB, gzC);
a.bzhi(gzA, anyptr_gpB, gzC); a.bzhi(gzA, anyptr_gpB, gzC);
a.mulx(gzA, gzB, gzC); a.mulx(gzA, gzB, gzC);
a.mulx(gzA, gzB, anyptr_gpB); a.mulx(gzA, gzB, anyptr_gpC);
a.pdep(gzA, gzB, gzC); a.pdep(gzA, gzB, gzC);
a.pdep(gzA, gzB, anyptr_gpB); a.pdep(gzA, gzB, anyptr_gpC);
a.pext(gzA, gzB, gzC); a.pext(gzA, gzB, gzC);
a.pext(gzA, gzB, anyptr_gpB); a.pext(gzA, gzB, anyptr_gpC);
a.rorx(gzA, gzB, 0); a.rorx(gzA, gzB, 0);
a.rorx(gzA, anyptr_gpB, 0); a.rorx(gzA, anyptr_gpB, 0);
a.sarx(gzA, gzB, gzC); a.sarx(gzA, gzB, gzC);