Files
asmjit/test/asmjit_test_opcode.h

6051 lines
186 KiB
C++

// [AsmJit]
// Complete x86/x64 JIT and Remote Assembler for C++.
//
// [License]
// Zlib - See LICENSE.md file in the package.
// [Guard]
#ifndef _ASMJIT_TEST_OPCODE_H
#define _ASMJIT_TEST_OPCODE_H
// [Dependencies]
#include "./asmjit.h"
namespace asmtest {
// Generate all instructions asmjit can emit.
static void generateOpcodes(asmjit::X86Assembler& a, bool useRex1 = false, bool useRex2 = false) {
using namespace asmjit;
using namespace asmjit::x86;
bool isX64 = a.is64Bit();
/*
// TODO: Finalize implicit vs explicit.
a.cmpxchg8b(ptr_gpC);
a.cmpxchg8b(ptr_gpC, x86::edx, x86::eax, x86::ecx, x86::ebx);
if (isX64) a.cmpxchg16b(ptr_gpC);
if (isX64) a.cmpxchg16b(ptr_gpC, x86::rdx, x86::rax, x86::rcx, x86::rbx);
*/
// Prevent crash when the generated function is called to see the disassembly.
a.ret();
// All instructions use the following register that can be changed to see if
// the `X86Assembler` is properly encoding all possible combinations. If the
// `useRexRegs` argument is true the `A` version will in most cases contain
// a register having index 8 (if encodable).
X86Gp gLoA = useRex1 ? r8b : al;
X86Gp gLoB = useRex2 ? r9b : bl;
X86Gp gHiA = ah;
X86Gp gHiB = bh;
X86Gp gwA = useRex1 ? r8w : ax;
X86Gp gwB = useRex2 ? r9w : bx;
X86Gp gdA = useRex1 ? r8d : eax;
X86Gp gdB = useRex2 ? r9d : ebx;
X86Gp gdC = useRex2 ? r10d : ecx;
X86Gp gzA = useRex1 ? r8 : a.zax();
X86Gp gzB = useRex2 ? r9 : a.zbx();
X86Gp gzC = useRex2 ? r10 : a.zcx();
X86Gp gzD = useRex2 ? r11 : a.zdx();
X86KReg kA = k1;
X86KReg kB = k2;
X86KReg kC = k3;
X86Mem anyptr_gpA = ptr(gzA);
X86Mem anyptr_gpB = ptr(gzB);
X86Mem anyptr_gpC = ptr(gzC);
X86Mem anyptr_gpD = ptr(gzD);
X86Mem intptr_gpA = a.intptr_ptr(gzA);
X86Mem intptr_gpB = a.intptr_ptr(gzB);
X86Fp fpA = fp0;
X86Fp fpB = fp7;
X86Mm mmA = mm0;
X86Mm mmB = mm1;
X86Xmm xmmA = useRex1 ? xmm8 : xmm0;
X86Xmm xmmB = useRex2 ? xmm9 : xmm1;
X86Xmm xmmC = useRex2 ? xmm10 : xmm2;
X86Xmm xmmD = useRex2 ? xmm11 : xmm3;
X86Ymm ymmA = useRex1 ? ymm8 : ymm0;
X86Ymm ymmB = useRex2 ? ymm9 : ymm1;
X86Ymm ymmC = useRex2 ? ymm10 : ymm2;
X86Ymm ymmD = useRex2 ? ymm11 : ymm3;
X86Zmm zmmA = useRex1 ? zmm8 : zmm0;
X86Zmm zmmB = useRex2 ? zmm9 : zmm1;
X86Zmm zmmC = useRex2 ? zmm10 : zmm2;
X86Zmm zmmD = useRex2 ? zmm11 : zmm3;
X86Mem vx_ptr = ptr(gzB, xmmB);
X86Mem vy_ptr = ptr(gzB, ymmB);
X86Mem vz_ptr = ptr(gzB, zmmB);
Label L;
// Base.
a.adc(gLoA, 1);
a.adc(gLoB, 1);
a.adc(gHiA, 1);
a.adc(gHiB, 1);
a.adc(gwA, 1);
a.adc(gwB, 1);
a.adc(gdA, 1);
a.adc(gdB, 1);
a.adc(gzA, 1);
a.adc(gzA, gzB);
a.adc(gzA, intptr_gpB);
a.adc(intptr_gpA, 1);
a.adc(intptr_gpA, gzB);
a.add(gLoA, 1);
a.add(gLoB, 1);
a.add(gHiA, 1);
a.add(gHiB, 1);
a.add(gwA, 1);
a.add(gwB, 1);
a.add(gdA, 1);
a.add(gdB, 1);
a.add(gzA, 1);
a.add(gzA, gzB);
a.add(gzA, intptr_gpB);
a.add(intptr_gpA, 1);
a.add(intptr_gpA, gzB);
a.and_(gLoA, 1);
a.and_(gLoB, 1);
a.and_(gHiA, 1);
a.and_(gHiB, 1);
a.and_(gwA, 1);
a.and_(gwB, 1);
a.and_(gdA, 1);
a.and_(gdB, 1);
a.and_(gzA, 1);
a.and_(gzA, gzB);
a.and_(gzA, intptr_gpB);
a.and_(intptr_gpA, 1);
a.and_(intptr_gpA, gzB);
a.bswap(gzA);
a.bt(gdA, 1);
a.bt(gzA, 1);
a.bt(gdA, gdB);
a.bt(gzA, gzB);
a.bt(intptr_gpA, 1);
a.bt(anyptr_gpA, gdB);
a.bt(intptr_gpA, gzB);
a.btc(gdA, 1);
a.btc(gzA, 1);
a.btc(gdA, gdB);
a.btc(gzA, gzB);
a.btc(intptr_gpA, 1);
a.btc(anyptr_gpA, gdB);
a.btc(intptr_gpA, gzB);
a.btr(gdA, 1);
a.btr(gzA, 1);
a.btr(gdA, gdB);
a.btr(gzA, gzB);
a.btr(intptr_gpA, 1);
a.btr(anyptr_gpA, gdB);
a.btr(intptr_gpA, gzB);
a.bts(gdA, 1);
a.bts(gzA, 1);
a.bts(gdA, gdB);
a.bts(gzA, gzB);
a.bts(intptr_gpA, 1);
a.bts(anyptr_gpA, gdB);
a.bts(intptr_gpA, gzB);
a.call(gzA);
a.call(intptr_gpA);
a.cbw(); // Implicit AX <- Sign Extend AL.
a.cbw(ax); // Explicit AX <- Sign Extend AL.
a.cdq(); // Implicit EDX:EAX <- Sign Extend EAX.
a.cdq(edx, eax); // Explicit EDX:EAX <- Sign Extend EAX.
if (isX64) a.cdqe(); // Implicit RAX <- Sign Extend EAX.
if (isX64) a.cdqe(eax); // Explicit RAX <- Sign Extend EAX.
a.cwd(); // Implicit DX:AX <- Sign Extend AX.
a.cwd(dx, ax); // Explicit DX:AX <- Sign Extend AX.
a.cwde(); // Implicit EAX <- Sign Extend AX.
a.cwde(eax); // Explicit EAX <- Sign Extend AX.
if (isX64) a.cqo(); // Implicit RDX:RAX <- Sign Extend RAX.
if (isX64) a.cqo(rdx, rax); // Explicit RDX:RAX <- Sign Extend RAX.
a.clc();
a.cld();
a.cmc();
a.cmp(gLoA, 1);
a.cmp(gLoB, 1);
a.cmp(gHiA, 1);
a.cmp(gHiB, 1);
a.cmp(gwA, 1);
a.cmp(gwB, 1);
a.cmp(gdA, 1);
a.cmp(gdB, 1);
a.cmp(gzA, 1);
a.cmp(gLoA, gLoB);
a.cmp(gHiA, gHiB);
a.cmp(gwA, gwB);
a.cmp(gdA, gdB);
a.cmp(gzA, gzB);
a.cmp(gdA, anyptr_gpB);
a.cmp(gzA, intptr_gpB);
a.cmp(intptr_gpA, 1);
a.cmp(anyptr_gpA, gdB);
a.cmp(intptr_gpA, gzB);
a.cmpxchg(gdA, gdB); // Implicit regA, regB, <EAX>
a.cmpxchg(gzA, gzB); // Implicit regA, regB, <ZAX>
a.cmpxchg(gdA, gdB, eax); // Explicit regA, regB, <EAX>
a.cmpxchg(gzA, gzB, a.zax()); // Explicit regA, regB, <ZAX>
a.cmpxchg(anyptr_gpA, gdB); // Implicit mem , regB, <EAX>
a.cmpxchg(anyptr_gpA, gzB); // Implicit mem , regB, <ZAX>
a.cmpxchg(anyptr_gpA, gdB, eax); // Explicit mem , regB, <EAX>
a.cmpxchg(anyptr_gpA, gzB, a.zax()); // Explicit mem , regB, <ZAX>
a.cmpxchg8b(anyptr_gpA); // Implicit mem , <EDX>, <EAX>, <ECX>, <EBX>
if (isX64) a.cmpxchg16b(anyptr_gpA); // Implicit mem , <RDX>, <RAX>, <RCX>, <RBX>
a.cpuid(); // Implicit <EAX>, <EBX>, <ECX>, <EDX>
a.cpuid(eax, ebx, ecx, edx); // Explicit <EAX>, <EBX>, <ECX>, <EDX>
a.crc32(gdA, byte_ptr(gzB));
a.crc32(gdA, word_ptr(gzB));
a.crc32(gdA, dword_ptr(gzB));
if (isX64) a.crc32(gdA, qword_ptr(gzB));
if (isX64) a.crc32(gzA, qword_ptr(gzB));
a.dec(gLoA);
a.dec(gHiA);
a.dec(gwA);
a.dec(gdA);
a.dec(gzA);
a.dec(intptr_gpA);
a.inc(gLoA);
a.inc(gwA);
a.inc(gdA);
a.inc(gzA);
a.inc(intptr_gpA);
a.int_(13);
a.int3();
a.into();
a.lea(gzA, intptr_gpB);
a.mov(gLoA, 1);
a.mov(gHiA, 1);
a.mov(gwA, 1);
a.mov(gdA, 1);
a.mov(gzA, 1);
a.mov(gLoA, gLoB);
a.mov(gHiA, gHiB);
a.mov(gwA, gwB);
a.mov(gdA, gdB);
a.mov(gzA, gzB);
a.mov(gLoA, anyptr_gpB);
a.mov(gwA, anyptr_gpB);
a.mov(gdA, anyptr_gpB);
a.mov(gzA, intptr_gpB);
a.mov(anyptr_gpA, gLoB);
a.mov(anyptr_gpA, gwB);
a.mov(anyptr_gpA, gdB);
a.mov(intptr_gpA, 1);
a.mov(intptr_gpA, gzB);
a.movsx(gzA, gLoB);
a.movsx(gzA, byte_ptr(gzB));
a.movzx(gzA, gLoB);
a.movzx(gzA, byte_ptr(gzB));
a.movbe(gzA, anyptr_gpB);
a.movbe(anyptr_gpA, gzB);
a.neg(gzA);
a.neg(intptr_gpA);
a.nop();
a.not_(gzA);
a.not_(intptr_gpA);
a.or_(gLoA, 1);
a.or_(gLoB, 1);
a.or_(gHiA, 1);
a.or_(gHiB, 1);
a.or_(gwA, 1);
a.or_(gwB, 1);
a.or_(gdA, 1);
a.or_(gdB, 1);
a.or_(gzA, 1);
a.or_(gzA, gzB);
a.or_(gzA, intptr_gpB);
a.or_(intptr_gpA, 1);
a.or_(intptr_gpA, gzB);
a.pop(gzA);
a.pop(intptr_gpA);
if (!isX64) a.popa();
if (!isX64) a.popad();
a.popf();
if (!isX64) a.popfd();
if ( isX64) a.popfq();
a.push(gzA);
a.push(intptr_gpA);
a.push(0);
if (!isX64) a.pusha();
if (!isX64) a.pushad();
a.pushf();
if (!isX64) a.pushfd();
if ( isX64) a.pushfq();
a.rcl(gdA, 0);
a.rcl(gzA, 0);
a.rcl(gdA, 1);
a.rcl(gzA, 1);
a.rcl(gdA, cl);
a.rcl(gzA, cl);
a.rcl(intptr_gpA, 0);
a.rcl(intptr_gpA, 1);
a.rcl(intptr_gpA, cl);
a.rcr(gdA, 0);
a.rcr(gzA, 0);
a.rcr(gdA, 1);
a.rcr(gzA, 1);
a.rcr(gdA, cl);
a.rcr(gzA, cl);
a.rcr(intptr_gpA, 0);
a.rcr(intptr_gpA, 1);
a.rcr(intptr_gpA, cl);
a.rdtsc(); // Implicit <EDX:EAX>
a.rdtsc(edx, eax); // Explicit <EDX:EAX>
a.rdtscp(); // Implicit <EDX:EAX>, <ECX>
a.rdtscp(edx, eax, ecx); // Implicit <EDX:EAX>, <ECX>
a.ret();
a.ret(0);
a.rol(gdA, 0);
a.rol(gzA, 0);
a.rol(gdA, 1);
a.rol(gzA, 1);
a.rol(gdA, cl);
a.rol(gzA, cl);
a.rol(intptr_gpA, 0);
a.rol(intptr_gpA, 1);
a.rol(intptr_gpA, cl);
a.ror(gdA, 0);
a.ror(gzA, 0);
a.ror(gdA, 1);
a.ror(gzA, 1);
a.ror(gdA, cl);
a.ror(gzA, cl);
a.ror(intptr_gpA, 0);
a.ror(intptr_gpA, 1);
a.ror(intptr_gpA, cl);
a.sbb(gLoA, 1);
a.sbb(gLoB, 1);
a.sbb(gHiA, 1);
a.sbb(gHiB, 1);
a.sbb(gwA, 1);
a.sbb(gwB, 1);
a.sbb(gdA, 1);
a.sbb(gdB, 1);
a.sbb(gzA, 1);
a.sbb(gzA, gzB);
a.sbb(gzA, intptr_gpB);
a.sbb(intptr_gpA, 1);
a.sbb(intptr_gpA, gzB);
a.sal(gdA, 0);
a.sal(gzA, 0);
a.sal(gdA, 1);
a.sal(gzA, 1);
a.sal(gdA, cl);
a.sal(gzA, cl);
a.sal(intptr_gpA, 0);
a.sal(intptr_gpA, 1);
a.sal(intptr_gpA, cl);
a.sar(gdA, 0);
a.sar(gzA, 0);
a.sar(gdA, 1);
a.sar(gzA, 1);
a.sar(gdA, cl);
a.sar(gzA, cl);
a.sar(intptr_gpA, 0);
a.sar(intptr_gpA, 1);
a.sar(intptr_gpA, cl);
a.shl(gdA, 0);
a.shl(gzA, 0);
a.shl(gdA, 1);
a.shl(gzA, 1);
a.shl(gdA, cl);
a.shl(gzA, cl);
a.shl(intptr_gpA, 0);
a.shl(intptr_gpA, 1);
a.shl(intptr_gpA, cl);
a.shr(gdA, 0);
a.shr(gzA, 0);
a.shr(gdA, 1);
a.shr(gzA, 1);
a.shr(gdA, cl);
a.shr(gzA, cl);
a.shr(intptr_gpA, 0);
a.shr(intptr_gpA, 1);
a.shr(intptr_gpA, cl);
a.shld(gdA, gdB, 0);
a.shld(gzA, gzB, 0);
a.shld(gdA, gdB, cl);
a.shld(gzA, gzB, cl);
a.shld(anyptr_gpA, gdB, 0);
a.shld(intptr_gpA, gzB, 0);
a.shld(anyptr_gpA, gdB, cl);
a.shld(intptr_gpA, gzB, cl);
a.shrd(gdA, gdB, 0);
a.shrd(gzA, gzB, 0);
a.shrd(gdA, gdB, cl);
a.shrd(gzA, gzB, cl);
a.shrd(anyptr_gpA, gdB, 0);
a.shrd(intptr_gpA, gzB, 0);
a.shrd(anyptr_gpA, gdB, cl);
a.shrd(intptr_gpA, gzB, cl);
a.stc();
a.std();
a.sti();
a.sub(gLoA, 1);
a.sub(gLoB, 1);
a.sub(gHiA, 1);
a.sub(gHiB, 1);
a.sub(gwA, 1);
a.sub(gwB, 1);
a.sub(gdA, 1);
a.sub(gdB, 1);
a.sub(gzA, 1);
a.sub(gzA, gzB);
a.sub(gzA, intptr_gpB);
a.sub(intptr_gpA, 1);
a.sub(intptr_gpA, gzB);
a.swapgs();
a.test(gzA, 1);
a.test(gzA, gzB);
a.test(intptr_gpA, 1);
a.test(intptr_gpA, gzB);
a.ud2();
a.xadd(gzA, gzB);
a.xadd(intptr_gpA, gzB);
a.xchg(gzA, gzB);
a.xchg(intptr_gpA, gzB);
a.xchg(gzA, intptr_gpB);
a.xor_(gLoA, 1);
a.xor_(gLoB, 1);
a.xor_(gHiA, 1);
a.xor_(gHiB, 1);
a.xor_(gwA, 1);
a.xor_(gwB, 1);
a.xor_(gdA, 1);
a.xor_(gdB, 1);
a.xor_(gzA, 1);
a.xor_(gzA, gzB);
a.xor_(gzA, intptr_gpB);
a.xor_(intptr_gpA, 1);
a.xor_(intptr_gpA, gzB);
// Special case - div|mul.
a.div(cl); // Implicit AH:AL <- AX * r8
a.div(byte_ptr(gzA)); // Implicit AH:AL <- AX * m8
a.div(ax, cl); // Explicit AH:AL <- AX * r8
a.div(ax, anyptr_gpA); // Explicit AH:AL <- AX * m8
a.div(cx); // Implicit DX:AX <- DX:AX * r16
a.div(word_ptr(gzA)); // Implicit DX:AX <- DX:AX * m16
a.div(dx, ax, cx); // Explicit DX:AX <- DX:AX * r16
a.div(dx, ax, anyptr_gpA); // Explicit DX:AX <- DX:AX * m16
a.div(ecx); // Implicit EDX:EAX <- EDX:EAX * r32
a.div(dword_ptr(gzA)); // Implicit EDX:EAX <- EDX:EAX * m32
a.div(edx, eax, ecx); // Explicit EDX:EAX <- EDX:EAX * r32
a.div(edx, eax, anyptr_gpA); // Explicit EDX:EAX <- EDX:EAX * m32
if (isX64) a.div(rcx); // Implicit RDX|RAX <- RDX:RAX * r64
if (isX64) a.div(qword_ptr(gzA)); // Implicit RDX|RAX <- RDX:RAX * m64
if (isX64) a.div(rdx, rax, rcx); // Explicit RDX|RAX <- RDX:RAX * r64
if (isX64) a.div(rdx, rax, anyptr_gpA); // Explicit RDX|RAX <- RDX:RAX * m64
a.idiv(cl); // Implicit AH:AL <- AX * r8
a.idiv(byte_ptr(gzA)); // Implicit AH:AL <- AX * m8
a.idiv(ax, cl); // Explicit AH:AL <- AX * r8
a.idiv(ax, anyptr_gpA); // Explicit AH:AL <- AX * m8
a.idiv(cx); // Implicit DX:AX <- DX:AX * r16
a.idiv(word_ptr(gzA)); // Implicit DX:AX <- DX:AX * m16
a.idiv(dx, ax, cx); // Explicit DX:AX <- DX:AX * r16
a.idiv(dx, ax, anyptr_gpA); // Explicit DX:AX <- DX:AX * m16
a.idiv(ecx); // Implicit EDX:EAX <- EDX:EAX * r32
a.idiv(dword_ptr(gzA)); // Implicit EDX:EAX <- EDX:EAX * m32
a.idiv(edx, eax, ecx); // Explicit EDX:EAX <- EDX:EAX * r32
a.idiv(edx, eax, anyptr_gpA); // Explicit EDX:EAX <- EDX:EAX * m32
if (isX64) a.idiv(rcx); // Implicit RDX|RAX <- RDX:RAX * r64
if (isX64) a.idiv(qword_ptr(gzA)); // Implicit RDX|RAX <- RDX:RAX * m64
if (isX64) a.idiv(rdx, rax, rcx); // Explicit RDX|RAX <- RDX:RAX * r64
if (isX64) a.idiv(rdx, rax, anyptr_gpA); // Explicit RDX|RAX <- RDX:RAX * m64
a.mul(cl); // Implicit AX <- AL * r8
a.mul(byte_ptr(gzA)); // Implicit AX <- AL * m8
a.mul(ax, cl); // Explicit AX <- AL * r8
a.mul(ax, anyptr_gpA); // Explicit AX <- AL * m8
a.mul(cx); // Implicit DX:AX <- AX * r16
a.mul(word_ptr(gzA)); // Implicit DX:AX <- AX * m16
a.mul(dx, ax, cx); // Explicit DX:AX <- AX * r16
a.mul(dx, ax, anyptr_gpA); // Explicit DX:AX <- AX * m16
a.mul(ecx); // Implicit EDX:EAX <- EAX * r32
a.mul(dword_ptr(gzA)); // Implicit EDX:EAX <- EAX * m32
a.mul(edx, eax, ecx); // Explicit EDX:EAX <- EAX * r32
a.mul(edx, eax, anyptr_gpA); // Explicit EDX:EAX <- EAX * m32
if (isX64) a.mul(rcx); // Implicit RDX|RAX <- RAX * r64
if (isX64) a.mul(qword_ptr(gzA)); // Implicit RDX|RAX <- RAX * m64
if (isX64) a.mul(rdx, rax, rcx); // Explicit RDX|RAX <- RAX * r64
if (isX64) a.mul(rdx, rax, anyptr_gpA); // Explicit RDX|RAX <- RAX * m64
a.imul(gdA);
a.imul(gzA);
a.imul(intptr_gpA);
a.imul(gdA, 1);
a.imul(gzA, 1);
a.imul(gdA, gdB);
a.imul(gzA, gzB);
a.imul(gdA, gdB, 1);
a.imul(gzA, gzB, 1);
a.imul(gdA, anyptr_gpB);
a.imul(gzA, intptr_gpB);
a.imul(gdA, anyptr_gpB, 1);
a.imul(gzA, intptr_gpB, 1);
// Special case - zero-extend 32-bit immediate instead of sign-extend:
if (isX64) a.mov(gzA, static_cast<uint32_t>(0xFEEDFEED));
if (isX64) a.and_(gzA, static_cast<uint32_t>(0xFEEDFEED));
// Special case - mov with absolute 32-bit address (X86|X64).
a.mov(al , ptr(uint64_t(0x01020304)));
a.mov(ax , ptr(uint64_t(0x01020304)));
a.mov(eax, ptr(uint64_t(0x01020304)));
a.mov(ptr(uint64_t(0x01020304)), al );
a.mov(ptr(uint64_t(0x01020304)), ax );
a.mov(ptr(uint64_t(0x01020304)), eax);
// Special case - mov with absolute 64-bit address (X64).
if (isX64) a.mov(al , ptr(uint64_t(0x0102030405060708ull)));
if (isX64) a.mov(ax , ptr(uint64_t(0x0102030405060708ull)));
if (isX64) a.mov(eax, ptr(uint64_t(0x0102030405060708ull)));
if (isX64) a.mov(rax, ptr(uint64_t(0x0102030405060708ull)));
if (isX64) a.mov(ptr(uint64_t(0x0102030405060708ull)), al );
if (isX64) a.mov(ptr(uint64_t(0x0102030405060708ull)), ax );
if (isX64) a.mov(ptr(uint64_t(0x0102030405060708ull)), eax);
if (isX64) a.mov(ptr(uint64_t(0x0102030405060708ull)), rax);
// Control registers.
a.nop();
a.mov(gzA, cr0);
a.mov(cr0, gzA);
if (isX64) a.mov(gzA, cr8);
if (isX64) a.mov(cr8, gzA);
// Debug registers.
a.nop();
a.mov(gzA, dr0);
a.mov(dr0, gzA);
// Segment registers.
a.nop();
if (!isX64) a.mov(es, ax);
if (!isX64) a.mov(es, bx);
if (!isX64) a.mov(ax, es);
if (!isX64) a.mov(bx, es);
if (!isX64) a.mov(cs, ax);
if (!isX64) a.mov(cs, bx);
if (!isX64) a.mov(ax, cs);
if (!isX64) a.mov(bx, cs);
if (!isX64) a.mov(ss, ax);
if (!isX64) a.mov(ss, bx);
if (!isX64) a.mov(ax, ss);
if (!isX64) a.mov(bx, ss);
if (!isX64) a.mov(ds, ax);
if (!isX64) a.mov(ds, bx);
if (!isX64) a.mov(ax, ds);
if (!isX64) a.mov(bx, ds);
a.mov(fs, ax);
a.mov(fs, bx);
a.mov(ax, fs);
a.mov(bx, fs);
a.mov(gs, ax);
a.mov(gs, bx);
a.mov(ax, gs);
a.mov(bx, gs);
// Instructions using REP prefix.
a.nop();
a.in(al, 0);
a.in(al, dx);
a.in(ax, 0);
a.in(ax, dx);
a.in(eax, 0);
a.in(eax, dx);
a.rep().ins(byte_ptr(a.zdi()), dx);
a.rep().ins(word_ptr(a.zdi()), dx);
a.rep().ins(dword_ptr(a.zdi()), dx);
a.out(imm(0), al);
a.out(dx, al);
a.out(imm(0), ax);
a.out(dx, ax);
a.out(imm(0), eax);
a.out(dx, eax);
a.rep().outs(dx, byte_ptr(a.zsi()));
a.rep().outs(dx, word_ptr(a.zsi()));
a.rep().outs(dx, dword_ptr(a.zsi()));
a.lodsb();
a.lodsd();
a.lodsw();
a.rep().lodsb();
a.rep().lodsd();
a.rep().lodsw();
if (isX64) a.rep().lodsq();
a.movsb();
a.movsd();
a.movsw();
a.rep().movsb();
a.rep().movsd();
a.rep().movsw();
if (isX64) a.rep().movsq();
a.stosb();
a.stosd();
a.stosw();
a.rep().stosb();
a.rep().stosd();
a.rep().stosw();
if (isX64) a.rep().stosq();
a.cmpsb();
a.cmpsd();
a.cmpsw();
a.repz().cmpsb();
a.repz().cmpsd();
a.repz().cmpsw();
if (isX64) a.repz().cmpsq();
a.repnz().cmpsb();
a.repnz().cmpsd();
a.repnz().cmpsw();
if (isX64) a.repnz().cmpsq();
a.scasb();
a.scasd();
a.scasw();
a.repz().scasb();
a.repz().scasd();
a.repz().scasw();
if (isX64) a.repz().scasq();
a.repnz().scasb();
a.repnz().scasd();
a.repnz().scasw();
if (isX64) a.repnz().scasq();
// Label...Jcc/Jecxz/Jmp.
a.nop();
L = a.newLabel();
a.bind(L);
a.ja(L);
a.jae(L);
a.jb(L);
a.jbe(L);
a.jc(L);
a.je(L);
a.jg(L);
a.jge(L);
a.jl(L);
a.jle(L);
a.jna(L);
a.jnae(L);
a.jnb(L);
a.jnbe(L);
a.jnc(L);
a.jne(L);
a.jng(L);
a.jnge(L);
a.jnl(L);
a.jnle(L);
a.jno(L);
a.jnp(L);
a.jns(L);
a.jnz(L);
a.jo(L);
a.jp(L);
a.jpe(L);
a.jpo(L);
a.js(L);
a.jz(L);
a.jecxz(ecx, L);
a.jmp(L);
// Jcc/Jecxz/Jmp...Label.
a.nop();
L = a.newLabel();
a.ja(L);
a.jae(L);
a.jb(L);
a.jbe(L);
a.jc(L);
a.je(L);
a.jg(L);
a.jge(L);
a.jl(L);
a.jle(L);
a.jna(L);
a.jnae(L);
a.jnb(L);
a.jnbe(L);
a.jnc(L);
a.jne(L);
a.jng(L);
a.jnge(L);
a.jnl(L);
a.jnle(L);
a.jno(L);
a.jnp(L);
a.jns(L);
a.jnz(L);
a.jo(L);
a.jp(L);
a.jpe(L);
a.jpo(L);
a.js(L);
a.jz(L);
a.jecxz(ecx, L);
a.jmp(L);
a.bind(L);
// FPU.
a.nop();
a.f2xm1();
a.fabs();
a.fadd(fpA, fpB);
a.fadd(fpB, fpA);
a.fadd(dword_ptr(gzA));
a.fadd(qword_ptr(gzA));
a.faddp(fpB);
a.faddp();
a.fbld(dword_ptr(gzA));
a.fbstp(dword_ptr(gzA));
a.fchs();
a.fclex();
a.fcom(fpB);
a.fcom();
a.fcom(dword_ptr(gzA));
a.fcom(qword_ptr(gzA));
a.fcomp(fpB);
a.fcomp();
a.fcomp(dword_ptr(gzA));
a.fcomp(qword_ptr(gzA));
a.fcompp();
a.fcos();
a.fdecstp();
a.fdiv(fpA, fpB);
a.fdiv(fpB, fpA);
a.fdiv(dword_ptr(gzA));
a.fdiv(qword_ptr(gzA));
a.fdivp(fpB);
a.fdivp();
a.fdivr(fpA, fpB);
a.fdivr(fpB, fpA);
a.fdivr(dword_ptr(gzA));
a.fdivr(qword_ptr(gzA));
a.fdivrp(fpB);
a.fdivrp();
a.fiadd(dword_ptr(gzA));
a.ficom(word_ptr(gzA));
a.ficom(dword_ptr(gzA));
a.ficomp(word_ptr(gzA));
a.ficomp(dword_ptr(gzA));
a.fidiv(word_ptr(gzA));
a.fidiv(dword_ptr(gzA));
a.fidivr(word_ptr(gzA));
a.fidivr(dword_ptr(gzA));
a.fild(word_ptr(gzA));
a.fild(dword_ptr(gzA));
a.fild(qword_ptr(gzA));
a.fimul(word_ptr(gzA));
a.fimul(dword_ptr(gzA));
a.fincstp();
a.finit();
a.fninit();
a.fisub(word_ptr(gzA));
a.fisub(dword_ptr(gzA));
a.fisubr(word_ptr(gzA));
a.fisubr(dword_ptr(gzA));
a.fist(word_ptr(gzA));
a.fist(dword_ptr(gzA));
a.fistp(word_ptr(gzA));
a.fistp(dword_ptr(gzA));
a.fistp(qword_ptr(gzA));
a.fld(dword_ptr(gzA));
a.fld(qword_ptr(gzA));
a.fld(tword_ptr(gzA));
a.fld1();
a.fldl2t();
a.fldl2e();
a.fldpi();
a.fldlg2();
a.fldln2();
a.fldz();
a.fldcw(anyptr_gpA);
a.fldenv(anyptr_gpA);
a.fmul(fpA, fpB);
a.fmul(fpB, fpA);
a.fmul(dword_ptr(gzA));
a.fmul(qword_ptr(gzA));
a.fmulp(fpB);
a.fmulp();
a.fnclex();
a.fnop();
a.fnsave(anyptr_gpA);
a.fnstenv(anyptr_gpA);
a.fnstcw(anyptr_gpA);
a.fpatan();
a.fprem();
a.fprem1();
a.fptan();
a.frndint();
a.frstor(anyptr_gpA);
a.fsave(anyptr_gpA);
a.fscale();
a.fsin();
a.fsincos();
a.fsqrt();
a.fst(dword_ptr(gzA));
a.fst(qword_ptr(gzA));
a.fstp(dword_ptr(gzA));
a.fstp(qword_ptr(gzA));
a.fstp(tword_ptr(gzA));
a.fstcw(anyptr_gpA);
a.fstenv(anyptr_gpA);
a.fsub(fpA, fpB);
a.fsub(fpB, fpA);
a.fsub(dword_ptr(gzA));
a.fsub(qword_ptr(gzA));
a.fsubp(fpB);
a.fsubp();
a.fsubr(fpA, fpB);
a.fsubr(fpB, fpA);
a.fsubr(dword_ptr(gzA));
a.fsubr(qword_ptr(gzA));
a.fsubrp(fpB);
a.fsubrp();
a.ftst();
a.fucom(fpB);
a.fucom();
a.fucom(fpB);
a.fucomi(fpB);
a.fucomip(fpB);
a.fucomp(fpB);
a.fucompp();
a.fxam();
a.fxtract();
a.fyl2x();
a.fyl2xp1();
// LAHF/SAHF
a.lahf(); // Implicit <AH>
a.lahf(ah); // Explicit <AH>
a.sahf(); // Implicit <AH>
a.sahf(ah); // Explicit <AH>
// FXSR.
a.fxrstor(anyptr_gpA);
a.fxsave(anyptr_gpA);
// XSAVE.
a.nop();
a.xgetbv(); // Implicit <EDX:EAX>, <ECX>
a.xgetbv(edx, eax, ecx); // Explicit <EDX:EAX>, <ECX>
a.xsetbv(); // Implicit <EDX:EAX>, <ECX>
a.xsetbv(edx, eax, ecx); // Explicit <EDX:EAX>, <ECX>
a.xrstor(anyptr_gpA); // Implicit <EDX:EAX>
a.xrstors(anyptr_gpA); // Implicit <EDX:EAX>
a.xsave(anyptr_gpA); // Implicit <EDX:EAX>
a.xsavec(anyptr_gpA); // Implicit <EDX:EAX>
a.xsaveopt(anyptr_gpA); // Implicit <EDX:EAX>
a.xsaves(anyptr_gpA); // Implicit <EDX:EAX>
if (isX64) a.xrstor64(anyptr_gpA); // Implicit <EDX:EAX>
if (isX64) a.xrstors64(anyptr_gpA); // Implicit <EDX:EAX>
if (isX64) a.xsave64(anyptr_gpA); // Implicit <EDX:EAX>
if (isX64) a.xsavec64(anyptr_gpA); // Implicit <EDX:EAX>
if (isX64) a.xsaveopt64(anyptr_gpA); // Implicit <EDX:EAX>
if (isX64) a.xsaves64(anyptr_gpA); // Implicit <EDX:EAX>
// POPCNT.
a.nop();
a.popcnt(gdA, gdB);
a.popcnt(gzA, gzB);
a.popcnt(gdA, anyptr_gpB);
a.popcnt(gzA, anyptr_gpB);
// LZCNT.
a.nop();
a.lzcnt(gdA, gdB);
a.lzcnt(gzA, gzB);
a.lzcnt(gdA, anyptr_gpB);
a.lzcnt(gzA, anyptr_gpB);
// BMI.
a.nop();
a.andn(gdA, gdB, gdC);
a.andn(gzA, gzB, gzC);
a.andn(gdA, gdB, anyptr_gpC);
a.andn(gzA, gzB, anyptr_gpC);
a.bextr(gdA, gdB, gdC);
a.bextr(gzA, gzB, gzC);
a.bextr(gdA, anyptr_gpB, gdC);
a.bextr(gzA, anyptr_gpB, gzC);
a.blsi(gdA, gdB);
a.blsi(gzA, gzB);
a.blsi(gdA, anyptr_gpB);
a.blsi(gzA, anyptr_gpB);
a.blsmsk(gdA, gdB);
a.blsmsk(gzA, gzB);
a.blsmsk(gdA, anyptr_gpB);
a.blsmsk(gzA, anyptr_gpB);
a.blsr(gdA, gdB);
a.blsr(gzA, gzB);
a.blsr(gdA, anyptr_gpB);
a.blsr(gzA, anyptr_gpB);
a.tzcnt(gdA, gdB);
a.tzcnt(gzA, gzB);
a.tzcnt(gdA, anyptr_gpB);
a.tzcnt(gzA, anyptr_gpB);
// BMI2.
a.nop();
a.bzhi(gdA, gdB, gdC);
a.bzhi(gzA, gzB, gzC);
a.bzhi(gdA, anyptr_gpB, gdC);
a.bzhi(gzA, anyptr_gpB, gzC);
a.mulx(gdA, gdB, gdC); // Implicit gpA, gpB, gpC, <EDX>
a.mulx(gdA, gdB, gdC, edx); // Explicit gpA, gpB, gpC, <EDX>
a.mulx(gzA, gzB, gzC); // Implicit gpA, gpB, gpC, <EDX|RDX>
a.mulx(gzA, gzB, gzC, a.zdx()); // Explicit gpA, gpB, gpC, <EDX|RDX>
a.mulx(gdA, gdB, anyptr_gpC); // Implicit gpA, gpB, mem, <EDX>
a.mulx(gdA, gdB, anyptr_gpC, edx); // Explicit gpA, gpB, mem, <EDX>
a.mulx(gzA, gzB, anyptr_gpC); // Implicit gpA, gpB, mem, <EDX|RDX>
a.mulx(gzA, gzB, anyptr_gpC, a.zdx()); // Explicit gpA, gpB, mem, <EDX|RDX>
a.pdep(gdA, gdB, gdC);
a.pdep(gzA, gzB, gzC);
a.pdep(gdA, gdB, anyptr_gpC);
a.pdep(gzA, gzB, anyptr_gpC);
a.pext(gdA, gdB, gdC);
a.pext(gzA, gzB, gzC);
a.pext(gdA, gdB, anyptr_gpC);
a.pext(gzA, gzB, anyptr_gpC);
a.rorx(gdA, gdB, 0);
a.rorx(gzA, gzB, 0);
a.rorx(gdA, anyptr_gpB, 0);
a.rorx(gzA, anyptr_gpB, 0);
a.sarx(gdA, gdB, gdC);
a.sarx(gzA, gzB, gzC);
a.sarx(gdA, anyptr_gpB, gdC);
a.sarx(gzA, anyptr_gpB, gzC);
a.shlx(gdA, gdB, gdC);
a.shlx(gzA, gzB, gzC);
a.shlx(gdA, anyptr_gpB, gdC);
a.shlx(gzA, anyptr_gpB, gzC);
a.shrx(gdA, gdB, gdC);
a.shrx(gzA, gzB, gzC);
a.shrx(gdA, anyptr_gpB, gdC);
a.shrx(gzA, anyptr_gpB, gzC);
// ADX.
a.nop();
a.adcx(gdA, gdB);
a.adcx(gzA, gzB);
a.adcx(gdA, anyptr_gpB);
a.adcx(gzA, anyptr_gpB);
a.adox(gdA, gdB);
a.adox(gzA, gzB);
a.adox(gdA, anyptr_gpB);
a.adox(gzA, anyptr_gpB);
// TBM.
a.nop();
a.blcfill(gdA, gdB);
a.blcfill(gzA, gzB);
a.blcfill(gdA, anyptr_gpB);
a.blcfill(gzA, anyptr_gpB);
a.blci(gdA, gdB);
a.blci(gzA, gzB);
a.blci(gdA, anyptr_gpB);
a.blci(gzA, anyptr_gpB);
a.blcic(gdA, gdB);
a.blcic(gzA, gzB);
a.blcic(gdA, anyptr_gpB);
a.blcic(gzA, anyptr_gpB);
a.blcmsk(gdA, gdB);
a.blcmsk(gzA, gzB);
a.blcmsk(gdA, anyptr_gpB);
a.blcmsk(gzA, anyptr_gpB);
a.blcs(gdA, gdB);
a.blcs(gzA, gzB);
a.blcs(gdA, anyptr_gpB);
a.blcs(gzA, anyptr_gpB);
a.blsfill(gdA, gdB);
a.blsfill(gzA, gzB);
a.blsfill(gdA, anyptr_gpB);
a.blsfill(gzA, anyptr_gpB);
a.blsic(gdA, gdB);
a.blsic(gzA, gzB);
a.blsic(gdA, anyptr_gpB);
a.blsic(gzA, anyptr_gpB);
a.t1mskc(gdA, gdB);
a.t1mskc(gzA, gzB);
a.t1mskc(gdA, anyptr_gpB);
a.t1mskc(gzA, anyptr_gpB);
a.tzmsk(gdA, gdB);
a.tzmsk(gzA, gzB);
a.tzmsk(gdA, anyptr_gpB);
a.tzmsk(gzA, anyptr_gpB);
// CLFLUSH / CLFLUSH_OPT.
a.nop();
a.clflush(anyptr_gpA);
a.clflushopt(anyptr_gpA);
// CLWB.
a.nop();
a.clwb(anyptr_gpA);
// CLZERO.
a.nop();
a.clzero(); // Implicit <ds:[EAX|RAX]>
a.clzero(ptr(a.zax())); // Explicit <ds:[EAX|RAX]>
// PCOMMIT.
a.nop();
a.pcommit();
// PREFETCH / PREFETCHW / PREFETCHWT1.
a.nop();
a.prefetch(anyptr_gpA); // 3DNOW.
a.prefetchnta(anyptr_gpA); // MMX+SSE.
a.prefetcht0(anyptr_gpA); // MMX+SSE.
a.prefetcht1(anyptr_gpA); // MMX+SSE.
a.prefetcht2(anyptr_gpA); // MMX+SSE.
a.prefetchw(anyptr_gpA); // PREFETCHW.
a.prefetchwt1(anyptr_gpA); // PREFETCHWT1.
// RDRAND / RDSEED.
a.nop();
a.rdrand(gdA);
a.rdrand(gzA);
a.rdseed(gdA);
a.rdseed(gzA);
// MMX/MMX-EXT.
a.nop();
a.movd(anyptr_gpA, mmB);
a.movd(gdA, mmB);
a.movd(mmA, anyptr_gpB);
a.movd(mmA, gdB);
a.movq(mmA, mmB);
a.movq(anyptr_gpA, mmB);
a.movq(mmA, anyptr_gpB);
a.packuswb(mmA, mmB);
a.packuswb(mmA, anyptr_gpB);
a.paddb(mmA, mmB);
a.paddb(mmA, anyptr_gpB);
a.paddw(mmA, mmB);
a.paddw(mmA, anyptr_gpB);
a.paddd(mmA, mmB);
a.paddd(mmA, anyptr_gpB);
a.paddsb(mmA, mmB);
a.paddsb(mmA, anyptr_gpB);
a.paddsw(mmA, mmB);
a.paddsw(mmA, anyptr_gpB);
a.paddusb(mmA, mmB);
a.paddusb(mmA, anyptr_gpB);
a.paddusw(mmA, mmB);
a.paddusw(mmA, anyptr_gpB);
a.pand(mmA, mmB);
a.pand(mmA, anyptr_gpB);
a.pandn(mmA, mmB);
a.pandn(mmA, anyptr_gpB);
a.pcmpeqb(mmA, mmB);
a.pcmpeqb(mmA, anyptr_gpB);
a.pcmpeqw(mmA, mmB);
a.pcmpeqw(mmA, anyptr_gpB);
a.pcmpeqd(mmA, mmB);
a.pcmpeqd(mmA, anyptr_gpB);
a.pcmpgtb(mmA, mmB);
a.pcmpgtb(mmA, anyptr_gpB);
a.pcmpgtw(mmA, mmB);
a.pcmpgtw(mmA, anyptr_gpB);
a.pcmpgtd(mmA, mmB);
a.pcmpgtd(mmA, anyptr_gpB);
a.pmulhw(mmA, mmB);
a.pmulhw(mmA, anyptr_gpB);
a.pmullw(mmA, mmB);
a.pmullw(mmA, anyptr_gpB);
a.por(mmA, mmB);
a.por(mmA, anyptr_gpB);
a.pmaddwd(mmA, mmB);
a.pmaddwd(mmA, anyptr_gpB);
a.pslld(mmA, mmB);
a.pslld(mmA, anyptr_gpB);
a.pslld(mmA, 0);
a.psllq(mmA, mmB);
a.psllq(mmA, anyptr_gpB);
a.psllq(mmA, 0);
a.psllw(mmA, mmB);
a.psllw(mmA, anyptr_gpB);
a.psllw(mmA, 0);
a.psrad(mmA, mmB);
a.psrad(mmA, anyptr_gpB);
a.psrad(mmA, 0);
a.psraw(mmA, mmB);
a.psraw(mmA, anyptr_gpB);
a.psraw(mmA, 0);
a.psrld(mmA, mmB);
a.psrld(mmA, anyptr_gpB);
a.psrld(mmA, 0);
a.psrlq(mmA, mmB);
a.psrlq(mmA, anyptr_gpB);
a.psrlq(mmA, 0);
a.psrlw(mmA, mmB);
a.psrlw(mmA, anyptr_gpB);
a.psrlw(mmA, 0);
a.psubb(mmA, mmB);
a.psubb(mmA, anyptr_gpB);
a.psubw(mmA, mmB);
a.psubw(mmA, anyptr_gpB);
a.psubd(mmA, mmB);
a.psubd(mmA, anyptr_gpB);
a.psubsb(mmA, mmB);
a.psubsb(mmA, anyptr_gpB);
a.psubsw(mmA, mmB);
a.psubsw(mmA, anyptr_gpB);
a.psubusb(mmA, mmB);
a.psubusb(mmA, anyptr_gpB);
a.psubusw(mmA, mmB);
a.psubusw(mmA, anyptr_gpB);
a.punpckhbw(mmA, mmB);
a.punpckhbw(mmA, anyptr_gpB);
a.punpckhwd(mmA, mmB);
a.punpckhwd(mmA, anyptr_gpB);
a.punpckhdq(mmA, mmB);
a.punpckhdq(mmA, anyptr_gpB);
a.punpcklbw(mmA, mmB);
a.punpcklbw(mmA, anyptr_gpB);
a.punpcklwd(mmA, mmB);
a.punpcklwd(mmA, anyptr_gpB);
a.punpckldq(mmA, mmB);
a.punpckldq(mmA, anyptr_gpB);
a.pxor(mmA, mmB);
a.pxor(mmA, anyptr_gpB);
a.emms();
// 3DNOW.
a.nop();
a.pavgusb(mmA, mmB);
a.pavgusb(mmA, anyptr_gpB);
a.pf2id(mmA, mmB);
a.pf2id(mmA, anyptr_gpB);
a.pf2iw(mmA, mmB);
a.pf2iw(mmA, anyptr_gpB);
a.pfacc(mmA, mmB);
a.pfacc(mmA, anyptr_gpB);
a.pfadd(mmA, mmB);
a.pfadd(mmA, anyptr_gpB);
a.pfcmpeq(mmA, mmB);
a.pfcmpeq(mmA, anyptr_gpB);
a.pfcmpge(mmA, mmB);
a.pfcmpge(mmA, anyptr_gpB);
a.pfcmpgt(mmA, mmB);
a.pfcmpgt(mmA, anyptr_gpB);
a.pfmax(mmA, mmB);
a.pfmax(mmA, anyptr_gpB);
a.pfmin(mmA, mmB);
a.pfmin(mmA, anyptr_gpB);
a.pfmul(mmA, mmB);
a.pfmul(mmA, anyptr_gpB);
a.pfnacc(mmA, mmB);
a.pfnacc(mmA, anyptr_gpB);
a.pfpnacc(mmA, mmB);
a.pfpnacc(mmA, anyptr_gpB);
a.pfrcp(mmA, mmB);
a.pfrcp(mmA, anyptr_gpB);
a.pfrcpit1(mmA, mmB);
a.pfrcpit1(mmA, anyptr_gpB);
a.pfrcpit2(mmA, mmB);
a.pfrcpit2(mmA, anyptr_gpB);
a.pfrcpv(mmA, mmB);
a.pfrcpv(mmA, anyptr_gpB);
a.pfrsqit1(mmA, mmB);
a.pfrsqit1(mmA, anyptr_gpB);
a.pfrsqrt(mmA, mmB);
a.pfrsqrt(mmA, anyptr_gpB);
a.pfrsqrtv(mmA, mmB);
a.pfrsqrtv(mmA, anyptr_gpB);
a.pfsub(mmA, mmB);
a.pfsub(mmA, anyptr_gpB);
a.pfsubr(mmA, mmB);
a.pfsubr(mmA, anyptr_gpB);
a.pi2fd(mmA, mmB);
a.pi2fd(mmA, anyptr_gpB);
a.pi2fw(mmA, mmB);
a.pi2fw(mmA, anyptr_gpB);
a.pmulhrw(mmA, mmB);
a.pmulhrw(mmA, anyptr_gpB);
a.pswapd(mmA, mmB);
a.pswapd(mmA, anyptr_gpB);
a.femms();
// SSE.
a.nop();
a.addps(xmmA, xmmB);
a.addps(xmmA, anyptr_gpB);
a.addss(xmmA, xmmB);
a.addss(xmmA, anyptr_gpB);
a.andnps(xmmA, xmmB);
a.andnps(xmmA, anyptr_gpB);
a.andps(xmmA, xmmB);
a.andps(xmmA, anyptr_gpB);
a.cmpps(xmmA, xmmB, 0);
a.cmpps(xmmA, anyptr_gpB, 0);
a.cmpss(xmmA, xmmB, 0);
a.cmpss(xmmA, anyptr_gpB, 0);
a.comiss(xmmA, xmmB);
a.comiss(xmmA, anyptr_gpB);
a.cvtpi2ps(xmmA, mmB);
a.cvtpi2ps(xmmA, anyptr_gpB);
a.cvtps2pi(mmA, xmmB);
a.cvtps2pi(mmA, anyptr_gpB);
a.cvtsi2ss(xmmA, gdB);
a.cvtsi2ss(xmmA, gzB);
a.cvtsi2ss(xmmA, anyptr_gpB);
a.cvtss2si(gdA, xmmB);
a.cvtss2si(gzA, xmmB);
a.cvtss2si(gdA, anyptr_gpB);
a.cvtss2si(gzA, anyptr_gpB);
a.cvttps2pi(mmA, xmmB);
a.cvttps2pi(mmA, anyptr_gpB);
a.cvttss2si(gdA, xmmB);
a.cvttss2si(gzA, xmmB);
a.cvttss2si(gdA, anyptr_gpB);
a.cvttss2si(gzA, anyptr_gpB);
a.divps(xmmA, xmmB);
a.divps(xmmA, anyptr_gpB);
a.divss(xmmA, xmmB);
a.divss(xmmA, anyptr_gpB);
a.ldmxcsr(anyptr_gpA);
a.maskmovq(mmA, mmB); // Implicit mmA, mmB, <ds:[EDI|RDI]>
a.maskmovq(mmA, mmB, ptr(a.zdi())); // Explicit mmA, mmB, <ds:[EDI|RDI]>
a.maxps(xmmA, xmmB);
a.maxps(xmmA, anyptr_gpB);
a.maxss(xmmA, xmmB);
a.maxss(xmmA, anyptr_gpB);
a.minps(xmmA, xmmB);
a.minps(xmmA, anyptr_gpB);
a.minss(xmmA, xmmB);
a.minss(xmmA, anyptr_gpB);
a.movaps(xmmA, xmmB);
a.movaps(xmmA, anyptr_gpB);
a.movaps(anyptr_gpA, xmmB);
a.movd(anyptr_gpA, xmmB);
a.movd(gdA, xmmB);
a.movd(gzA, xmmB);
a.movd(xmmA, anyptr_gpB);
a.movd(xmmA, gdB);
a.movd(xmmA, gzB);
a.movq(mmA, mmB);
a.movq(xmmA, xmmB);
a.movq(anyptr_gpA, xmmB);
a.movq(xmmA, anyptr_gpB);
a.movntq(anyptr_gpA, mmB);
a.movhlps(xmmA, xmmB);
a.movhps(xmmA, anyptr_gpB);
a.movhps(anyptr_gpA, xmmB);
a.movlhps(xmmA, xmmB);
a.movlps(xmmA, anyptr_gpB);
a.movlps(anyptr_gpA, xmmB);
a.movntps(anyptr_gpA, xmmB);
a.movss(xmmA, anyptr_gpB);
a.movss(anyptr_gpA, xmmB);
a.movups(xmmA, xmmB);
a.movups(xmmA, anyptr_gpB);
a.movups(anyptr_gpA, xmmB);
a.mulps(xmmA, xmmB);
a.mulps(xmmA, anyptr_gpB);
a.mulss(xmmA, xmmB);
a.mulss(xmmA, anyptr_gpB);
a.orps(xmmA, xmmB);
a.orps(xmmA, anyptr_gpB);
a.pavgb(mmA, mmB);
a.pavgb(mmA, anyptr_gpB);
a.pavgw(mmA, mmB);
a.pavgw(mmA, anyptr_gpB);
a.pextrw(gdA, mmB, 0);
a.pextrw(gzA, mmB, 0);
a.pinsrw(mmA, gdB, 0);
a.pinsrw(mmA, gzB, 0);
a.pinsrw(mmA, anyptr_gpB, 0);
a.pmaxsw(mmA, mmB);
a.pmaxsw(mmA, anyptr_gpB);
a.pmaxub(mmA, mmB);
a.pmaxub(mmA, anyptr_gpB);
a.pminsw(mmA, mmB);
a.pminsw(mmA, anyptr_gpB);
a.pminub(mmA, mmB);
a.pminub(mmA, anyptr_gpB);
a.pmovmskb(gdA, mmB);
a.pmovmskb(gzA, mmB);
a.pmulhuw(mmA, mmB);
a.pmulhuw(mmA, anyptr_gpB);
a.psadbw(mmA, mmB);
a.psadbw(mmA, anyptr_gpB);
a.pshufw(mmA, mmB, 0);
a.pshufw(mmA, anyptr_gpB, 0);
a.rcpps(xmmA, xmmB);
a.rcpps(xmmA, anyptr_gpB);
a.rcpss(xmmA, xmmB);
a.rcpss(xmmA, anyptr_gpB);
a.psadbw(xmmA, xmmB);
a.psadbw(xmmA, anyptr_gpB);
a.rsqrtps(xmmA, xmmB);
a.rsqrtps(xmmA, anyptr_gpB);
a.rsqrtss(xmmA, xmmB);
a.rsqrtss(xmmA, anyptr_gpB);
a.sfence();
a.shufps(xmmA, xmmB, 0);
a.shufps(xmmA, anyptr_gpB, 0);
a.sqrtps(xmmA, xmmB);
a.sqrtps(xmmA, anyptr_gpB);
a.sqrtss(xmmA, xmmB);
a.sqrtss(xmmA, anyptr_gpB);
a.stmxcsr(anyptr_gpA);
a.subps(xmmA, xmmB);
a.subps(xmmA, anyptr_gpB);
a.subss(xmmA, xmmB);
a.subss(xmmA, anyptr_gpB);
a.ucomiss(xmmA, xmmB);
a.ucomiss(xmmA, anyptr_gpB);
a.unpckhps(xmmA, xmmB);
a.unpckhps(xmmA, anyptr_gpB);
a.unpcklps(xmmA, xmmB);
a.unpcklps(xmmA, anyptr_gpB);
a.xorps(xmmA, xmmB);
a.xorps(xmmA, anyptr_gpB);
// SSE2.
a.nop();
a.addpd(xmmA, xmmB);
a.addpd(xmmA, anyptr_gpB);
a.addsd(xmmA, xmmB);
a.addsd(xmmA, anyptr_gpB);
a.andnpd(xmmA, xmmB);
a.andnpd(xmmA, anyptr_gpB);
a.andpd(xmmA, xmmB);
a.andpd(xmmA, anyptr_gpB);
a.cmppd(xmmA, xmmB, 0);
a.cmppd(xmmA, anyptr_gpB, 0);
a.cmpsd(xmmA, xmmB, 0);
a.cmpsd(xmmA, anyptr_gpB, 0);
a.comisd(xmmA, xmmB);
a.comisd(xmmA, anyptr_gpB);
a.cvtdq2pd(xmmA, xmmB);
a.cvtdq2pd(xmmA, anyptr_gpB);
a.cvtdq2ps(xmmA, xmmB);
a.cvtdq2ps(xmmA, anyptr_gpB);
a.cvtpd2dq(xmmA, xmmB);
a.cvtpd2dq(xmmA, anyptr_gpB);
a.cvtpd2pi(mmA, xmmB);
a.cvtpd2pi(mmA, anyptr_gpB);
a.cvtpd2ps(xmmA, xmmB);
a.cvtpd2ps(xmmA, anyptr_gpB);
a.cvtpi2pd(xmmA, mmB);
a.cvtpi2pd(xmmA, anyptr_gpB);
a.cvtps2dq(xmmA, xmmB);
a.cvtps2dq(xmmA, anyptr_gpB);
a.cvtps2pd(xmmA, xmmB);
a.cvtps2pd(xmmA, anyptr_gpB);
a.cvtsd2si(gdA, xmmB);
a.cvtsd2si(gzA, xmmB);
a.cvtsd2si(gdA, anyptr_gpB);
a.cvtsd2si(gzA, anyptr_gpB);
a.cvtsd2ss(xmmA, xmmB);
a.cvtsd2ss(xmmA, anyptr_gpB);
a.cvtsi2sd(xmmA, gdB);
a.cvtsi2sd(xmmA, gzB);
a.cvtsi2sd(xmmA, anyptr_gpB);
a.cvtss2sd(xmmA, xmmB);
a.cvtss2sd(xmmA, anyptr_gpB);
a.cvtss2si(gdA, xmmB);
a.cvtss2si(gzA, xmmB);
a.cvtss2si(gdA, anyptr_gpB);
a.cvtss2si(gzA, anyptr_gpB);
a.cvttpd2pi(mmA, xmmB);
a.cvttpd2pi(mmA, anyptr_gpB);
a.cvttpd2dq(xmmA, xmmB);
a.cvttpd2dq(xmmA, anyptr_gpB);
a.cvttps2dq(xmmA, xmmB);
a.cvttps2dq(xmmA, anyptr_gpB);
a.cvttsd2si(gdA, xmmB);
a.cvttsd2si(gzA, xmmB);
a.cvttsd2si(gdA, anyptr_gpB);
a.cvttsd2si(gzA, anyptr_gpB);
a.divpd(xmmA, xmmB);
a.divpd(xmmA, anyptr_gpB);
a.divsd(xmmA, xmmB);
a.divsd(xmmA, anyptr_gpB);
a.lfence();
a.maskmovdqu(xmmA, xmmB); // Implicit xmmA, xmmB, <ds:[EDI|RDI]>
a.maskmovdqu(xmmA, xmmB, ptr(a.zdi())); // Explicit xmmA, xmmB, <ds:[EDI|RDI]>
a.maxpd(xmmA, xmmB);
a.maxpd(xmmA, anyptr_gpB);
a.maxsd(xmmA, xmmB);
a.maxsd(xmmA, anyptr_gpB);
a.mfence();
a.minpd(xmmA, xmmB);
a.minpd(xmmA, anyptr_gpB);
a.minsd(xmmA, xmmB);
a.minsd(xmmA, anyptr_gpB);
a.movdqa(xmmA, xmmB);
a.movdqa(xmmA, anyptr_gpB);
a.movdqa(anyptr_gpA, xmmB);
a.movdqu(xmmA, xmmB);
a.movdqu(xmmA, anyptr_gpB);
a.movdqu(anyptr_gpA, xmmB);
a.movmskps(gdA, xmmB);
a.movmskps(gzA, xmmB);
a.movmskpd(gdA, xmmB);
a.movmskpd(gzA, xmmB);
a.movsd(xmmA, xmmB);
a.movsd(xmmA, anyptr_gpB);
a.movsd(anyptr_gpA, xmmB);
a.movapd(xmmA, anyptr_gpB);
a.movapd(anyptr_gpA, xmmB);
a.movdq2q(mmA, xmmB);
a.movq2dq(xmmA, mmB);
a.movhpd(xmmA, anyptr_gpB);
a.movhpd(anyptr_gpA, xmmB);
a.movlpd(xmmA, anyptr_gpB);
a.movlpd(anyptr_gpA, xmmB);
a.movntdq(anyptr_gpA, xmmB);
a.movnti(anyptr_gpA, gdB);
a.movnti(anyptr_gpA, gzB);
a.movntpd(anyptr_gpA, xmmB);
a.movupd(xmmA, anyptr_gpB);
a.movupd(anyptr_gpA, xmmB);
a.mulpd(xmmA, xmmB);
a.mulpd(xmmA, anyptr_gpB);
a.mulsd(xmmA, xmmB);
a.mulsd(xmmA, anyptr_gpB);
a.orpd(xmmA, xmmB);
a.orpd(xmmA, anyptr_gpB);
a.packsswb(xmmA, xmmB);
a.packsswb(xmmA, anyptr_gpB);
a.packssdw(xmmA, xmmB);
a.packssdw(xmmA, anyptr_gpB);
a.packuswb(xmmA, xmmB);
a.packuswb(xmmA, anyptr_gpB);
a.paddb(xmmA, xmmB);
a.paddb(xmmA, anyptr_gpB);
a.paddw(xmmA, xmmB);
a.paddw(xmmA, anyptr_gpB);
a.paddd(xmmA, xmmB);
a.paddd(xmmA, anyptr_gpB);
a.paddq(mmA, mmB);
a.paddq(mmA, anyptr_gpB);
a.paddq(xmmA, xmmB);
a.paddq(xmmA, anyptr_gpB);
a.paddsb(xmmA, xmmB);
a.paddsb(xmmA, anyptr_gpB);
a.paddsw(xmmA, xmmB);
a.paddsw(xmmA, anyptr_gpB);
a.paddusb(xmmA, xmmB);
a.paddusb(xmmA, anyptr_gpB);
a.paddusw(xmmA, xmmB);
a.paddusw(xmmA, anyptr_gpB);
a.pand(xmmA, xmmB);
a.pand(xmmA, anyptr_gpB);
a.pandn(xmmA, xmmB);
a.pandn(xmmA, anyptr_gpB);
a.pause();
a.pavgb(xmmA, xmmB);
a.pavgb(xmmA, anyptr_gpB);
a.pavgw(xmmA, xmmB);
a.pavgw(xmmA, anyptr_gpB);
a.pcmpeqb(xmmA, xmmB);
a.pcmpeqb(xmmA, anyptr_gpB);
a.pcmpeqw(xmmA, xmmB);
a.pcmpeqw(xmmA, anyptr_gpB);
a.pcmpeqd(xmmA, xmmB);
a.pcmpeqd(xmmA, anyptr_gpB);
a.pcmpgtb(xmmA, xmmB);
a.pcmpgtb(xmmA, anyptr_gpB);
a.pcmpgtw(xmmA, xmmB);
a.pcmpgtw(xmmA, anyptr_gpB);
a.pcmpgtd(xmmA, xmmB);
a.pcmpgtd(xmmA, anyptr_gpB);
a.pmaxsw(xmmA, xmmB);
a.pmaxsw(xmmA, anyptr_gpB);
a.pmaxub(xmmA, xmmB);
a.pmaxub(xmmA, anyptr_gpB);
a.pminsw(xmmA, xmmB);
a.pminsw(xmmA, anyptr_gpB);
a.pminub(xmmA, xmmB);
a.pminub(xmmA, anyptr_gpB);
a.pmovmskb(gdA, xmmB);
a.pmovmskb(gzA, xmmB);
a.pmulhw(xmmA, xmmB);
a.pmulhw(xmmA, anyptr_gpB);
a.pmulhuw(xmmA, xmmB);
a.pmulhuw(xmmA, anyptr_gpB);
a.pmullw(xmmA, xmmB);
a.pmullw(xmmA, anyptr_gpB);
a.pmuludq(mmA, mmB);
a.pmuludq(mmA, anyptr_gpB);
a.pmuludq(xmmA, xmmB);
a.pmuludq(xmmA, anyptr_gpB);
a.por(xmmA, xmmB);
a.por(xmmA, anyptr_gpB);
a.pslld(xmmA, xmmB);
a.pslld(xmmA, anyptr_gpB);
a.pslld(xmmA, 0);
a.psllq(xmmA, xmmB);
a.psllq(xmmA, anyptr_gpB);
a.psllq(xmmA, 0);
a.psllw(xmmA, xmmB);
a.psllw(xmmA, anyptr_gpB);
a.psllw(xmmA, 0);
a.pslldq(xmmA, 0);
a.psrad(xmmA, xmmB);
a.psrad(xmmA, anyptr_gpB);
a.psrad(xmmA, 0);
a.psraw(xmmA, xmmB);
a.psraw(xmmA, anyptr_gpB);
a.psraw(xmmA, 0);
a.psubb(xmmA, xmmB);
a.psubb(xmmA, anyptr_gpB);
a.psubw(xmmA, xmmB);
a.psubw(xmmA, anyptr_gpB);
a.psubd(xmmA, xmmB);
a.psubd(xmmA, anyptr_gpB);
a.psubq(mmA, mmB);
a.psubq(mmA, anyptr_gpB);
a.psubq(xmmA, xmmB);
a.psubq(xmmA, anyptr_gpB);
a.pmaddwd(xmmA, xmmB);
a.pmaddwd(xmmA, anyptr_gpB);
a.pshufd(xmmA, xmmB, 0);
a.pshufd(xmmA, anyptr_gpB, 0);
a.pshufhw(xmmA, xmmB, 0);
a.pshufhw(xmmA, anyptr_gpB, 0);
a.pshuflw(xmmA, xmmB, 0);
a.pshuflw(xmmA, anyptr_gpB, 0);
a.psrld(xmmA, xmmB);
a.psrld(xmmA, anyptr_gpB);
a.psrld(xmmA, 0);
a.psrlq(xmmA, xmmB);
a.psrlq(xmmA, anyptr_gpB);
a.psrlq(xmmA, 0);
a.psrldq(xmmA, 0);
a.psrlw(xmmA, xmmB);
a.psrlw(xmmA, anyptr_gpB);
a.psrlw(xmmA, 0);
a.psubsb(xmmA, xmmB);
a.psubsb(xmmA, anyptr_gpB);
a.psubsw(xmmA, xmmB);
a.psubsw(xmmA, anyptr_gpB);
a.psubusb(xmmA, xmmB);
a.psubusb(xmmA, anyptr_gpB);
a.psubusw(xmmA, xmmB);
a.psubusw(xmmA, anyptr_gpB);
a.punpckhbw(xmmA, xmmB);
a.punpckhbw(xmmA, anyptr_gpB);
a.punpckhwd(xmmA, xmmB);
a.punpckhwd(xmmA, anyptr_gpB);
a.punpckhdq(xmmA, xmmB);
a.punpckhdq(xmmA, anyptr_gpB);
a.punpckhqdq(xmmA, xmmB);
a.punpckhqdq(xmmA, anyptr_gpB);
a.punpcklbw(xmmA, xmmB);
a.punpcklbw(xmmA, anyptr_gpB);
a.punpcklwd(xmmA, xmmB);
a.punpcklwd(xmmA, anyptr_gpB);
a.punpckldq(xmmA, xmmB);
a.punpckldq(xmmA, anyptr_gpB);
a.punpcklqdq(xmmA, xmmB);
a.punpcklqdq(xmmA, anyptr_gpB);
a.pxor(xmmA, xmmB);
a.pxor(xmmA, anyptr_gpB);
a.sqrtpd(xmmA, xmmB);
a.sqrtpd(xmmA, anyptr_gpB);
a.sqrtsd(xmmA, xmmB);
a.sqrtsd(xmmA, anyptr_gpB);
a.subpd(xmmA, xmmB);
a.subpd(xmmA, anyptr_gpB);
a.subsd(xmmA, xmmB);
a.subsd(xmmA, anyptr_gpB);
a.ucomisd(xmmA, xmmB);
a.ucomisd(xmmA, anyptr_gpB);
a.unpckhpd(xmmA, xmmB);
a.unpckhpd(xmmA, anyptr_gpB);
a.unpcklpd(xmmA, xmmB);
a.unpcklpd(xmmA, anyptr_gpB);
a.xorpd(xmmA, xmmB);
a.xorpd(xmmA, anyptr_gpB);
// SSE3.
a.nop();
a.addsubpd(xmmA, xmmB);
a.addsubpd(xmmA, anyptr_gpB);
a.addsubps(xmmA, xmmB);
a.addsubps(xmmA, anyptr_gpB);
a.fisttp(dword_ptr(gzA));
a.haddpd(xmmA, xmmB);
a.haddpd(xmmA, anyptr_gpB);
a.haddps(xmmA, xmmB);
a.haddps(xmmA, anyptr_gpB);
a.hsubpd(xmmA, xmmB);
a.hsubpd(xmmA, anyptr_gpB);
a.hsubps(xmmA, xmmB);
a.hsubps(xmmA, anyptr_gpB);
a.lddqu(xmmA, anyptr_gpB);
a.monitor();
a.movddup(xmmA, xmmB);
a.movddup(xmmA, anyptr_gpB);
a.movshdup(xmmA, xmmB);
a.movshdup(xmmA, anyptr_gpB);
a.movsldup(xmmA, xmmB);
a.movsldup(xmmA, anyptr_gpB);
a.mwait();
// SSSE3.
a.nop();
a.psignb(mmA, mmB);
a.psignb(mmA, anyptr_gpB);
a.psignb(xmmA, xmmB);
a.psignb(xmmA, anyptr_gpB);
a.psignw(mmA, mmB);
a.psignw(mmA, anyptr_gpB);
a.psignw(xmmA, xmmB);
a.psignw(xmmA, anyptr_gpB);
a.psignd(mmA, mmB);
a.psignd(mmA, anyptr_gpB);
a.psignd(xmmA, xmmB);
a.psignd(xmmA, anyptr_gpB);
a.phaddw(mmA, mmB);
a.phaddw(mmA, anyptr_gpB);
a.phaddw(xmmA, xmmB);
a.phaddw(xmmA, anyptr_gpB);
a.phaddd(mmA, mmB);
a.phaddd(mmA, anyptr_gpB);
a.phaddd(xmmA, xmmB);
a.phaddd(xmmA, anyptr_gpB);
a.phaddsw(mmA, mmB);
a.phaddsw(mmA, anyptr_gpB);
a.phaddsw(xmmA, xmmB);
a.phaddsw(xmmA, anyptr_gpB);
a.phsubw(mmA, mmB);
a.phsubw(mmA, anyptr_gpB);
a.phsubw(xmmA, xmmB);
a.phsubw(xmmA, anyptr_gpB);
a.phsubd(mmA, mmB);
a.phsubd(mmA, anyptr_gpB);
a.phsubd(xmmA, xmmB);
a.phsubd(xmmA, anyptr_gpB);
a.phsubsw(mmA, mmB);
a.phsubsw(mmA, anyptr_gpB);
a.phsubsw(xmmA, xmmB);
a.phsubsw(xmmA, anyptr_gpB);
a.pmaddubsw(mmA, mmB);
a.pmaddubsw(mmA, anyptr_gpB);
a.pmaddubsw(xmmA, xmmB);
a.pmaddubsw(xmmA, anyptr_gpB);
a.pabsb(mmA, mmB);
a.pabsb(mmA, anyptr_gpB);
a.pabsb(xmmA, xmmB);
a.pabsb(xmmA, anyptr_gpB);
a.pabsw(mmA, mmB);
a.pabsw(mmA, anyptr_gpB);
a.pabsw(xmmA, xmmB);
a.pabsw(xmmA, anyptr_gpB);
a.pabsd(mmA, mmB);
a.pabsd(mmA, anyptr_gpB);
a.pabsd(xmmA, xmmB);
a.pabsd(xmmA, anyptr_gpB);
a.pmulhrsw(mmA, mmB);
a.pmulhrsw(mmA, anyptr_gpB);
a.pmulhrsw(xmmA, xmmB);
a.pmulhrsw(xmmA, anyptr_gpB);
a.pshufb(mmA, mmB);
a.pshufb(mmA, anyptr_gpB);
a.pshufb(xmmA, xmmB);
a.pshufb(xmmA, anyptr_gpB);
a.palignr(mmA, mmB, 0);
a.palignr(mmA, anyptr_gpB, 0);
a.palignr(xmmA, xmmB, 0);
a.palignr(xmmA, anyptr_gpB, 0);
// SSE4.1.
a.nop();
a.blendpd(xmmA, xmmB, 0);
a.blendpd(xmmA, anyptr_gpB, 0);
a.blendps(xmmA, xmmB, 0);
a.blendps(xmmA, anyptr_gpB, 0);
a.blendvpd(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
a.blendvpd(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
a.blendvpd(xmmA, anyptr_gpB); // Implicit xmmA, mem , <XMM0>
a.blendvpd(xmmA, anyptr_gpB, xmm0); // Explicit xmmA, mem , <XMM0>
a.blendvps(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
a.blendvps(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
a.blendvps(xmmA, anyptr_gpB); // Implicit xmmA, mem , <XMM0>
a.blendvps(xmmA, anyptr_gpB, xmm0); // Explicit xmmA, mem , <XMM0>
a.dppd(xmmA, xmmB, 0);
a.dppd(xmmA, anyptr_gpB, 0);
a.dpps(xmmA, xmmB, 0);
a.dpps(xmmA, anyptr_gpB, 0);
a.extractps(gdA, xmmB, 0);
a.extractps(gzA, xmmB, 0);
a.extractps(anyptr_gpA, xmmB, 0);
a.insertps(xmmA, xmmB, 0);
a.insertps(xmmA, anyptr_gpB, 0);
a.movntdqa(xmmA, anyptr_gpB);
a.mpsadbw(xmmA, xmmB, 0);
a.mpsadbw(xmmA, anyptr_gpB, 0);
a.packusdw(xmmA, xmmB);
a.packusdw(xmmA, anyptr_gpB);
a.pblendvb(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
a.pblendvb(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
a.pblendvb(xmmA, anyptr_gpB); // Implicit xmmA, mem, <XMM0>
a.pblendvb(xmmA, anyptr_gpB, xmm0); // Implicit xmmA, mem, <XMM0>
a.pblendw(xmmA, xmmB, 0);
a.pblendw(xmmA, anyptr_gpB, 0);
a.pcmpeqq(xmmA, xmmB);
a.pcmpeqq(xmmA, anyptr_gpB);
a.pextrb(gdA, xmmB, 0);
a.pextrb(gzA, xmmB, 0);
a.pextrb(anyptr_gpA, xmmB, 0);
a.pextrd(gdA, xmmB, 0);
a.pextrd(gzA, xmmB, 0);
a.pextrd(anyptr_gpA, xmmB, 0);
if (isX64) a.pextrq(gzA, xmmB, 0);
if (isX64) a.pextrq(anyptr_gpA, xmmB, 0);
a.pextrw(gdA, xmmB, 0);
a.pextrw(gzA, xmmB, 0);
a.pextrw(anyptr_gpA, xmmB, 0);
a.phminposuw(xmmA, xmmB);
a.phminposuw(xmmA, anyptr_gpB);
a.pinsrb(xmmA, gdB, 0);
a.pinsrb(xmmA, gzB, 0);
a.pinsrb(xmmA, anyptr_gpB, 0);
a.pinsrd(xmmA, gdB, 0);
a.pinsrd(xmmA, gzB, 0);
a.pinsrd(xmmA, anyptr_gpB, 0);
a.pinsrw(xmmA, gdB, 0);
a.pinsrw(xmmA, gzB, 0);
a.pinsrw(xmmA, anyptr_gpB, 0);
a.pmaxuw(xmmA, xmmB);
a.pmaxuw(xmmA, anyptr_gpB);
a.pmaxsb(xmmA, xmmB);
a.pmaxsb(xmmA, anyptr_gpB);
a.pmaxsd(xmmA, xmmB);
a.pmaxsd(xmmA, anyptr_gpB);
a.pmaxud(xmmA, xmmB);
a.pmaxud(xmmA, anyptr_gpB);
a.pminsb(xmmA, xmmB);
a.pminsb(xmmA, anyptr_gpB);
a.pminuw(xmmA, xmmB);
a.pminuw(xmmA, anyptr_gpB);
a.pminud(xmmA, xmmB);
a.pminud(xmmA, anyptr_gpB);
a.pminsd(xmmA, xmmB);
a.pminsd(xmmA, anyptr_gpB);
a.pmovsxbw(xmmA, xmmB);
a.pmovsxbw(xmmA, anyptr_gpB);
a.pmovsxbd(xmmA, xmmB);
a.pmovsxbd(xmmA, anyptr_gpB);
a.pmovsxbq(xmmA, xmmB);
a.pmovsxbq(xmmA, anyptr_gpB);
a.pmovsxwd(xmmA, xmmB);
a.pmovsxwd(xmmA, anyptr_gpB);
a.pmovsxwq(xmmA, xmmB);
a.pmovsxwq(xmmA, anyptr_gpB);
a.pmovsxdq(xmmA, xmmB);
a.pmovsxdq(xmmA, anyptr_gpB);
a.pmovzxbw(xmmA, xmmB);
a.pmovzxbw(xmmA, anyptr_gpB);
a.pmovzxbd(xmmA, xmmB);
a.pmovzxbd(xmmA, anyptr_gpB);
a.pmovzxbq(xmmA, xmmB);
a.pmovzxbq(xmmA, anyptr_gpB);
a.pmovzxwd(xmmA, xmmB);
a.pmovzxwd(xmmA, anyptr_gpB);
a.pmovzxwq(xmmA, xmmB);
a.pmovzxwq(xmmA, anyptr_gpB);
a.pmovzxdq(xmmA, xmmB);
a.pmovzxdq(xmmA, anyptr_gpB);
a.pmuldq(xmmA, xmmB);
a.pmuldq(xmmA, anyptr_gpB);
a.pmulld(xmmA, xmmB);
a.pmulld(xmmA, anyptr_gpB);
a.ptest(xmmA, xmmB);
a.ptest(xmmA, anyptr_gpB);
a.roundps(xmmA, xmmB, 0);
a.roundps(xmmA, anyptr_gpB, 0);
a.roundss(xmmA, xmmB, 0);
a.roundss(xmmA, anyptr_gpB, 0);
a.roundpd(xmmA, xmmB, 0);
a.roundpd(xmmA, anyptr_gpB, 0);
a.roundsd(xmmA, xmmB, 0);
a.roundsd(xmmA, anyptr_gpB, 0);
// SSE4.2.
a.nop();
a.pcmpestri(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <ECX>, <EAX>, <EDX>
a.pcmpestri(xmmA, xmmB , imm(0), ecx, eax, edx); // Explicit xmmA, xmmB, imm, <ECX>, <EAX>, <EDX>
a.pcmpestri(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <ECX>, <EAX>, <EDX>
a.pcmpestri(xmmA, anyptr_gpB, imm(0), ecx, eax, edx); // Explicit xmmA, mem , imm, <ECX>, <EAX>, <EDX>
a.pcmpestrm(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <XMM0>, <EAX>, <EDX>
a.pcmpestrm(xmmA, xmmB , imm(0), xmm0, eax, edx); // Explicit xmmA, xmmB, imm, <XMM0>, <EAX>, <EDX>
a.pcmpestrm(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <XMM0>, <EAX>, <EDX>
a.pcmpestrm(xmmA, anyptr_gpB, imm(0), xmm0, eax, edx); // Explicit xmmA, mem , imm, <XMM0>, <EAX>, <EDX>
a.pcmpistri(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <ECX>
a.pcmpistri(xmmA, xmmB , imm(0), ecx); // Explicit xmmA, xmmB, imm, <ECX>
a.pcmpistri(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <ECX>
a.pcmpistri(xmmA, anyptr_gpB, imm(0), ecx); // Explicit xmmA, mem , imm, <ECX>
a.pcmpistrm(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <XMM0>
a.pcmpistrm(xmmA, xmmB , imm(0), xmm0); // Explicit xmmA, xmmB, imm, <XMM0>
a.pcmpistrm(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <XMM0>
a.pcmpistrm(xmmA, anyptr_gpB, imm(0), xmm0); // Explicit xmmA, mem , imm, <XMM0>
a.pcmpgtq(xmmA, xmmB);
a.pcmpgtq(xmmA, anyptr_gpB);
// SSE4A.
a.nop();
a.extrq(xmmA, xmmB);
a.extrq(xmmA, 0x1, 0x2);
a.extrq(xmmB, 0x1, 0x2);
a.insertq(xmmA, xmmB);
a.insertq(xmmA, xmmB, 0x1, 0x2);
a.movntsd(anyptr_gpA, xmmB);
a.movntss(anyptr_gpA, xmmB);
// AESNI.
a.nop();
a.aesdec(xmmA, xmmB);
a.aesdec(xmmA, anyptr_gpB);
a.aesdeclast(xmmA, xmmB);
a.aesdeclast(xmmA, anyptr_gpB);
a.aesenc(xmmA, xmmB);
a.aesenc(xmmA, anyptr_gpB);
a.aesenclast(xmmA, xmmB);
a.aesenclast(xmmA, anyptr_gpB);
a.aesimc(xmmA, xmmB);
a.aesimc(xmmA, anyptr_gpB);
a.aeskeygenassist(xmmA, xmmB, 0);
a.aeskeygenassist(xmmA, anyptr_gpB, 0);
// SHA.
a.nop();
a.sha1msg1(xmmA, xmmB);
a.sha1msg1(xmmA, anyptr_gpB);
a.sha1msg2(xmmA, xmmB);
a.sha1msg2(xmmA, anyptr_gpB);
a.sha1nexte(xmmA, xmmB);
a.sha1nexte(xmmA, anyptr_gpB);
a.sha1rnds4(xmmA, xmmB, 0);
a.sha1rnds4(xmmA, anyptr_gpB, 0);
a.sha256msg1(xmmA, xmmB);
a.sha256msg1(xmmA, anyptr_gpB);
a.sha256msg2(xmmA, xmmB);
a.sha256msg2(xmmA, anyptr_gpB);
a.sha256rnds2(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
a.sha256rnds2(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
a.sha256rnds2(xmmA, anyptr_gpB); // Implicit xmmA, mem, <XMM0>
a.sha256rnds2(xmmA, anyptr_gpB, xmm0); // Explicit xmmA, mem, <XMM0>
// PCLMULQDQ.
a.nop();
a.pclmulqdq(xmmA, xmmB, 0);
a.pclmulqdq(xmmA, anyptr_gpB, 0);
// AVX.
a.nop();
a.vaddpd(xmmA, xmmB, xmmC);
a.vaddpd(xmmA, xmmB, anyptr_gpC);
a.vaddpd(ymmA, ymmB, ymmC);
a.vaddpd(ymmA, ymmB, anyptr_gpC);
a.vaddps(xmmA, xmmB, xmmC);
a.vaddps(xmmA, xmmB, anyptr_gpC);
a.vaddps(ymmA, ymmB, ymmC);
a.vaddps(ymmA, ymmB, anyptr_gpC);
a.vaddsd(xmmA, xmmB, xmmC);
a.vaddsd(xmmA, xmmB, anyptr_gpC);
a.vaddss(xmmA, xmmB, xmmC);
a.vaddss(xmmA, xmmB, anyptr_gpC);
a.vaddsubpd(xmmA, xmmB, xmmC);
a.vaddsubpd(xmmA, xmmB, anyptr_gpC);
a.vaddsubpd(ymmA, ymmB, ymmC);
a.vaddsubpd(ymmA, ymmB, anyptr_gpC);
a.vaddsubps(xmmA, xmmB, xmmC);
a.vaddsubps(xmmA, xmmB, anyptr_gpC);
a.vaddsubps(ymmA, ymmB, ymmC);
a.vaddsubps(ymmA, ymmB, anyptr_gpC);
a.vandpd(xmmA, xmmB, xmmC);
a.vandpd(xmmA, xmmB, anyptr_gpC);
a.vandpd(ymmA, ymmB, ymmC);
a.vandpd(ymmA, ymmB, anyptr_gpC);
a.vandps(xmmA, xmmB, xmmC);
a.vandps(xmmA, xmmB, anyptr_gpC);
a.vandps(ymmA, ymmB, ymmC);
a.vandps(ymmA, ymmB, anyptr_gpC);
a.vandnpd(xmmA, xmmB, xmmC);
a.vandnpd(xmmA, xmmB, anyptr_gpC);
a.vandnpd(ymmA, ymmB, ymmC);
a.vandnpd(ymmA, ymmB, anyptr_gpC);
a.vandnps(xmmA, xmmB, xmmC);
a.vandnps(xmmA, xmmB, anyptr_gpC);
a.vandnps(ymmA, ymmB, ymmC);
a.vandnps(ymmA, ymmB, anyptr_gpC);
a.vblendpd(xmmA, xmmB, xmmC, 0);
a.vblendpd(xmmA, xmmB, anyptr_gpC, 0);
a.vblendpd(ymmA, ymmB, ymmC, 0);
a.vblendpd(ymmA, ymmB, anyptr_gpC, 0);
a.vblendps(xmmA, xmmB, xmmC, 0);
a.vblendps(xmmA, xmmB, anyptr_gpC, 0);
a.vblendps(ymmA, ymmB, ymmC, 0);
a.vblendps(ymmA, ymmB, anyptr_gpC, 0);
a.vblendvpd(xmmA, xmmB, xmmC, xmmD);
a.vblendvpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vblendvpd(ymmA, ymmB, ymmC, ymmD);
a.vblendvpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vbroadcastf128(ymmA, anyptr_gpB);
a.vbroadcastsd(ymmA, anyptr_gpB);
a.vbroadcastss(xmmA, anyptr_gpB);
a.vbroadcastss(ymmA, anyptr_gpB);
a.vcmppd(xmmA, xmmB, xmmC, 0);
a.vcmppd(xmmA, xmmB, anyptr_gpC, 0);
a.vcmppd(ymmA, ymmB, ymmC, 0);
a.vcmppd(ymmA, ymmB, anyptr_gpC, 0);
a.vcmpps(xmmA, xmmB, xmmC, 0);
a.vcmpps(xmmA, xmmB, anyptr_gpC, 0);
a.vcmpps(ymmA, ymmB, ymmC, 0);
a.vcmpps(ymmA, ymmB, anyptr_gpC, 0);
a.vcmpsd(xmmA, xmmB, xmmC, 0);
a.vcmpsd(xmmA, xmmB, anyptr_gpC, 0);
a.vcmpss(xmmA, xmmB, xmmC, 0);
a.vcmpss(xmmA, xmmB, anyptr_gpC, 0);
a.vcomisd(xmmA, xmmB);
a.vcomisd(xmmA, anyptr_gpB);
a.vcomiss(xmmA, xmmB);
a.vcomiss(xmmA, anyptr_gpB);
a.vcvtdq2pd(xmmA, xmmB);
a.vcvtdq2pd(xmmA, anyptr_gpB);
a.vcvtdq2pd(ymmA, xmmB);
a.vcvtdq2pd(ymmA, anyptr_gpB);
a.vcvtdq2ps(xmmA, xmmB);
a.vcvtdq2ps(xmmA, anyptr_gpB);
a.vcvtdq2ps(ymmA, ymmB);
a.vcvtdq2ps(ymmA, anyptr_gpB);
a.vcvtpd2dq(xmmA, xmmB);
a.vcvtpd2dq(xmmA, ymmB);
a.vcvtpd2dq(xmmA, anyptr_gpB);
a.vcvtpd2ps(xmmA, xmmB);
a.vcvtpd2ps(xmmA, ymmB);
a.vcvtpd2ps(xmmA, anyptr_gpB);
a.vcvtps2dq(xmmA, xmmB);
a.vcvtps2dq(xmmA, anyptr_gpB);
a.vcvtps2dq(ymmA, ymmB);
a.vcvtps2dq(ymmA, anyptr_gpB);
a.vcvtps2pd(xmmA, xmmB);
a.vcvtps2pd(xmmA, anyptr_gpB);
a.vcvtps2pd(ymmA, xmmB);
a.vcvtps2pd(ymmA, anyptr_gpB);
a.vcvtsd2si(gzA, xmmB);
a.vcvtsd2si(gzA, anyptr_gpB);
a.vcvtsd2ss(xmmA, xmmB, xmmC);
a.vcvtsd2ss(xmmA, xmmB, anyptr_gpC);
a.vcvtsi2sd(xmmA, xmmB, gzC);
a.vcvtsi2sd(xmmA, xmmB, anyptr_gpC);
a.vcvtsi2ss(xmmA, xmmB, gzC);
a.vcvtsi2ss(xmmA, xmmB, anyptr_gpC);
a.vcvtss2sd(xmmA, xmmB, xmmC);
a.vcvtss2sd(xmmA, xmmB, anyptr_gpC);
a.vcvtss2si(gzA, xmmB);
a.vcvtss2si(gzA, anyptr_gpB);
a.vcvttpd2dq(xmmA, xmmB);
a.vcvttpd2dq(xmmA, ymmB);
a.vcvttpd2dq(xmmA, anyptr_gpB);
a.vcvttps2dq(xmmA, xmmB);
a.vcvttps2dq(xmmA, anyptr_gpB);
a.vcvttps2dq(ymmA, ymmB);
a.vcvttps2dq(ymmA, anyptr_gpB);
a.vcvttsd2si(gzA, xmmB);
a.vcvttsd2si(gzA, anyptr_gpB);
a.vcvttss2si(gzA, xmmB);
a.vcvttss2si(gzA, anyptr_gpB);
a.vdivpd(xmmA, xmmB, xmmC);
a.vdivpd(xmmA, xmmB, anyptr_gpC);
a.vdivpd(ymmA, ymmB, ymmC);
a.vdivpd(ymmA, ymmB, anyptr_gpC);
a.vdivps(xmmA, xmmB, xmmC);
a.vdivps(xmmA, xmmB, anyptr_gpC);
a.vdivps(ymmA, ymmB, ymmC);
a.vdivps(ymmA, ymmB, anyptr_gpC);
a.vdivsd(xmmA, xmmB, xmmC);
a.vdivsd(xmmA, xmmB, anyptr_gpC);
a.vdivss(xmmA, xmmB, xmmC);
a.vdivss(xmmA, xmmB, anyptr_gpC);
a.vdppd(xmmA, xmmB, xmmC, 0);
a.vdppd(xmmA, xmmB, anyptr_gpC, 0);
a.vdpps(xmmA, xmmB, xmmC, 0);
a.vdpps(xmmA, xmmB, anyptr_gpC, 0);
a.vdpps(ymmA, ymmB, ymmC, 0);
a.vdpps(ymmA, ymmB, anyptr_gpC, 0);
a.vextractf128(xmmA, ymmB, 0);
a.vextractf128(anyptr_gpA, ymmB, 0);
a.vextractps(gzA, xmmB, 0);
a.vextractps(anyptr_gpA, xmmB, 0);
a.vhaddpd(xmmA, xmmB, xmmC);
a.vhaddpd(xmmA, xmmB, anyptr_gpC);
a.vhaddpd(ymmA, ymmB, ymmC);
a.vhaddpd(ymmA, ymmB, anyptr_gpC);
a.vhaddps(xmmA, xmmB, xmmC);
a.vhaddps(xmmA, xmmB, anyptr_gpC);
a.vhaddps(ymmA, ymmB, ymmC);
a.vhaddps(ymmA, ymmB, anyptr_gpC);
a.vhsubpd(xmmA, xmmB, xmmC);
a.vhsubpd(xmmA, xmmB, anyptr_gpC);
a.vhsubpd(ymmA, ymmB, ymmC);
a.vhsubpd(ymmA, ymmB, anyptr_gpC);
a.vhsubps(xmmA, xmmB, xmmC);
a.vhsubps(xmmA, xmmB, anyptr_gpC);
a.vhsubps(ymmA, ymmB, ymmC);
a.vhsubps(ymmA, ymmB, anyptr_gpC);
a.vinsertf128(ymmA, ymmB, xmmC, 0);
a.vinsertf128(ymmA, ymmB, anyptr_gpC, 0);
a.vinsertps(xmmA, xmmB, xmmC, 0);
a.vinsertps(xmmA, xmmB, anyptr_gpC, 0);
a.vlddqu(xmmA, anyptr_gpB);
a.vlddqu(ymmA, anyptr_gpB);
a.vldmxcsr(anyptr_gpA);
a.vmaskmovdqu(xmmA, xmmB); // Implicit xmmA, xmmB, <ds:[EDI|RDI]>
a.vmaskmovdqu(xmmA, xmmB, ptr(a.zdi())); // Explicit xmmA, xmmB, <ds:[EDI|RDI]>
a.vmaskmovps(xmmA, xmmB, anyptr_gpC);
a.vmaskmovps(ymmA, ymmB, anyptr_gpC);
a.vmaskmovps(anyptr_gpA, xmmB, xmmC);
a.vmaskmovps(anyptr_gpA, ymmB, ymmC);
a.vmaskmovpd(xmmA, xmmB, anyptr_gpC);
a.vmaskmovpd(ymmA, ymmB, anyptr_gpC);
a.vmaskmovpd(anyptr_gpA, xmmB, xmmC);
a.vmaskmovpd(anyptr_gpA, ymmB, ymmC);
a.vmaxpd(xmmA, xmmB, xmmC);
a.vmaxpd(xmmA, xmmB, anyptr_gpC);
a.vmaxpd(ymmA, ymmB, ymmC);
a.vmaxpd(ymmA, ymmB, anyptr_gpC);
a.vmaxps(xmmA, xmmB, xmmC);
a.vmaxps(xmmA, xmmB, anyptr_gpC);
a.vmaxps(ymmA, ymmB, ymmC);
a.vmaxps(ymmA, ymmB, anyptr_gpC);
a.vmaxsd(xmmA, xmmB, xmmC);
a.vmaxsd(xmmA, xmmB, anyptr_gpC);
a.vmaxss(xmmA, xmmB, xmmC);
a.vmaxss(xmmA, xmmB, anyptr_gpC);
a.vminpd(xmmA, xmmB, xmmC);
a.vminpd(xmmA, xmmB, anyptr_gpC);
a.vminpd(ymmA, ymmB, ymmC);
a.vminpd(ymmA, ymmB, anyptr_gpC);
a.vminps(xmmA, xmmB, xmmC);
a.vminps(xmmA, xmmB, anyptr_gpC);
a.vminps(ymmA, ymmB, ymmC);
a.vminps(ymmA, ymmB, anyptr_gpC);
a.vminsd(xmmA, xmmB, xmmC);
a.vminsd(xmmA, xmmB, anyptr_gpC);
a.vminss(xmmA, xmmB, xmmC);
a.vminss(xmmA, xmmB, anyptr_gpC);
a.vmovapd(xmmA, xmmB);
a.vmovapd(xmmA, anyptr_gpB);
a.vmovapd(anyptr_gpA, xmmB);
a.vmovapd(ymmA, ymmB);
a.vmovapd(ymmA, anyptr_gpB);
a.vmovapd(anyptr_gpA, ymmB);
a.vmovaps(xmmA, xmmB);
a.vmovaps(xmmA, anyptr_gpB);
a.vmovaps(anyptr_gpA, xmmB);
a.vmovaps(ymmA, ymmB);
a.vmovaps(ymmA, anyptr_gpB);
a.vmovaps(anyptr_gpA, ymmB);
a.vmovd(xmmA, gzB);
a.vmovd(xmmA, anyptr_gpB);
a.vmovd(gzA, xmmB);
a.vmovd(anyptr_gpA, xmmB);
a.vmovddup(xmmA, xmmB);
a.vmovddup(xmmA, anyptr_gpB);
a.vmovddup(ymmA, ymmB);
a.vmovddup(ymmA, anyptr_gpB);
a.vmovdqa(xmmA, xmmB);
a.vmovdqa(xmmA, anyptr_gpB);
a.vmovdqa(anyptr_gpA, xmmB);
a.vmovdqa(ymmA, ymmB);
a.vmovdqa(ymmA, anyptr_gpB);
a.vmovdqa(anyptr_gpA, ymmB);
a.vmovdqu(xmmA, xmmB);
a.vmovdqu(xmmA, anyptr_gpB);
a.vmovdqu(anyptr_gpA, xmmB);
a.vmovdqu(ymmA, ymmB);
a.vmovdqu(ymmA, anyptr_gpB);
a.vmovdqu(anyptr_gpA, ymmB);
a.vmovhlps(xmmA, xmmB, xmmC);
a.vmovhpd(xmmA, xmmB, anyptr_gpC);
a.vmovhpd(anyptr_gpA, xmmB);
a.vmovhps(xmmA, xmmB, anyptr_gpC);
a.vmovhps(anyptr_gpA, xmmB);
a.vmovlhps(xmmA, xmmB, xmmC);
a.vmovlpd(xmmA, xmmB, anyptr_gpC);
a.vmovlpd(anyptr_gpA, xmmB);
a.vmovlps(xmmA, xmmB, anyptr_gpC);
a.vmovlps(anyptr_gpA, xmmB);
a.vmovmskpd(gzA, xmmB);
a.vmovmskpd(gzA, ymmB);
a.vmovmskps(gzA, xmmB);
a.vmovmskps(gzA, ymmB);
a.vmovntdq(anyptr_gpA, xmmB);
a.vmovntdq(anyptr_gpA, ymmB);
a.vmovntdqa(xmmA, anyptr_gpB);
a.vmovntpd(anyptr_gpA, xmmB);
a.vmovntpd(anyptr_gpA, ymmB);
a.vmovntps(anyptr_gpA, xmmB);
a.vmovntps(anyptr_gpA, ymmB);
a.vmovsd(xmmA, xmmB, xmmC);
a.vmovsd(xmmA, anyptr_gpB);
a.vmovsd(anyptr_gpA, xmmB);
a.vmovshdup(xmmA, xmmB);
a.vmovshdup(xmmA, anyptr_gpB);
a.vmovshdup(ymmA, ymmB);
a.vmovshdup(ymmA, anyptr_gpB);
a.vmovsldup(xmmA, xmmB);
a.vmovsldup(xmmA, anyptr_gpB);
a.vmovsldup(ymmA, ymmB);
a.vmovsldup(ymmA, anyptr_gpB);
a.vmovss(xmmA, xmmB, xmmC);
a.vmovss(xmmA, anyptr_gpB);
a.vmovss(anyptr_gpA, xmmB);
a.vmovupd(xmmA, xmmB);
a.vmovupd(xmmA, anyptr_gpB);
a.vmovupd(anyptr_gpA, xmmB);
a.vmovupd(ymmA, ymmB);
a.vmovupd(ymmA, anyptr_gpB);
a.vmovupd(anyptr_gpA, ymmB);
a.vmovups(xmmA, xmmB);
a.vmovups(xmmA, anyptr_gpB);
a.vmovups(anyptr_gpA, xmmB);
a.vmovups(ymmA, ymmB);
a.vmovups(ymmA, anyptr_gpB);
a.vmovups(anyptr_gpA, ymmB);
a.vmpsadbw(xmmA, xmmB, xmmC, 0);
a.vmpsadbw(xmmA, xmmB, anyptr_gpC, 0);
a.vmulpd(xmmA, xmmB, xmmC);
a.vmulpd(xmmA, xmmB, anyptr_gpC);
a.vmulpd(ymmA, ymmB, ymmC);
a.vmulpd(ymmA, ymmB, anyptr_gpC);
a.vmulps(xmmA, xmmB, xmmC);
a.vmulps(xmmA, xmmB, anyptr_gpC);
a.vmulps(ymmA, ymmB, ymmC);
a.vmulps(ymmA, ymmB, anyptr_gpC);
a.vmulsd(xmmA, xmmB, xmmC);
a.vmulsd(xmmA, xmmB, anyptr_gpC);
a.vmulss(xmmA, xmmB, xmmC);
a.vmulss(xmmA, xmmB, anyptr_gpC);
a.vorpd(xmmA, xmmB, xmmC);
a.vorpd(xmmA, xmmB, anyptr_gpC);
a.vorpd(ymmA, ymmB, ymmC);
a.vorpd(ymmA, ymmB, anyptr_gpC);
a.vorps(xmmA, xmmB, xmmC);
a.vorps(xmmA, xmmB, anyptr_gpC);
a.vorps(ymmA, ymmB, ymmC);
a.vorps(ymmA, ymmB, anyptr_gpC);
a.vpabsb(xmmA, xmmB);
a.vpabsb(xmmA, anyptr_gpB);
a.vpabsd(xmmA, xmmB);
a.vpabsd(xmmA, anyptr_gpB);
a.vpabsw(xmmA, xmmB);
a.vpabsw(xmmA, anyptr_gpB);
a.vpackssdw(xmmA, xmmB, xmmC);
a.vpackssdw(xmmA, xmmB, anyptr_gpC);
a.vpacksswb(xmmA, xmmB, xmmC);
a.vpacksswb(xmmA, xmmB, anyptr_gpC);
a.vpackusdw(xmmA, xmmB, xmmC);
a.vpackusdw(xmmA, xmmB, anyptr_gpC);
a.vpackuswb(xmmA, xmmB, xmmC);
a.vpackuswb(xmmA, xmmB, anyptr_gpC);
a.vpaddb(xmmA, xmmB, xmmC);
a.vpaddb(xmmA, xmmB, anyptr_gpC);
a.vpaddd(xmmA, xmmB, xmmC);
a.vpaddd(xmmA, xmmB, anyptr_gpC);
a.vpaddq(xmmA, xmmB, xmmC);
a.vpaddq(xmmA, xmmB, anyptr_gpC);
a.vpaddw(xmmA, xmmB, xmmC);
a.vpaddw(xmmA, xmmB, anyptr_gpC);
a.vpaddsb(xmmA, xmmB, xmmC);
a.vpaddsb(xmmA, xmmB, anyptr_gpC);
a.vpaddsw(xmmA, xmmB, xmmC);
a.vpaddsw(xmmA, xmmB, anyptr_gpC);
a.vpaddusb(xmmA, xmmB, xmmC);
a.vpaddusb(xmmA, xmmB, anyptr_gpC);
a.vpaddusw(xmmA, xmmB, xmmC);
a.vpaddusw(xmmA, xmmB, anyptr_gpC);
a.vpalignr(xmmA, xmmB, xmmC, 0);
a.vpalignr(xmmA, xmmB, anyptr_gpC, 0);
a.vpand(xmmA, xmmB, xmmC);
a.vpand(xmmA, xmmB, anyptr_gpC);
a.vpandn(xmmA, xmmB, xmmC);
a.vpandn(xmmA, xmmB, anyptr_gpC);
a.vpavgb(xmmA, xmmB, xmmC);
a.vpavgb(xmmA, xmmB, anyptr_gpC);
a.vpavgw(xmmA, xmmB, xmmC);
a.vpavgw(xmmA, xmmB, anyptr_gpC);
a.vpblendvb(xmmA, xmmB, xmmC, xmmD);
a.vpblendvb(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpblendw(xmmA, xmmB, xmmC, 0);
a.vpblendw(xmmA, xmmB, anyptr_gpC, 0);
a.vpcmpeqb(xmmA, xmmB, xmmC);
a.vpcmpeqb(xmmA, xmmB, anyptr_gpC);
a.vpcmpeqd(xmmA, xmmB, xmmC);
a.vpcmpeqd(xmmA, xmmB, anyptr_gpC);
a.vpcmpeqq(xmmA, xmmB, xmmC);
a.vpcmpeqq(xmmA, xmmB, anyptr_gpC);
a.vpcmpeqw(xmmA, xmmB, xmmC);
a.vpcmpeqw(xmmA, xmmB, anyptr_gpC);
a.vpcmpgtb(xmmA, xmmB, xmmC);
a.vpcmpgtb(xmmA, xmmB, anyptr_gpC);
a.vpcmpgtd(xmmA, xmmB, xmmC);
a.vpcmpgtd(xmmA, xmmB, anyptr_gpC);
a.vpcmpgtq(xmmA, xmmB, xmmC);
a.vpcmpgtq(xmmA, xmmB, anyptr_gpC);
a.vpcmpgtw(xmmA, xmmB, xmmC);
a.vpcmpgtw(xmmA, xmmB, anyptr_gpC);
a.vpcmpestri(xmmA, xmmB, 0);
a.vpcmpestri(xmmA, anyptr_gpB, 0);
a.vpcmpestrm(xmmA, xmmB, 0);
a.vpcmpestrm(xmmA, anyptr_gpB, 0);
a.vpcmpistri(xmmA, xmmB, 0);
a.vpcmpistri(xmmA, anyptr_gpB, 0);
a.vpcmpistrm(xmmA, xmmB, 0);
a.vpcmpistrm(xmmA, anyptr_gpB, 0);
a.vpermilpd(xmmA, xmmB, xmmC);
a.vpermilpd(xmmA, xmmB, anyptr_gpC);
a.vpermilpd(ymmA, ymmB, ymmC);
a.vpermilpd(ymmA, ymmB, anyptr_gpC);
a.vpermilpd(xmmA, xmmB, 0);
a.vpermilpd(xmmA, anyptr_gpB, 0);
a.vpermilpd(ymmA, ymmB, 0);
a.vpermilpd(ymmA, anyptr_gpB, 0);
a.vpermilps(xmmA, xmmB, xmmC);
a.vpermilps(xmmA, xmmB, anyptr_gpC);
a.vpermilps(ymmA, ymmB, ymmC);
a.vpermilps(ymmA, ymmB, anyptr_gpC);
a.vpermilps(xmmA, xmmB, 0);
a.vpermilps(xmmA, anyptr_gpB, 0);
a.vpermilps(ymmA, ymmB, 0);
a.vpermilps(ymmA, anyptr_gpB, 0);
a.vperm2f128(ymmA, ymmB, ymmC, 0);
a.vperm2f128(ymmA, ymmB, anyptr_gpC, 0);
a.vpextrb(gzA, xmmB, 0);
a.vpextrb(anyptr_gpA, xmmB, 0);
a.vpextrd(gzA, xmmB, 0);
a.vpextrd(anyptr_gpA, xmmB, 0);
if (isX64) a.vpextrq(gzA, xmmB, 0);
if (isX64) a.vpextrq(anyptr_gpA, xmmB, 0);
a.vpextrw(gzA, xmmB, 0);
a.vpextrw(anyptr_gpA, xmmB, 0);
a.vphaddd(xmmA, xmmB, xmmC);
a.vphaddd(xmmA, xmmB, anyptr_gpC);
a.vphaddsw(xmmA, xmmB, xmmC);
a.vphaddsw(xmmA, xmmB, anyptr_gpC);
a.vphaddw(xmmA, xmmB, xmmC);
a.vphaddw(xmmA, xmmB, anyptr_gpC);
a.vphminposuw(xmmA, xmmB);
a.vphminposuw(xmmA, anyptr_gpB);
a.vphsubd(xmmA, xmmB, xmmC);
a.vphsubd(xmmA, xmmB, anyptr_gpC);
a.vphsubsw(xmmA, xmmB, xmmC);
a.vphsubsw(xmmA, xmmB, anyptr_gpC);
a.vphsubw(xmmA, xmmB, xmmC);
a.vphsubw(xmmA, xmmB, anyptr_gpC);
a.vpinsrb(xmmA, xmmB, gzC, 0);
a.vpinsrb(xmmA, xmmB, anyptr_gpC, 0);
a.vpinsrd(xmmA, xmmB, gzC, 0);
a.vpinsrd(xmmA, xmmB, anyptr_gpC, 0);
a.vpinsrw(xmmA, xmmB, gzC, 0);
a.vpinsrw(xmmA, xmmB, anyptr_gpC, 0);
a.vpmaddubsw(xmmA, xmmB, xmmC);
a.vpmaddubsw(xmmA, xmmB, anyptr_gpC);
a.vpmaddwd(xmmA, xmmB, xmmC);
a.vpmaddwd(xmmA, xmmB, anyptr_gpC);
a.vpmaxsb(xmmA, xmmB, xmmC);
a.vpmaxsb(xmmA, xmmB, anyptr_gpC);
a.vpmaxsd(xmmA, xmmB, xmmC);
a.vpmaxsd(xmmA, xmmB, anyptr_gpC);
a.vpmaxsw(xmmA, xmmB, xmmC);
a.vpmaxsw(xmmA, xmmB, anyptr_gpC);
a.vpmaxub(xmmA, xmmB, xmmC);
a.vpmaxub(xmmA, xmmB, anyptr_gpC);
a.vpmaxud(xmmA, xmmB, xmmC);
a.vpmaxud(xmmA, xmmB, anyptr_gpC);
a.vpmaxuw(xmmA, xmmB, xmmC);
a.vpmaxuw(xmmA, xmmB, anyptr_gpC);
a.vpminsb(xmmA, xmmB, xmmC);
a.vpminsb(xmmA, xmmB, anyptr_gpC);
a.vpminsd(xmmA, xmmB, xmmC);
a.vpminsd(xmmA, xmmB, anyptr_gpC);
a.vpminsw(xmmA, xmmB, xmmC);
a.vpminsw(xmmA, xmmB, anyptr_gpC);
a.vpminub(xmmA, xmmB, xmmC);
a.vpminub(xmmA, xmmB, anyptr_gpC);
a.vpminud(xmmA, xmmB, xmmC);
a.vpminud(xmmA, xmmB, anyptr_gpC);
a.vpminuw(xmmA, xmmB, xmmC);
a.vpminuw(xmmA, xmmB, anyptr_gpC);
a.vpmovmskb(gzA, xmmB);
a.vpmovsxbd(xmmA, xmmB);
a.vpmovsxbd(xmmA, anyptr_gpB);
a.vpmovsxbq(xmmA, xmmB);
a.vpmovsxbq(xmmA, anyptr_gpB);
a.vpmovsxbw(xmmA, xmmB);
a.vpmovsxbw(xmmA, anyptr_gpB);
a.vpmovsxdq(xmmA, xmmB);
a.vpmovsxdq(xmmA, anyptr_gpB);
a.vpmovsxwd(xmmA, xmmB);
a.vpmovsxwd(xmmA, anyptr_gpB);
a.vpmovsxwq(xmmA, xmmB);
a.vpmovsxwq(xmmA, anyptr_gpB);
a.vpmovzxbd(xmmA, xmmB);
a.vpmovzxbd(xmmA, anyptr_gpB);
a.vpmovzxbq(xmmA, xmmB);
a.vpmovzxbq(xmmA, anyptr_gpB);
a.vpmovzxbw(xmmA, xmmB);
a.vpmovzxbw(xmmA, anyptr_gpB);
a.vpmovzxdq(xmmA, xmmB);
a.vpmovzxdq(xmmA, anyptr_gpB);
a.vpmovzxwd(xmmA, xmmB);
a.vpmovzxwd(xmmA, anyptr_gpB);
a.vpmovzxwq(xmmA, xmmB);
a.vpmovzxwq(xmmA, anyptr_gpB);
a.vpmuldq(xmmA, xmmB, xmmC);
a.vpmuldq(xmmA, xmmB, anyptr_gpC);
a.vpmulhrsw(xmmA, xmmB, xmmC);
a.vpmulhrsw(xmmA, xmmB, anyptr_gpC);
a.vpmulhuw(xmmA, xmmB, xmmC);
a.vpmulhuw(xmmA, xmmB, anyptr_gpC);
a.vpmulhw(xmmA, xmmB, xmmC);
a.vpmulhw(xmmA, xmmB, anyptr_gpC);
a.vpmulld(xmmA, xmmB, xmmC);
a.vpmulld(xmmA, xmmB, anyptr_gpC);
a.vpmullw(xmmA, xmmB, xmmC);
a.vpmullw(xmmA, xmmB, anyptr_gpC);
a.vpmuludq(xmmA, xmmB, xmmC);
a.vpmuludq(xmmA, xmmB, anyptr_gpC);
a.vpor(xmmA, xmmB, xmmC);
a.vpor(xmmA, xmmB, anyptr_gpC);
a.vpsadbw(xmmA, xmmB, xmmC);
a.vpsadbw(xmmA, xmmB, anyptr_gpC);
a.vpshufb(xmmA, xmmB, xmmC);
a.vpshufb(xmmA, xmmB, anyptr_gpC);
a.vpshufd(xmmA, xmmB, 0);
a.vpshufd(xmmA, anyptr_gpB, 0);
a.vpshufhw(xmmA, xmmB, 0);
a.vpshufhw(xmmA, anyptr_gpB, 0);
a.vpshuflw(xmmA, xmmB, 0);
a.vpshuflw(xmmA, anyptr_gpB, 0);
a.vpsignb(xmmA, xmmB, xmmC);
a.vpsignb(xmmA, xmmB, anyptr_gpC);
a.vpsignd(xmmA, xmmB, xmmC);
a.vpsignd(xmmA, xmmB, anyptr_gpC);
a.vpsignw(xmmA, xmmB, xmmC);
a.vpsignw(xmmA, xmmB, anyptr_gpC);
a.vpslld(xmmA, xmmB, xmmC);
a.vpslld(xmmA, xmmB, anyptr_gpC);
a.vpslld(xmmA, xmmB, 0);
a.vpslldq(xmmA, xmmB, 0);
a.vpsllq(xmmA, xmmB, xmmC);
a.vpsllq(xmmA, xmmB, anyptr_gpC);
a.vpsllq(xmmA, xmmB, 0);
a.vpsllw(xmmA, xmmB, xmmC);
a.vpsllw(xmmA, xmmB, anyptr_gpC);
a.vpsllw(xmmA, xmmB, 0);
a.vpsrad(xmmA, xmmB, xmmC);
a.vpsrad(xmmA, xmmB, anyptr_gpC);
a.vpsrad(xmmA, xmmB, 0);
a.vpsraw(xmmA, xmmB, xmmC);
a.vpsraw(xmmA, xmmB, anyptr_gpC);
a.vpsraw(xmmA, xmmB, 0);
a.vpsrld(xmmA, xmmB, xmmC);
a.vpsrld(xmmA, xmmB, anyptr_gpC);
a.vpsrld(xmmA, xmmB, 0);
a.vpsrldq(xmmA, xmmB, 0);
a.vpsrlq(xmmA, xmmB, xmmC);
a.vpsrlq(xmmA, xmmB, anyptr_gpC);
a.vpsrlq(xmmA, xmmB, 0);
a.vpsrlw(xmmA, xmmB, xmmC);
a.vpsrlw(xmmA, xmmB, anyptr_gpC);
a.vpsrlw(xmmA, xmmB, 0);
a.vpsubb(xmmA, xmmB, xmmC);
a.vpsubb(xmmA, xmmB, anyptr_gpC);
a.vpsubd(xmmA, xmmB, xmmC);
a.vpsubd(xmmA, xmmB, anyptr_gpC);
a.vpsubq(xmmA, xmmB, xmmC);
a.vpsubq(xmmA, xmmB, anyptr_gpC);
a.vpsubw(xmmA, xmmB, xmmC);
a.vpsubw(xmmA, xmmB, anyptr_gpC);
a.vpsubsb(xmmA, xmmB, xmmC);
a.vpsubsb(xmmA, xmmB, anyptr_gpC);
a.vpsubsw(xmmA, xmmB, xmmC);
a.vpsubsw(xmmA, xmmB, anyptr_gpC);
a.vpsubusb(xmmA, xmmB, xmmC);
a.vpsubusb(xmmA, xmmB, anyptr_gpC);
a.vpsubusw(xmmA, xmmB, xmmC);
a.vpsubusw(xmmA, xmmB, anyptr_gpC);
a.vptest(xmmA, xmmB);
a.vptest(xmmA, anyptr_gpB);
a.vptest(ymmA, ymmB);
a.vptest(ymmA, anyptr_gpB);
a.vpunpckhbw(xmmA, xmmB, xmmC);
a.vpunpckhbw(xmmA, xmmB, anyptr_gpC);
a.vpunpckhdq(xmmA, xmmB, xmmC);
a.vpunpckhdq(xmmA, xmmB, anyptr_gpC);
a.vpunpckhqdq(xmmA, xmmB, xmmC);
a.vpunpckhqdq(xmmA, xmmB, anyptr_gpC);
a.vpunpckhwd(xmmA, xmmB, xmmC);
a.vpunpckhwd(xmmA, xmmB, anyptr_gpC);
a.vpunpcklbw(xmmA, xmmB, xmmC);
a.vpunpcklbw(xmmA, xmmB, anyptr_gpC);
a.vpunpckldq(xmmA, xmmB, xmmC);
a.vpunpckldq(xmmA, xmmB, anyptr_gpC);
a.vpunpcklqdq(xmmA, xmmB, xmmC);
a.vpunpcklqdq(xmmA, xmmB, anyptr_gpC);
a.vpunpcklwd(xmmA, xmmB, xmmC);
a.vpunpcklwd(xmmA, xmmB, anyptr_gpC);
a.vpxor(xmmA, xmmB, xmmC);
a.vpxor(xmmA, xmmB, anyptr_gpC);
a.vrcpps(xmmA, xmmB);
a.vrcpps(xmmA, anyptr_gpB);
a.vrcpps(ymmA, ymmB);
a.vrcpps(ymmA, anyptr_gpB);
a.vrcpss(xmmA, xmmB, xmmC);
a.vrcpss(xmmA, xmmB, anyptr_gpC);
a.vrsqrtps(xmmA, xmmB);
a.vrsqrtps(xmmA, anyptr_gpB);
a.vrsqrtps(ymmA, ymmB);
a.vrsqrtps(ymmA, anyptr_gpB);
a.vrsqrtss(xmmA, xmmB, xmmC);
a.vrsqrtss(xmmA, xmmB, anyptr_gpC);
a.vroundpd(xmmA, xmmB, 0);
a.vroundpd(xmmA, anyptr_gpB, 0);
a.vroundpd(ymmA, ymmB, 0);
a.vroundpd(ymmA, anyptr_gpB, 0);
a.vroundps(xmmA, xmmB, 0);
a.vroundps(xmmA, anyptr_gpB, 0);
a.vroundps(ymmA, ymmB, 0);
a.vroundps(ymmA, anyptr_gpB, 0);
a.vroundsd(xmmA, xmmB, xmmC, 0);
a.vroundsd(xmmA, xmmB, anyptr_gpC, 0);
a.vroundss(xmmA, xmmB, xmmC, 0);
a.vroundss(xmmA, xmmB, anyptr_gpC, 0);
a.vshufpd(xmmA, xmmB, xmmC, 0);
a.vshufpd(xmmA, xmmB, anyptr_gpC, 0);
a.vshufpd(ymmA, ymmB, ymmC, 0);
a.vshufpd(ymmA, ymmB, anyptr_gpC, 0);
a.vshufps(xmmA, xmmB, xmmC, 0);
a.vshufps(xmmA, xmmB, anyptr_gpC, 0);
a.vshufps(ymmA, ymmB, ymmC, 0);
a.vshufps(ymmA, ymmB, anyptr_gpC, 0);
a.vsqrtpd(xmmA, xmmB);
a.vsqrtpd(xmmA, anyptr_gpB);
a.vsqrtpd(ymmA, ymmB);
a.vsqrtpd(ymmA, anyptr_gpB);
a.vsqrtps(xmmA, xmmB);
a.vsqrtps(xmmA, anyptr_gpB);
a.vsqrtps(ymmA, ymmB);
a.vsqrtps(ymmA, anyptr_gpB);
a.vsqrtsd(xmmA, xmmB, xmmC);
a.vsqrtsd(xmmA, xmmB, anyptr_gpC);
a.vsqrtss(xmmA, xmmB, xmmC);
a.vsqrtss(xmmA, xmmB, anyptr_gpC);
a.vstmxcsr(anyptr_gpA);
a.vsubpd(xmmA, xmmB, xmmC);
a.vsubpd(xmmA, xmmB, anyptr_gpC);
a.vsubpd(ymmA, ymmB, ymmC);
a.vsubpd(ymmA, ymmB, anyptr_gpC);
a.vsubps(xmmA, xmmB, xmmC);
a.vsubps(xmmA, xmmB, anyptr_gpC);
a.vsubps(ymmA, ymmB, ymmC);
a.vsubps(ymmA, ymmB, anyptr_gpC);
a.vsubsd(xmmA, xmmB, xmmC);
a.vsubsd(xmmA, xmmB, anyptr_gpC);
a.vsubss(xmmA, xmmB, xmmC);
a.vsubss(xmmA, xmmB, anyptr_gpC);
a.vtestps(xmmA, xmmB);
a.vtestps(xmmA, anyptr_gpB);
a.vtestps(ymmA, ymmB);
a.vtestps(ymmA, anyptr_gpB);
a.vtestpd(xmmA, xmmB);
a.vtestpd(xmmA, anyptr_gpB);
a.vtestpd(ymmA, ymmB);
a.vtestpd(ymmA, anyptr_gpB);
a.vucomisd(xmmA, xmmB);
a.vucomisd(xmmA, anyptr_gpB);
a.vucomiss(xmmA, xmmB);
a.vucomiss(xmmA, anyptr_gpB);
a.vunpckhpd(xmmA, xmmB, xmmC);
a.vunpckhpd(xmmA, xmmB, anyptr_gpC);
a.vunpckhpd(ymmA, ymmB, ymmC);
a.vunpckhpd(ymmA, ymmB, anyptr_gpC);
a.vunpckhps(xmmA, xmmB, xmmC);
a.vunpckhps(xmmA, xmmB, anyptr_gpC);
a.vunpckhps(ymmA, ymmB, ymmC);
a.vunpckhps(ymmA, ymmB, anyptr_gpC);
a.vunpcklpd(xmmA, xmmB, xmmC);
a.vunpcklpd(xmmA, xmmB, anyptr_gpC);
a.vunpcklpd(ymmA, ymmB, ymmC);
a.vunpcklpd(ymmA, ymmB, anyptr_gpC);
a.vunpcklps(xmmA, xmmB, xmmC);
a.vunpcklps(xmmA, xmmB, anyptr_gpC);
a.vunpcklps(ymmA, ymmB, ymmC);
a.vunpcklps(ymmA, ymmB, anyptr_gpC);
a.vxorpd(xmmA, xmmB, xmmC);
a.vxorpd(xmmA, xmmB, anyptr_gpC);
a.vxorpd(ymmA, ymmB, ymmC);
a.vxorpd(ymmA, ymmB, anyptr_gpC);
a.vxorps(xmmA, xmmB, xmmC);
a.vxorps(xmmA, xmmB, anyptr_gpC);
a.vxorps(ymmA, ymmB, ymmC);
a.vxorps(ymmA, ymmB, anyptr_gpC);
a.vzeroall();
a.vex3().vzeroall();
a.vzeroupper();
a.vex3().vzeroupper();
// AVX+AESNI.
a.nop();
a.vaesdec(xmmA, xmmB, xmmC);
a.vaesdec(xmmA, xmmB, anyptr_gpC);
a.vaesdeclast(xmmA, xmmB, xmmC);
a.vaesdeclast(xmmA, xmmB, anyptr_gpC);
a.vaesenc(xmmA, xmmB, xmmC);
a.vaesenc(xmmA, xmmB, anyptr_gpC);
a.vaesenclast(xmmA, xmmB, xmmC);
a.vaesenclast(xmmA, xmmB, anyptr_gpC);
a.vaesimc(xmmA, xmmB);
a.vaesimc(xmmA, anyptr_gpB);
a.vaeskeygenassist(xmmA, xmmB, 0);
a.vaeskeygenassist(xmmA, anyptr_gpB, 0);
// AVX+PCLMULQDQ.
a.nop();
a.vpclmulqdq(xmmA, xmmB, xmmC, 0);
a.vpclmulqdq(xmmA, xmmB, anyptr_gpC, 0);
// AVX2.
a.nop();
a.vbroadcasti128(ymmA, anyptr_gpB);
a.vbroadcastsd(ymmA, xmmB);
a.vbroadcastss(xmmA, xmmB);
a.vbroadcastss(ymmA, xmmB);
a.vextracti128(xmmA, ymmB, 0);
a.vextracti128(anyptr_gpA, ymmB, 0);
a.vgatherdpd(xmmA, vx_ptr, xmmC);
a.vgatherdpd(ymmA, vx_ptr, ymmC);
a.vgatherdps(xmmA, vx_ptr, xmmC);
a.vgatherdps(ymmA, vy_ptr, ymmC);
a.vgatherqpd(xmmA, vx_ptr, xmmC);
a.vgatherqpd(ymmA, vy_ptr, ymmC);
a.vgatherqps(xmmA, vx_ptr, xmmC);
a.vgatherqps(xmmA, vy_ptr, xmmC);
a.vinserti128(ymmA, ymmB, xmmC, 0);
a.vinserti128(ymmA, ymmB, anyptr_gpC, 0);
a.vmovntdqa(ymmA, anyptr_gpB);
a.vmpsadbw(ymmA, ymmB, ymmC, 0);
a.vmpsadbw(ymmA, ymmB, anyptr_gpC, 0);
a.vpabsb(ymmA, ymmB);
a.vpabsb(ymmA, anyptr_gpB);
a.vpabsd(ymmA, ymmB);
a.vpabsd(ymmA, anyptr_gpB);
a.vpabsw(ymmA, ymmB);
a.vpabsw(ymmA, anyptr_gpB);
a.vpackssdw(ymmA, ymmB, ymmC);
a.vpackssdw(ymmA, ymmB, anyptr_gpC);
a.vpacksswb(ymmA, ymmB, ymmC);
a.vpacksswb(ymmA, ymmB, anyptr_gpC);
a.vpackusdw(ymmA, ymmB, ymmC);
a.vpackusdw(ymmA, ymmB, anyptr_gpC);
a.vpackuswb(ymmA, ymmB, ymmC);
a.vpackuswb(ymmA, ymmB, anyptr_gpC);
a.vpaddb(ymmA, ymmB, ymmC);
a.vpaddb(ymmA, ymmB, anyptr_gpC);
a.vpaddd(ymmA, ymmB, ymmC);
a.vpaddd(ymmA, ymmB, anyptr_gpC);
a.vpaddq(ymmA, ymmB, ymmC);
a.vpaddq(ymmA, ymmB, anyptr_gpC);
a.vpaddw(ymmA, ymmB, ymmC);
a.vpaddw(ymmA, ymmB, anyptr_gpC);
a.vpaddsb(ymmA, ymmB, ymmC);
a.vpaddsb(ymmA, ymmB, anyptr_gpC);
a.vpaddsw(ymmA, ymmB, ymmC);
a.vpaddsw(ymmA, ymmB, anyptr_gpC);
a.vpaddusb(ymmA, ymmB, ymmC);
a.vpaddusb(ymmA, ymmB, anyptr_gpC);
a.vpaddusw(ymmA, ymmB, ymmC);
a.vpaddusw(ymmA, ymmB, anyptr_gpC);
a.vpalignr(ymmA, ymmB, ymmC, 0);
a.vpalignr(ymmA, ymmB, anyptr_gpC, 0);
a.vpand(ymmA, ymmB, ymmC);
a.vpand(ymmA, ymmB, anyptr_gpC);
a.vpandn(ymmA, ymmB, ymmC);
a.vpandn(ymmA, ymmB, anyptr_gpC);
a.vpavgb(ymmA, ymmB, ymmC);
a.vpavgb(ymmA, ymmB, anyptr_gpC);
a.vpavgw(ymmA, ymmB, ymmC);
a.vpavgw(ymmA, ymmB, anyptr_gpC);
a.vpblendd(xmmA, xmmB, xmmC, 0);
a.vpblendd(xmmA, xmmB, anyptr_gpC, 0);
a.vpblendd(ymmA, ymmB, ymmC, 0);
a.vpblendd(ymmA, ymmB, anyptr_gpC, 0);
a.vpblendvb(ymmA, ymmB, ymmC, ymmD);
a.vpblendvb(ymmA, ymmB, anyptr_gpC, ymmD);
a.vpblendw(ymmA, ymmB, ymmC, 0);
a.vpblendw(ymmA, ymmB, anyptr_gpC, 0);
a.vpbroadcastb(xmmA, xmmB);
a.vpbroadcastb(xmmA, anyptr_gpB);
a.vpbroadcastb(ymmA, xmmB);
a.vpbroadcastb(ymmA, anyptr_gpB);
a.vpbroadcastd(xmmA, xmmB);
a.vpbroadcastd(xmmA, anyptr_gpB);
a.vpbroadcastd(ymmA, xmmB);
a.vpbroadcastd(ymmA, anyptr_gpB);
a.vpbroadcastq(xmmA, xmmB);
a.vpbroadcastq(xmmA, anyptr_gpB);
a.vpbroadcastq(ymmA, xmmB);
a.vpbroadcastq(ymmA, anyptr_gpB);
a.vpbroadcastw(xmmA, xmmB);
a.vpbroadcastw(xmmA, anyptr_gpB);
a.vpbroadcastw(ymmA, xmmB);
a.vpbroadcastw(ymmA, anyptr_gpB);
a.vpcmpeqb(ymmA, ymmB, ymmC);
a.vpcmpeqb(ymmA, ymmB, anyptr_gpC);
a.vpcmpeqd(ymmA, ymmB, ymmC);
a.vpcmpeqd(ymmA, ymmB, anyptr_gpC);
a.vpcmpeqq(ymmA, ymmB, ymmC);
a.vpcmpeqq(ymmA, ymmB, anyptr_gpC);
a.vpcmpeqw(ymmA, ymmB, ymmC);
a.vpcmpeqw(ymmA, ymmB, anyptr_gpC);
a.vpcmpgtb(ymmA, ymmB, ymmC);
a.vpcmpgtb(ymmA, ymmB, anyptr_gpC);
a.vpcmpgtd(ymmA, ymmB, ymmC);
a.vpcmpgtd(ymmA, ymmB, anyptr_gpC);
a.vpcmpgtq(ymmA, ymmB, ymmC);
a.vpcmpgtq(ymmA, ymmB, anyptr_gpC);
a.vpcmpgtw(ymmA, ymmB, ymmC);
a.vpcmpgtw(ymmA, ymmB, anyptr_gpC);
a.vperm2i128(ymmA, ymmB, ymmC, 0);
a.vperm2i128(ymmA, ymmB, anyptr_gpC, 0);
a.vpermd(ymmA, ymmB, ymmC);
a.vpermd(ymmA, ymmB, anyptr_gpC);
a.vpermps(ymmA, ymmB, ymmC);
a.vpermps(ymmA, ymmB, anyptr_gpC);
a.vpermpd(ymmA, ymmB, 0);
a.vpermpd(ymmA, anyptr_gpB, 0);
a.vpermq(ymmA, ymmB, 0);
a.vpermq(ymmA, anyptr_gpB, 0);
a.vpgatherdd(xmmA, vx_ptr, xmmC);
a.vpgatherdd(ymmA, vy_ptr, ymmC);
a.vpgatherdq(xmmA, vx_ptr, xmmC);
a.vpgatherdq(ymmA, vx_ptr, ymmC);
a.vpgatherqd(xmmA, vx_ptr, xmmC);
a.vpgatherqd(xmmA, vy_ptr, xmmC);
a.vpgatherqq(xmmA, vx_ptr, xmmC);
a.vpgatherqq(ymmA, vy_ptr, ymmC);
a.vpmovmskb(gzA, ymmB);
a.vpmovsxbd(ymmA, anyptr_gpB);
a.vpmovsxbd(ymmA, xmmB);
a.vpmovsxbq(ymmA, anyptr_gpB);
a.vpmovsxbq(ymmA, xmmB);
a.vpmovsxbw(ymmA, anyptr_gpB);
a.vpmovsxbw(ymmA, xmmB);
a.vpmovsxdq(ymmA, anyptr_gpB);
a.vpmovsxdq(ymmA, xmmB);
a.vpmovsxwd(ymmA, anyptr_gpB);
a.vpmovsxwd(ymmA, xmmB);
a.vpmovsxwq(ymmA, anyptr_gpB);
a.vpmovsxwq(ymmA, xmmB);
a.vpmovzxbd(ymmA, anyptr_gpB);
a.vpmovzxbd(ymmA, xmmB);
a.vpmovzxbq(ymmA, anyptr_gpB);
a.vpmovzxbq(ymmA, xmmB);
a.vpmovzxbw(ymmA, anyptr_gpB);
a.vpmovzxbw(ymmA, xmmB);
a.vpmovzxdq(ymmA, anyptr_gpB);
a.vpmovzxdq(ymmA, xmmB);
a.vpmovzxwd(ymmA, anyptr_gpB);
a.vpmovzxwd(ymmA, xmmB);
a.vpmovzxwq(ymmA, anyptr_gpB);
a.vpmovzxwq(ymmA, xmmB);
a.vpshufd(ymmA, anyptr_gpB, 0);
a.vpshufd(ymmA, ymmB, 0);
a.vpshufhw(ymmA, anyptr_gpB, 0);
a.vpshufhw(ymmA, ymmB, 0);
a.vpshuflw(ymmA, anyptr_gpB, 0);
a.vpshuflw(ymmA, ymmB, 0);
a.vpslld(ymmA, ymmB, 0);
a.vpslldq(ymmA, ymmB, 0);
a.vpsllq(ymmA, ymmB, 0);
a.vpsllw(ymmA, ymmB, 0);
a.vpsrad(ymmA, ymmB, 0);
a.vpsraw(ymmA, ymmB, 0);
a.vpsrld(ymmA, ymmB, 0);
a.vpsrldq(ymmA, ymmB, 0);
a.vpsrlq(ymmA, ymmB, 0);
a.vpsrlw(ymmA, ymmB, 0);
a.vphaddd(ymmA, ymmB, anyptr_gpC);
a.vphaddd(ymmA, ymmB, ymmC);
a.vphaddsw(ymmA, ymmB, anyptr_gpC);
a.vphaddsw(ymmA, ymmB, ymmC);
a.vphaddw(ymmA, ymmB, anyptr_gpC);
a.vphaddw(ymmA, ymmB, ymmC);
a.vphsubd(ymmA, ymmB, anyptr_gpC);
a.vphsubd(ymmA, ymmB, ymmC);
a.vphsubsw(ymmA, ymmB, anyptr_gpC);
a.vphsubsw(ymmA, ymmB, ymmC);
a.vphsubw(ymmA, ymmB, anyptr_gpC);
a.vphsubw(ymmA, ymmB, ymmC);
a.vpmaddubsw(ymmA, ymmB, anyptr_gpC);
a.vpmaddubsw(ymmA, ymmB, ymmC);
a.vpmaddwd(ymmA, ymmB, anyptr_gpC);
a.vpmaddwd(ymmA, ymmB, ymmC);
a.vpmaskmovd(anyptr_gpA, xmmB, xmmC);
a.vpmaskmovd(anyptr_gpA, ymmB, ymmC);
a.vpmaskmovd(xmmA, xmmB, anyptr_gpC);
a.vpmaskmovd(ymmA, ymmB, anyptr_gpC);
a.vpmaskmovq(anyptr_gpA, xmmB, xmmC);
a.vpmaskmovq(anyptr_gpA, ymmB, ymmC);
a.vpmaskmovq(xmmA, xmmB, anyptr_gpC);
a.vpmaskmovq(ymmA, ymmB, anyptr_gpC);
a.vpmaxsb(ymmA, ymmB, anyptr_gpC);
a.vpmaxsb(ymmA, ymmB, ymmC);
a.vpmaxsd(ymmA, ymmB, anyptr_gpC);
a.vpmaxsd(ymmA, ymmB, ymmC);
a.vpmaxsw(ymmA, ymmB, anyptr_gpC);
a.vpmaxsw(ymmA, ymmB, ymmC);
a.vpmaxub(ymmA, ymmB, anyptr_gpC);
a.vpmaxub(ymmA, ymmB, ymmC);
a.vpmaxud(ymmA, ymmB, anyptr_gpC);
a.vpmaxud(ymmA, ymmB, ymmC);
a.vpmaxuw(ymmA, ymmB, anyptr_gpC);
a.vpmaxuw(ymmA, ymmB, ymmC);
a.vpminsb(ymmA, ymmB, anyptr_gpC);
a.vpminsb(ymmA, ymmB, ymmC);
a.vpminsd(ymmA, ymmB, anyptr_gpC);
a.vpminsd(ymmA, ymmB, ymmC);
a.vpminsw(ymmA, ymmB, anyptr_gpC);
a.vpminsw(ymmA, ymmB, ymmC);
a.vpminub(ymmA, ymmB, anyptr_gpC);
a.vpminub(ymmA, ymmB, ymmC);
a.vpminud(ymmA, ymmB, anyptr_gpC);
a.vpminud(ymmA, ymmB, ymmC);
a.vpminuw(ymmA, ymmB, anyptr_gpC);
a.vpminuw(ymmA, ymmB, ymmC);
a.vpmuldq(ymmA, ymmB, anyptr_gpC);
a.vpmuldq(ymmA, ymmB, ymmC);
a.vpmulhrsw(ymmA, ymmB, anyptr_gpC);
a.vpmulhrsw(ymmA, ymmB, ymmC);
a.vpmulhuw(ymmA, ymmB, anyptr_gpC);
a.vpmulhuw(ymmA, ymmB, ymmC);
a.vpmulhw(ymmA, ymmB, anyptr_gpC);
a.vpmulhw(ymmA, ymmB, ymmC);
a.vpmulld(ymmA, ymmB, anyptr_gpC);
a.vpmulld(ymmA, ymmB, ymmC);
a.vpmullw(ymmA, ymmB, anyptr_gpC);
a.vpmullw(ymmA, ymmB, ymmC);
a.vpmuludq(ymmA, ymmB, anyptr_gpC);
a.vpmuludq(ymmA, ymmB, ymmC);
a.vpor(ymmA, ymmB, anyptr_gpC);
a.vpor(ymmA, ymmB, ymmC);
a.vpsadbw(ymmA, ymmB, anyptr_gpC);
a.vpsadbw(ymmA, ymmB, ymmC);
a.vpshufb(ymmA, ymmB, anyptr_gpC);
a.vpshufb(ymmA, ymmB, ymmC);
a.vpsignb(ymmA, ymmB, anyptr_gpC);
a.vpsignb(ymmA, ymmB, ymmC);
a.vpsignd(ymmA, ymmB, anyptr_gpC);
a.vpsignd(ymmA, ymmB, ymmC);
a.vpsignw(ymmA, ymmB, anyptr_gpC);
a.vpsignw(ymmA, ymmB, ymmC);
a.vpslld(ymmA, ymmB, anyptr_gpC);
a.vpslld(ymmA, ymmB, xmmC);
a.vpsllq(ymmA, ymmB, anyptr_gpC);
a.vpsllq(ymmA, ymmB, xmmC);
a.vpsllvd(xmmA, xmmB, anyptr_gpC);
a.vpsllvd(xmmA, xmmB, xmmC);
a.vpsllvd(ymmA, ymmB, anyptr_gpC);
a.vpsllvd(ymmA, ymmB, ymmC);
a.vpsllvq(xmmA, xmmB, anyptr_gpC);
a.vpsllvq(xmmA, xmmB, xmmC);
a.vpsllvq(ymmA, ymmB, anyptr_gpC);
a.vpsllvq(ymmA, ymmB, ymmC);
a.vpsllw(ymmA, ymmB, anyptr_gpC);
a.vpsllw(ymmA, ymmB, xmmC);
a.vpsrad(ymmA, ymmB, anyptr_gpC);
a.vpsrad(ymmA, ymmB, xmmC);
a.vpsravd(xmmA, xmmB, anyptr_gpC);
a.vpsravd(xmmA, xmmB, xmmC);
a.vpsravd(ymmA, ymmB, anyptr_gpC);
a.vpsravd(ymmA, ymmB, ymmC);
a.vpsraw(ymmA, ymmB, anyptr_gpC);
a.vpsraw(ymmA, ymmB, xmmC);
a.vpsrld(ymmA, ymmB, anyptr_gpC);
a.vpsrld(ymmA, ymmB, xmmC);
a.vpsrlq(ymmA, ymmB, anyptr_gpC);
a.vpsrlq(ymmA, ymmB, xmmC);
a.vpsrlvd(xmmA, xmmB, anyptr_gpC);
a.vpsrlvd(xmmA, xmmB, xmmC);
a.vpsrlvd(ymmA, ymmB, anyptr_gpC);
a.vpsrlvd(ymmA, ymmB, ymmC);
a.vpsrlvq(xmmA, xmmB, anyptr_gpC);
a.vpsrlvq(xmmA, xmmB, xmmC);
a.vpsrlvq(ymmA, ymmB, anyptr_gpC);
a.vpsrlvq(ymmA, ymmB, ymmC);
a.vpsrlw(ymmA, ymmB, anyptr_gpC);
a.vpsrlw(ymmA, ymmB, xmmC);
a.vpsubb(ymmA, ymmB, anyptr_gpC);
a.vpsubb(ymmA, ymmB, ymmC);
a.vpsubd(ymmA, ymmB, anyptr_gpC);
a.vpsubd(ymmA, ymmB, ymmC);
a.vpsubq(ymmA, ymmB, anyptr_gpC);
a.vpsubq(ymmA, ymmB, ymmC);
a.vpsubsb(ymmA, ymmB, anyptr_gpC);
a.vpsubsb(ymmA, ymmB, ymmC);
a.vpsubsw(ymmA, ymmB, anyptr_gpC);
a.vpsubsw(ymmA, ymmB, ymmC);
a.vpsubusb(ymmA, ymmB, anyptr_gpC);
a.vpsubusb(ymmA, ymmB, ymmC);
a.vpsubusw(ymmA, ymmB, anyptr_gpC);
a.vpsubusw(ymmA, ymmB, ymmC);
a.vpsubw(ymmA, ymmB, anyptr_gpC);
a.vpsubw(ymmA, ymmB, ymmC);
a.vpunpckhbw(ymmA, ymmB, anyptr_gpC);
a.vpunpckhbw(ymmA, ymmB, ymmC);
a.vpunpckhdq(ymmA, ymmB, anyptr_gpC);
a.vpunpckhdq(ymmA, ymmB, ymmC);
a.vpunpckhqdq(ymmA, ymmB, anyptr_gpC);
a.vpunpckhqdq(ymmA, ymmB, ymmC);
a.vpunpckhwd(ymmA, ymmB, anyptr_gpC);
a.vpunpckhwd(ymmA, ymmB, ymmC);
a.vpunpcklbw(ymmA, ymmB, anyptr_gpC);
a.vpunpcklbw(ymmA, ymmB, ymmC);
a.vpunpckldq(ymmA, ymmB, anyptr_gpC);
a.vpunpckldq(ymmA, ymmB, ymmC);
a.vpunpcklqdq(ymmA, ymmB, anyptr_gpC);
a.vpunpcklqdq(ymmA, ymmB, ymmC);
a.vpunpcklwd(ymmA, ymmB, anyptr_gpC);
a.vpunpcklwd(ymmA, ymmB, ymmC);
a.vpxor(ymmA, ymmB, anyptr_gpC);
a.vpxor(ymmA, ymmB, ymmC);
// FMA3.
a.nop();
a.vfmadd132pd(xmmA, xmmB, anyptr_gpC);
a.vfmadd132pd(xmmA, xmmB, xmmC);
a.vfmadd132pd(ymmA, ymmB, anyptr_gpC);
a.vfmadd132pd(ymmA, ymmB, ymmC);
a.vfmadd132ps(xmmA, xmmB, anyptr_gpC);
a.vfmadd132ps(xmmA, xmmB, xmmC);
a.vfmadd132ps(ymmA, ymmB, anyptr_gpC);
a.vfmadd132ps(ymmA, ymmB, ymmC);
a.vfmadd132sd(xmmA, xmmB, anyptr_gpC);
a.vfmadd132sd(xmmA, xmmB, xmmC);
a.vfmadd132ss(xmmA, xmmB, anyptr_gpC);
a.vfmadd132ss(xmmA, xmmB, xmmC);
a.vfmadd213pd(xmmA, xmmB, anyptr_gpC);
a.vfmadd213pd(xmmA, xmmB, xmmC);
a.vfmadd213pd(ymmA, ymmB, anyptr_gpC);
a.vfmadd213pd(ymmA, ymmB, ymmC);
a.vfmadd213ps(xmmA, xmmB, anyptr_gpC);
a.vfmadd213ps(xmmA, xmmB, xmmC);
a.vfmadd213ps(ymmA, ymmB, anyptr_gpC);
a.vfmadd213ps(ymmA, ymmB, ymmC);
a.vfmadd213sd(xmmA, xmmB, anyptr_gpC);
a.vfmadd213sd(xmmA, xmmB, xmmC);
a.vfmadd213ss(xmmA, xmmB, anyptr_gpC);
a.vfmadd213ss(xmmA, xmmB, xmmC);
a.vfmadd231pd(xmmA, xmmB, anyptr_gpC);
a.vfmadd231pd(xmmA, xmmB, xmmC);
a.vfmadd231pd(ymmA, ymmB, anyptr_gpC);
a.vfmadd231pd(ymmA, ymmB, ymmC);
a.vfmadd231ps(xmmA, xmmB, anyptr_gpC);
a.vfmadd231ps(xmmA, xmmB, xmmC);
a.vfmadd231ps(ymmA, ymmB, anyptr_gpC);
a.vfmadd231ps(ymmA, ymmB, ymmC);
a.vfmadd231sd(xmmA, xmmB, anyptr_gpC);
a.vfmadd231sd(xmmA, xmmB, xmmC);
a.vfmadd231ss(xmmA, xmmB, anyptr_gpC);
a.vfmadd231ss(xmmA, xmmB, xmmC);
a.vfmaddsub132pd(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub132pd(xmmA, xmmB, xmmC);
a.vfmaddsub132pd(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub132pd(ymmA, ymmB, ymmC);
a.vfmaddsub132ps(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub132ps(xmmA, xmmB, xmmC);
a.vfmaddsub132ps(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub132ps(ymmA, ymmB, ymmC);
a.vfmaddsub213pd(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub213pd(xmmA, xmmB, xmmC);
a.vfmaddsub213pd(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub213pd(ymmA, ymmB, ymmC);
a.vfmaddsub213ps(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub213ps(xmmA, xmmB, xmmC);
a.vfmaddsub213ps(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub213ps(ymmA, ymmB, ymmC);
a.vfmaddsub231pd(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub231pd(xmmA, xmmB, xmmC);
a.vfmaddsub231pd(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub231pd(ymmA, ymmB, ymmC);
a.vfmaddsub231ps(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub231ps(xmmA, xmmB, xmmC);
a.vfmaddsub231ps(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub231ps(ymmA, ymmB, ymmC);
a.vfmsub132pd(xmmA, xmmB, anyptr_gpC);
a.vfmsub132pd(xmmA, xmmB, xmmC);
a.vfmsub132pd(ymmA, ymmB, anyptr_gpC);
a.vfmsub132pd(ymmA, ymmB, ymmC);
a.vfmsub132ps(xmmA, xmmB, anyptr_gpC);
a.vfmsub132ps(xmmA, xmmB, xmmC);
a.vfmsub132ps(ymmA, ymmB, anyptr_gpC);
a.vfmsub132ps(ymmA, ymmB, ymmC);
a.vfmsub132sd(xmmA, xmmB, anyptr_gpC);
a.vfmsub132sd(xmmA, xmmB, xmmC);
a.vfmsub132ss(xmmA, xmmB, anyptr_gpC);
a.vfmsub132ss(xmmA, xmmB, xmmC);
a.vfmsub213pd(xmmA, xmmB, anyptr_gpC);
a.vfmsub213pd(xmmA, xmmB, xmmC);
a.vfmsub213pd(ymmA, ymmB, anyptr_gpC);
a.vfmsub213pd(ymmA, ymmB, ymmC);
a.vfmsub213ps(xmmA, xmmB, anyptr_gpC);
a.vfmsub213ps(xmmA, xmmB, xmmC);
a.vfmsub213ps(ymmA, ymmB, anyptr_gpC);
a.vfmsub213ps(ymmA, ymmB, ymmC);
a.vfmsub213sd(xmmA, xmmB, anyptr_gpC);
a.vfmsub213sd(xmmA, xmmB, xmmC);
a.vfmsub213ss(xmmA, xmmB, anyptr_gpC);
a.vfmsub213ss(xmmA, xmmB, xmmC);
a.vfmsub231pd(xmmA, xmmB, anyptr_gpC);
a.vfmsub231pd(xmmA, xmmB, xmmC);
a.vfmsub231pd(ymmA, ymmB, anyptr_gpC);
a.vfmsub231pd(ymmA, ymmB, ymmC);
a.vfmsub231ps(xmmA, xmmB, anyptr_gpC);
a.vfmsub231ps(xmmA, xmmB, xmmC);
a.vfmsub231ps(ymmA, ymmB, anyptr_gpC);
a.vfmsub231ps(ymmA, ymmB, ymmC);
a.vfmsub231sd(xmmA, xmmB, anyptr_gpC);
a.vfmsub231sd(xmmA, xmmB, xmmC);
a.vfmsub231ss(xmmA, xmmB, anyptr_gpC);
a.vfmsub231ss(xmmA, xmmB, xmmC);
a.vfmsubadd132pd(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd132pd(xmmA, xmmB, xmmC);
a.vfmsubadd132pd(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd132pd(ymmA, ymmB, ymmC);
a.vfmsubadd132ps(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd132ps(xmmA, xmmB, xmmC);
a.vfmsubadd132ps(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd132ps(ymmA, ymmB, ymmC);
a.vfmsubadd213pd(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd213pd(xmmA, xmmB, xmmC);
a.vfmsubadd213pd(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd213pd(ymmA, ymmB, ymmC);
a.vfmsubadd213ps(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd213ps(xmmA, xmmB, xmmC);
a.vfmsubadd213ps(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd213ps(ymmA, ymmB, ymmC);
a.vfmsubadd231pd(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd231pd(xmmA, xmmB, xmmC);
a.vfmsubadd231pd(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd231pd(ymmA, ymmB, ymmC);
a.vfmsubadd231ps(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd231ps(xmmA, xmmB, xmmC);
a.vfmsubadd231ps(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd231ps(ymmA, ymmB, ymmC);
a.vfnmadd132pd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd132pd(xmmA, xmmB, xmmC);
a.vfnmadd132pd(ymmA, ymmB, anyptr_gpC);
a.vfnmadd132pd(ymmA, ymmB, ymmC);
a.vfnmadd132ps(xmmA, xmmB, anyptr_gpC);
a.vfnmadd132ps(xmmA, xmmB, xmmC);
a.vfnmadd132ps(ymmA, ymmB, anyptr_gpC);
a.vfnmadd132ps(ymmA, ymmB, ymmC);
a.vfnmadd132sd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd132sd(xmmA, xmmB, xmmC);
a.vfnmadd132ss(xmmA, xmmB, anyptr_gpC);
a.vfnmadd132ss(xmmA, xmmB, xmmC);
a.vfnmadd213pd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd213pd(xmmA, xmmB, xmmC);
a.vfnmadd213pd(ymmA, ymmB, anyptr_gpC);
a.vfnmadd213pd(ymmA, ymmB, ymmC);
a.vfnmadd213ps(xmmA, xmmB, anyptr_gpC);
a.vfnmadd213ps(xmmA, xmmB, xmmC);
a.vfnmadd213ps(ymmA, ymmB, anyptr_gpC);
a.vfnmadd213ps(ymmA, ymmB, ymmC);
a.vfnmadd213sd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd213sd(xmmA, xmmB, xmmC);
a.vfnmadd213ss(xmmA, xmmB, anyptr_gpC);
a.vfnmadd213ss(xmmA, xmmB, xmmC);
a.vfnmadd231pd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd231pd(xmmA, xmmB, xmmC);
a.vfnmadd231pd(ymmA, ymmB, anyptr_gpC);
a.vfnmadd231pd(ymmA, ymmB, ymmC);
a.vfnmadd231ps(xmmA, xmmB, anyptr_gpC);
a.vfnmadd231ps(xmmA, xmmB, xmmC);
a.vfnmadd231ps(ymmA, ymmB, anyptr_gpC);
a.vfnmadd231ps(ymmA, ymmB, ymmC);
a.vfnmadd231sd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd231sd(xmmA, xmmB, xmmC);
a.vfnmadd231ss(xmmA, xmmB, anyptr_gpC);
a.vfnmadd231ss(xmmA, xmmB, xmmC);
a.vfnmsub132pd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub132pd(xmmA, xmmB, xmmC);
a.vfnmsub132pd(ymmA, ymmB, anyptr_gpC);
a.vfnmsub132pd(ymmA, ymmB, ymmC);
a.vfnmsub132ps(xmmA, xmmB, anyptr_gpC);
a.vfnmsub132ps(xmmA, xmmB, xmmC);
a.vfnmsub132ps(ymmA, ymmB, anyptr_gpC);
a.vfnmsub132ps(ymmA, ymmB, ymmC);
a.vfnmsub132sd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub132sd(xmmA, xmmB, xmmC);
a.vfnmsub132ss(xmmA, xmmB, anyptr_gpC);
a.vfnmsub132ss(xmmA, xmmB, xmmC);
a.vfnmsub213pd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub213pd(xmmA, xmmB, xmmC);
a.vfnmsub213pd(ymmA, ymmB, anyptr_gpC);
a.vfnmsub213pd(ymmA, ymmB, ymmC);
a.vfnmsub213ps(xmmA, xmmB, anyptr_gpC);
a.vfnmsub213ps(xmmA, xmmB, xmmC);
a.vfnmsub213ps(ymmA, ymmB, anyptr_gpC);
a.vfnmsub213ps(ymmA, ymmB, ymmC);
a.vfnmsub213sd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub213sd(xmmA, xmmB, xmmC);
a.vfnmsub213ss(xmmA, xmmB, anyptr_gpC);
a.vfnmsub213ss(xmmA, xmmB, xmmC);
a.vfnmsub231pd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub231pd(xmmA, xmmB, xmmC);
a.vfnmsub231pd(ymmA, ymmB, anyptr_gpC);
a.vfnmsub231pd(ymmA, ymmB, ymmC);
a.vfnmsub231ps(xmmA, xmmB, anyptr_gpC);
a.vfnmsub231ps(xmmA, xmmB, xmmC);
a.vfnmsub231ps(ymmA, ymmB, anyptr_gpC);
a.vfnmsub231ps(ymmA, ymmB, ymmC);
a.vfnmsub231sd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub231sd(xmmA, xmmB, xmmC);
a.vfnmsub231ss(xmmA, xmmB, anyptr_gpC);
a.vfnmsub231ss(xmmA, xmmB, xmmC);
// FMA4.
a.nop();
a.vfmaddpd(xmmA, xmmB, xmmC, xmmD);
a.vfmaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddpd(ymmA, ymmB, ymmC, ymmD);
a.vfmaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmaddps(xmmA, xmmB, xmmC, xmmD);
a.vfmaddps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddps(ymmA, ymmB, ymmC, ymmD);
a.vfmaddps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmaddps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmaddsd(xmmA, xmmB, xmmC, xmmD);
a.vfmaddsd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddsd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddss(xmmA, xmmB, xmmC, xmmD);
a.vfmaddss(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddss(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddsubpd(xmmA, xmmB, xmmC, xmmD);
a.vfmaddsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddsubpd(ymmA, ymmB, ymmC, ymmD);
a.vfmaddsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmaddsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmaddsubps(xmmA, xmmB, xmmC, xmmD);
a.vfmaddsubps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmaddsubps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmaddsubps(ymmA, ymmB, ymmC, ymmD);
a.vfmaddsubps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmaddsubps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmsubaddpd(xmmA, xmmB, xmmC, xmmD);
a.vfmsubaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmsubaddpd(ymmA, ymmB, ymmC, ymmD);
a.vfmsubaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmsubaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmsubaddps(xmmA, xmmB, xmmC, xmmD);
a.vfmsubaddps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubaddps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmsubaddps(ymmA, ymmB, ymmC, ymmD);
a.vfmsubaddps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmsubaddps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmsubpd(xmmA, xmmB, xmmC, xmmD);
a.vfmsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmsubpd(ymmA, ymmB, ymmC, ymmD);
a.vfmsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmsubps(xmmA, xmmB, xmmC, xmmD);
a.vfmsubps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmsubps(ymmA, ymmB, ymmC, ymmD);
a.vfmsubps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfmsubps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfmsubsd(xmmA, xmmB, xmmC, xmmD);
a.vfmsubsd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubsd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfmsubss(xmmA, xmmB, xmmC, xmmD);
a.vfmsubss(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfmsubss(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmaddpd(xmmA, xmmB, xmmC, xmmD);
a.vfnmaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmaddpd(ymmA, ymmB, ymmC, ymmD);
a.vfnmaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfnmaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfnmaddps(xmmA, xmmB, xmmC, xmmD);
a.vfnmaddps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmaddps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmaddps(ymmA, ymmB, ymmC, ymmD);
a.vfnmaddps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfnmaddps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfnmaddsd(xmmA, xmmB, xmmC, xmmD);
a.vfnmaddsd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmaddsd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmaddss(xmmA, xmmB, xmmC, xmmD);
a.vfnmaddss(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmaddss(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmsubpd(xmmA, xmmB, xmmC, xmmD);
a.vfnmsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmsubpd(ymmA, ymmB, ymmC, ymmD);
a.vfnmsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfnmsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfnmsubps(xmmA, xmmB, xmmC, xmmD);
a.vfnmsubps(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmsubps(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmsubps(ymmA, ymmB, ymmC, ymmD);
a.vfnmsubps(ymmA, ymmB, anyptr_gpC, ymmD);
a.vfnmsubps(ymmA, ymmB, ymmC, anyptr_gpD);
a.vfnmsubsd(xmmA, xmmB, xmmC, xmmD);
a.vfnmsubsd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmsubsd(xmmA, xmmB, xmmC, anyptr_gpD);
a.vfnmsubss(xmmA, xmmB, xmmC, xmmD);
a.vfnmsubss(xmmA, xmmB, anyptr_gpC, xmmD);
a.vfnmsubss(xmmA, xmmB, xmmC, anyptr_gpD);
// XOP.
a.nop();
a.vfrczpd(xmmA, xmmB);
a.vfrczpd(xmmA, anyptr_gpB);
a.vfrczpd(ymmA, ymmB);
a.vfrczpd(ymmA, anyptr_gpB);
a.vfrczps(xmmA, xmmB);
a.vfrczps(xmmA, anyptr_gpB);
a.vfrczps(ymmA, ymmB);
a.vfrczps(ymmA, anyptr_gpB);
a.vfrczsd(xmmA, xmmB);
a.vfrczsd(xmmA, anyptr_gpB);
a.vfrczss(xmmA, xmmB);
a.vfrczss(xmmA, anyptr_gpB);
a.vpcmov(xmmA, xmmB, xmmC, xmmD);
a.vpcmov(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpcmov(xmmA, xmmB, xmmC, anyptr_gpD);
a.vpcmov(ymmA, ymmB, ymmC, ymmD);
a.vpcmov(ymmA, ymmB, anyptr_gpC, ymmD);
a.vpcmov(ymmA, ymmB, ymmC, anyptr_gpD);
a.vpcomb(xmmA, xmmB, xmmC, 0);
a.vpcomb(xmmA, xmmB, anyptr_gpC, 0);
a.vpcomd(xmmA, xmmB, xmmC, 0);
a.vpcomd(xmmA, xmmB, anyptr_gpC, 0);
a.vpcomq(xmmA, xmmB, xmmC, 0);
a.vpcomq(xmmA, xmmB, anyptr_gpC, 0);
a.vpcomw(xmmA, xmmB, xmmC, 0);
a.vpcomw(xmmA, xmmB, anyptr_gpC, 0);
a.vpcomub(xmmA, xmmB, xmmC, 0);
a.vpcomub(xmmA, xmmB, anyptr_gpC, 0);
a.vpcomud(xmmA, xmmB, xmmC, 0);
a.vpcomud(xmmA, xmmB, anyptr_gpC, 0);
a.vpcomuq(xmmA, xmmB, xmmC, 0);
a.vpcomuq(xmmA, xmmB, anyptr_gpC, 0);
a.vpcomuw(xmmA, xmmB, xmmC, 0);
a.vpcomuw(xmmA, xmmB, anyptr_gpC, 0);
a.vpermil2pd(xmmA, xmmB, xmmC, xmmD, 0);
a.vpermil2pd(xmmA, xmmB, anyptr_gpC, xmmD, 0);
a.vpermil2pd(xmmA, xmmB, xmmC, anyptr_gpD, 0);
a.vpermil2pd(ymmA, ymmB, ymmC, ymmD, 0);
a.vpermil2pd(ymmA, ymmB, anyptr_gpC, ymmD, 0);
a.vpermil2pd(ymmA, ymmB, ymmC, anyptr_gpD, 0);
a.vpermil2ps(xmmA, xmmB, xmmC, xmmD, 0);
a.vpermil2ps(xmmA, xmmB, anyptr_gpC, xmmD, 0);
a.vpermil2ps(xmmA, xmmB, xmmC, anyptr_gpD, 0);
a.vpermil2ps(ymmA, ymmB, ymmC, ymmD, 0);
a.vpermil2ps(ymmA, ymmB, anyptr_gpC, ymmD, 0);
a.vpermil2ps(ymmA, ymmB, ymmC, anyptr_gpD, 0);
a.vphaddbd(xmmA, xmmB);
a.vphaddbd(xmmA, anyptr_gpB);
a.vphaddbq(xmmA, xmmB);
a.vphaddbq(xmmA, anyptr_gpB);
a.vphaddbw(xmmA, xmmB);
a.vphaddbw(xmmA, anyptr_gpB);
a.vphadddq(xmmA, xmmB);
a.vphadddq(xmmA, anyptr_gpB);
a.vphaddwd(xmmA, xmmB);
a.vphaddwd(xmmA, anyptr_gpB);
a.vphaddwq(xmmA, xmmB);
a.vphaddwq(xmmA, anyptr_gpB);
a.vphaddubd(xmmA, xmmB);
a.vphaddubd(xmmA, anyptr_gpB);
a.vphaddubq(xmmA, xmmB);
a.vphaddubq(xmmA, anyptr_gpB);
a.vphaddubw(xmmA, xmmB);
a.vphaddubw(xmmA, anyptr_gpB);
a.vphaddudq(xmmA, xmmB);
a.vphaddudq(xmmA, anyptr_gpB);
a.vphadduwd(xmmA, xmmB);
a.vphadduwd(xmmA, anyptr_gpB);
a.vphadduwq(xmmA, xmmB);
a.vphadduwq(xmmA, anyptr_gpB);
a.vphsubbw(xmmA, xmmB);
a.vphsubbw(xmmA, anyptr_gpB);
a.vphsubdq(xmmA, xmmB);
a.vphsubdq(xmmA, anyptr_gpB);
a.vphsubwd(xmmA, xmmB);
a.vphsubwd(xmmA, anyptr_gpB);
a.vpmacsdd(xmmA, xmmB, xmmC, xmmD);
a.vpmacsdd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmacsdqh(xmmA, xmmB, xmmC, xmmD);
a.vpmacsdqh(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmacsdql(xmmA, xmmB, xmmC, xmmD);
a.vpmacsdql(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmacswd(xmmA, xmmB, xmmC, xmmD);
a.vpmacswd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmacsww(xmmA, xmmB, xmmC, xmmD);
a.vpmacsww(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmacssdd(xmmA, xmmB, xmmC, xmmD);
a.vpmacssdd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmacssdqh(xmmA, xmmB, xmmC, xmmD);
a.vpmacssdqh(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmacssdql(xmmA, xmmB, xmmC, xmmD);
a.vpmacssdql(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmacsswd(xmmA, xmmB, xmmC, xmmD);
a.vpmacsswd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmacssww(xmmA, xmmB, xmmC, xmmD);
a.vpmacssww(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmadcsswd(xmmA, xmmB, xmmC, xmmD);
a.vpmadcsswd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpmadcswd(xmmA, xmmB, xmmC, xmmD);
a.vpmadcswd(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpperm(xmmA, xmmB, xmmC, xmmD);
a.vpperm(xmmA, xmmB, anyptr_gpC, xmmD);
a.vpperm(xmmA, xmmB, xmmC, anyptr_gpD);
a.vprotb(xmmA, xmmB, xmmC);
a.vprotb(xmmA, anyptr_gpB, xmmC);
a.vprotb(xmmA, xmmB, anyptr_gpC);
a.vprotb(xmmA, xmmB, 0);
a.vprotb(xmmA, anyptr_gpB, 0);
a.vprotd(xmmA, xmmB, xmmC);
a.vprotd(xmmA, anyptr_gpB, xmmC);
a.vprotd(xmmA, xmmB, anyptr_gpC);
a.vprotd(xmmA, xmmB, 0);
a.vprotd(xmmA, anyptr_gpB, 0);
a.vprotq(xmmA, xmmB, xmmC);
a.vprotq(xmmA, anyptr_gpB, xmmC);
a.vprotq(xmmA, xmmB, anyptr_gpC);
a.vprotq(xmmA, xmmB, 0);
a.vprotq(xmmA, anyptr_gpB, 0);
a.vprotw(xmmA, xmmB, xmmC);
a.vprotw(xmmA, anyptr_gpB, xmmC);
a.vprotw(xmmA, xmmB, anyptr_gpC);
a.vprotw(xmmA, xmmB, 0);
a.vprotw(xmmA, anyptr_gpB, 0);
a.vpshab(xmmA, xmmB, xmmC);
a.vpshab(xmmA, anyptr_gpB, xmmC);
a.vpshab(xmmA, xmmB, anyptr_gpC);
a.vpshad(xmmA, xmmB, xmmC);
a.vpshad(xmmA, anyptr_gpB, xmmC);
a.vpshad(xmmA, xmmB, anyptr_gpC);
a.vpshaq(xmmA, xmmB, xmmC);
a.vpshaq(xmmA, anyptr_gpB, xmmC);
a.vpshaq(xmmA, xmmB, anyptr_gpC);
a.vpshaw(xmmA, xmmB, xmmC);
a.vpshaw(xmmA, anyptr_gpB, xmmC);
a.vpshaw(xmmA, xmmB, anyptr_gpC);
a.vpshlb(xmmA, xmmB, xmmC);
a.vpshlb(xmmA, anyptr_gpB, xmmC);
a.vpshlb(xmmA, xmmB, anyptr_gpC);
a.vpshld(xmmA, xmmB, xmmC);
a.vpshld(xmmA, anyptr_gpB, xmmC);
a.vpshld(xmmA, xmmB, anyptr_gpC);
a.vpshlq(xmmA, xmmB, xmmC);
a.vpshlq(xmmA, anyptr_gpB, xmmC);
a.vpshlq(xmmA, xmmB, anyptr_gpC);
a.vpshlw(xmmA, xmmB, xmmC);
a.vpshlw(xmmA, anyptr_gpB, xmmC);
a.vpshlw(xmmA, xmmB, anyptr_gpC);
// F16C.
a.nop();
a.vcvtph2ps(xmmA, xmmB);
a.vcvtph2ps(xmmA, anyptr_gpB);
a.vcvtph2ps(ymmA, xmmB);
a.vcvtph2ps(ymmA, anyptr_gpB);
a.vcvtps2ph(xmmA, xmmB, 0);
a.vcvtps2ph(anyptr_gpA, xmmB, 0);
a.vcvtps2ph(xmmA, ymmB, 0);
a.vcvtps2ph(anyptr_gpA, ymmB, 0);
// AVX512.
a.nop();
a.kaddb(kA, kB, kC);
a.kaddd(kA, kB, kC);
a.kaddq(kA, kB, kC);
a.kaddw(kA, kB, kC);
a.kandb(kA, kB, kC);
a.kandd(kA, kB, kC);
a.kandnb(kA, kB, kC);
a.kandnd(kA, kB, kC);
a.kandnq(kA, kB, kC);
a.kandnw(kA, kB, kC);
a.kandq(kA, kB, kC);
a.kandw(kA, kB, kC);
a.kmovb(kA, kB);
a.kmovb(kA, anyptr_gpB);
a.kmovb(kA, gdB);
if (isX64) a.kmovb(kA, gzB);
a.kmovb(anyptr_gpA, kB);
a.kmovb(gdA, kB);
if (isX64) a.kmovb(gzA, kB);
a.kmovd(kA, kB);
a.kmovd(kA, anyptr_gpB);
a.kmovd(kA, gdB);
if (isX64) a.kmovd(kA, gzB);
a.kmovd(anyptr_gpA, kB);
a.kmovd(gdA, kB);
if (isX64) a.kmovd(gzA, kB);
a.kmovq(kA, kB);
a.kmovq(kA, anyptr_gpB);
if (isX64) a.kmovq(kA, gzB);
a.kmovq(anyptr_gpA, kB);
if (isX64) a.kmovq(gzA, kB);
a.kmovw(kA, kB);
a.kmovw(kA, anyptr_gpB);
a.kmovw(kA, gdB);
if (isX64) a.kmovw(kA, gzB);
a.kmovw(anyptr_gpA, kB);
a.kmovw(gdA, kB);
if (isX64) a.kmovw(gzA, kB);
a.knotb(kA, kB);
a.knotd(kA, kB);
a.knotq(kA, kB);
a.knotw(kA, kB);
a.korb(kA, kB, kC);
a.kord(kA, kB, kC);
a.korq(kA, kB, kC);
a.kortestb(kA, kB);
a.kortestd(kA, kB);
a.kortestq(kA, kB);
a.kortestw(kA, kB);
a.korw(kA, kB, kC);
a.kshiftlb(kA, kB, 0);
a.kshiftld(kA, kB, 0);
a.kshiftlq(kA, kB, 0);
a.kshiftlw(kA, kB, 0);
a.kshiftrb(kA, kB, 0);
a.kshiftrd(kA, kB, 0);
a.kshiftrq(kA, kB, 0);
a.kshiftrw(kA, kB, 0);
a.ktestb(kA, kB);
a.ktestd(kA, kB);
a.ktestq(kA, kB);
a.ktestw(kA, kB);
a.kunpckbw(kA, kB, kC);
a.kunpckdq(kA, kB, kC);
a.kunpckwd(kA, kB, kC);
a.kxnorb(kA, kB, kC);
a.kxnord(kA, kB, kC);
a.kxnorq(kA, kB, kC);
a.kxnorw(kA, kB, kC);
a.kxorb(kA, kB, kC);
a.kxord(kA, kB, kC);
a.kxorq(kA, kB, kC);
a.kxorw(kA, kB, kC);
a.nop();
a.vaddpd(xmmA, xmmB, xmmC);
a.vaddpd(xmmA, xmmB, anyptr_gpC);
a.vaddpd(ymmA, ymmB, ymmC);
a.vaddpd(ymmA, ymmB, anyptr_gpC);
a.vaddpd(zmmA, zmmB, zmmC);
a.vaddpd(zmmA, zmmB, anyptr_gpC);
a.vaddps(xmmA, xmmB, xmmC);
a.vaddps(xmmA, xmmB, anyptr_gpC);
a.vaddps(ymmA, ymmB, ymmC);
a.vaddps(ymmA, ymmB, anyptr_gpC);
a.vaddps(zmmA, zmmB, zmmC);
a.vaddps(zmmA, zmmB, anyptr_gpC);
a.vaddsd(xmmA, xmmB, xmmC);
a.vaddsd(xmmA, xmmB, anyptr_gpC);
a.vaddss(xmmA, xmmB, xmmC);
a.vaddss(xmmA, xmmB, anyptr_gpC);
a.valignd(xmmA, xmmB, xmmC, 0);
a.valignd(xmmA, xmmB, anyptr_gpC, 0);
a.valignd(ymmA, ymmB, ymmC, 0);
a.valignd(ymmA, ymmB, anyptr_gpC, 0);
a.valignd(zmmA, zmmB, zmmC, 0);
a.valignd(zmmA, zmmB, anyptr_gpC, 0);
a.valignq(xmmA, xmmB, xmmC, 0);
a.valignq(xmmA, xmmB, anyptr_gpC, 0);
a.valignq(ymmA, ymmB, ymmC, 0);
a.valignq(ymmA, ymmB, anyptr_gpC, 0);
a.valignq(zmmA, zmmB, zmmC, 0);
a.valignq(zmmA, zmmB, anyptr_gpC, 0);
a.vandnpd(xmmA, xmmB, xmmC);
a.vandnpd(xmmA, xmmB, anyptr_gpC);
a.vandnpd(ymmA, ymmB, ymmC);
a.vandnpd(ymmA, ymmB, anyptr_gpC);
a.vandnpd(zmmA, zmmB, zmmC);
a.vandnpd(zmmA, zmmB, anyptr_gpC);
a.vandnps(xmmA, xmmB, xmmC);
a.vandnps(xmmA, xmmB, anyptr_gpC);
a.vandnps(ymmA, ymmB, ymmC);
a.vandnps(ymmA, ymmB, anyptr_gpC);
a.vandnps(zmmA, zmmB, zmmC);
a.vandnps(zmmA, zmmB, anyptr_gpC);
a.vandpd(xmmA, xmmB, xmmC);
a.vandpd(xmmA, xmmB, anyptr_gpC);
a.vandpd(ymmA, ymmB, ymmC);
a.vandpd(ymmA, ymmB, anyptr_gpC);
a.vandpd(zmmA, zmmB, zmmC);
a.vandpd(zmmA, zmmB, anyptr_gpC);
a.vandps(xmmA, xmmB, xmmC);
a.vandps(xmmA, xmmB, anyptr_gpC);
a.vandps(ymmA, ymmB, ymmC);
a.vandps(ymmA, ymmB, anyptr_gpC);
a.vandps(zmmA, zmmB, zmmC);
a.vandps(zmmA, zmmB, anyptr_gpC);
a.vblendmb(xmmA, xmmB, xmmC);
a.vblendmb(xmmA, xmmB, anyptr_gpC);
a.vblendmb(ymmA, ymmB, ymmC);
a.vblendmb(ymmA, ymmB, anyptr_gpC);
a.vblendmb(zmmA, zmmB, zmmC);
a.vblendmb(zmmA, zmmB, anyptr_gpC);
a.vblendmd(xmmA, xmmB, xmmC);
a.vblendmd(xmmA, xmmB, anyptr_gpC);
a.vblendmd(ymmA, ymmB, ymmC);
a.vblendmd(ymmA, ymmB, anyptr_gpC);
a.vblendmd(zmmA, zmmB, zmmC);
a.vblendmd(zmmA, zmmB, anyptr_gpC);
a.vblendmpd(xmmA, xmmB, xmmC);
a.vblendmpd(xmmA, xmmB, anyptr_gpC);
a.vblendmpd(ymmA, ymmB, ymmC);
a.vblendmpd(ymmA, ymmB, anyptr_gpC);
a.vblendmpd(zmmA, zmmB, zmmC);
a.vblendmpd(zmmA, zmmB, anyptr_gpC);
a.vblendmps(xmmA, xmmB, xmmC);
a.vblendmps(xmmA, xmmB, anyptr_gpC);
a.vblendmps(ymmA, ymmB, ymmC);
a.vblendmps(ymmA, ymmB, anyptr_gpC);
a.vblendmps(zmmA, zmmB, zmmC);
a.vblendmps(zmmA, zmmB, anyptr_gpC);
a.vblendmq(xmmA, xmmB, xmmC);
a.vblendmq(xmmA, xmmB, anyptr_gpC);
a.vblendmq(ymmA, ymmB, ymmC);
a.vblendmq(ymmA, ymmB, anyptr_gpC);
a.vblendmq(zmmA, zmmB, zmmC);
a.vblendmq(zmmA, zmmB, anyptr_gpC);
a.vblendmw(xmmA, xmmB, xmmC);
a.vblendmw(xmmA, xmmB, anyptr_gpC);
a.vblendmw(ymmA, ymmB, ymmC);
a.vblendmw(ymmA, ymmB, anyptr_gpC);
a.vblendmw(zmmA, zmmB, zmmC);
a.vblendmw(zmmA, zmmB, anyptr_gpC);
a.vbroadcastf32x2(ymmA, xmmB);
a.vbroadcastf32x2(ymmA, anyptr_gpB);
a.vbroadcastf32x2(zmmA, xmmB);
a.vbroadcastf32x2(zmmA, anyptr_gpB);
a.vbroadcastf32x4(ymmA, anyptr_gpB);
a.vbroadcastf32x4(zmmA, anyptr_gpB);
a.vbroadcastf32x8(zmmA, anyptr_gpB);
a.vbroadcastf64x2(ymmA, anyptr_gpB);
a.vbroadcastf64x2(zmmA, anyptr_gpB);
a.vbroadcastf64x4(zmmA, anyptr_gpB);
a.vbroadcasti32x2(xmmA, xmmB);
a.vbroadcasti32x2(xmmA, anyptr_gpB);
a.vbroadcasti32x2(ymmA, xmmB);
a.vbroadcasti32x2(ymmA, anyptr_gpB);
a.vbroadcasti32x2(zmmA, xmmB);
a.vbroadcasti32x2(zmmA, anyptr_gpB);
a.vbroadcasti32x4(ymmA, xmmB);
a.vbroadcasti32x4(ymmA, anyptr_gpB);
a.vbroadcasti32x4(zmmA, xmmB);
a.vbroadcasti32x4(zmmA, anyptr_gpB);
a.vbroadcasti32x8(zmmA, xmmB);
a.vbroadcasti32x8(zmmA, anyptr_gpB);
a.vbroadcasti64x2(ymmA, xmmB);
a.vbroadcasti64x2(ymmA, anyptr_gpB);
a.vbroadcasti64x2(zmmA, xmmB);
a.vbroadcasti64x2(zmmA, anyptr_gpB);
a.vbroadcasti64x4(zmmA, xmmB);
a.vbroadcasti64x4(zmmA, anyptr_gpB);
a.vbroadcastsd(ymmA, xmmB);
a.vbroadcastsd(ymmA, anyptr_gpB);
a.vbroadcastsd(zmmA, xmmB);
a.vbroadcastsd(zmmA, anyptr_gpB);
a.vbroadcastss(xmmA, xmmB);
a.vbroadcastss(xmmA, anyptr_gpB);
a.vbroadcastss(ymmA, xmmB);
a.vbroadcastss(ymmA, anyptr_gpB);
a.vbroadcastss(zmmA, xmmB);
a.vbroadcastss(zmmA, anyptr_gpB);
a.vcmppd(kA, xmmB, xmmC, 0);
a.vcmppd(kA, xmmB, anyptr_gpC, 0);
a.vcmppd(kA, ymmB, ymmC, 0);
a.vcmppd(kA, ymmB, anyptr_gpC, 0);
a.vcmppd(kA, zmmB, zmmC, 0);
a.vcmppd(kA, zmmB, anyptr_gpC, 0);
a.vcmpps(kA, xmmB, xmmC, 0);
a.vcmpps(kA, xmmB, anyptr_gpC, 0);
a.vcmpps(kA, ymmB, ymmC, 0);
a.vcmpps(kA, ymmB, anyptr_gpC, 0);
a.vcmpps(kA, zmmB, zmmC, 0);
a.vcmpps(kA, zmmB, anyptr_gpC, 0);
a.vcmpsd(kA, xmmB, xmmC, 0);
a.vcmpsd(kA, xmmB, anyptr_gpC, 0);
a.vcmpss(kA, xmmB, xmmC, 0);
a.vcmpss(kA, xmmB, anyptr_gpC, 0);
a.vcomisd(xmmA, xmmB);
a.vcomisd(xmmA, anyptr_gpB);
a.vcomiss(xmmA, xmmB);
a.vcomiss(xmmA, anyptr_gpB);
a.vcompresspd(xmmA, xmmB);
a.vcompresspd(anyptr_gpA, xmmB);
a.vcompresspd(ymmA, ymmB);
a.vcompresspd(anyptr_gpA, ymmB);
a.vcompresspd(zmmA, zmmB);
a.vcompresspd(anyptr_gpA, zmmB);
a.vcompressps(xmmA, xmmB);
a.vcompressps(anyptr_gpA, xmmB);
a.vcompressps(ymmA, ymmB);
a.vcompressps(anyptr_gpA, ymmB);
a.vcompressps(zmmA, zmmB);
a.vcompressps(anyptr_gpA, zmmB);
a.vcvtdq2pd(xmmA, xmmB);
a.vcvtdq2pd(xmmA, anyptr_gpB);
a.vcvtdq2pd(ymmA, xmmB);
a.vcvtdq2pd(ymmA, anyptr_gpB);
a.vcvtdq2pd(zmmA, ymmB);
a.vcvtdq2pd(zmmA, anyptr_gpB);
a.vcvtdq2ps(xmmA, xmmB);
a.vcvtdq2ps(xmmA, anyptr_gpB);
a.vcvtdq2ps(ymmA, ymmB);
a.vcvtdq2ps(ymmA, anyptr_gpB);
a.vcvtdq2ps(zmmA, zmmB);
a.vcvtdq2ps(zmmA, anyptr_gpB);
a.vcvtpd2dq(xmmA, xmmB);
a.vcvtpd2dq(xmmA, anyptr_gpB);
a.vcvtpd2dq(xmmA, ymmB);
a.vcvtpd2dq(xmmA, anyptr_gpB);
a.vcvtpd2dq(ymmA, zmmB);
a.vcvtpd2dq(ymmA, anyptr_gpB);
a.vcvtpd2qq(xmmA, xmmB);
a.vcvtpd2qq(xmmA, anyptr_gpB);
a.vcvtpd2qq(ymmA, ymmB);
a.vcvtpd2qq(ymmA, anyptr_gpB);
a.vcvtpd2qq(zmmA, zmmB);
a.vcvtpd2qq(zmmA, anyptr_gpB);
a.vcvtpd2udq(xmmA, xmmB);
a.vcvtpd2udq(xmmA, anyptr_gpB);
a.vcvtpd2udq(xmmA, ymmB);
a.vcvtpd2udq(xmmA, anyptr_gpB);
a.vcvtpd2udq(ymmA, zmmB);
a.vcvtpd2udq(ymmA, anyptr_gpB);
a.vcvtpd2uqq(xmmA, xmmB);
a.vcvtpd2uqq(xmmA, anyptr_gpB);
a.vcvtpd2uqq(ymmA, ymmB);
a.vcvtpd2uqq(ymmA, anyptr_gpB);
a.vcvtpd2uqq(zmmA, zmmB);
a.vcvtpd2uqq(zmmA, anyptr_gpB);
a.vcvtph2ps(xmmA, xmmB);
a.vcvtph2ps(xmmA, anyptr_gpB);
a.vcvtph2ps(ymmA, xmmB);
a.vcvtph2ps(ymmA, anyptr_gpB);
a.vcvtph2ps(zmmA, ymmB);
a.vcvtph2ps(zmmA, anyptr_gpB);
a.vcvtps2dq(xmmA, xmmB);
a.vcvtps2dq(xmmA, anyptr_gpB);
a.vcvtps2dq(ymmA, ymmB);
a.vcvtps2dq(ymmA, anyptr_gpB);
a.vcvtps2dq(zmmA, zmmB);
a.vcvtps2dq(zmmA, anyptr_gpB);
a.vcvtps2pd(xmmA, xmmB);
a.vcvtps2pd(xmmA, anyptr_gpB);
a.vcvtps2pd(ymmA, xmmB);
a.vcvtps2pd(ymmA, anyptr_gpB);
a.vcvtps2pd(zmmA, ymmB);
a.vcvtps2pd(zmmA, anyptr_gpB);
a.vcvtps2ph(xmmA, xmmB, 0);
a.vcvtps2ph(anyptr_gpA, xmmB, 0);
a.vcvtps2ph(xmmA, ymmB, 0);
a.vcvtps2ph(anyptr_gpA, ymmB, 0);
a.vcvtps2ph(ymmA, zmmB, 0);
a.vcvtps2ph(anyptr_gpA, zmmB, 0);
a.vcvtps2qq(xmmA, xmmB);
a.vcvtps2qq(xmmA, anyptr_gpB);
a.vcvtps2qq(ymmA, xmmB);
a.vcvtps2qq(ymmA, anyptr_gpB);
a.vcvtps2qq(zmmA, ymmB);
a.vcvtps2qq(zmmA, anyptr_gpB);
a.vcvtps2udq(xmmA, xmmB);
a.vcvtps2udq(xmmA, anyptr_gpB);
a.vcvtps2udq(ymmA, ymmB);
a.vcvtps2udq(ymmA, anyptr_gpB);
a.vcvtps2udq(zmmA, zmmB);
a.vcvtps2udq(zmmA, anyptr_gpB);
a.vcvtps2uqq(xmmA, xmmB);
a.vcvtps2uqq(xmmA, anyptr_gpB);
a.vcvtps2uqq(ymmA, xmmB);
a.vcvtps2uqq(ymmA, anyptr_gpB);
a.vcvtps2uqq(zmmA, ymmB);
a.vcvtps2uqq(zmmA, anyptr_gpB);
a.vcvtqq2pd(xmmA, xmmB);
a.vcvtqq2pd(xmmA, anyptr_gpB);
a.vcvtqq2pd(ymmA, ymmB);
a.vcvtqq2pd(ymmA, anyptr_gpB);
a.vcvtqq2pd(zmmA, zmmB);
a.vcvtqq2pd(zmmA, anyptr_gpB);
a.vcvtqq2ps(xmmA, xmmB);
a.vcvtqq2ps(xmmA, anyptr_gpB);
a.vcvtqq2ps(xmmA, ymmB);
a.vcvtqq2ps(xmmA, anyptr_gpB);
a.vcvtqq2ps(ymmA, zmmB);
a.vcvtqq2ps(ymmA, anyptr_gpB);
a.vcvtsd2si(gdA, xmmB);
a.vcvtsd2si(gdA, anyptr_gpB);
if (isX64) a.vcvtsd2si(gzA, xmmB);
if (isX64) a.vcvtsd2si(gzA, anyptr_gpB);
a.vcvtsd2ss(xmmA, xmmB, xmmC);
a.vcvtsd2ss(xmmA, xmmB, anyptr_gpC);
a.vcvtsd2usi(gdA, xmmB);
a.vcvtsd2usi(gdA, anyptr_gpB);
if (isX64) a.vcvtsd2usi(gzA, xmmB);
if (isX64) a.vcvtsd2usi(gzA, anyptr_gpB);
a.vcvtsi2sd(xmmA, xmmB, gdC);
a.vcvtsi2sd(xmmA, xmmB, dword_ptr(gzC));
if (isX64) a.vcvtsi2sd(xmmA, xmmB, gzC);
if (isX64) a.vcvtsi2sd(xmmA, xmmB, qword_ptr(gzC));
a.vcvtsi2ss(xmmA, xmmB, gdC);
a.vcvtsi2ss(xmmA, xmmB, dword_ptr(gzC));
if (isX64) a.vcvtsi2ss(xmmA, xmmB, gzC);
if (isX64) a.vcvtsi2ss(xmmA, xmmB, qword_ptr(gzC));
a.vcvtss2sd(xmmA, xmmB, xmmC);
a.vcvtss2sd(xmmA, xmmB, anyptr_gpC);
a.vcvtss2si(gdA, xmmB);
a.vcvtss2si(gdA, anyptr_gpB);
if (isX64) a.vcvtss2si(gzA, xmmB);
if (isX64) a.vcvtss2si(gzA, anyptr_gpB);
a.vcvtss2usi(gdA, xmmB);
a.vcvtss2usi(gdA, anyptr_gpB);
if (isX64) a.vcvtss2usi(gzA, xmmB);
if (isX64) a.vcvtss2usi(gzA, anyptr_gpB);
a.vcvttpd2dq(xmmA, xmmB);
a.vcvttpd2dq(xmmA, anyptr_gpB);
a.vcvttpd2dq(xmmA, ymmB);
a.vcvttpd2dq(xmmA, anyptr_gpB);
a.vcvttpd2dq(ymmA, zmmB);
a.vcvttpd2dq(ymmA, anyptr_gpB);
a.vcvttpd2qq(xmmA, xmmB);
a.vcvttpd2qq(xmmA, anyptr_gpB);
a.vcvttpd2qq(ymmA, ymmB);
a.vcvttpd2qq(ymmA, anyptr_gpB);
a.vcvttpd2qq(zmmA, zmmB);
a.vcvttpd2qq(zmmA, anyptr_gpB);
a.vcvttpd2udq(xmmA, xmmB);
a.vcvttpd2udq(xmmA, anyptr_gpB);
a.vcvttpd2udq(xmmA, ymmB);
a.vcvttpd2udq(xmmA, anyptr_gpB);
a.vcvttpd2udq(ymmA, zmmB);
a.vcvttpd2udq(ymmA, anyptr_gpB);
a.vcvttpd2uqq(xmmA, xmmB);
a.vcvttpd2uqq(xmmA, anyptr_gpB);
a.vcvttpd2uqq(ymmA, ymmB);
a.vcvttpd2uqq(ymmA, anyptr_gpB);
a.vcvttpd2uqq(zmmA, zmmB);
a.vcvttpd2uqq(zmmA, anyptr_gpB);
a.vcvttps2dq(xmmA, xmmB);
a.vcvttps2dq(xmmA, anyptr_gpB);
a.vcvttps2dq(ymmA, ymmB);
a.vcvttps2dq(ymmA, anyptr_gpB);
a.vcvttps2dq(zmmA, zmmB);
a.vcvttps2dq(zmmA, anyptr_gpB);
a.vcvttps2qq(xmmA, xmmB);
a.vcvttps2qq(xmmA, anyptr_gpB);
a.vcvttps2qq(ymmA, xmmB);
a.vcvttps2qq(ymmA, anyptr_gpB);
a.vcvttps2qq(zmmA, ymmB);
a.vcvttps2qq(zmmA, anyptr_gpB);
a.vcvttps2udq(xmmA, xmmB);
a.vcvttps2udq(xmmA, anyptr_gpB);
a.vcvttps2udq(ymmA, ymmB);
a.vcvttps2udq(ymmA, anyptr_gpB);
a.vcvttps2udq(zmmA, zmmB);
a.vcvttps2udq(zmmA, anyptr_gpB);
a.vcvttps2uqq(xmmA, xmmB);
a.vcvttps2uqq(xmmA, anyptr_gpB);
a.vcvttps2uqq(ymmA, xmmB);
a.vcvttps2uqq(ymmA, anyptr_gpB);
a.vcvttps2uqq(zmmA, ymmB);
a.vcvttps2uqq(zmmA, anyptr_gpB);
a.vcvttsd2si(gdA, xmmB);
a.vcvttsd2si(gdA, anyptr_gpB);
if (isX64) a.vcvttsd2si(gzA, xmmB);
if (isX64) a.vcvttsd2si(gzA, anyptr_gpB);
a.vcvttsd2usi(gdA, xmmB);
a.vcvttsd2usi(gdA, anyptr_gpB);
if (isX64) a.vcvttsd2usi(gzA, xmmB);
if (isX64) a.vcvttsd2usi(gzA, anyptr_gpB);
a.vcvttss2si(gdA, xmmB);
a.vcvttss2si(gdA, anyptr_gpB);
if (isX64) a.vcvttss2si(gzA, xmmB);
if (isX64) a.vcvttss2si(gzA, anyptr_gpB);
a.vcvttss2usi(gdA, xmmB);
a.vcvttss2usi(gdA, anyptr_gpB);
if (isX64) a.vcvttss2usi(gzA, xmmB);
if (isX64) a.vcvttss2usi(gzA, anyptr_gpB);
a.vcvtudq2pd(xmmA, xmmB);
a.vcvtudq2pd(xmmA, anyptr_gpB);
a.vcvtudq2pd(ymmA, xmmB);
a.vcvtudq2pd(ymmA, anyptr_gpB);
a.vcvtudq2pd(zmmA, ymmB);
a.vcvtudq2pd(zmmA, anyptr_gpB);
a.vcvtudq2ps(xmmA, xmmB);
a.vcvtudq2ps(xmmA, anyptr_gpB);
a.vcvtudq2ps(ymmA, ymmB);
a.vcvtudq2ps(ymmA, anyptr_gpB);
a.vcvtudq2ps(zmmA, zmmB);
a.vcvtudq2ps(zmmA, anyptr_gpB);
a.vcvtuqq2pd(xmmA, xmmB);
a.vcvtuqq2pd(xmmA, anyptr_gpB);
a.vcvtuqq2pd(ymmA, ymmB);
a.vcvtuqq2pd(ymmA, anyptr_gpB);
a.vcvtuqq2pd(zmmA, zmmB);
a.vcvtuqq2pd(zmmA, anyptr_gpB);
a.vcvtuqq2ps(xmmA, xmmB);
a.vcvtuqq2ps(xmmA, anyptr_gpB);
a.vcvtuqq2ps(xmmA, ymmB);
a.vcvtuqq2ps(xmmA, anyptr_gpB);
a.vcvtuqq2ps(ymmA, zmmB);
a.vcvtuqq2ps(ymmA, anyptr_gpB);
a.vcvtusi2sd(xmmA, xmmB, gdC);
a.vcvtusi2sd(xmmA, xmmB, dword_ptr(gzC));
if (isX64) a.vcvtusi2sd(xmmA, xmmB, gzC);
if (isX64) a.vcvtusi2sd(xmmA, xmmB, qword_ptr(gzC));
a.vcvtusi2ss(xmmA, xmmB, gdC);
a.vcvtusi2ss(xmmA, xmmB, dword_ptr(gzC));
if (isX64) a.vcvtusi2ss(xmmA, xmmB, gzC);
if (isX64) a.vcvtusi2ss(xmmA, xmmB, qword_ptr(gzC));
a.vdbpsadbw(xmmA, xmmB, xmmC, 0);
a.vdbpsadbw(xmmA, xmmB, anyptr_gpC, 0);
a.vdbpsadbw(ymmA, ymmB, ymmC, 0);
a.vdbpsadbw(ymmA, ymmB, anyptr_gpC, 0);
a.vdbpsadbw(zmmA, zmmB, zmmC, 0);
a.vdbpsadbw(zmmA, zmmB, anyptr_gpC, 0);
a.vdivpd(xmmA, xmmB, xmmC);
a.vdivpd(xmmA, xmmB, anyptr_gpC);
a.vdivpd(ymmA, ymmB, ymmC);
a.vdivpd(ymmA, ymmB, anyptr_gpC);
a.vdivpd(zmmA, zmmB, zmmC);
a.vdivpd(zmmA, zmmB, anyptr_gpC);
a.vdivps(xmmA, xmmB, xmmC);
a.vdivps(xmmA, xmmB, anyptr_gpC);
a.vdivps(ymmA, ymmB, ymmC);
a.vdivps(ymmA, ymmB, anyptr_gpC);
a.vdivps(zmmA, zmmB, zmmC);
a.vdivps(zmmA, zmmB, anyptr_gpC);
a.vdivsd(xmmA, xmmB, xmmC);
a.vdivsd(xmmA, xmmB, anyptr_gpC);
a.vdivss(xmmA, xmmB, xmmC);
a.vdivss(xmmA, xmmB, anyptr_gpC);
a.vexp2pd(zmmA, zmmB);
a.vexp2pd(zmmA, anyptr_gpB);
a.vexp2ps(zmmA, zmmB);
a.vexp2ps(zmmA, anyptr_gpB);
a.vexpandpd(xmmA, xmmB);
a.vexpandpd(xmmA, anyptr_gpB);
a.vexpandpd(ymmA, ymmB);
a.vexpandpd(ymmA, anyptr_gpB);
a.vexpandpd(zmmA, zmmB);
a.vexpandpd(zmmA, anyptr_gpB);
a.vexpandps(xmmA, xmmB);
a.vexpandps(xmmA, anyptr_gpB);
a.vexpandps(ymmA, ymmB);
a.vexpandps(ymmA, anyptr_gpB);
a.vexpandps(zmmA, zmmB);
a.vexpandps(zmmA, anyptr_gpB);
a.vextractf32x4(xmmA, ymmB, 0);
a.vextractf32x4(anyptr_gpA, ymmB, 0);
a.vextractf32x4(xmmA, zmmB, 0);
a.vextractf32x4(anyptr_gpA, zmmB, 0);
a.vextractf32x8(ymmA, zmmB, 0);
a.vextractf32x8(anyptr_gpA, zmmB, 0);
a.vextractf64x2(xmmA, ymmB, 0);
a.vextractf64x2(anyptr_gpA, ymmB, 0);
a.vextractf64x2(xmmA, zmmB, 0);
a.vextractf64x2(anyptr_gpA, zmmB, 0);
a.vextractf64x4(ymmA, zmmB, 0);
a.vextractf64x4(anyptr_gpA, zmmB, 0);
a.vextracti32x4(xmmA, ymmB, 0);
a.vextracti32x4(anyptr_gpA, ymmB, 0);
a.vextracti32x4(xmmA, zmmB, 0);
a.vextracti32x4(anyptr_gpA, zmmB, 0);
a.vextracti32x8(ymmA, zmmB, 0);
a.vextracti32x8(anyptr_gpA, zmmB, 0);
a.vextracti64x2(xmmA, ymmB, 0);
a.vextracti64x2(anyptr_gpA, ymmB, 0);
a.vextracti64x2(xmmA, zmmB, 0);
a.vextracti64x2(anyptr_gpA, zmmB, 0);
a.vextracti64x4(ymmA, zmmB, 0);
a.vextracti64x4(anyptr_gpA, zmmB, 0);
a.vextractps(gdA, xmmB, 0);
a.vextractps(gzA, xmmB, 0);
a.vextractps(anyptr_gpA, xmmB, 0);
a.vfixupimmpd(xmmA, xmmB, xmmC, 0);
a.vfixupimmpd(xmmA, xmmB, anyptr_gpC, 0);
a.vfixupimmpd(ymmA, ymmB, ymmC, 0);
a.vfixupimmpd(ymmA, ymmB, anyptr_gpC, 0);
a.vfixupimmpd(zmmA, zmmB, zmmC, 0);
a.vfixupimmpd(zmmA, zmmB, anyptr_gpC, 0);
a.vfixupimmps(xmmA, xmmB, xmmC, 0);
a.vfixupimmps(xmmA, xmmB, anyptr_gpC, 0);
a.vfixupimmps(ymmA, ymmB, ymmC, 0);
a.vfixupimmps(ymmA, ymmB, anyptr_gpC, 0);
a.vfixupimmps(zmmA, zmmB, zmmC, 0);
a.vfixupimmps(zmmA, zmmB, anyptr_gpC, 0);
a.vfixupimmsd(xmmA, xmmB, xmmC, 0);
a.vfixupimmsd(xmmA, xmmB, anyptr_gpC, 0);
a.vfixupimmss(xmmA, xmmB, xmmC, 0);
a.vfixupimmss(xmmA, xmmB, anyptr_gpC, 0);
a.vfmadd132pd(xmmA, xmmB, xmmC);
a.vfmadd132pd(xmmA, xmmB, anyptr_gpC);
a.vfmadd132pd(ymmA, ymmB, ymmC);
a.vfmadd132pd(ymmA, ymmB, anyptr_gpC);
a.vfmadd132pd(zmmA, zmmB, zmmC);
a.vfmadd132pd(zmmA, zmmB, anyptr_gpC);
a.vfmadd132ps(xmmA, xmmB, xmmC);
a.vfmadd132ps(xmmA, xmmB, anyptr_gpC);
a.vfmadd132ps(ymmA, ymmB, ymmC);
a.vfmadd132ps(ymmA, ymmB, anyptr_gpC);
a.vfmadd132ps(zmmA, zmmB, zmmC);
a.vfmadd132ps(zmmA, zmmB, anyptr_gpC);
a.vfmadd132sd(xmmA, xmmB, xmmC);
a.vfmadd132sd(xmmA, xmmB, anyptr_gpC);
a.vfmadd132ss(xmmA, xmmB, xmmC);
a.vfmadd132ss(xmmA, xmmB, anyptr_gpC);
a.vfmadd213pd(xmmA, xmmB, xmmC);
a.vfmadd213pd(xmmA, xmmB, anyptr_gpC);
a.vfmadd213pd(ymmA, ymmB, ymmC);
a.vfmadd213pd(ymmA, ymmB, anyptr_gpC);
a.vfmadd213pd(zmmA, zmmB, zmmC);
a.vfmadd213pd(zmmA, zmmB, anyptr_gpC);
a.vfmadd213ps(xmmA, xmmB, xmmC);
a.vfmadd213ps(xmmA, xmmB, anyptr_gpC);
a.vfmadd213ps(ymmA, ymmB, ymmC);
a.vfmadd213ps(ymmA, ymmB, anyptr_gpC);
a.vfmadd213ps(zmmA, zmmB, zmmC);
a.vfmadd213ps(zmmA, zmmB, anyptr_gpC);
a.vfmadd213sd(xmmA, xmmB, xmmC);
a.vfmadd213sd(xmmA, xmmB, anyptr_gpC);
a.vfmadd213ss(xmmA, xmmB, xmmC);
a.vfmadd213ss(xmmA, xmmB, anyptr_gpC);
a.vfmadd231pd(xmmA, xmmB, xmmC);
a.vfmadd231pd(xmmA, xmmB, anyptr_gpC);
a.vfmadd231pd(ymmA, ymmB, ymmC);
a.vfmadd231pd(ymmA, ymmB, anyptr_gpC);
a.vfmadd231pd(zmmA, zmmB, zmmC);
a.vfmadd231pd(zmmA, zmmB, anyptr_gpC);
a.vfmadd231ps(xmmA, xmmB, xmmC);
a.vfmadd231ps(xmmA, xmmB, anyptr_gpC);
a.vfmadd231ps(ymmA, ymmB, ymmC);
a.vfmadd231ps(ymmA, ymmB, anyptr_gpC);
a.vfmadd231ps(zmmA, zmmB, zmmC);
a.vfmadd231ps(zmmA, zmmB, anyptr_gpC);
a.vfmadd231sd(xmmA, xmmB, xmmC);
a.vfmadd231sd(xmmA, xmmB, anyptr_gpC);
a.vfmadd231ss(xmmA, xmmB, xmmC);
a.vfmadd231ss(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub132pd(xmmA, xmmB, xmmC);
a.vfmaddsub132pd(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub132pd(ymmA, ymmB, ymmC);
a.vfmaddsub132pd(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub132pd(zmmA, zmmB, zmmC);
a.vfmaddsub132pd(zmmA, zmmB, anyptr_gpC);
a.vfmaddsub132ps(xmmA, xmmB, xmmC);
a.vfmaddsub132ps(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub132ps(ymmA, ymmB, ymmC);
a.vfmaddsub132ps(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub132ps(zmmA, zmmB, zmmC);
a.vfmaddsub132ps(zmmA, zmmB, anyptr_gpC);
a.vfmaddsub213pd(xmmA, xmmB, xmmC);
a.vfmaddsub213pd(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub213pd(ymmA, ymmB, ymmC);
a.vfmaddsub213pd(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub213pd(zmmA, zmmB, zmmC);
a.vfmaddsub213pd(zmmA, zmmB, anyptr_gpC);
a.vfmaddsub213ps(xmmA, xmmB, xmmC);
a.vfmaddsub213ps(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub213ps(ymmA, ymmB, ymmC);
a.vfmaddsub213ps(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub213ps(zmmA, zmmB, zmmC);
a.vfmaddsub213ps(zmmA, zmmB, anyptr_gpC);
a.vfmaddsub231pd(xmmA, xmmB, xmmC);
a.vfmaddsub231pd(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub231pd(ymmA, ymmB, ymmC);
a.vfmaddsub231pd(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub231pd(zmmA, zmmB, zmmC);
a.vfmaddsub231pd(zmmA, zmmB, anyptr_gpC);
a.vfmaddsub231ps(xmmA, xmmB, xmmC);
a.vfmaddsub231ps(xmmA, xmmB, anyptr_gpC);
a.vfmaddsub231ps(ymmA, ymmB, ymmC);
a.vfmaddsub231ps(ymmA, ymmB, anyptr_gpC);
a.vfmaddsub231ps(zmmA, zmmB, zmmC);
a.vfmaddsub231ps(zmmA, zmmB, anyptr_gpC);
a.vfmsub132pd(xmmA, xmmB, xmmC);
a.vfmsub132pd(xmmA, xmmB, anyptr_gpC);
a.vfmsub132pd(ymmA, ymmB, ymmC);
a.vfmsub132pd(ymmA, ymmB, anyptr_gpC);
a.vfmsub132pd(zmmA, zmmB, zmmC);
a.vfmsub132pd(zmmA, zmmB, anyptr_gpC);
a.vfmsub132ps(xmmA, xmmB, xmmC);
a.vfmsub132ps(xmmA, xmmB, anyptr_gpC);
a.vfmsub132ps(ymmA, ymmB, ymmC);
a.vfmsub132ps(ymmA, ymmB, anyptr_gpC);
a.vfmsub132ps(zmmA, zmmB, zmmC);
a.vfmsub132ps(zmmA, zmmB, anyptr_gpC);
a.vfmsub132sd(xmmA, xmmB, xmmC);
a.vfmsub132sd(xmmA, xmmB, anyptr_gpC);
a.vfmsub132ss(xmmA, xmmB, xmmC);
a.vfmsub132ss(xmmA, xmmB, anyptr_gpC);
a.vfmsub213pd(xmmA, xmmB, xmmC);
a.vfmsub213pd(xmmA, xmmB, anyptr_gpC);
a.vfmsub213pd(ymmA, ymmB, ymmC);
a.vfmsub213pd(ymmA, ymmB, anyptr_gpC);
a.vfmsub213pd(zmmA, zmmB, zmmC);
a.vfmsub213pd(zmmA, zmmB, anyptr_gpC);
a.vfmsub213ps(xmmA, xmmB, xmmC);
a.vfmsub213ps(xmmA, xmmB, anyptr_gpC);
a.vfmsub213ps(ymmA, ymmB, ymmC);
a.vfmsub213ps(ymmA, ymmB, anyptr_gpC);
a.vfmsub213ps(zmmA, zmmB, zmmC);
a.vfmsub213ps(zmmA, zmmB, anyptr_gpC);
a.vfmsub213sd(xmmA, xmmB, xmmC);
a.vfmsub213sd(xmmA, xmmB, anyptr_gpC);
a.vfmsub213ss(xmmA, xmmB, xmmC);
a.vfmsub213ss(xmmA, xmmB, anyptr_gpC);
a.vfmsub231pd(xmmA, xmmB, xmmC);
a.vfmsub231pd(xmmA, xmmB, anyptr_gpC);
a.vfmsub231pd(ymmA, ymmB, ymmC);
a.vfmsub231pd(ymmA, ymmB, anyptr_gpC);
a.vfmsub231pd(zmmA, zmmB, zmmC);
a.vfmsub231pd(zmmA, zmmB, anyptr_gpC);
a.vfmsub231ps(xmmA, xmmB, xmmC);
a.vfmsub231ps(xmmA, xmmB, anyptr_gpC);
a.vfmsub231ps(ymmA, ymmB, ymmC);
a.vfmsub231ps(ymmA, ymmB, anyptr_gpC);
a.vfmsub231ps(zmmA, zmmB, zmmC);
a.vfmsub231ps(zmmA, zmmB, anyptr_gpC);
a.vfmsub231sd(xmmA, xmmB, xmmC);
a.vfmsub231sd(xmmA, xmmB, anyptr_gpC);
a.vfmsub231ss(xmmA, xmmB, xmmC);
a.vfmsub231ss(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd132pd(xmmA, xmmB, xmmC);
a.vfmsubadd132pd(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd132pd(ymmA, ymmB, ymmC);
a.vfmsubadd132pd(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd132pd(zmmA, zmmB, zmmC);
a.vfmsubadd132pd(zmmA, zmmB, anyptr_gpC);
a.vfmsubadd132ps(xmmA, xmmB, xmmC);
a.vfmsubadd132ps(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd132ps(ymmA, ymmB, ymmC);
a.vfmsubadd132ps(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd132ps(zmmA, zmmB, zmmC);
a.vfmsubadd132ps(zmmA, zmmB, anyptr_gpC);
a.vfmsubadd213pd(xmmA, xmmB, xmmC);
a.vfmsubadd213pd(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd213pd(ymmA, ymmB, ymmC);
a.vfmsubadd213pd(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd213pd(zmmA, zmmB, zmmC);
a.vfmsubadd213pd(zmmA, zmmB, anyptr_gpC);
a.vfmsubadd213ps(xmmA, xmmB, xmmC);
a.vfmsubadd213ps(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd213ps(ymmA, ymmB, ymmC);
a.vfmsubadd213ps(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd213ps(zmmA, zmmB, zmmC);
a.vfmsubadd213ps(zmmA, zmmB, anyptr_gpC);
a.vfmsubadd231pd(xmmA, xmmB, xmmC);
a.vfmsubadd231pd(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd231pd(ymmA, ymmB, ymmC);
a.vfmsubadd231pd(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd231pd(zmmA, zmmB, zmmC);
a.vfmsubadd231pd(zmmA, zmmB, anyptr_gpC);
a.vfmsubadd231ps(xmmA, xmmB, xmmC);
a.vfmsubadd231ps(xmmA, xmmB, anyptr_gpC);
a.vfmsubadd231ps(ymmA, ymmB, ymmC);
a.vfmsubadd231ps(ymmA, ymmB, anyptr_gpC);
a.vfmsubadd231ps(zmmA, zmmB, zmmC);
a.vfmsubadd231ps(zmmA, zmmB, anyptr_gpC);
a.vfnmadd132pd(xmmA, xmmB, xmmC);
a.vfnmadd132pd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd132pd(ymmA, ymmB, ymmC);
a.vfnmadd132pd(ymmA, ymmB, anyptr_gpC);
a.vfnmadd132pd(zmmA, zmmB, zmmC);
a.vfnmadd132pd(zmmA, zmmB, anyptr_gpC);
a.vfnmadd132ps(xmmA, xmmB, xmmC);
a.vfnmadd132ps(xmmA, xmmB, anyptr_gpC);
a.vfnmadd132ps(ymmA, ymmB, ymmC);
a.vfnmadd132ps(ymmA, ymmB, anyptr_gpC);
a.vfnmadd132ps(zmmA, zmmB, zmmC);
a.vfnmadd132ps(zmmA, zmmB, anyptr_gpC);
a.vfnmadd132sd(xmmA, xmmB, xmmC);
a.vfnmadd132sd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd132ss(xmmA, xmmB, xmmC);
a.vfnmadd132ss(xmmA, xmmB, anyptr_gpC);
a.vfnmadd213pd(xmmA, xmmB, xmmC);
a.vfnmadd213pd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd213pd(ymmA, ymmB, ymmC);
a.vfnmadd213pd(ymmA, ymmB, anyptr_gpC);
a.vfnmadd213pd(zmmA, zmmB, zmmC);
a.vfnmadd213pd(zmmA, zmmB, anyptr_gpC);
a.vfnmadd213ps(xmmA, xmmB, xmmC);
a.vfnmadd213ps(xmmA, xmmB, anyptr_gpC);
a.vfnmadd213ps(ymmA, ymmB, ymmC);
a.vfnmadd213ps(ymmA, ymmB, anyptr_gpC);
a.vfnmadd213ps(zmmA, zmmB, zmmC);
a.vfnmadd213ps(zmmA, zmmB, anyptr_gpC);
a.vfnmadd213sd(xmmA, xmmB, xmmC);
a.vfnmadd213sd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd213ss(xmmA, xmmB, xmmC);
a.vfnmadd213ss(xmmA, xmmB, anyptr_gpC);
a.vfnmadd231pd(xmmA, xmmB, xmmC);
a.vfnmadd231pd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd231pd(ymmA, ymmB, ymmC);
a.vfnmadd231pd(ymmA, ymmB, anyptr_gpC);
a.vfnmadd231pd(zmmA, zmmB, zmmC);
a.vfnmadd231pd(zmmA, zmmB, anyptr_gpC);
a.vfnmadd231ps(xmmA, xmmB, xmmC);
a.vfnmadd231ps(xmmA, xmmB, anyptr_gpC);
a.vfnmadd231ps(ymmA, ymmB, ymmC);
a.vfnmadd231ps(ymmA, ymmB, anyptr_gpC);
a.vfnmadd231ps(zmmA, zmmB, zmmC);
a.vfnmadd231ps(zmmA, zmmB, anyptr_gpC);
a.vfnmadd231sd(xmmA, xmmB, xmmC);
a.vfnmadd231sd(xmmA, xmmB, anyptr_gpC);
a.vfnmadd231ss(xmmA, xmmB, xmmC);
a.vfnmadd231ss(xmmA, xmmB, anyptr_gpC);
a.vfnmsub132pd(xmmA, xmmB, xmmC);
a.vfnmsub132pd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub132pd(ymmA, ymmB, ymmC);
a.vfnmsub132pd(ymmA, ymmB, anyptr_gpC);
a.vfnmsub132pd(zmmA, zmmB, zmmC);
a.vfnmsub132pd(zmmA, zmmB, anyptr_gpC);
a.vfnmsub132ps(xmmA, xmmB, xmmC);
a.vfnmsub132ps(xmmA, xmmB, anyptr_gpC);
a.vfnmsub132ps(ymmA, ymmB, ymmC);
a.vfnmsub132ps(ymmA, ymmB, anyptr_gpC);
a.vfnmsub132ps(zmmA, zmmB, zmmC);
a.vfnmsub132ps(zmmA, zmmB, anyptr_gpC);
a.vfnmsub132sd(xmmA, xmmB, xmmC);
a.vfnmsub132sd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub132ss(xmmA, xmmB, xmmC);
a.vfnmsub132ss(xmmA, xmmB, anyptr_gpC);
a.vfnmsub213pd(xmmA, xmmB, xmmC);
a.vfnmsub213pd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub213pd(ymmA, ymmB, ymmC);
a.vfnmsub213pd(ymmA, ymmB, anyptr_gpC);
a.vfnmsub213pd(zmmA, zmmB, zmmC);
a.vfnmsub213pd(zmmA, zmmB, anyptr_gpC);
a.vfnmsub213ps(xmmA, xmmB, xmmC);
a.vfnmsub213ps(xmmA, xmmB, anyptr_gpC);
a.vfnmsub213ps(ymmA, ymmB, ymmC);
a.vfnmsub213ps(ymmA, ymmB, anyptr_gpC);
a.vfnmsub213ps(zmmA, zmmB, zmmC);
a.vfnmsub213ps(zmmA, zmmB, anyptr_gpC);
a.vfnmsub213sd(xmmA, xmmB, xmmC);
a.vfnmsub213sd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub213ss(xmmA, xmmB, xmmC);
a.vfnmsub213ss(xmmA, xmmB, anyptr_gpC);
a.vfnmsub231pd(xmmA, xmmB, xmmC);
a.vfnmsub231pd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub231pd(ymmA, ymmB, ymmC);
a.vfnmsub231pd(ymmA, ymmB, anyptr_gpC);
a.vfnmsub231pd(zmmA, zmmB, zmmC);
a.vfnmsub231pd(zmmA, zmmB, anyptr_gpC);
a.vfnmsub231ps(xmmA, xmmB, xmmC);
a.vfnmsub231ps(xmmA, xmmB, anyptr_gpC);
a.vfnmsub231ps(ymmA, ymmB, ymmC);
a.vfnmsub231ps(ymmA, ymmB, anyptr_gpC);
a.vfnmsub231ps(zmmA, zmmB, zmmC);
a.vfnmsub231ps(zmmA, zmmB, anyptr_gpC);
a.vfnmsub231sd(xmmA, xmmB, xmmC);
a.vfnmsub231sd(xmmA, xmmB, anyptr_gpC);
a.vfnmsub231ss(xmmA, xmmB, xmmC);
a.vfnmsub231ss(xmmA, xmmB, anyptr_gpC);
a.vfpclasspd(kA, xmmB, 0);
a.vfpclasspd(kA, anyptr_gpB, 0);
a.vfpclasspd(kA, ymmB, 0);
a.vfpclasspd(kA, anyptr_gpB, 0);
a.vfpclasspd(kA, zmmB, 0);
a.vfpclasspd(kA, anyptr_gpB, 0);
a.vfpclassps(kA, xmmB, 0);
a.vfpclassps(kA, anyptr_gpB, 0);
a.vfpclassps(kA, ymmB, 0);
a.vfpclassps(kA, anyptr_gpB, 0);
a.vfpclassps(kA, zmmB, 0);
a.vfpclassps(kA, anyptr_gpB, 0);
a.vfpclasssd(kA, xmmB, 0);
a.vfpclasssd(kA, anyptr_gpB, 0);
a.vfpclassss(kA, xmmB, 0);
a.vfpclassss(kA, anyptr_gpB, 0);
a.vgatherdpd(xmmA, vx_ptr);
a.vgatherdpd(ymmA, vy_ptr);
a.vgatherdpd(zmmA, vz_ptr);
a.vgatherdps(xmmA, vx_ptr);
a.vgatherdps(ymmA, vy_ptr);
a.vgatherdps(zmmA, vz_ptr);
a.vgatherpf0dpd(vy_ptr);
a.vgatherpf0dps(vz_ptr);
a.vgatherpf0qpd(vz_ptr);
a.vgatherpf0qps(vz_ptr);
a.vgatherpf1dpd(vy_ptr);
a.vgatherpf1dps(vz_ptr);
a.vgatherpf1qpd(vz_ptr);
a.vgatherpf1qps(vz_ptr);
a.vgatherqpd(xmmA, vx_ptr);
a.vgatherqpd(ymmA, vy_ptr);
a.vgatherqpd(zmmA, vz_ptr);
a.vgatherqps(xmmA, vx_ptr);
a.vgatherqps(ymmA, vy_ptr);
a.vgatherqps(zmmA, vz_ptr);
a.vgetexppd(xmmA, xmmB);
a.vgetexppd(xmmA, anyptr_gpB);
a.vgetexppd(ymmA, ymmB);
a.vgetexppd(ymmA, anyptr_gpB);
a.vgetexppd(zmmA, zmmB);
a.vgetexppd(zmmA, anyptr_gpB);
a.vgetexpps(xmmA, xmmB);
a.vgetexpps(xmmA, anyptr_gpB);
a.vgetexpps(ymmA, ymmB);
a.vgetexpps(ymmA, anyptr_gpB);
a.vgetexpps(zmmA, zmmB);
a.vgetexpps(zmmA, anyptr_gpB);
a.vgetexpsd(xmmA, xmmB);
a.vgetexpsd(xmmA, anyptr_gpB);
a.vgetexpss(xmmA, xmmB);
a.vgetexpss(xmmA, anyptr_gpB);
a.vgetmantpd(xmmA, xmmB, 0);
a.vgetmantpd(xmmA, anyptr_gpB, 0);
a.vgetmantpd(ymmA, ymmB, 0);
a.vgetmantpd(ymmA, anyptr_gpB, 0);
a.vgetmantpd(zmmA, zmmB, 0);
a.vgetmantpd(zmmA, anyptr_gpB, 0);
a.vgetmantps(xmmA, xmmB, 0);
a.vgetmantps(xmmA, anyptr_gpB, 0);
a.vgetmantps(ymmA, ymmB, 0);
a.vgetmantps(ymmA, anyptr_gpB, 0);
a.vgetmantps(zmmA, zmmB, 0);
a.vgetmantps(zmmA, anyptr_gpB, 0);
a.vgetmantsd(xmmA, xmmB, 0);
a.vgetmantsd(xmmA, anyptr_gpB, 0);
a.vgetmantss(xmmA, xmmB, 0);
a.vgetmantss(xmmA, anyptr_gpB, 0);
a.vinsertf32x4(ymmA, ymmB, xmmC, 0);
a.vinsertf32x4(ymmA, ymmB, anyptr_gpC, 0);
a.vinsertf32x4(zmmA, zmmB, xmmC, 0);
a.vinsertf32x4(zmmA, zmmB, anyptr_gpC, 0);
a.vinsertf32x8(zmmA, zmmB, ymmC, 0);
a.vinsertf32x8(zmmA, zmmB, anyptr_gpC, 0);
a.vinsertf64x2(ymmA, ymmB, xmmC, 0);
a.vinsertf64x2(ymmA, ymmB, anyptr_gpC, 0);
a.vinsertf64x2(zmmA, zmmB, xmmC, 0);
a.vinsertf64x2(zmmA, zmmB, anyptr_gpC, 0);
a.vinsertf64x4(zmmA, zmmB, ymmC, 0);
a.vinsertf64x4(zmmA, zmmB, anyptr_gpC, 0);
a.vinserti32x4(ymmA, ymmB, xmmC, 0);
a.vinserti32x4(ymmA, ymmB, anyptr_gpC, 0);
a.vinserti32x4(zmmA, zmmB, xmmC, 0);
a.vinserti32x4(zmmA, zmmB, anyptr_gpC, 0);
a.vinserti32x8(zmmA, zmmB, ymmC, 0);
a.vinserti32x8(zmmA, zmmB, anyptr_gpC, 0);
a.vinserti64x2(ymmA, ymmB, xmmC, 0);
a.vinserti64x2(ymmA, ymmB, anyptr_gpC, 0);
a.vinserti64x2(zmmA, zmmB, xmmC, 0);
a.vinserti64x2(zmmA, zmmB, anyptr_gpC, 0);
a.vinserti64x4(zmmA, zmmB, ymmC, 0);
a.vinserti64x4(zmmA, zmmB, anyptr_gpC, 0);
a.vinsertps(xmmA, xmmB, xmmC, 0);
a.vinsertps(xmmA, xmmB, anyptr_gpC, 0);
a.vmaxpd(xmmA, xmmB, xmmC);
a.vmaxpd(xmmA, xmmB, anyptr_gpC);
a.vmaxpd(ymmA, ymmB, ymmC);
a.vmaxpd(ymmA, ymmB, anyptr_gpC);
a.vmaxpd(zmmA, zmmB, zmmC);
a.vmaxpd(zmmA, zmmB, anyptr_gpC);
a.vmaxps(xmmA, xmmB, xmmC);
a.vmaxps(xmmA, xmmB, anyptr_gpC);
a.vmaxps(ymmA, ymmB, ymmC);
a.vmaxps(ymmA, ymmB, anyptr_gpC);
a.vmaxps(zmmA, zmmB, zmmC);
a.vmaxps(zmmA, zmmB, anyptr_gpC);
a.vmaxsd(xmmA, xmmB, xmmC);
a.vmaxsd(xmmA, xmmB, anyptr_gpC);
a.vmaxss(xmmA, xmmB, xmmC);
a.vmaxss(xmmA, xmmB, anyptr_gpC);
a.vminpd(xmmA, xmmB, xmmC);
a.vminpd(xmmA, xmmB, anyptr_gpC);
a.vminpd(ymmA, ymmB, ymmC);
a.vminpd(ymmA, ymmB, anyptr_gpC);
a.vminpd(zmmA, zmmB, zmmC);
a.vminpd(zmmA, zmmB, anyptr_gpC);
a.vminps(xmmA, xmmB, xmmC);
a.vminps(xmmA, xmmB, anyptr_gpC);
a.vminps(ymmA, ymmB, ymmC);
a.vminps(ymmA, ymmB, anyptr_gpC);
a.vminps(zmmA, zmmB, zmmC);
a.vminps(zmmA, zmmB, anyptr_gpC);
a.vminsd(xmmA, xmmB, xmmC);
a.vminsd(xmmA, xmmB, anyptr_gpC);
a.vminss(xmmA, xmmB, xmmC);
a.vminss(xmmA, xmmB, anyptr_gpC);
a.vmovapd(xmmA, xmmB);
a.vmovapd(xmmA, anyptr_gpB);
a.vmovapd(xmmA, xmmB);
a.vmovapd(anyptr_gpA, xmmB);
a.vmovapd(ymmA, ymmB);
a.vmovapd(ymmA, anyptr_gpB);
a.vmovapd(ymmA, ymmB);
a.vmovapd(anyptr_gpA, ymmB);
a.vmovapd(zmmA, zmmB);
a.vmovapd(zmmA, anyptr_gpB);
a.vmovapd(zmmA, zmmB);
a.vmovapd(anyptr_gpA, zmmB);
a.vmovaps(xmmA, xmmB);
a.vmovaps(xmmA, anyptr_gpB);
a.vmovaps(xmmA, xmmB);
a.vmovaps(anyptr_gpA, xmmB);
a.vmovaps(ymmA, ymmB);
a.vmovaps(ymmA, anyptr_gpB);
a.vmovaps(ymmA, ymmB);
a.vmovaps(anyptr_gpA, ymmB);
a.vmovaps(zmmA, zmmB);
a.vmovaps(zmmA, anyptr_gpB);
a.vmovaps(zmmA, zmmB);
a.vmovaps(anyptr_gpA, zmmB);
a.vmovd(gdA, xmmB);
a.vmovd(gzA, xmmB);
a.vmovd(anyptr_gpA, xmmB);
a.vmovd(xmmA, gdB);
a.vmovd(xmmA, gzB);
a.vmovd(xmmA, anyptr_gpB);
a.vmovddup(xmmA, xmmB);
a.vmovddup(xmmA, anyptr_gpB);
a.vmovddup(ymmA, ymmB);
a.vmovddup(ymmA, anyptr_gpB);
a.vmovddup(zmmA, zmmB);
a.vmovddup(zmmA, anyptr_gpB);
a.vmovdqa32(xmmA, xmmB);
a.vmovdqa32(xmmA, anyptr_gpB);
a.vmovdqa32(xmmA, xmmB);
a.vmovdqa32(anyptr_gpA, xmmB);
a.vmovdqa32(ymmA, ymmB);
a.vmovdqa32(ymmA, anyptr_gpB);
a.vmovdqa32(ymmA, ymmB);
a.vmovdqa32(anyptr_gpA, ymmB);
a.vmovdqa32(zmmA, zmmB);
a.vmovdqa32(zmmA, anyptr_gpB);
a.vmovdqa32(zmmA, zmmB);
a.vmovdqa32(anyptr_gpA, zmmB);
a.vmovdqa64(xmmA, xmmB);
a.vmovdqa64(xmmA, anyptr_gpB);
a.vmovdqa64(xmmA, xmmB);
a.vmovdqa64(anyptr_gpA, xmmB);
a.vmovdqa64(ymmA, ymmB);
a.vmovdqa64(ymmA, anyptr_gpB);
a.vmovdqa64(ymmA, ymmB);
a.vmovdqa64(anyptr_gpA, ymmB);
a.vmovdqa64(zmmA, zmmB);
a.vmovdqa64(zmmA, anyptr_gpB);
a.vmovdqa64(zmmA, zmmB);
a.vmovdqa64(anyptr_gpA, zmmB);
a.vmovdqu16(xmmA, xmmB);
a.vmovdqu16(xmmA, anyptr_gpB);
a.vmovdqu16(xmmA, xmmB);
a.vmovdqu16(anyptr_gpA, xmmB);
a.vmovdqu16(ymmA, ymmB);
a.vmovdqu16(ymmA, anyptr_gpB);
a.vmovdqu16(ymmA, ymmB);
a.vmovdqu16(anyptr_gpA, ymmB);
a.vmovdqu16(zmmA, zmmB);
a.vmovdqu16(zmmA, anyptr_gpB);
a.vmovdqu16(zmmA, zmmB);
a.vmovdqu16(anyptr_gpA, zmmB);
a.vmovdqu32(xmmA, xmmB);
a.vmovdqu32(xmmA, anyptr_gpB);
a.vmovdqu32(xmmA, xmmB);
a.vmovdqu32(anyptr_gpA, xmmB);
a.vmovdqu32(ymmA, ymmB);
a.vmovdqu32(ymmA, anyptr_gpB);
a.vmovdqu32(ymmA, ymmB);
a.vmovdqu32(anyptr_gpA, ymmB);
a.vmovdqu32(zmmA, zmmB);
a.vmovdqu32(zmmA, anyptr_gpB);
a.vmovdqu32(zmmA, zmmB);
a.vmovdqu32(anyptr_gpA, zmmB);
a.vmovdqu64(xmmA, xmmB);
a.vmovdqu64(xmmA, anyptr_gpB);
a.vmovdqu64(xmmA, xmmB);
a.vmovdqu64(anyptr_gpA, xmmB);
a.vmovdqu64(ymmA, ymmB);
a.vmovdqu64(ymmA, anyptr_gpB);
a.vmovdqu64(ymmA, ymmB);
a.vmovdqu64(anyptr_gpA, ymmB);
a.vmovdqu64(zmmA, zmmB);
a.vmovdqu64(zmmA, anyptr_gpB);
a.vmovdqu64(zmmA, zmmB);
a.vmovdqu64(anyptr_gpA, zmmB);
a.vmovdqu8(xmmA, xmmB);
a.vmovdqu8(xmmA, anyptr_gpB);
a.vmovdqu8(xmmA, xmmB);
a.vmovdqu8(anyptr_gpA, xmmB);
a.vmovdqu8(ymmA, ymmB);
a.vmovdqu8(ymmA, anyptr_gpB);
a.vmovdqu8(ymmA, ymmB);
a.vmovdqu8(anyptr_gpA, ymmB);
a.vmovdqu8(zmmA, zmmB);
a.vmovdqu8(zmmA, anyptr_gpB);
a.vmovdqu8(zmmA, zmmB);
a.vmovdqu8(anyptr_gpA, zmmB);
a.vmovhlps(xmmA, xmmB, xmmC);
a.vmovhpd(anyptr_gpA, xmmB);
a.vmovhpd(xmmA, xmmB, anyptr_gpC);
a.vmovhps(anyptr_gpA, xmmB);
a.vmovhps(xmmA, xmmB, anyptr_gpC);
a.vmovlhps(xmmA, xmmB, xmmC);
a.vmovlpd(anyptr_gpA, xmmB);
a.vmovlpd(xmmA, xmmB, anyptr_gpC);
a.vmovlps(anyptr_gpA, xmmB);
a.vmovlps(xmmA, xmmB, anyptr_gpC);
a.vmovntdq(anyptr_gpA, xmmB);
a.vmovntdq(anyptr_gpA, ymmB);
a.vmovntdq(anyptr_gpA, zmmB);
a.vmovntdqa(xmmA, anyptr_gpB);
a.vmovntdqa(ymmA, anyptr_gpB);
a.vmovntdqa(zmmA, anyptr_gpB);
a.vmovntpd(anyptr_gpA, xmmB);
a.vmovntpd(anyptr_gpA, ymmB);
a.vmovntpd(anyptr_gpA, zmmB);
a.vmovntps(anyptr_gpA, xmmB);
a.vmovntps(anyptr_gpA, ymmB);
a.vmovntps(anyptr_gpA, zmmB);
if (isX64) a.vmovq(gzA, xmmB);
if (isX64) a.vmovq(xmmA, gzB);
a.vmovq(anyptr_gpA, xmmB);
a.vmovq(xmmA, anyptr_gpB);
a.vmovq(xmmA, xmmB);
a.vmovq(xmmA, anyptr_gpB);
a.vmovq(xmmA, xmmB);
a.vmovq(anyptr_gpA, xmmB);
a.vmovsd(anyptr_gpA, xmmB);
a.vmovsd(xmmA, anyptr_gpB);
a.vmovsd(xmmA, xmmB, xmmC);
a.vmovsd(xmmA, xmmB, xmmC);
a.vmovshdup(xmmA, xmmB);
a.vmovshdup(xmmA, anyptr_gpB);
a.vmovshdup(ymmA, ymmB);
a.vmovshdup(ymmA, anyptr_gpB);
a.vmovshdup(zmmA, zmmB);
a.vmovshdup(zmmA, anyptr_gpB);
a.vmovsldup(xmmA, xmmB);
a.vmovsldup(xmmA, anyptr_gpB);
a.vmovsldup(ymmA, ymmB);
a.vmovsldup(ymmA, anyptr_gpB);
a.vmovsldup(zmmA, zmmB);
a.vmovsldup(zmmA, anyptr_gpB);
a.vmovss(anyptr_gpA, xmmB);
a.vmovss(xmmA, anyptr_gpB);
a.vmovss(xmmA, xmmB, xmmC);
a.vmovss(xmmA, xmmB, xmmC);
a.vmovupd(xmmA, xmmB);
a.vmovupd(xmmA, anyptr_gpB);
a.vmovupd(xmmA, xmmB);
a.vmovupd(anyptr_gpA, xmmB);
a.vmovupd(ymmA, ymmB);
a.vmovupd(ymmA, anyptr_gpB);
a.vmovupd(ymmA, ymmB);
a.vmovupd(anyptr_gpA, ymmB);
a.vmovupd(zmmA, zmmB);
a.vmovupd(zmmA, anyptr_gpB);
a.vmovupd(zmmA, zmmB);
a.vmovupd(anyptr_gpA, zmmB);
a.vmovups(xmmA, xmmB);
a.vmovups(xmmA, anyptr_gpB);
a.vmovups(xmmA, xmmB);
a.vmovups(anyptr_gpA, xmmB);
a.vmovups(ymmA, ymmB);
a.vmovups(ymmA, anyptr_gpB);
a.vmovups(ymmA, ymmB);
a.vmovups(anyptr_gpA, ymmB);
a.vmovups(zmmA, zmmB);
a.vmovups(zmmA, anyptr_gpB);
a.vmovups(zmmA, zmmB);
a.vmovups(anyptr_gpA, zmmB);
a.vmulpd(xmmA, xmmB, xmmC);
a.vmulpd(xmmA, xmmB, anyptr_gpC);
a.vmulpd(ymmA, ymmB, ymmC);
a.vmulpd(ymmA, ymmB, anyptr_gpC);
a.vmulpd(zmmA, zmmB, zmmC);
a.vmulpd(zmmA, zmmB, anyptr_gpC);
a.vmulps(xmmA, xmmB, xmmC);
a.vmulps(xmmA, xmmB, anyptr_gpC);
a.vmulps(ymmA, ymmB, ymmC);
a.vmulps(ymmA, ymmB, anyptr_gpC);
a.vmulps(zmmA, zmmB, zmmC);
a.vmulps(zmmA, zmmB, anyptr_gpC);
a.vmulsd(xmmA, xmmB, xmmC);
a.vmulsd(xmmA, xmmB, anyptr_gpC);
a.vmulss(xmmA, xmmB, xmmC);
a.vmulss(xmmA, xmmB, anyptr_gpC);
a.vorpd(xmmA, xmmB, xmmC);
a.vorpd(xmmA, xmmB, anyptr_gpC);
a.vorpd(ymmA, ymmB, ymmC);
a.vorpd(ymmA, ymmB, anyptr_gpC);
a.vorpd(zmmA, zmmB, zmmC);
a.vorpd(zmmA, zmmB, anyptr_gpC);
a.vorps(xmmA, xmmB, xmmC);
a.vorps(xmmA, xmmB, anyptr_gpC);
a.vorps(ymmA, ymmB, ymmC);
a.vorps(ymmA, ymmB, anyptr_gpC);
a.vorps(zmmA, zmmB, zmmC);
a.vorps(zmmA, zmmB, anyptr_gpC);
a.vpabsb(xmmA, xmmB);
a.vpabsb(xmmA, anyptr_gpB);
a.vpabsb(ymmA, ymmB);
a.vpabsb(ymmA, anyptr_gpB);
a.vpabsb(zmmA, zmmB);
a.vpabsb(zmmA, anyptr_gpB);
a.vpabsd(xmmA, xmmB);
a.vpabsd(xmmA, anyptr_gpB);
a.vpabsd(ymmA, ymmB);
a.vpabsd(ymmA, anyptr_gpB);
a.vpabsd(zmmA, zmmB);
a.vpabsd(zmmA, anyptr_gpB);
a.vpabsq(xmmA, xmmB);
a.vpabsq(xmmA, anyptr_gpB);
a.vpabsq(ymmA, ymmB);
a.vpabsq(ymmA, anyptr_gpB);
a.vpabsq(zmmA, zmmB);
a.vpabsq(zmmA, anyptr_gpB);
a.vpabsw(xmmA, xmmB);
a.vpabsw(xmmA, anyptr_gpB);
a.vpabsw(ymmA, ymmB);
a.vpabsw(ymmA, anyptr_gpB);
a.vpabsw(zmmA, zmmB);
a.vpabsw(zmmA, anyptr_gpB);
a.vpackssdw(xmmA, xmmB, xmmC);
a.vpackssdw(xmmA, xmmB, anyptr_gpC);
a.vpackssdw(ymmA, ymmB, ymmC);
a.vpackssdw(ymmA, ymmB, anyptr_gpC);
a.vpackssdw(zmmA, zmmB, zmmC);
a.vpackssdw(zmmA, zmmB, anyptr_gpC);
a.vpacksswb(xmmA, xmmB, xmmC);
a.vpacksswb(xmmA, xmmB, anyptr_gpC);
a.vpacksswb(ymmA, ymmB, ymmC);
a.vpacksswb(ymmA, ymmB, anyptr_gpC);
a.vpacksswb(zmmA, zmmB, zmmC);
a.vpacksswb(zmmA, zmmB, anyptr_gpC);
a.vpackusdw(xmmA, xmmB, xmmC);
a.vpackusdw(xmmA, xmmB, anyptr_gpC);
a.vpackusdw(ymmA, ymmB, ymmC);
a.vpackusdw(ymmA, ymmB, anyptr_gpC);
a.vpackusdw(zmmA, zmmB, zmmC);
a.vpackusdw(zmmA, zmmB, anyptr_gpC);
a.vpackuswb(xmmA, xmmB, xmmC);
a.vpackuswb(xmmA, xmmB, anyptr_gpC);
a.vpackuswb(ymmA, ymmB, ymmC);
a.vpackuswb(ymmA, ymmB, anyptr_gpC);
a.vpackuswb(zmmA, zmmB, zmmC);
a.vpackuswb(zmmA, zmmB, anyptr_gpC);
a.vpaddb(xmmA, xmmB, xmmC);
a.vpaddb(xmmA, xmmB, anyptr_gpC);
a.vpaddb(ymmA, ymmB, ymmC);
a.vpaddb(ymmA, ymmB, anyptr_gpC);
a.vpaddb(zmmA, zmmB, zmmC);
a.vpaddb(zmmA, zmmB, anyptr_gpC);
a.vpaddd(xmmA, xmmB, xmmC);
a.vpaddd(xmmA, xmmB, anyptr_gpC);
a.vpaddd(ymmA, ymmB, ymmC);
a.vpaddd(ymmA, ymmB, anyptr_gpC);
a.vpaddd(zmmA, zmmB, zmmC);
a.vpaddd(zmmA, zmmB, anyptr_gpC);
a.vpaddq(xmmA, xmmB, xmmC);
a.vpaddq(xmmA, xmmB, anyptr_gpC);
a.vpaddq(ymmA, ymmB, ymmC);
a.vpaddq(ymmA, ymmB, anyptr_gpC);
a.vpaddq(zmmA, zmmB, zmmC);
a.vpaddq(zmmA, zmmB, anyptr_gpC);
a.vpaddsb(xmmA, xmmB, xmmC);
a.vpaddsb(xmmA, xmmB, anyptr_gpC);
a.vpaddsb(ymmA, ymmB, ymmC);
a.vpaddsb(ymmA, ymmB, anyptr_gpC);
a.vpaddsb(zmmA, zmmB, zmmC);
a.vpaddsb(zmmA, zmmB, anyptr_gpC);
a.vpaddsw(xmmA, xmmB, xmmC);
a.vpaddsw(xmmA, xmmB, anyptr_gpC);
a.vpaddsw(ymmA, ymmB, ymmC);
a.vpaddsw(ymmA, ymmB, anyptr_gpC);
a.vpaddsw(zmmA, zmmB, zmmC);
a.vpaddsw(zmmA, zmmB, anyptr_gpC);
a.vpaddusb(xmmA, xmmB, xmmC);
a.vpaddusb(xmmA, xmmB, anyptr_gpC);
a.vpaddusb(ymmA, ymmB, ymmC);
a.vpaddusb(ymmA, ymmB, anyptr_gpC);
a.vpaddusb(zmmA, zmmB, zmmC);
a.vpaddusb(zmmA, zmmB, anyptr_gpC);
a.vpaddusw(xmmA, xmmB, xmmC);
a.vpaddusw(xmmA, xmmB, anyptr_gpC);
a.vpaddusw(ymmA, ymmB, ymmC);
a.vpaddusw(ymmA, ymmB, anyptr_gpC);
a.vpaddusw(zmmA, zmmB, zmmC);
a.vpaddusw(zmmA, zmmB, anyptr_gpC);
a.vpaddw(xmmA, xmmB, xmmC);
a.vpaddw(xmmA, xmmB, anyptr_gpC);
a.vpaddw(ymmA, ymmB, ymmC);
a.vpaddw(ymmA, ymmB, anyptr_gpC);
a.vpaddw(zmmA, zmmB, zmmC);
a.vpaddw(zmmA, zmmB, anyptr_gpC);
a.vpalignr(xmmA, xmmB, xmmC, 0);
a.vpalignr(xmmA, xmmB, anyptr_gpC, 0);
a.vpalignr(ymmA, ymmB, ymmC, 0);
a.vpalignr(ymmA, ymmB, anyptr_gpC, 0);
a.vpalignr(zmmA, zmmB, zmmC, 0);
a.vpalignr(zmmA, zmmB, anyptr_gpC, 0);
a.vpandd(xmmA, xmmB, xmmC);
a.vpandd(xmmA, xmmB, anyptr_gpC);
a.vpandd(ymmA, ymmB, ymmC);
a.vpandd(ymmA, ymmB, anyptr_gpC);
a.vpandd(zmmA, zmmB, zmmC);
a.vpandd(zmmA, zmmB, anyptr_gpC);
a.vpandnd(xmmA, xmmB, xmmC);
a.vpandnd(xmmA, xmmB, anyptr_gpC);
a.vpandnd(ymmA, ymmB, ymmC);
a.vpandnd(ymmA, ymmB, anyptr_gpC);
a.vpandnd(zmmA, zmmB, zmmC);
a.vpandnd(zmmA, zmmB, anyptr_gpC);
a.vpandnq(xmmA, xmmB, xmmC);
a.vpandnq(xmmA, xmmB, anyptr_gpC);
a.vpandnq(ymmA, ymmB, ymmC);
a.vpandnq(ymmA, ymmB, anyptr_gpC);
a.vpandnq(zmmA, zmmB, zmmC);
a.vpandnq(zmmA, zmmB, anyptr_gpC);
a.vpandq(xmmA, xmmB, xmmC);
a.vpandq(xmmA, xmmB, anyptr_gpC);
a.vpandq(ymmA, ymmB, ymmC);
a.vpandq(ymmA, ymmB, anyptr_gpC);
a.vpandq(zmmA, zmmB, zmmC);
a.vpandq(zmmA, zmmB, anyptr_gpC);
a.vpavgb(xmmA, xmmB, xmmC);
a.vpavgb(xmmA, xmmB, anyptr_gpC);
a.vpavgb(ymmA, ymmB, ymmC);
a.vpavgb(ymmA, ymmB, anyptr_gpC);
a.vpavgb(zmmA, zmmB, zmmC);
a.vpavgb(zmmA, zmmB, anyptr_gpC);
a.vpavgw(xmmA, xmmB, xmmC);
a.vpavgw(xmmA, xmmB, anyptr_gpC);
a.vpavgw(ymmA, ymmB, ymmC);
a.vpavgw(ymmA, ymmB, anyptr_gpC);
a.vpavgw(zmmA, zmmB, zmmC);
a.vpavgw(zmmA, zmmB, anyptr_gpC);
a.vpbroadcastb(xmmA, gdB);
a.vpbroadcastb(xmmA, gzB);
a.vpbroadcastb(xmmA, xmmB);
a.vpbroadcastb(xmmA, anyptr_gpB);
a.vpbroadcastb(ymmA, gdB);
a.vpbroadcastb(ymmA, gzB);
a.vpbroadcastb(ymmA, xmmB);
a.vpbroadcastb(ymmA, anyptr_gpB);
a.vpbroadcastb(zmmA, gdB);
a.vpbroadcastb(zmmA, gzB);
a.vpbroadcastb(zmmA, xmmB);
a.vpbroadcastb(zmmA, anyptr_gpB);
a.vpbroadcastd(xmmA, gdB);
a.vpbroadcastd(xmmA, gzB);
a.vpbroadcastd(xmmA, xmmB);
a.vpbroadcastd(xmmA, anyptr_gpB);
a.vpbroadcastd(ymmA, gdB);
a.vpbroadcastd(ymmA, gzB);
a.vpbroadcastd(ymmA, xmmB);
a.vpbroadcastd(ymmA, anyptr_gpB);
a.vpbroadcastd(zmmA, gdB);
a.vpbroadcastd(zmmA, gzB);
a.vpbroadcastd(zmmA, xmmB);
a.vpbroadcastd(zmmA, anyptr_gpB);
a.vpbroadcastmb2d(xmmA, kB);
a.vpbroadcastmb2d(ymmA, kB);
a.vpbroadcastmb2d(zmmA, kB);
a.vpbroadcastmb2q(xmmA, kB);
a.vpbroadcastmb2q(ymmA, kB);
a.vpbroadcastmb2q(zmmA, kB);
if (isX64) a.vpbroadcastq(xmmA, gzB);
a.vpbroadcastq(xmmA, xmmB);
a.vpbroadcastq(xmmA, anyptr_gpB);
if (isX64) a.vpbroadcastq(ymmA, gzB);
a.vpbroadcastq(ymmA, xmmB);
a.vpbroadcastq(ymmA, anyptr_gpB);
if (isX64) a.vpbroadcastq(zmmA, gzB);
a.vpbroadcastq(zmmA, xmmB);
a.vpbroadcastq(zmmA, anyptr_gpB);
a.vpbroadcastw(xmmA, gdB);
a.vpbroadcastw(xmmA, gzB);
a.vpbroadcastw(xmmA, xmmB);
a.vpbroadcastw(xmmA, anyptr_gpB);
a.vpbroadcastw(ymmA, gdB);
a.vpbroadcastw(ymmA, gzB);
a.vpbroadcastw(ymmA, xmmB);
a.vpbroadcastw(ymmA, anyptr_gpB);
a.vpbroadcastw(zmmA, gdB);
a.vpbroadcastw(zmmA, gzB);
a.vpbroadcastw(zmmA, xmmB);
a.vpbroadcastw(zmmA, anyptr_gpB);
a.vpcmpb(kA, xmmB, xmmC, 0);
a.vpcmpb(kA, xmmB, anyptr_gpC, 0);
a.vpcmpb(kA, ymmB, ymmC, 0);
a.vpcmpb(kA, ymmB, anyptr_gpC, 0);
a.vpcmpb(kA, zmmB, zmmC, 0);
a.vpcmpb(kA, zmmB, anyptr_gpC, 0);
a.vpcmpd(kA, xmmB, xmmC, 0);
a.vpcmpd(kA, xmmB, anyptr_gpC, 0);
a.vpcmpd(kA, ymmB, ymmC, 0);
a.vpcmpd(kA, ymmB, anyptr_gpC, 0);
a.vpcmpd(kA, zmmB, zmmC, 0);
a.vpcmpd(kA, zmmB, anyptr_gpC, 0);
a.vpcmpeqb(kA, xmmB, xmmC);
a.vpcmpeqb(kA, xmmB, anyptr_gpC);
a.vpcmpeqb(kA, ymmB, ymmC);
a.vpcmpeqb(kA, ymmB, anyptr_gpC);
a.vpcmpeqb(kA, zmmB, zmmC);
a.vpcmpeqb(kA, zmmB, anyptr_gpC);
a.vpcmpeqd(kA, xmmB, xmmC);
a.vpcmpeqd(kA, xmmB, anyptr_gpC);
a.vpcmpeqd(kA, ymmB, ymmC);
a.vpcmpeqd(kA, ymmB, anyptr_gpC);
a.vpcmpeqd(kA, zmmB, zmmC);
a.vpcmpeqd(kA, zmmB, anyptr_gpC);
a.vpcmpeqq(kA, xmmB, xmmC);
a.vpcmpeqq(kA, xmmB, anyptr_gpC);
a.vpcmpeqq(kA, ymmB, ymmC);
a.vpcmpeqq(kA, ymmB, anyptr_gpC);
a.vpcmpeqq(kA, zmmB, zmmC);
a.vpcmpeqq(kA, zmmB, anyptr_gpC);
a.vpcmpeqw(kA, xmmB, xmmC);
a.vpcmpeqw(kA, xmmB, anyptr_gpC);
a.vpcmpeqw(kA, ymmB, ymmC);
a.vpcmpeqw(kA, ymmB, anyptr_gpC);
a.vpcmpeqw(kA, zmmB, zmmC);
a.vpcmpeqw(kA, zmmB, anyptr_gpC);
a.vpcmpgtb(kA, xmmB, xmmC);
a.vpcmpgtb(kA, xmmB, anyptr_gpC);
a.vpcmpgtb(kA, ymmB, ymmC);
a.vpcmpgtb(kA, ymmB, anyptr_gpC);
a.vpcmpgtb(kA, zmmB, zmmC);
a.vpcmpgtb(kA, zmmB, anyptr_gpC);
a.vpcmpgtd(kA, xmmB, xmmC);
a.vpcmpgtd(kA, xmmB, anyptr_gpC);
a.vpcmpgtd(kA, ymmB, ymmC);
a.vpcmpgtd(kA, ymmB, anyptr_gpC);
a.vpcmpgtd(kA, zmmB, zmmC);
a.vpcmpgtd(kA, zmmB, anyptr_gpC);
a.vpcmpgtq(kA, xmmB, xmmC);
a.vpcmpgtq(kA, xmmB, anyptr_gpC);
a.vpcmpgtq(kA, ymmB, ymmC);
a.vpcmpgtq(kA, ymmB, anyptr_gpC);
a.vpcmpgtq(kA, zmmB, zmmC);
a.vpcmpgtq(kA, zmmB, anyptr_gpC);
a.vpcmpgtw(kA, xmmB, xmmC);
a.vpcmpgtw(kA, xmmB, anyptr_gpC);
a.vpcmpgtw(kA, ymmB, ymmC);
a.vpcmpgtw(kA, ymmB, anyptr_gpC);
a.vpcmpgtw(kA, zmmB, zmmC);
a.vpcmpgtw(kA, zmmB, anyptr_gpC);
a.vpcmpq(kA, xmmB, xmmC, 0);
a.vpcmpq(kA, xmmB, anyptr_gpC, 0);
a.vpcmpq(kA, ymmB, ymmC, 0);
a.vpcmpq(kA, ymmB, anyptr_gpC, 0);
a.vpcmpq(kA, zmmB, zmmC, 0);
a.vpcmpq(kA, zmmB, anyptr_gpC, 0);
a.vpcmpub(kA, xmmB, xmmC, 0);
a.vpcmpub(kA, xmmB, anyptr_gpC, 0);
a.vpcmpub(kA, ymmB, ymmC, 0);
a.vpcmpub(kA, ymmB, anyptr_gpC, 0);
a.vpcmpub(kA, zmmB, zmmC, 0);
a.vpcmpub(kA, zmmB, anyptr_gpC, 0);
a.vpcmpud(kA, xmmB, xmmC, 0);
a.vpcmpud(kA, xmmB, anyptr_gpC, 0);
a.vpcmpud(kA, ymmB, ymmC, 0);
a.vpcmpud(kA, ymmB, anyptr_gpC, 0);
a.vpcmpud(kA, zmmB, zmmC, 0);
a.vpcmpud(kA, zmmB, anyptr_gpC, 0);
a.vpcmpuq(kA, xmmB, xmmC, 0);
a.vpcmpuq(kA, xmmB, anyptr_gpC, 0);
a.vpcmpuq(kA, ymmB, ymmC, 0);
a.vpcmpuq(kA, ymmB, anyptr_gpC, 0);
a.vpcmpuq(kA, zmmB, zmmC, 0);
a.vpcmpuq(kA, zmmB, anyptr_gpC, 0);
a.vpcmpuw(kA, xmmB, xmmC, 0);
a.vpcmpuw(kA, xmmB, anyptr_gpC, 0);
a.vpcmpuw(kA, ymmB, ymmC, 0);
a.vpcmpuw(kA, ymmB, anyptr_gpC, 0);
a.vpcmpuw(kA, zmmB, zmmC, 0);
a.vpcmpuw(kA, zmmB, anyptr_gpC, 0);
a.vpcmpw(kA, xmmB, xmmC, 0);
a.vpcmpw(kA, xmmB, anyptr_gpC, 0);
a.vpcmpw(kA, ymmB, ymmC, 0);
a.vpcmpw(kA, ymmB, anyptr_gpC, 0);
a.vpcmpw(kA, zmmB, zmmC, 0);
a.vpcmpw(kA, zmmB, anyptr_gpC, 0);
a.vpcompressd(xmmA, xmmB);
a.vpcompressd(anyptr_gpA, xmmB);
a.vpcompressd(ymmA, ymmB);
a.vpcompressd(anyptr_gpA, ymmB);
a.vpcompressd(zmmA, zmmB);
a.vpcompressd(anyptr_gpA, zmmB);
a.vpcompressq(xmmA, xmmB);
a.vpcompressq(anyptr_gpA, xmmB);
a.vpcompressq(ymmA, ymmB);
a.vpcompressq(anyptr_gpA, ymmB);
a.vpcompressq(zmmA, zmmB);
a.vpcompressq(anyptr_gpA, zmmB);
a.vpconflictd(xmmA, xmmB);
a.vpconflictd(xmmA, anyptr_gpB);
a.vpconflictd(ymmA, ymmB);
a.vpconflictd(ymmA, anyptr_gpB);
a.vpconflictd(zmmA, zmmB);
a.vpconflictd(zmmA, anyptr_gpB);
a.vpconflictq(xmmA, xmmB);
a.vpconflictq(xmmA, anyptr_gpB);
a.vpconflictq(ymmA, ymmB);
a.vpconflictq(ymmA, anyptr_gpB);
a.vpconflictq(zmmA, zmmB);
a.vpconflictq(zmmA, anyptr_gpB);
a.vpermb(xmmA, xmmB, xmmC);
a.vpermb(xmmA, xmmB, anyptr_gpC);
a.vpermb(ymmA, ymmB, ymmC);
a.vpermb(ymmA, ymmB, anyptr_gpC);
a.vpermb(zmmA, zmmB, zmmC);
a.vpermb(zmmA, zmmB, anyptr_gpC);
a.vpermd(ymmA, ymmB, ymmC);
a.vpermd(ymmA, ymmB, anyptr_gpC);
a.vpermd(zmmA, zmmB, zmmC);
a.vpermd(zmmA, zmmB, anyptr_gpC);
a.vpermi2b(xmmA, xmmB, xmmC);
a.vpermi2b(xmmA, xmmB, anyptr_gpC);
a.vpermi2b(ymmA, ymmB, ymmC);
a.vpermi2b(ymmA, ymmB, anyptr_gpC);
a.vpermi2b(zmmA, zmmB, zmmC);
a.vpermi2b(zmmA, zmmB, anyptr_gpC);
a.vpermi2d(xmmA, xmmB, xmmC);
a.vpermi2d(xmmA, xmmB, anyptr_gpC);
a.vpermi2d(ymmA, ymmB, ymmC);
a.vpermi2d(ymmA, ymmB, anyptr_gpC);
a.vpermi2d(zmmA, zmmB, zmmC);
a.vpermi2d(zmmA, zmmB, anyptr_gpC);
a.vpermi2pd(xmmA, xmmB, xmmC);
a.vpermi2pd(xmmA, xmmB, anyptr_gpC);
a.vpermi2pd(ymmA, ymmB, ymmC);
a.vpermi2pd(ymmA, ymmB, anyptr_gpC);
a.vpermi2pd(zmmA, zmmB, zmmC);
a.vpermi2pd(zmmA, zmmB, anyptr_gpC);
a.vpermi2ps(xmmA, xmmB, xmmC);
a.vpermi2ps(xmmA, xmmB, anyptr_gpC);
a.vpermi2ps(ymmA, ymmB, ymmC);
a.vpermi2ps(ymmA, ymmB, anyptr_gpC);
a.vpermi2ps(zmmA, zmmB, zmmC);
a.vpermi2ps(zmmA, zmmB, anyptr_gpC);
a.vpermi2q(xmmA, xmmB, xmmC);
a.vpermi2q(xmmA, xmmB, anyptr_gpC);
a.vpermi2q(ymmA, ymmB, ymmC);
a.vpermi2q(ymmA, ymmB, anyptr_gpC);
a.vpermi2q(zmmA, zmmB, zmmC);
a.vpermi2q(zmmA, zmmB, anyptr_gpC);
a.vpermi2w(xmmA, xmmB, xmmC);
a.vpermi2w(xmmA, xmmB, anyptr_gpC);
a.vpermi2w(ymmA, ymmB, ymmC);
a.vpermi2w(ymmA, ymmB, anyptr_gpC);
a.vpermi2w(zmmA, zmmB, zmmC);
a.vpermi2w(zmmA, zmmB, anyptr_gpC);
a.vpermilpd(xmmA, xmmB, xmmC);
a.vpermilpd(xmmA, xmmB, anyptr_gpC);
a.vpermilpd(ymmA, ymmB, ymmC);
a.vpermilpd(ymmA, ymmB, anyptr_gpC);
a.vpermilpd(zmmA, zmmB, zmmC);
a.vpermilpd(zmmA, zmmB, anyptr_gpC);
a.vpermilpd(xmmA, xmmB, 0);
a.vpermilpd(xmmA, anyptr_gpB, 0);
a.vpermilpd(ymmA, ymmB, 0);
a.vpermilpd(ymmA, anyptr_gpB, 0);
a.vpermilpd(zmmA, zmmB, 0);
a.vpermilpd(zmmA, anyptr_gpB, 0);
a.vpermilps(xmmA, xmmB, xmmC);
a.vpermilps(xmmA, xmmB, anyptr_gpC);
a.vpermilps(ymmA, ymmB, ymmC);
a.vpermilps(ymmA, ymmB, anyptr_gpC);
a.vpermilps(zmmA, zmmB, zmmC);
a.vpermilps(zmmA, zmmB, anyptr_gpC);
a.vpermilps(xmmA, xmmB, 0);
a.vpermilps(xmmA, anyptr_gpB, 0);
a.vpermilps(ymmA, ymmB, 0);
a.vpermilps(ymmA, anyptr_gpB, 0);
a.vpermilps(zmmA, zmmB, 0);
a.vpermilps(zmmA, anyptr_gpB, 0);
a.vpermq(ymmA, ymmB, ymmC);
a.vpermq(ymmA, ymmB, anyptr_gpC);
a.vpermq(zmmA, zmmB, zmmC);
a.vpermq(zmmA, zmmB, anyptr_gpC);
a.vpermq(ymmA, ymmB, 0);
a.vpermq(ymmA, anyptr_gpB, 0);
a.vpermq(zmmA, zmmB, 0);
a.vpermq(zmmA, anyptr_gpB, 0);
a.vpermt2b(xmmA, xmmB, xmmC);
a.vpermt2b(xmmA, xmmB, anyptr_gpC);
a.vpermt2b(ymmA, ymmB, ymmC);
a.vpermt2b(ymmA, ymmB, anyptr_gpC);
a.vpermt2b(zmmA, zmmB, zmmC);
a.vpermt2b(zmmA, zmmB, anyptr_gpC);
a.vpermt2d(xmmA, xmmB, xmmC);
a.vpermt2d(xmmA, xmmB, anyptr_gpC);
a.vpermt2d(ymmA, ymmB, ymmC);
a.vpermt2d(ymmA, ymmB, anyptr_gpC);
a.vpermt2d(zmmA, zmmB, zmmC);
a.vpermt2d(zmmA, zmmB, anyptr_gpC);
a.vpermt2pd(xmmA, xmmB, xmmC);
a.vpermt2pd(xmmA, xmmB, anyptr_gpC);
a.vpermt2pd(ymmA, ymmB, ymmC);
a.vpermt2pd(ymmA, ymmB, anyptr_gpC);
a.vpermt2pd(zmmA, zmmB, zmmC);
a.vpermt2pd(zmmA, zmmB, anyptr_gpC);
a.vpermt2ps(xmmA, xmmB, xmmC);
a.vpermt2ps(xmmA, xmmB, anyptr_gpC);
a.vpermt2ps(ymmA, ymmB, ymmC);
a.vpermt2ps(ymmA, ymmB, anyptr_gpC);
a.vpermt2ps(zmmA, zmmB, zmmC);
a.vpermt2ps(zmmA, zmmB, anyptr_gpC);
a.vpermt2q(xmmA, xmmB, xmmC);
a.vpermt2q(xmmA, xmmB, anyptr_gpC);
a.vpermt2q(ymmA, ymmB, ymmC);
a.vpermt2q(ymmA, ymmB, anyptr_gpC);
a.vpermt2q(zmmA, zmmB, zmmC);
a.vpermt2q(zmmA, zmmB, anyptr_gpC);
a.vpermt2w(xmmA, xmmB, xmmC);
a.vpermt2w(xmmA, xmmB, anyptr_gpC);
a.vpermt2w(ymmA, ymmB, ymmC);
a.vpermt2w(ymmA, ymmB, anyptr_gpC);
a.vpermt2w(zmmA, zmmB, zmmC);
a.vpermt2w(zmmA, zmmB, anyptr_gpC);
a.vpermw(xmmA, xmmB, xmmC);
a.vpermw(xmmA, xmmB, anyptr_gpC);
a.vpermw(ymmA, ymmB, ymmC);
a.vpermw(ymmA, ymmB, anyptr_gpC);
a.vpermw(zmmA, zmmB, zmmC);
a.vpermw(zmmA, zmmB, anyptr_gpC);
a.vpexpandd(xmmA, xmmB);
a.vpexpandd(xmmA, anyptr_gpB);
a.vpexpandd(ymmA, ymmB);
a.vpexpandd(ymmA, anyptr_gpB);
a.vpexpandd(zmmA, zmmB);
a.vpexpandd(zmmA, anyptr_gpB);
a.vpexpandq(xmmA, xmmB);
a.vpexpandq(xmmA, anyptr_gpB);
a.vpexpandq(ymmA, ymmB);
a.vpexpandq(ymmA, anyptr_gpB);
a.vpexpandq(zmmA, zmmB);
a.vpexpandq(zmmA, anyptr_gpB);
a.vpextrb(gdA, xmmB, 0);
a.vpextrb(anyptr_gpA, xmmB, 0);
a.vpextrb(gzA, xmmB, 0);
a.vpextrd(gdA, xmmB, 0);
a.vpextrd(anyptr_gpA, xmmB, 0);
if (isX64) a.vpextrd(gzA, xmmB, 0);
if (isX64) a.vpextrq(gzA, xmmB, 0);
a.vpextrq(anyptr_gpA, xmmB, 0);
a.vpextrw(gdA, xmmB, 0);
a.vpextrw(gzA, xmmB, 0);
a.vpextrw(gdA, xmmB, 0);
a.vpextrw(anyptr_gpA, xmmB, 0);
a.vpextrw(gzA, xmmB, 0);
a.vpgatherdd(xmmA, vx_ptr);
a.vpgatherdd(ymmA, vy_ptr);
a.vpgatherdd(zmmA, vz_ptr);
a.vpgatherdq(xmmA, vx_ptr);
a.vpgatherdq(ymmA, vy_ptr);
a.vpgatherdq(zmmA, vz_ptr);
a.vpgatherqd(xmmA, vx_ptr);
a.vpgatherqd(ymmA, vy_ptr);
a.vpgatherqd(zmmA, vz_ptr);
a.vpgatherqq(xmmA, vx_ptr);
a.vpgatherqq(ymmA, vy_ptr);
a.vpgatherqq(zmmA, vz_ptr);
a.vpinsrb(xmmA, xmmB, gdC, 0);
a.vpinsrb(xmmA, xmmB, anyptr_gpC, 0);
a.vpinsrb(xmmA, xmmB, gzC, 0);
a.vpinsrd(xmmA, xmmB, gdC, 0);
a.vpinsrd(xmmA, xmmB, anyptr_gpC, 0);
a.vpinsrd(xmmA, xmmB, gzC, 0);
if (isX64) a.vpinsrq(xmmA, xmmB, gzC, 0);
a.vpinsrq(xmmA, xmmB, anyptr_gpC, 0);
a.vpinsrw(xmmA, xmmB, gdC, 0);
a.vpinsrw(xmmA, xmmB, anyptr_gpC, 0);
a.vpinsrw(xmmA, xmmB, gzC, 0);
a.vplzcntd(xmmA, xmmB);
a.vplzcntd(xmmA, anyptr_gpB);
a.vplzcntd(ymmA, ymmB);
a.vplzcntd(ymmA, anyptr_gpB);
a.vplzcntd(zmmA, zmmB);
a.vplzcntd(zmmA, anyptr_gpB);
a.vplzcntq(xmmA, xmmB);
a.vplzcntq(xmmA, anyptr_gpB);
a.vplzcntq(ymmA, ymmB);
a.vplzcntq(ymmA, anyptr_gpB);
a.vplzcntq(zmmA, zmmB);
a.vplzcntq(zmmA, anyptr_gpB);
a.vpmadd52huq(xmmA, xmmB, xmmC);
a.vpmadd52huq(xmmA, xmmB, anyptr_gpC);
a.vpmadd52huq(ymmA, ymmB, ymmC);
a.vpmadd52huq(ymmA, ymmB, anyptr_gpC);
a.vpmadd52huq(zmmA, zmmB, zmmC);
a.vpmadd52huq(zmmA, zmmB, anyptr_gpC);
a.vpmadd52luq(xmmA, xmmB, xmmC);
a.vpmadd52luq(xmmA, xmmB, anyptr_gpC);
a.vpmadd52luq(ymmA, ymmB, ymmC);
a.vpmadd52luq(ymmA, ymmB, anyptr_gpC);
a.vpmadd52luq(zmmA, zmmB, zmmC);
a.vpmadd52luq(zmmA, zmmB, anyptr_gpC);
a.vpmaddubsw(xmmA, xmmB, xmmC);
a.vpmaddubsw(xmmA, xmmB, anyptr_gpC);
a.vpmaddubsw(ymmA, ymmB, ymmC);
a.vpmaddubsw(ymmA, ymmB, anyptr_gpC);
a.vpmaddubsw(zmmA, zmmB, zmmC);
a.vpmaddubsw(zmmA, zmmB, anyptr_gpC);
a.vpmaddwd(xmmA, xmmB, xmmC);
a.vpmaddwd(xmmA, xmmB, anyptr_gpC);
a.vpmaddwd(ymmA, ymmB, ymmC);
a.vpmaddwd(ymmA, ymmB, anyptr_gpC);
a.vpmaddwd(zmmA, zmmB, zmmC);
a.vpmaddwd(zmmA, zmmB, anyptr_gpC);
a.vpmaxsb(xmmA, xmmB, xmmC);
a.vpmaxsb(xmmA, xmmB, anyptr_gpC);
a.vpmaxsb(ymmA, ymmB, ymmC);
a.vpmaxsb(ymmA, ymmB, anyptr_gpC);
a.vpmaxsb(zmmA, zmmB, zmmC);
a.vpmaxsb(zmmA, zmmB, anyptr_gpC);
a.vpmaxsd(xmmA, xmmB, xmmC);
a.vpmaxsd(xmmA, xmmB, anyptr_gpC);
a.vpmaxsd(ymmA, ymmB, ymmC);
a.vpmaxsd(ymmA, ymmB, anyptr_gpC);
a.vpmaxsd(zmmA, zmmB, zmmC);
a.vpmaxsd(zmmA, zmmB, anyptr_gpC);
a.vpmaxsq(xmmA, xmmB, xmmC);
a.vpmaxsq(xmmA, xmmB, anyptr_gpC);
a.vpmaxsq(ymmA, ymmB, ymmC);
a.vpmaxsq(ymmA, ymmB, anyptr_gpC);
a.vpmaxsq(zmmA, zmmB, zmmC);
a.vpmaxsq(zmmA, zmmB, anyptr_gpC);
a.vpmaxsw(xmmA, xmmB, xmmC);
a.vpmaxsw(xmmA, xmmB, anyptr_gpC);
a.vpmaxsw(ymmA, ymmB, ymmC);
a.vpmaxsw(ymmA, ymmB, anyptr_gpC);
a.vpmaxsw(zmmA, zmmB, zmmC);
a.vpmaxsw(zmmA, zmmB, anyptr_gpC);
a.vpmaxub(xmmA, xmmB, xmmC);
a.vpmaxub(xmmA, xmmB, anyptr_gpC);
a.vpmaxub(ymmA, ymmB, ymmC);
a.vpmaxub(ymmA, ymmB, anyptr_gpC);
a.vpmaxub(zmmA, zmmB, zmmC);
a.vpmaxub(zmmA, zmmB, anyptr_gpC);
a.vpmaxud(xmmA, xmmB, xmmC);
a.vpmaxud(xmmA, xmmB, anyptr_gpC);
a.vpmaxud(ymmA, ymmB, ymmC);
a.vpmaxud(ymmA, ymmB, anyptr_gpC);
a.vpmaxud(zmmA, zmmB, zmmC);
a.vpmaxud(zmmA, zmmB, anyptr_gpC);
a.vpmaxuq(xmmA, xmmB, xmmC);
a.vpmaxuq(xmmA, xmmB, anyptr_gpC);
a.vpmaxuq(ymmA, ymmB, ymmC);
a.vpmaxuq(ymmA, ymmB, anyptr_gpC);
a.vpmaxuq(zmmA, zmmB, zmmC);
a.vpmaxuq(zmmA, zmmB, anyptr_gpC);
a.vpmaxuw(xmmA, xmmB, xmmC);
a.vpmaxuw(xmmA, xmmB, anyptr_gpC);
a.vpmaxuw(ymmA, ymmB, ymmC);
a.vpmaxuw(ymmA, ymmB, anyptr_gpC);
a.vpmaxuw(zmmA, zmmB, zmmC);
a.vpmaxuw(zmmA, zmmB, anyptr_gpC);
a.vpminsb(xmmA, xmmB, xmmC);
a.vpminsb(xmmA, xmmB, anyptr_gpC);
a.vpminsb(ymmA, ymmB, ymmC);
a.vpminsb(ymmA, ymmB, anyptr_gpC);
a.vpminsb(zmmA, zmmB, zmmC);
a.vpminsb(zmmA, zmmB, anyptr_gpC);
a.vpminsd(xmmA, xmmB, xmmC);
a.vpminsd(xmmA, xmmB, anyptr_gpC);
a.vpminsd(ymmA, ymmB, ymmC);
a.vpminsd(ymmA, ymmB, anyptr_gpC);
a.vpminsd(zmmA, zmmB, zmmC);
a.vpminsd(zmmA, zmmB, anyptr_gpC);
a.vpminsq(xmmA, xmmB, xmmC);
a.vpminsq(xmmA, xmmB, anyptr_gpC);
a.vpminsq(ymmA, ymmB, ymmC);
a.vpminsq(ymmA, ymmB, anyptr_gpC);
a.vpminsq(zmmA, zmmB, zmmC);
a.vpminsq(zmmA, zmmB, anyptr_gpC);
a.vpminsw(xmmA, xmmB, xmmC);
a.vpminsw(xmmA, xmmB, anyptr_gpC);
a.vpminsw(ymmA, ymmB, ymmC);
a.vpminsw(ymmA, ymmB, anyptr_gpC);
a.vpminsw(zmmA, zmmB, zmmC);
a.vpminsw(zmmA, zmmB, anyptr_gpC);
a.vpminub(xmmA, xmmB, xmmC);
a.vpminub(xmmA, xmmB, anyptr_gpC);
a.vpminub(ymmA, ymmB, ymmC);
a.vpminub(ymmA, ymmB, anyptr_gpC);
a.vpminub(zmmA, zmmB, zmmC);
a.vpminub(zmmA, zmmB, anyptr_gpC);
a.vpminud(xmmA, xmmB, xmmC);
a.vpminud(xmmA, xmmB, anyptr_gpC);
a.vpminud(ymmA, ymmB, ymmC);
a.vpminud(ymmA, ymmB, anyptr_gpC);
a.vpminud(zmmA, zmmB, zmmC);
a.vpminud(zmmA, zmmB, anyptr_gpC);
a.vpminuq(xmmA, xmmB, xmmC);
a.vpminuq(xmmA, xmmB, anyptr_gpC);
a.vpminuq(ymmA, ymmB, ymmC);
a.vpminuq(ymmA, ymmB, anyptr_gpC);
a.vpminuq(zmmA, zmmB, zmmC);
a.vpminuq(zmmA, zmmB, anyptr_gpC);
a.vpminuw(xmmA, xmmB, xmmC);
a.vpminuw(xmmA, xmmB, anyptr_gpC);
a.vpminuw(ymmA, ymmB, ymmC);
a.vpminuw(ymmA, ymmB, anyptr_gpC);
a.vpminuw(zmmA, zmmB, zmmC);
a.vpminuw(zmmA, zmmB, anyptr_gpC);
a.vpmovb2m(kA, xmmB);
a.vpmovb2m(kA, ymmB);
a.vpmovb2m(kA, zmmB);
a.vpmovd2m(kA, xmmB);
a.vpmovd2m(kA, ymmB);
a.vpmovd2m(kA, zmmB);
a.vpmovdb(xmmA, xmmB);
a.vpmovdb(anyptr_gpA, xmmB);
a.vpmovdb(xmmA, ymmB);
a.vpmovdb(anyptr_gpA, ymmB);
a.vpmovdb(xmmA, zmmB);
a.vpmovdb(anyptr_gpA, zmmB);
a.vpmovdw(xmmA, xmmB);
a.vpmovdw(anyptr_gpA, xmmB);
a.vpmovdw(xmmA, ymmB);
a.vpmovdw(anyptr_gpA, ymmB);
a.vpmovdw(ymmA, zmmB);
a.vpmovdw(anyptr_gpA, zmmB);
a.vpmovm2b(xmmA, kB);
a.vpmovm2b(ymmA, kB);
a.vpmovm2b(zmmA, kB);
a.vpmovm2d(xmmA, kB);
a.vpmovm2d(ymmA, kB);
a.vpmovm2d(zmmA, kB);
a.vpmovm2q(xmmA, kB);
a.vpmovm2q(ymmA, kB);
a.vpmovm2q(zmmA, kB);
a.vpmovm2w(xmmA, kB);
a.vpmovm2w(ymmA, kB);
a.vpmovm2w(zmmA, kB);
a.vpmovq2m(kA, xmmB);
a.vpmovq2m(kA, ymmB);
a.vpmovq2m(kA, zmmB);
a.vpmovqb(xmmA, xmmB);
a.vpmovqb(anyptr_gpA, xmmB);
a.vpmovqb(xmmA, ymmB);
a.vpmovqb(anyptr_gpA, ymmB);
a.vpmovqb(xmmA, zmmB);
a.vpmovqb(anyptr_gpA, zmmB);
a.vpmovqd(xmmA, xmmB);
a.vpmovqd(anyptr_gpA, xmmB);
a.vpmovqd(xmmA, ymmB);
a.vpmovqd(anyptr_gpA, ymmB);
a.vpmovqd(ymmA, zmmB);
a.vpmovqd(anyptr_gpA, zmmB);
a.vpmovqw(xmmA, xmmB);
a.vpmovqw(anyptr_gpA, xmmB);
a.vpmovqw(xmmA, ymmB);
a.vpmovqw(anyptr_gpA, ymmB);
a.vpmovqw(xmmA, zmmB);
a.vpmovqw(anyptr_gpA, zmmB);
a.vpmovsdb(xmmA, xmmB);
a.vpmovsdb(anyptr_gpA, xmmB);
a.vpmovsdb(xmmA, ymmB);
a.vpmovsdb(anyptr_gpA, ymmB);
a.vpmovsdb(xmmA, zmmB);
a.vpmovsdb(anyptr_gpA, zmmB);
a.vpmovsdw(xmmA, xmmB);
a.vpmovsdw(anyptr_gpA, xmmB);
a.vpmovsdw(xmmA, ymmB);
a.vpmovsdw(anyptr_gpA, ymmB);
a.vpmovsdw(ymmA, zmmB);
a.vpmovsdw(anyptr_gpA, zmmB);
a.vpmovsqb(xmmA, xmmB);
a.vpmovsqb(anyptr_gpA, xmmB);
a.vpmovsqb(xmmA, ymmB);
a.vpmovsqb(anyptr_gpA, ymmB);
a.vpmovsqb(xmmA, zmmB);
a.vpmovsqb(anyptr_gpA, zmmB);
a.vpmovsqd(xmmA, xmmB);
a.vpmovsqd(anyptr_gpA, xmmB);
a.vpmovsqd(xmmA, ymmB);
a.vpmovsqd(anyptr_gpA, ymmB);
a.vpmovsqd(ymmA, zmmB);
a.vpmovsqd(anyptr_gpA, zmmB);
a.vpmovsqw(xmmA, xmmB);
a.vpmovsqw(anyptr_gpA, xmmB);
a.vpmovsqw(xmmA, ymmB);
a.vpmovsqw(anyptr_gpA, ymmB);
a.vpmovsqw(xmmA, zmmB);
a.vpmovsqw(anyptr_gpA, zmmB);
a.vpmovswb(xmmA, xmmB);
a.vpmovswb(anyptr_gpA, xmmB);
a.vpmovswb(xmmA, ymmB);
a.vpmovswb(anyptr_gpA, ymmB);
a.vpmovswb(ymmA, zmmB);
a.vpmovswb(anyptr_gpA, zmmB);
a.vpmovsxbd(xmmA, xmmB);
a.vpmovsxbd(xmmA, anyptr_gpB);
a.vpmovsxbd(ymmA, xmmB);
a.vpmovsxbd(ymmA, anyptr_gpB);
a.vpmovsxbd(zmmA, xmmB);
a.vpmovsxbd(zmmA, anyptr_gpB);
a.vpmovsxbq(xmmA, xmmB);
a.vpmovsxbq(xmmA, anyptr_gpB);
a.vpmovsxbq(ymmA, xmmB);
a.vpmovsxbq(ymmA, anyptr_gpB);
a.vpmovsxbq(zmmA, xmmB);
a.vpmovsxbq(zmmA, anyptr_gpB);
a.vpmovsxbw(xmmA, xmmB);
a.vpmovsxbw(xmmA, anyptr_gpB);
a.vpmovsxbw(ymmA, xmmB);
a.vpmovsxbw(ymmA, anyptr_gpB);
a.vpmovsxbw(zmmA, ymmB);
a.vpmovsxbw(zmmA, anyptr_gpB);
a.vpmovsxdq(xmmA, xmmB);
a.vpmovsxdq(xmmA, anyptr_gpB);
a.vpmovsxdq(ymmA, xmmB);
a.vpmovsxdq(ymmA, anyptr_gpB);
a.vpmovsxdq(zmmA, xmmB);
a.vpmovsxdq(zmmA, anyptr_gpB);
a.vpmovsxwd(xmmA, xmmB);
a.vpmovsxwd(xmmA, anyptr_gpB);
a.vpmovsxwd(ymmA, xmmB);
a.vpmovsxwd(ymmA, anyptr_gpB);
a.vpmovsxwd(zmmA, ymmB);
a.vpmovsxwd(zmmA, anyptr_gpB);
a.vpmovsxwq(xmmA, xmmB);
a.vpmovsxwq(xmmA, anyptr_gpB);
a.vpmovsxwq(ymmA, xmmB);
a.vpmovsxwq(ymmA, anyptr_gpB);
a.vpmovsxwq(zmmA, xmmB);
a.vpmovsxwq(zmmA, anyptr_gpB);
a.vpmovusdb(xmmA, xmmB);
a.vpmovusdb(anyptr_gpA, xmmB);
a.vpmovusdb(xmmA, ymmB);
a.vpmovusdb(anyptr_gpA, ymmB);
a.vpmovusdb(xmmA, zmmB);
a.vpmovusdb(anyptr_gpA, zmmB);
a.vpmovusdw(xmmA, xmmB);
a.vpmovusdw(anyptr_gpA, xmmB);
a.vpmovusdw(xmmA, ymmB);
a.vpmovusdw(anyptr_gpA, ymmB);
a.vpmovusdw(ymmA, zmmB);
a.vpmovusdw(anyptr_gpA, zmmB);
a.vpmovusqb(xmmA, xmmB);
a.vpmovusqb(anyptr_gpA, xmmB);
a.vpmovusqb(xmmA, ymmB);
a.vpmovusqb(anyptr_gpA, ymmB);
a.vpmovusqb(xmmA, zmmB);
a.vpmovusqb(anyptr_gpA, zmmB);
a.vpmovusqd(xmmA, xmmB);
a.vpmovusqd(anyptr_gpA, xmmB);
a.vpmovusqd(xmmA, ymmB);
a.vpmovusqd(anyptr_gpA, ymmB);
a.vpmovusqd(ymmA, zmmB);
a.vpmovusqd(anyptr_gpA, zmmB);
a.vpmovusqw(xmmA, xmmB);
a.vpmovusqw(anyptr_gpA, xmmB);
a.vpmovusqw(xmmA, ymmB);
a.vpmovusqw(anyptr_gpA, ymmB);
a.vpmovusqw(xmmA, zmmB);
a.vpmovusqw(anyptr_gpA, zmmB);
a.vpmovuswb(xmmA, xmmB);
a.vpmovuswb(anyptr_gpA, xmmB);
a.vpmovuswb(xmmA, ymmB);
a.vpmovuswb(anyptr_gpA, ymmB);
a.vpmovuswb(ymmA, zmmB);
a.vpmovuswb(anyptr_gpA, zmmB);
a.vpmovw2m(kA, xmmB);
a.vpmovw2m(kA, ymmB);
a.vpmovw2m(kA, zmmB);
a.vpmovwb(xmmA, xmmB);
a.vpmovwb(anyptr_gpA, xmmB);
a.vpmovwb(xmmA, ymmB);
a.vpmovwb(anyptr_gpA, ymmB);
a.vpmovwb(ymmA, zmmB);
a.vpmovwb(anyptr_gpA, zmmB);
a.vpmovzxbd(xmmA, xmmB);
a.vpmovzxbd(xmmA, anyptr_gpB);
a.vpmovzxbd(ymmA, xmmB);
a.vpmovzxbd(ymmA, anyptr_gpB);
a.vpmovzxbd(zmmA, xmmB);
a.vpmovzxbd(zmmA, anyptr_gpB);
a.vpmovzxbq(xmmA, xmmB);
a.vpmovzxbq(xmmA, anyptr_gpB);
a.vpmovzxbq(ymmA, xmmB);
a.vpmovzxbq(ymmA, anyptr_gpB);
a.vpmovzxbq(zmmA, xmmB);
a.vpmovzxbq(zmmA, anyptr_gpB);
a.vpmovzxbw(xmmA, xmmB);
a.vpmovzxbw(xmmA, anyptr_gpB);
a.vpmovzxbw(ymmA, xmmB);
a.vpmovzxbw(ymmA, anyptr_gpB);
a.vpmovzxbw(zmmA, ymmB);
a.vpmovzxbw(zmmA, anyptr_gpB);
a.vpmovzxdq(xmmA, xmmB);
a.vpmovzxdq(xmmA, anyptr_gpB);
a.vpmovzxdq(ymmA, xmmB);
a.vpmovzxdq(ymmA, anyptr_gpB);
a.vpmovzxdq(zmmA, xmmB);
a.vpmovzxdq(zmmA, anyptr_gpB);
a.vpmovzxwd(xmmA, xmmB);
a.vpmovzxwd(xmmA, anyptr_gpB);
a.vpmovzxwd(ymmA, xmmB);
a.vpmovzxwd(ymmA, anyptr_gpB);
a.vpmovzxwd(zmmA, ymmB);
a.vpmovzxwd(zmmA, anyptr_gpB);
a.vpmovzxwq(xmmA, xmmB);
a.vpmovzxwq(xmmA, anyptr_gpB);
a.vpmovzxwq(ymmA, xmmB);
a.vpmovzxwq(ymmA, anyptr_gpB);
a.vpmovzxwq(zmmA, xmmB);
a.vpmovzxwq(zmmA, anyptr_gpB);
a.vpmuldq(xmmA, xmmB, xmmC);
a.vpmuldq(xmmA, xmmB, anyptr_gpC);
a.vpmuldq(ymmA, ymmB, ymmC);
a.vpmuldq(ymmA, ymmB, anyptr_gpC);
a.vpmuldq(zmmA, zmmB, zmmC);
a.vpmuldq(zmmA, zmmB, anyptr_gpC);
a.vpmulhrsw(xmmA, xmmB, xmmC);
a.vpmulhrsw(xmmA, xmmB, anyptr_gpC);
a.vpmulhrsw(ymmA, ymmB, ymmC);
a.vpmulhrsw(ymmA, ymmB, anyptr_gpC);
a.vpmulhrsw(zmmA, zmmB, zmmC);
a.vpmulhrsw(zmmA, zmmB, anyptr_gpC);
a.vpmulhuw(xmmA, xmmB, xmmC);
a.vpmulhuw(xmmA, xmmB, anyptr_gpC);
a.vpmulhuw(ymmA, ymmB, ymmC);
a.vpmulhuw(ymmA, ymmB, anyptr_gpC);
a.vpmulhuw(zmmA, zmmB, zmmC);
a.vpmulhuw(zmmA, zmmB, anyptr_gpC);
a.vpmulhw(xmmA, xmmB, xmmC);
a.vpmulhw(xmmA, xmmB, anyptr_gpC);
a.vpmulhw(ymmA, ymmB, ymmC);
a.vpmulhw(ymmA, ymmB, anyptr_gpC);
a.vpmulhw(zmmA, zmmB, zmmC);
a.vpmulhw(zmmA, zmmB, anyptr_gpC);
a.vpmulld(xmmA, xmmB, xmmC);
a.vpmulld(xmmA, xmmB, anyptr_gpC);
a.vpmulld(ymmA, ymmB, ymmC);
a.vpmulld(ymmA, ymmB, anyptr_gpC);
a.vpmulld(zmmA, zmmB, zmmC);
a.vpmulld(zmmA, zmmB, anyptr_gpC);
a.vpmullq(xmmA, xmmB, xmmC);
a.vpmullq(xmmA, xmmB, anyptr_gpC);
a.vpmullq(ymmA, ymmB, ymmC);
a.vpmullq(ymmA, ymmB, anyptr_gpC);
a.vpmullq(zmmA, zmmB, zmmC);
a.vpmullq(zmmA, zmmB, anyptr_gpC);
a.vpmullw(xmmA, xmmB, xmmC);
a.vpmullw(xmmA, xmmB, anyptr_gpC);
a.vpmullw(ymmA, ymmB, ymmC);
a.vpmullw(ymmA, ymmB, anyptr_gpC);
a.vpmullw(zmmA, zmmB, zmmC);
a.vpmullw(zmmA, zmmB, anyptr_gpC);
a.vpmultishiftqb(xmmA, xmmB, xmmC);
a.vpmultishiftqb(xmmA, xmmB, anyptr_gpC);
a.vpmultishiftqb(ymmA, ymmB, ymmC);
a.vpmultishiftqb(ymmA, ymmB, anyptr_gpC);
a.vpmultishiftqb(zmmA, zmmB, zmmC);
a.vpmultishiftqb(zmmA, zmmB, anyptr_gpC);
a.vpmuludq(xmmA, xmmB, xmmC);
a.vpmuludq(xmmA, xmmB, anyptr_gpC);
a.vpmuludq(ymmA, ymmB, ymmC);
a.vpmuludq(ymmA, ymmB, anyptr_gpC);
a.vpmuludq(zmmA, zmmB, zmmC);
a.vpmuludq(zmmA, zmmB, anyptr_gpC);
a.vpopcntd(zmmA, zmmB);
a.vpopcntd(zmmA, anyptr_gpB);
a.vpopcntq(zmmA, zmmB);
a.vpopcntq(zmmA, anyptr_gpB);
a.vpord(xmmA, xmmB, xmmC);
a.vpord(xmmA, xmmB, anyptr_gpC);
a.vpord(ymmA, ymmB, ymmC);
a.vpord(ymmA, ymmB, anyptr_gpC);
a.vpord(zmmA, zmmB, zmmC);
a.vpord(zmmA, zmmB, anyptr_gpC);
a.vporq(xmmA, xmmB, xmmC);
a.vporq(xmmA, xmmB, anyptr_gpC);
a.vporq(ymmA, ymmB, ymmC);
a.vporq(ymmA, ymmB, anyptr_gpC);
a.vporq(zmmA, zmmB, zmmC);
a.vporq(zmmA, zmmB, anyptr_gpC);
a.vprold(xmmA, xmmB, 0);
a.vprold(xmmA, anyptr_gpB, 0);
a.vprold(ymmA, ymmB, 0);
a.vprold(ymmA, anyptr_gpB, 0);
a.vprold(zmmA, zmmB, 0);
a.vprold(zmmA, anyptr_gpB, 0);
a.vprolq(xmmA, xmmB, 0);
a.vprolq(xmmA, anyptr_gpB, 0);
a.vprolq(ymmA, ymmB, 0);
a.vprolq(ymmA, anyptr_gpB, 0);
a.vprolq(zmmA, zmmB, 0);
a.vprolq(zmmA, anyptr_gpB, 0);
a.vprolvd(xmmA, xmmB, xmmC);
a.vprolvd(xmmA, xmmB, anyptr_gpC);
a.vprolvd(ymmA, ymmB, ymmC);
a.vprolvd(ymmA, ymmB, anyptr_gpC);
a.vprolvd(zmmA, zmmB, zmmC);
a.vprolvd(zmmA, zmmB, anyptr_gpC);
a.vprolvq(xmmA, xmmB, xmmC);
a.vprolvq(xmmA, xmmB, anyptr_gpC);
a.vprolvq(ymmA, ymmB, ymmC);
a.vprolvq(ymmA, ymmB, anyptr_gpC);
a.vprolvq(zmmA, zmmB, zmmC);
a.vprolvq(zmmA, zmmB, anyptr_gpC);
a.vprord(xmmA, xmmB, 0);
a.vprord(xmmA, anyptr_gpB, 0);
a.vprord(ymmA, ymmB, 0);
a.vprord(ymmA, anyptr_gpB, 0);
a.vprord(zmmA, zmmB, 0);
a.vprord(zmmA, anyptr_gpB, 0);
a.vprorq(xmmA, xmmB, 0);
a.vprorq(xmmA, anyptr_gpB, 0);
a.vprorq(ymmA, ymmB, 0);
a.vprorq(ymmA, anyptr_gpB, 0);
a.vprorq(zmmA, zmmB, 0);
a.vprorq(zmmA, anyptr_gpB, 0);
a.vprorvd(xmmA, xmmB, xmmC);
a.vprorvd(xmmA, xmmB, anyptr_gpC);
a.vprorvd(ymmA, ymmB, ymmC);
a.vprorvd(ymmA, ymmB, anyptr_gpC);
a.vprorvd(zmmA, zmmB, zmmC);
a.vprorvd(zmmA, zmmB, anyptr_gpC);
a.vprorvq(xmmA, xmmB, xmmC);
a.vprorvq(xmmA, xmmB, anyptr_gpC);
a.vprorvq(ymmA, ymmB, ymmC);
a.vprorvq(ymmA, ymmB, anyptr_gpC);
a.vprorvq(zmmA, zmmB, zmmC);
a.vprorvq(zmmA, zmmB, anyptr_gpC);
a.vpsadbw(xmmA, xmmB, xmmC);
a.vpsadbw(xmmA, xmmB, anyptr_gpC);
a.vpsadbw(ymmA, ymmB, ymmC);
a.vpsadbw(ymmA, ymmB, anyptr_gpC);
a.vpsadbw(zmmA, zmmB, zmmC);
a.vpsadbw(zmmA, zmmB, anyptr_gpC);
a.vpscatterdd(vx_ptr, xmmB);
a.vpscatterdd(vy_ptr, ymmB);
a.vpscatterdd(vz_ptr, zmmB);
a.vpscatterdq(vx_ptr, xmmB);
a.vpscatterdq(vy_ptr, ymmB);
a.vpscatterdq(vz_ptr, zmmB);
a.vpscatterqd(vx_ptr, xmmB);
a.vpscatterqd(vy_ptr, xmmB);
a.vpscatterqd(vz_ptr, ymmB);
a.vpscatterqq(vx_ptr, xmmB);
a.vpscatterqq(vy_ptr, ymmB);
a.vpscatterqq(vz_ptr, zmmB);
a.vpshufb(xmmA, xmmB, xmmC);
a.vpshufb(xmmA, xmmB, anyptr_gpC);
a.vpshufb(ymmA, ymmB, ymmC);
a.vpshufb(ymmA, ymmB, anyptr_gpC);
a.vpshufb(zmmA, zmmB, zmmC);
a.vpshufb(zmmA, zmmB, anyptr_gpC);
a.vpshufd(xmmA, xmmB, 0);
a.vpshufd(xmmA, anyptr_gpB, 0);
a.vpshufd(ymmA, ymmB, 0);
a.vpshufd(ymmA, anyptr_gpB, 0);
a.vpshufd(zmmA, zmmB, 0);
a.vpshufd(zmmA, anyptr_gpB, 0);
a.vpshufhw(xmmA, xmmB, 0);
a.vpshufhw(xmmA, anyptr_gpB, 0);
a.vpshufhw(ymmA, ymmB, 0);
a.vpshufhw(ymmA, anyptr_gpB, 0);
a.vpshufhw(zmmA, zmmB, 0);
a.vpshufhw(zmmA, anyptr_gpB, 0);
a.vpshuflw(xmmA, xmmB, 0);
a.vpshuflw(xmmA, anyptr_gpB, 0);
a.vpshuflw(ymmA, ymmB, 0);
a.vpshuflw(ymmA, anyptr_gpB, 0);
a.vpshuflw(zmmA, zmmB, 0);
a.vpshuflw(zmmA, anyptr_gpB, 0);
a.vpslld(xmmA, xmmB, xmmC);
a.vpslld(xmmA, xmmB, anyptr_gpC);
a.vpslld(xmmA, xmmB, 0);
a.vpslld(xmmA, anyptr_gpB, 0);
a.vpslld(ymmA, ymmB, xmmC);
a.vpslld(ymmA, ymmB, anyptr_gpC);
a.vpslld(ymmA, ymmB, 0);
a.vpslld(ymmA, anyptr_gpB, 0);
a.vpslld(zmmA, zmmB, xmmC);
a.vpslld(zmmA, zmmB, anyptr_gpC);
a.vpslld(zmmA, zmmB, 0);
a.vpslld(zmmA, anyptr_gpB, 0);
a.vpslldq(xmmA, xmmB, 0);
a.vpslldq(xmmA, anyptr_gpB, 0);
a.vpslldq(ymmA, ymmB, 0);
a.vpslldq(ymmA, anyptr_gpB, 0);
a.vpslldq(zmmA, zmmB, 0);
a.vpslldq(zmmA, anyptr_gpB, 0);
a.vpsllq(xmmA, xmmB, xmmC);
a.vpsllq(xmmA, xmmB, anyptr_gpC);
a.vpsllq(xmmA, xmmB, 0);
a.vpsllq(xmmA, anyptr_gpB, 0);
a.vpsllq(ymmA, ymmB, xmmC);
a.vpsllq(ymmA, ymmB, anyptr_gpC);
a.vpsllq(ymmA, ymmB, 0);
a.vpsllq(ymmA, anyptr_gpB, 0);
a.vpsllq(zmmA, zmmB, xmmC);
a.vpsllq(zmmA, zmmB, anyptr_gpC);
a.vpsllq(zmmA, zmmB, 0);
a.vpsllq(zmmA, anyptr_gpB, 0);
a.vpsllvd(xmmA, xmmB, xmmC);
a.vpsllvd(xmmA, xmmB, anyptr_gpC);
a.vpsllvd(ymmA, ymmB, ymmC);
a.vpsllvd(ymmA, ymmB, anyptr_gpC);
a.vpsllvd(zmmA, zmmB, zmmC);
a.vpsllvd(zmmA, zmmB, anyptr_gpC);
a.vpsllvq(xmmA, xmmB, xmmC);
a.vpsllvq(xmmA, xmmB, anyptr_gpC);
a.vpsllvq(ymmA, ymmB, ymmC);
a.vpsllvq(ymmA, ymmB, anyptr_gpC);
a.vpsllvq(zmmA, zmmB, zmmC);
a.vpsllvq(zmmA, zmmB, anyptr_gpC);
a.vpsllvw(xmmA, xmmB, xmmC);
a.vpsllvw(xmmA, xmmB, anyptr_gpC);
a.vpsllvw(ymmA, ymmB, ymmC);
a.vpsllvw(ymmA, ymmB, anyptr_gpC);
a.vpsllvw(zmmA, zmmB, zmmC);
a.vpsllvw(zmmA, zmmB, anyptr_gpC);
a.vpsllw(xmmA, xmmB, xmmC);
a.vpsllw(xmmA, xmmB, anyptr_gpC);
a.vpsllw(xmmA, xmmB, 0);
a.vpsllw(xmmA, anyptr_gpB, 0);
a.vpsllw(ymmA, ymmB, xmmC);
a.vpsllw(ymmA, ymmB, anyptr_gpC);
a.vpsllw(ymmA, ymmB, 0);
a.vpsllw(ymmA, anyptr_gpB, 0);
a.vpsllw(zmmA, zmmB, xmmC);
a.vpsllw(zmmA, zmmB, anyptr_gpC);
a.vpsllw(zmmA, zmmB, 0);
a.vpsllw(zmmA, anyptr_gpB, 0);
a.vpsrad(xmmA, xmmB, xmmC);
a.vpsrad(xmmA, xmmB, anyptr_gpC);
a.vpsrad(xmmA, xmmB, 0);
a.vpsrad(xmmA, anyptr_gpB, 0);
a.vpsrad(ymmA, ymmB, xmmC);
a.vpsrad(ymmA, ymmB, anyptr_gpC);
a.vpsrad(ymmA, ymmB, 0);
a.vpsrad(ymmA, anyptr_gpB, 0);
a.vpsrad(zmmA, zmmB, xmmC);
a.vpsrad(zmmA, zmmB, anyptr_gpC);
a.vpsrad(zmmA, zmmB, 0);
a.vpsrad(zmmA, anyptr_gpB, 0);
a.vpsraq(xmmA, xmmB, xmmC);
a.vpsraq(xmmA, xmmB, anyptr_gpC);
a.vpsraq(xmmA, xmmB, 0);
a.vpsraq(xmmA, anyptr_gpB, 0);
a.vpsraq(ymmA, ymmB, xmmC);
a.vpsraq(ymmA, ymmB, anyptr_gpC);
a.vpsraq(ymmA, ymmB, 0);
a.vpsraq(ymmA, anyptr_gpB, 0);
a.vpsraq(zmmA, zmmB, xmmC);
a.vpsraq(zmmA, zmmB, anyptr_gpC);
a.vpsraq(zmmA, zmmB, 0);
a.vpsraq(zmmA, anyptr_gpB, 0);
a.vpsravd(xmmA, xmmB, xmmC);
a.vpsravd(xmmA, xmmB, anyptr_gpC);
a.vpsravd(ymmA, ymmB, ymmC);
a.vpsravd(ymmA, ymmB, anyptr_gpC);
a.vpsravd(zmmA, zmmB, zmmC);
a.vpsravd(zmmA, zmmB, anyptr_gpC);
a.vpsravq(xmmA, xmmB, xmmC);
a.vpsravq(xmmA, xmmB, anyptr_gpC);
a.vpsravq(ymmA, ymmB, ymmC);
a.vpsravq(ymmA, ymmB, anyptr_gpC);
a.vpsravq(zmmA, zmmB, zmmC);
a.vpsravq(zmmA, zmmB, anyptr_gpC);
a.vpsravw(xmmA, xmmB, xmmC);
a.vpsravw(xmmA, xmmB, anyptr_gpC);
a.vpsravw(ymmA, ymmB, ymmC);
a.vpsravw(ymmA, ymmB, anyptr_gpC);
a.vpsravw(zmmA, zmmB, zmmC);
a.vpsravw(zmmA, zmmB, anyptr_gpC);
a.vpsraw(xmmA, xmmB, xmmC);
a.vpsraw(xmmA, xmmB, anyptr_gpC);
a.vpsraw(xmmA, xmmB, 0);
a.vpsraw(xmmA, anyptr_gpB, 0);
a.vpsraw(ymmA, ymmB, xmmC);
a.vpsraw(ymmA, ymmB, anyptr_gpC);
a.vpsraw(ymmA, ymmB, 0);
a.vpsraw(ymmA, anyptr_gpB, 0);
a.vpsraw(zmmA, zmmB, xmmC);
a.vpsraw(zmmA, zmmB, anyptr_gpC);
a.vpsraw(zmmA, zmmB, 0);
a.vpsraw(zmmA, anyptr_gpB, 0);
a.vpsrld(xmmA, xmmB, xmmC);
a.vpsrld(xmmA, xmmB, anyptr_gpC);
a.vpsrld(xmmA, xmmB, 0);
a.vpsrld(xmmA, anyptr_gpB, 0);
a.vpsrld(ymmA, ymmB, xmmC);
a.vpsrld(ymmA, ymmB, anyptr_gpC);
a.vpsrld(ymmA, ymmB, 0);
a.vpsrld(ymmA, anyptr_gpB, 0);
a.vpsrld(zmmA, zmmB, xmmC);
a.vpsrld(zmmA, zmmB, anyptr_gpC);
a.vpsrld(zmmA, zmmB, 0);
a.vpsrld(zmmA, anyptr_gpB, 0);
a.vpsrldq(xmmA, xmmB, 0);
a.vpsrldq(xmmA, anyptr_gpB, 0);
a.vpsrldq(ymmA, ymmB, 0);
a.vpsrldq(ymmA, anyptr_gpB, 0);
a.vpsrldq(zmmA, zmmB, 0);
a.vpsrldq(zmmA, anyptr_gpB, 0);
a.vpsrlq(xmmA, xmmB, xmmC);
a.vpsrlq(xmmA, xmmB, anyptr_gpC);
a.vpsrlq(xmmA, xmmB, 0);
a.vpsrlq(xmmA, anyptr_gpB, 0);
a.vpsrlq(ymmA, ymmB, xmmC);
a.vpsrlq(ymmA, ymmB, anyptr_gpC);
a.vpsrlq(ymmA, ymmB, 0);
a.vpsrlq(ymmA, anyptr_gpB, 0);
a.vpsrlq(zmmA, zmmB, xmmC);
a.vpsrlq(zmmA, zmmB, anyptr_gpC);
a.vpsrlq(zmmA, zmmB, 0);
a.vpsrlq(zmmA, anyptr_gpB, 0);
a.vpsrlvd(xmmA, xmmB, xmmC);
a.vpsrlvd(xmmA, xmmB, anyptr_gpC);
a.vpsrlvd(ymmA, ymmB, ymmC);
a.vpsrlvd(ymmA, ymmB, anyptr_gpC);
a.vpsrlvd(zmmA, zmmB, zmmC);
a.vpsrlvd(zmmA, zmmB, anyptr_gpC);
a.vpsrlvq(xmmA, xmmB, xmmC);
a.vpsrlvq(xmmA, xmmB, anyptr_gpC);
a.vpsrlvq(ymmA, ymmB, ymmC);
a.vpsrlvq(ymmA, ymmB, anyptr_gpC);
a.vpsrlvq(zmmA, zmmB, zmmC);
a.vpsrlvq(zmmA, zmmB, anyptr_gpC);
a.vpsrlvw(xmmA, xmmB, xmmC);
a.vpsrlvw(xmmA, xmmB, anyptr_gpC);
a.vpsrlvw(ymmA, ymmB, ymmC);
a.vpsrlvw(ymmA, ymmB, anyptr_gpC);
a.vpsrlvw(zmmA, zmmB, zmmC);
a.vpsrlvw(zmmA, zmmB, anyptr_gpC);
a.vpsrlw(xmmA, xmmB, xmmC);
a.vpsrlw(xmmA, xmmB, anyptr_gpC);
a.vpsrlw(xmmA, xmmB, 0);
a.vpsrlw(xmmA, anyptr_gpB, 0);
a.vpsrlw(ymmA, ymmB, xmmC);
a.vpsrlw(ymmA, ymmB, anyptr_gpC);
a.vpsrlw(ymmA, ymmB, 0);
a.vpsrlw(ymmA, anyptr_gpB, 0);
a.vpsrlw(zmmA, zmmB, xmmC);
a.vpsrlw(zmmA, zmmB, anyptr_gpC);
a.vpsrlw(zmmA, zmmB, 0);
a.vpsrlw(zmmA, anyptr_gpB, 0);
a.vpsubb(xmmA, xmmB, xmmC);
a.vpsubb(xmmA, xmmB, anyptr_gpC);
a.vpsubb(ymmA, ymmB, ymmC);
a.vpsubb(ymmA, ymmB, anyptr_gpC);
a.vpsubb(zmmA, zmmB, zmmC);
a.vpsubb(zmmA, zmmB, anyptr_gpC);
a.vpsubd(xmmA, xmmB, xmmC);
a.vpsubd(xmmA, xmmB, anyptr_gpC);
a.vpsubd(ymmA, ymmB, ymmC);
a.vpsubd(ymmA, ymmB, anyptr_gpC);
a.vpsubd(zmmA, zmmB, zmmC);
a.vpsubd(zmmA, zmmB, anyptr_gpC);
a.vpsubq(xmmA, xmmB, xmmC);
a.vpsubq(xmmA, xmmB, anyptr_gpC);
a.vpsubq(ymmA, ymmB, ymmC);
a.vpsubq(ymmA, ymmB, anyptr_gpC);
a.vpsubq(zmmA, zmmB, zmmC);
a.vpsubq(zmmA, zmmB, anyptr_gpC);
a.vpsubsb(xmmA, xmmB, xmmC);
a.vpsubsb(xmmA, xmmB, anyptr_gpC);
a.vpsubsb(ymmA, ymmB, ymmC);
a.vpsubsb(ymmA, ymmB, anyptr_gpC);
a.vpsubsb(zmmA, zmmB, zmmC);
a.vpsubsb(zmmA, zmmB, anyptr_gpC);
a.vpsubsw(xmmA, xmmB, xmmC);
a.vpsubsw(xmmA, xmmB, anyptr_gpC);
a.vpsubsw(ymmA, ymmB, ymmC);
a.vpsubsw(ymmA, ymmB, anyptr_gpC);
a.vpsubsw(zmmA, zmmB, zmmC);
a.vpsubsw(zmmA, zmmB, anyptr_gpC);
a.vpsubusb(xmmA, xmmB, xmmC);
a.vpsubusb(xmmA, xmmB, anyptr_gpC);
a.vpsubusb(ymmA, ymmB, ymmC);
a.vpsubusb(ymmA, ymmB, anyptr_gpC);
a.vpsubusb(zmmA, zmmB, zmmC);
a.vpsubusb(zmmA, zmmB, anyptr_gpC);
a.vpsubusw(xmmA, xmmB, xmmC);
a.vpsubusw(xmmA, xmmB, anyptr_gpC);
a.vpsubusw(ymmA, ymmB, ymmC);
a.vpsubusw(ymmA, ymmB, anyptr_gpC);
a.vpsubusw(zmmA, zmmB, zmmC);
a.vpsubusw(zmmA, zmmB, anyptr_gpC);
a.vpsubw(xmmA, xmmB, xmmC);
a.vpsubw(xmmA, xmmB, anyptr_gpC);
a.vpsubw(ymmA, ymmB, ymmC);
a.vpsubw(ymmA, ymmB, anyptr_gpC);
a.vpsubw(zmmA, zmmB, zmmC);
a.vpsubw(zmmA, zmmB, anyptr_gpC);
a.vpternlogd(xmmA, xmmB, xmmC, 0);
a.vpternlogd(xmmA, xmmB, anyptr_gpC, 0);
a.vpternlogd(ymmA, ymmB, ymmC, 0);
a.vpternlogd(ymmA, ymmB, anyptr_gpC, 0);
a.vpternlogd(zmmA, zmmB, zmmC, 0);
a.vpternlogd(zmmA, zmmB, anyptr_gpC, 0);
a.vpternlogq(xmmA, xmmB, xmmC, 0);
a.vpternlogq(xmmA, xmmB, anyptr_gpC, 0);
a.vpternlogq(ymmA, ymmB, ymmC, 0);
a.vpternlogq(ymmA, ymmB, anyptr_gpC, 0);
a.vpternlogq(zmmA, zmmB, zmmC, 0);
a.vpternlogq(zmmA, zmmB, anyptr_gpC, 0);
a.vptestmb(kA, xmmB, xmmC);
a.vptestmb(kA, xmmB, anyptr_gpC);
a.vptestmb(kA, ymmB, ymmC);
a.vptestmb(kA, ymmB, anyptr_gpC);
a.vptestmb(kA, zmmB, zmmC);
a.vptestmb(kA, zmmB, anyptr_gpC);
a.vptestmd(kA, xmmB, xmmC);
a.vptestmd(kA, xmmB, anyptr_gpC);
a.vptestmd(kA, ymmB, ymmC);
a.vptestmd(kA, ymmB, anyptr_gpC);
a.vptestmd(kA, zmmB, zmmC);
a.vptestmd(kA, zmmB, anyptr_gpC);
a.vptestmq(kA, xmmB, xmmC);
a.vptestmq(kA, xmmB, anyptr_gpC);
a.vptestmq(kA, ymmB, ymmC);
a.vptestmq(kA, ymmB, anyptr_gpC);
a.vptestmq(kA, zmmB, zmmC);
a.vptestmq(kA, zmmB, anyptr_gpC);
a.vptestmw(kA, xmmB, xmmC);
a.vptestmw(kA, xmmB, anyptr_gpC);
a.vptestmw(kA, ymmB, ymmC);
a.vptestmw(kA, ymmB, anyptr_gpC);
a.vptestmw(kA, zmmB, zmmC);
a.vptestmw(kA, zmmB, anyptr_gpC);
a.vptestnmb(kA, xmmB, xmmC);
a.vptestnmb(kA, xmmB, anyptr_gpC);
a.vptestnmb(kA, ymmB, ymmC);
a.vptestnmb(kA, ymmB, anyptr_gpC);
a.vptestnmb(kA, zmmB, zmmC);
a.vptestnmb(kA, zmmB, anyptr_gpC);
a.vptestnmd(kA, xmmB, xmmC);
a.vptestnmd(kA, xmmB, anyptr_gpC);
a.vptestnmd(kA, ymmB, ymmC);
a.vptestnmd(kA, ymmB, anyptr_gpC);
a.vptestnmd(kA, zmmB, zmmC);
a.vptestnmd(kA, zmmB, anyptr_gpC);
a.vptestnmq(kA, xmmB, xmmC);
a.vptestnmq(kA, xmmB, anyptr_gpC);
a.vptestnmq(kA, ymmB, ymmC);
a.vptestnmq(kA, ymmB, anyptr_gpC);
a.vptestnmq(kA, zmmB, zmmC);
a.vptestnmq(kA, zmmB, anyptr_gpC);
a.vptestnmw(kA, xmmB, xmmC);
a.vptestnmw(kA, xmmB, anyptr_gpC);
a.vptestnmw(kA, ymmB, ymmC);
a.vptestnmw(kA, ymmB, anyptr_gpC);
a.vptestnmw(kA, zmmB, zmmC);
a.vptestnmw(kA, zmmB, anyptr_gpC);
a.vpunpckhbw(xmmA, xmmB, xmmC);
a.vpunpckhbw(xmmA, xmmB, anyptr_gpC);
a.vpunpckhbw(ymmA, ymmB, ymmC);
a.vpunpckhbw(ymmA, ymmB, anyptr_gpC);
a.vpunpckhbw(zmmA, zmmB, zmmC);
a.vpunpckhbw(zmmA, zmmB, anyptr_gpC);
a.vpunpckhdq(xmmA, xmmB, xmmC);
a.vpunpckhdq(xmmA, xmmB, anyptr_gpC);
a.vpunpckhdq(ymmA, ymmB, ymmC);
a.vpunpckhdq(ymmA, ymmB, anyptr_gpC);
a.vpunpckhdq(zmmA, zmmB, zmmC);
a.vpunpckhdq(zmmA, zmmB, anyptr_gpC);
a.vpunpckhqdq(xmmA, xmmB, xmmC);
a.vpunpckhqdq(xmmA, xmmB, anyptr_gpC);
a.vpunpckhqdq(ymmA, ymmB, ymmC);
a.vpunpckhqdq(ymmA, ymmB, anyptr_gpC);
a.vpunpckhqdq(zmmA, zmmB, zmmC);
a.vpunpckhqdq(zmmA, zmmB, anyptr_gpC);
a.vpunpckhwd(xmmA, xmmB, xmmC);
a.vpunpckhwd(xmmA, xmmB, anyptr_gpC);
a.vpunpckhwd(ymmA, ymmB, ymmC);
a.vpunpckhwd(ymmA, ymmB, anyptr_gpC);
a.vpunpckhwd(zmmA, zmmB, zmmC);
a.vpunpckhwd(zmmA, zmmB, anyptr_gpC);
a.vpunpcklbw(xmmA, xmmB, xmmC);
a.vpunpcklbw(xmmA, xmmB, anyptr_gpC);
a.vpunpcklbw(ymmA, ymmB, ymmC);
a.vpunpcklbw(ymmA, ymmB, anyptr_gpC);
a.vpunpcklbw(zmmA, zmmB, zmmC);
a.vpunpcklbw(zmmA, zmmB, anyptr_gpC);
a.vpunpckldq(xmmA, xmmB, xmmC);
a.vpunpckldq(xmmA, xmmB, anyptr_gpC);
a.vpunpckldq(ymmA, ymmB, ymmC);
a.vpunpckldq(ymmA, ymmB, anyptr_gpC);
a.vpunpckldq(zmmA, zmmB, zmmC);
a.vpunpckldq(zmmA, zmmB, anyptr_gpC);
a.vpunpcklqdq(xmmA, xmmB, xmmC);
a.vpunpcklqdq(xmmA, xmmB, anyptr_gpC);
a.vpunpcklqdq(ymmA, ymmB, ymmC);
a.vpunpcklqdq(ymmA, ymmB, anyptr_gpC);
a.vpunpcklqdq(zmmA, zmmB, zmmC);
a.vpunpcklqdq(zmmA, zmmB, anyptr_gpC);
a.vpunpcklwd(xmmA, xmmB, xmmC);
a.vpunpcklwd(xmmA, xmmB, anyptr_gpC);
a.vpunpcklwd(ymmA, ymmB, ymmC);
a.vpunpcklwd(ymmA, ymmB, anyptr_gpC);
a.vpunpcklwd(zmmA, zmmB, zmmC);
a.vpunpcklwd(zmmA, zmmB, anyptr_gpC);
a.vpxord(xmmA, xmmB, xmmC);
a.vpxord(xmmA, xmmB, anyptr_gpC);
a.vpxord(ymmA, ymmB, ymmC);
a.vpxord(ymmA, ymmB, anyptr_gpC);
a.vpxord(zmmA, zmmB, zmmC);
a.vpxord(zmmA, zmmB, anyptr_gpC);
a.vpxorq(xmmA, xmmB, xmmC);
a.vpxorq(xmmA, xmmB, anyptr_gpC);
a.vpxorq(ymmA, ymmB, ymmC);
a.vpxorq(ymmA, ymmB, anyptr_gpC);
a.vpxorq(zmmA, zmmB, zmmC);
a.vpxorq(zmmA, zmmB, anyptr_gpC);
a.vrangepd(xmmA, xmmB, xmmC, 0);
a.vrangepd(xmmA, xmmB, anyptr_gpC, 0);
a.vrangepd(ymmA, ymmB, ymmC, 0);
a.vrangepd(ymmA, ymmB, anyptr_gpC, 0);
a.vrangepd(zmmA, zmmB, zmmC, 0);
a.vrangepd(zmmA, zmmB, anyptr_gpC, 0);
a.vrangeps(xmmA, xmmB, xmmC, 0);
a.vrangeps(xmmA, xmmB, anyptr_gpC, 0);
a.vrangeps(ymmA, ymmB, ymmC, 0);
a.vrangeps(ymmA, ymmB, anyptr_gpC, 0);
a.vrangeps(zmmA, zmmB, zmmC, 0);
a.vrangeps(zmmA, zmmB, anyptr_gpC, 0);
a.vrangesd(xmmA, xmmB, xmmC, 0);
a.vrangesd(xmmA, xmmB, anyptr_gpC, 0);
a.vrangess(xmmA, xmmB, xmmC, 0);
a.vrangess(xmmA, xmmB, anyptr_gpC, 0);
a.vrcp14pd(xmmA, xmmB);
a.vrcp14pd(xmmA, anyptr_gpB);
a.vrcp14pd(ymmA, ymmB);
a.vrcp14pd(ymmA, anyptr_gpB);
a.vrcp14pd(zmmA, zmmB);
a.vrcp14pd(zmmA, anyptr_gpB);
a.vrcp14ps(xmmA, xmmB);
a.vrcp14ps(xmmA, anyptr_gpB);
a.vrcp14ps(ymmA, ymmB);
a.vrcp14ps(ymmA, anyptr_gpB);
a.vrcp14ps(zmmA, zmmB);
a.vrcp14ps(zmmA, anyptr_gpB);
a.vrcp14sd(xmmA, xmmB, xmmC);
a.vrcp14sd(xmmA, xmmB, anyptr_gpC);
a.vrcp14ss(xmmA, xmmB, xmmC);
a.vrcp14ss(xmmA, xmmB, anyptr_gpC);
a.vrcp28pd(zmmA, zmmB);
a.vrcp28pd(zmmA, anyptr_gpB);
a.vrcp28ps(zmmA, zmmB);
a.vrcp28ps(zmmA, anyptr_gpB);
a.vrcp28sd(xmmA, xmmB, xmmC);
a.vrcp28sd(xmmA, xmmB, anyptr_gpC);
a.vrcp28ss(xmmA, xmmB, xmmC);
a.vrcp28ss(xmmA, xmmB, anyptr_gpC);
a.vreducepd(xmmA, xmmB, 0);
a.vreducepd(xmmA, anyptr_gpB, 0);
a.vreducepd(ymmA, ymmB, 0);
a.vreducepd(ymmA, anyptr_gpB, 0);
a.vreducepd(zmmA, zmmB, 0);
a.vreducepd(zmmA, anyptr_gpB, 0);
a.vreduceps(xmmA, xmmB, 0);
a.vreduceps(xmmA, anyptr_gpB, 0);
a.vreduceps(ymmA, ymmB, 0);
a.vreduceps(ymmA, anyptr_gpB, 0);
a.vreduceps(zmmA, zmmB, 0);
a.vreduceps(zmmA, anyptr_gpB, 0);
a.vreducesd(xmmA, xmmB, xmmC, 0);
a.vreducesd(xmmA, xmmB, anyptr_gpC, 0);
a.vreducess(xmmA, xmmB, xmmC, 0);
a.vreducess(xmmA, xmmB, anyptr_gpC, 0);
a.vrndscalepd(xmmA, xmmB, 0);
a.vrndscalepd(xmmA, anyptr_gpB, 0);
a.vrndscalepd(ymmA, ymmB, 0);
a.vrndscalepd(ymmA, anyptr_gpB, 0);
a.vrndscalepd(zmmA, zmmB, 0);
a.vrndscalepd(zmmA, anyptr_gpB, 0);
a.vrndscaleps(xmmA, xmmB, 0);
a.vrndscaleps(xmmA, anyptr_gpB, 0);
a.vrndscaleps(ymmA, ymmB, 0);
a.vrndscaleps(ymmA, anyptr_gpB, 0);
a.vrndscaleps(zmmA, zmmB, 0);
a.vrndscaleps(zmmA, anyptr_gpB, 0);
a.vrndscalesd(xmmA, xmmB, xmmC, 0);
a.vrndscalesd(xmmA, xmmB, anyptr_gpC, 0);
a.vrndscaless(xmmA, xmmB, xmmC, 0);
a.vrndscaless(xmmA, xmmB, anyptr_gpC, 0);
a.vrsqrt14pd(xmmA, xmmB);
a.vrsqrt14pd(xmmA, anyptr_gpB);
a.vrsqrt14pd(ymmA, ymmB);
a.vrsqrt14pd(ymmA, anyptr_gpB);
a.vrsqrt14pd(zmmA, zmmB);
a.vrsqrt14pd(zmmA, anyptr_gpB);
a.vrsqrt14ps(xmmA, xmmB);
a.vrsqrt14ps(xmmA, anyptr_gpB);
a.vrsqrt14ps(ymmA, ymmB);
a.vrsqrt14ps(ymmA, anyptr_gpB);
a.vrsqrt14ps(zmmA, zmmB);
a.vrsqrt14ps(zmmA, anyptr_gpB);
a.vrsqrt14sd(xmmA, xmmB, xmmC);
a.vrsqrt14sd(xmmA, xmmB, anyptr_gpC);
a.vrsqrt14ss(xmmA, xmmB, xmmC);
a.vrsqrt14ss(xmmA, xmmB, anyptr_gpC);
a.vrsqrt28pd(zmmA, zmmB);
a.vrsqrt28pd(zmmA, anyptr_gpB);
a.vrsqrt28ps(zmmA, zmmB);
a.vrsqrt28ps(zmmA, anyptr_gpB);
a.vrsqrt28sd(xmmA, xmmB, xmmC);
a.vrsqrt28sd(xmmA, xmmB, anyptr_gpC);
a.vrsqrt28ss(xmmA, xmmB, xmmC);
a.vrsqrt28ss(xmmA, xmmB, anyptr_gpC);
a.vscalefpd(xmmA, xmmB, xmmC);
a.vscalefpd(xmmA, xmmB, anyptr_gpC);
a.vscalefpd(ymmA, ymmB, ymmC);
a.vscalefpd(ymmA, ymmB, anyptr_gpC);
a.vscalefpd(zmmA, zmmB, zmmC);
a.vscalefpd(zmmA, zmmB, anyptr_gpC);
a.vscalefps(xmmA, xmmB, xmmC);
a.vscalefps(xmmA, xmmB, anyptr_gpC);
a.vscalefps(ymmA, ymmB, ymmC);
a.vscalefps(ymmA, ymmB, anyptr_gpC);
a.vscalefps(zmmA, zmmB, zmmC);
a.vscalefps(zmmA, zmmB, anyptr_gpC);
a.vscalefsd(xmmA, xmmB, xmmC);
a.vscalefsd(xmmA, xmmB, anyptr_gpC);
a.vscalefss(xmmA, xmmB, xmmC);
a.vscalefss(xmmA, xmmB, anyptr_gpC);
a.vscatterdpd(vx_ptr, xmmB);
a.vscatterdpd(vx_ptr, ymmB);
a.vscatterdpd(vy_ptr, zmmB);
a.vscatterdps(vx_ptr, xmmB);
a.vscatterdps(vy_ptr, ymmB);
a.vscatterdps(vz_ptr, zmmB);
a.vscatterpf0dpd(vy_ptr);
a.vscatterpf0dps(vz_ptr);
a.vscatterpf0qpd(vz_ptr);
a.vscatterpf0qps(vz_ptr);
a.vscatterpf1dpd(vy_ptr);
a.vscatterpf1dps(vz_ptr);
a.vscatterpf1qpd(vz_ptr);
a.vscatterpf1qps(vz_ptr);
a.vscatterqpd(vx_ptr, xmmB);
a.vscatterqpd(vy_ptr, ymmB);
a.vscatterqpd(vz_ptr, zmmB);
a.vscatterqps(vx_ptr, xmmB);
a.vscatterqps(vy_ptr, xmmB);
a.vscatterqps(vz_ptr, ymmB);
a.vshuff32x4(ymmA, ymmB, ymmC, 0);
a.vshuff32x4(ymmA, ymmB, anyptr_gpC, 0);
a.vshuff32x4(zmmA, zmmB, zmmC, 0);
a.vshuff32x4(zmmA, zmmB, anyptr_gpC, 0);
a.vshuff64x2(ymmA, ymmB, ymmC, 0);
a.vshuff64x2(ymmA, ymmB, anyptr_gpC, 0);
a.vshuff64x2(zmmA, zmmB, zmmC, 0);
a.vshuff64x2(zmmA, zmmB, anyptr_gpC, 0);
a.vshufi32x4(ymmA, ymmB, ymmC, 0);
a.vshufi32x4(ymmA, ymmB, anyptr_gpC, 0);
a.vshufi32x4(zmmA, zmmB, zmmC, 0);
a.vshufi32x4(zmmA, zmmB, anyptr_gpC, 0);
a.vshufi64x2(ymmA, ymmB, ymmC, 0);
a.vshufi64x2(ymmA, ymmB, anyptr_gpC, 0);
a.vshufi64x2(zmmA, zmmB, zmmC, 0);
a.vshufi64x2(zmmA, zmmB, anyptr_gpC, 0);
a.vshufpd(xmmA, xmmB, xmmC, 0);
a.vshufpd(xmmA, xmmB, anyptr_gpC, 0);
a.vshufpd(ymmA, ymmB, ymmC, 0);
a.vshufpd(ymmA, ymmB, anyptr_gpC, 0);
a.vshufpd(zmmA, zmmB, zmmC, 0);
a.vshufpd(zmmA, zmmB, anyptr_gpC, 0);
a.vshufps(xmmA, xmmB, xmmC, 0);
a.vshufps(xmmA, xmmB, anyptr_gpC, 0);
a.vshufps(ymmA, ymmB, ymmC, 0);
a.vshufps(ymmA, ymmB, anyptr_gpC, 0);
a.vshufps(zmmA, zmmB, zmmC, 0);
a.vshufps(zmmA, zmmB, anyptr_gpC, 0);
a.vsqrtpd(xmmA, xmmB);
a.vsqrtpd(xmmA, anyptr_gpB);
a.vsqrtpd(ymmA, ymmB);
a.vsqrtpd(ymmA, anyptr_gpB);
a.vsqrtpd(zmmA, zmmB);
a.vsqrtpd(zmmA, anyptr_gpB);
a.vsqrtps(xmmA, xmmB);
a.vsqrtps(xmmA, anyptr_gpB);
a.vsqrtps(ymmA, ymmB);
a.vsqrtps(ymmA, anyptr_gpB);
a.vsqrtps(zmmA, zmmB);
a.vsqrtps(zmmA, anyptr_gpB);
a.vsqrtsd(xmmA, xmmB, xmmC);
a.vsqrtsd(xmmA, xmmB, anyptr_gpC);
a.vsqrtss(xmmA, xmmB, xmmC);
a.vsqrtss(xmmA, xmmB, anyptr_gpC);
a.vsubpd(xmmA, xmmB, xmmC);
a.vsubpd(xmmA, xmmB, anyptr_gpC);
a.vsubpd(ymmA, ymmB, ymmC);
a.vsubpd(ymmA, ymmB, anyptr_gpC);
a.vsubpd(zmmA, zmmB, zmmC);
a.vsubpd(zmmA, zmmB, anyptr_gpC);
a.vsubps(xmmA, xmmB, xmmC);
a.vsubps(xmmA, xmmB, anyptr_gpC);
a.vsubps(ymmA, ymmB, ymmC);
a.vsubps(ymmA, ymmB, anyptr_gpC);
a.vsubps(zmmA, zmmB, zmmC);
a.vsubps(zmmA, zmmB, anyptr_gpC);
a.vsubsd(xmmA, xmmB, xmmC);
a.vsubsd(xmmA, xmmB, anyptr_gpC);
a.vsubss(xmmA, xmmB, xmmC);
a.vsubss(xmmA, xmmB, anyptr_gpC);
a.vucomisd(xmmA, xmmB);
a.vucomisd(xmmA, anyptr_gpB);
a.vucomiss(xmmA, xmmB);
a.vucomiss(xmmA, anyptr_gpB);
a.vunpckhpd(xmmA, xmmB, xmmC);
a.vunpckhpd(xmmA, xmmB, anyptr_gpC);
a.vunpckhpd(ymmA, ymmB, ymmC);
a.vunpckhpd(ymmA, ymmB, anyptr_gpC);
a.vunpckhpd(zmmA, zmmB, zmmC);
a.vunpckhpd(zmmA, zmmB, anyptr_gpC);
a.vunpckhps(xmmA, xmmB, xmmC);
a.vunpckhps(xmmA, xmmB, anyptr_gpC);
a.vunpckhps(ymmA, ymmB, ymmC);
a.vunpckhps(ymmA, ymmB, anyptr_gpC);
a.vunpckhps(zmmA, zmmB, zmmC);
a.vunpckhps(zmmA, zmmB, anyptr_gpC);
a.vunpcklpd(xmmA, xmmB, xmmC);
a.vunpcklpd(xmmA, xmmB, anyptr_gpC);
a.vunpcklpd(ymmA, ymmB, ymmC);
a.vunpcklpd(ymmA, ymmB, anyptr_gpC);
a.vunpcklpd(zmmA, zmmB, zmmC);
a.vunpcklpd(zmmA, zmmB, anyptr_gpC);
a.vunpcklps(xmmA, xmmB, xmmC);
a.vunpcklps(xmmA, xmmB, anyptr_gpC);
a.vunpcklps(ymmA, ymmB, ymmC);
a.vunpcklps(ymmA, ymmB, anyptr_gpC);
a.vunpcklps(zmmA, zmmB, zmmC);
a.vunpcklps(zmmA, zmmB, anyptr_gpC);
a.vxorpd(xmmA, xmmB, xmmC);
a.vxorpd(xmmA, xmmB, anyptr_gpC);
a.vxorpd(ymmA, ymmB, ymmC);
a.vxorpd(ymmA, ymmB, anyptr_gpC);
a.vxorpd(zmmA, zmmB, zmmC);
a.vxorpd(zmmA, zmmB, anyptr_gpC);
a.vxorps(xmmA, xmmB, xmmC);
a.vxorps(xmmA, xmmB, anyptr_gpC);
a.vxorps(ymmA, ymmB, ymmC);
a.vxorps(ymmA, ymmB, anyptr_gpC);
a.vxorps(zmmA, zmmB, zmmC);
a.vxorps(zmmA, zmmB, anyptr_gpC);
// Mark the end.
a.nop();
a.nop();
a.nop();
a.nop();
}
} // asmtest namespace
// [Guard]
#endif // _ASMJIT_TEST_OPCODE_H