commit 5c7123fbb3f596dd0eecd3ea150c82a4cdfa1e1a Author: kobalicekp Date: Sun Feb 2 03:17:30 2014 +0100 Initial. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9442d2b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build_* diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..47ee580 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,292 @@ +# ============================================================================= +# [AsmJit - CMakeLists.txt] +# ============================================================================= + +CMake_Minimum_Required(VERSION 2.8.12) + +# ============================================================================= +# [AsmJit - Configuration] +# ============================================================================= + +# Whether to build static library (default FALSE). +# Set(ASMJIT_STATIC FALSE) + +# Whether to build samples (default FALSE). +# Set(ASMJIT_BUILD_SAMPLES FALSE) + +# ============================================================================= +# [AsmJit - Build] +# ============================================================================= + +If(NOT CMAKE_PROJECT_NAME) + Project(asmjit C CXX) + Set(ASMJIT_PROJECT_STR "Project") +Else() + # Do not create a project if this CMakeLists.txt is included by a different + # project. This allows easy static library build including debugger support. + Set(ASMJIT_PROJECT_STR "Include") +EndIf() + +If(ASMJIT_STATIC) + Set(ASMJIT_PROJECT_STR "${ASMJIT_PROJECT_STR}|Static") +Else() + Set(ASMJIT_PROJECT_STR "${ASMJIT_PROJECT_STR}|Shared") +EndIf() + +Message("") +Message("== ====================================================") +Message("== [AsmJit ${ASMJIT_PROJECT_STR}]") +Message("== ====================================================") +Message("") + +# ============================================================================= +# [AsmJit - Directories] +# ============================================================================= + +If(NOT ASMJIT_DIR) + Set(ASMJIT_DIR ${CMAKE_CURRENT_LIST_DIR}) + Message("-- Initializing ASMJIT_DIR=${ASMJIT_DIR}") +Else() + Message("-- Using Custom ASMJIT_DIR=${ASMJIT_DIR}") +EndIf() + +Set(ASMJIT_SRC_DIR "${ASMJIT_DIR}/src") +Set(ASMJIT_INC_DIR "${ASMJIT_SRC_DIR}") + +Include_Directories(${ASMJIT_SRC_DIR}) + +# ============================================================================= +# [AsmJit - Flags/Deps] +# ============================================================================= + +Set(ASMJIT_DEPS) +Set(ASMJIT_LFLAGS) + +Set(ASMJIT_CDEFS) +Set(ASMJIT_CDEFS_DBG ASMJIT_DEBUG) +Set(ASMJIT_CDEFS_REL ASMJIT_RELEASE) + +Set(ASMJIT_CFLAGS) +Set(ASMJIT_CFLAGS_DBG) +Set(ASMJIT_CFLAGS_REL) + +If(MSVC) + Set(ASMJIT_LFLAGS "/OPT:REF /OPT:ICF") + Set(ASMJIT_CFLAGS /GF) + Set(ASMJIT_CFLAGS_DBG /GS /GR-) + Set(ASMJIT_CFLAGS_REL /Oi /Oy /GS- /GR-) +EndIf() + +If(CMAKE_COMPILER_IS_GNUCXX) + Set(ASMJIT_CFLAGS -fno-exceptions) + Set(ASMJIT_CFLAGS_DBG -O0 + -fno-inline-functions) + Set(ASMJIT_CFLAGS_REL -O2 + -finline-functions + -fomit-frame-pointer + -fmerge-all-constants + -fno-keep-static-consts) +EndIf() + +If(WIN32) + List(APPEND ASMJIT_CDEFS _UNICODE) +Else() + List(APPEND ASMJIT_DEPS pthread) +EndIf() + +Set(ASMJIT_CFLAGS_DBG ${ASMJIT_CFLAGS} ${ASMJIT_CFLAGS_DBG}) +Set(ASMJIT_CFLAGS_REL ${ASMJIT_CFLAGS} ${ASMJIT_CFLAGS_REL}) + +# ============================================================================= +# [AsmJit - Macros] +# ============================================================================= + +Macro(AsmJit_AddSource DST BASE_PATH) + Set(__list "") + Set(__path "${ASMJIT_SRC_DIR}/${BASE_PATH}") + + ForEach(__name ${ARGN}) + Set(__file "${__path}/${__name}") + Set(__cflags ${ASMJIT_CFLAGS}) + + If(__name MATCHES "\\.cpp|\\.h") + If(${__cflags}) + Set_Source_Files_Properties(${__name} PROPERTIES COMPILE_FLAGS ${__cflags}) + EndIf() + List(APPEND __list ${__file}) + EndIf() + EndForEach() + + List(APPEND "${DST}" ${__list}) + Source_Group(${BASE_PATH} FILES ${__list}) +EndMacro() + +Macro(AsmJit_AddLibrary NAME SRC DEPS CDEFS_DBG CDEFS_REL CFLAGS_DBG CFLAGS_REL) + If(NOT ASMJIT_STATIC) + Set(__type "SHARED") + Else() + Set(__type "STATIC") + EndIf() + + Add_Library(${NAME} ${__type} ${SRC}) + + # Dependencies. + Target_Link_Libraries(${NAME} ${DEPS}) + + # Compiler Flags. + If(${CMAKE_BUILD_TYPE}) + If(${CMAKE_BUILD_TYPE} MATCHES "Debug") + Set_Target_Properties(${NAME} PROPERTIES COMPILE_DEFINITIONS ${ASMJIT_CDEFS_DBG}) + Set_Target_Properties(${NAME} PROPERTIES COMPILE_FLAGS ${ASMJIT_CFLAGS_DBG}) + Else() + Set_Target_Properties(${NAME} PROPERTIES COMPILE_DEFINITIONS ${ASMJIT_CDEFS_REL}) + Set_Target_Properties(${NAME} PROPERTIES COMPILE_FLAGS ${ASMJIT_CFLAGS_REL}) + EndIf() + Else() + Target_Compile_Definitions(${NAME} PUBLIC + $<$:${CDEFS_DBG}>$<$>:${CDEFS_REL}>) + + Target_Compile_Options(${NAME} PUBLIC + $<$:${CFLAGS_DBG}>$<$>:${CFLAGS_REL}>) + EndIf() + + # Linker Flags. + Set_Target_Properties(${NAME} PROPERTIES LINK_FLAGS "${ASMJIT_LFLAGS}") + + # Install Instructions. + If(NOT ASMJIT_STATIC) + Install(TARGETS ${NAME} DESTINATION lib) + EndIf() + + Unset(__type) +EndMacro() + +# ============================================================================= +# [AsmJit - Source] +# ============================================================================= + +Set(ASMJIT_SRC "") + +AsmJit_AddSource(ASMJIT_SRC asmjit + asmjit.h + base.h + build.h + config.h + contrib.h + host.h + x86.h +) + +AsmJit_AddSource(ASMJIT_SRC asmjit/base + apibegin.h + apiend.h + + assembler.cpp + assembler.h + assert.cpp + assert.h + codegen.cpp + codegen.h + compiler.cpp + compiler.h + context.cpp + context_p.h + cpu.cpp + cpu.h + defs.cpp + defs.h + error.cpp + error.h + func.cpp + func.h + globals.h + intutil.h + lock.h + logger.cpp + logger.h + memorymanager.cpp + memorymanager.h + podlist.h + podvector.cpp + podvector.h + runtime.cpp + runtime.h + string.cpp + string.h + vectypes.h + vmem.cpp + vmem.h + zone.cpp + zone.h +) + +AsmJit_AddSource(ASMJIT_SRC asmjit/x86 + x86assembler.cpp + x86assembler.h + x86compiler.cpp + x86compiler.h + x86context.cpp + x86context_p.h + x86cpu.cpp + x86cpu.h + x86defs.cpp + x86defs.h + x86func.cpp + x86func.h +) + +AsmJit_AddSource(ASMJIT_SRC asmjit/contrib + winremoteruntime.cpp + winremoteruntime.h +) + +# ============================================================================= +# [AsmJit - Headers] +# ============================================================================= + +If(NOT ASMJIT_STATIC) + ForEach(i ${ASMJIT_SRC}) + Get_Filename_Component(path ${i} PATH) + Get_Filename_Component(name ${i} NAME) + String(REGEX REPLACE "^${ASMJIT_SRC_DIR}/" "" targetpath "${path}") + If(${name} MATCHES ".h$") + If(NOT "${name}" MATCHES "_p.h$") + Install(FILES ${i} DESTINATION "include/${targetpath}") + EndIf() + EndIf() + EndForEach() +EndIf() + +# ============================================================================= +# [Asmjit - Library] +# ============================================================================= + +AsmJit_AddLibrary(asmjit + "${ASMJIT_SRC}" + "${ASMJIT_DEPS}" + "${ASMJIT_CDEFS_DBG}" + "${ASMJIT_CDEFS_REL}" + "${ASMJIT_CFLAGS_DBG}" + "${ASMJIT_CFLAGS_REL}" +) + +# ============================================================================= +# [Asmjit - Samples] +# ============================================================================= + +If(ASMJIT_BUILD_SAMPLES) + Set(ASMJIT_SRC_SAMPLES + benchx86 + testcpu + testdummy + testmem + testopcode + testsizeof + testx86 + ) + + ForEach(file ${ASMJIT_SRC_SAMPLES}) + Add_Executable(${file} src/app/test/${file}.cpp) + Target_Link_Libraries(${file} asmjit ${ASMJIT_DEPS}) + EndForEach(file) +EndIf() diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..4a258b9 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,18 @@ +AsmJit - Complete x86/x64 JIT and Remote Assembler for C++ +Copyright (c) 2008-2014, Petr Kobalicek + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0415c79 --- /dev/null +++ b/README.md @@ -0,0 +1,129 @@ +AsmJit - Complete x86/x64 JIT and Remote Assembler for C++ +========================================================== + +Official Repository: https://github.com/kobalicekp/asmjit + +Support the Project: [![Donate](https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif)]( + https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=QDRM6SRNG7378&lc=EN;&item_name=asmjit¤cy_code=EUR) + +Introduction +============ + +AsmJit is a complete JIT and remote assembler for C++ language. It can generate native code for x86 and x64 architectures having support for a full instruction set, from legacy MMX to the newest AVX2. It has a type-safe API that allows C++ compiler to do a semantic checks at compile-time even before the assembled code is generated or run. + +AsmJit is not a virtual machine (VM). It doesn't have functionality to implement VM out of the box; however, it can be be used as a JIT backend for your own VM. The usage of AsmJit is not limited at all; it's suitable for multimedia, VM backends or remote code generation. + +Features +======== + + - Complete x86/x64 instruction set - MMX, SSE, AVX, BMI, XOP, FMA..., + - Low-level and high-level code generation, + - Built-in CPU detection, + - Virtual Memory management, + - Pretty logging and error handling, + - Small and embeddable, around 150kB compiled, + - Zero dependencies, not even STL or RTTI. + +Supported Environments +====================== + +## Operating Systems + + - BSDs + - Linux + - Mac + - Windows + +## C++ Compilers + + - BorlandC++ + - GNU (3.4.X+, 4.0+, MinGW) + - MSVC (VS2005, VS2008, VS2010) + - Other compilers require testing + +## Backends + + - X86 + - X64 + +Project Organization +==================== + + - project root / + - extras - Documentation and addons + - contrib - Contributions (not official, but included) + - doc - Documentation generator files + - msvs - MS Visual Studio additions + - scripts - Scripts to generate project files and regenerate defs + - src - Source code + - asmjit - Public header files (always include from here) + - base - Base files, used by the AsmJit and all backends + - x86 - X86/X64 specific files, used only by X86/X64 backend + +Code Generation Concepts +======================== + +AsmJit has two completely different code generation concepts. The difference is in how the code is generated. The first concept, also referred as the low level concept, is called 'Assembler' and it's the same as writing RAW assembly by using physical registers directly. In this case AsmJit does only instruction encoding, verification and relocation. + +The second concept, also referred as the high level concept, is called 'Compiler'. Compiler lets you use virtually unlimited number of registers (called variables) significantly simplifying the code generation process. Compiler allocates these virtual registers to physical registers after the code generation is done. This requires some extra effort - Compiler has to generate information for each node (instruction, function declaration, function call) in the code, perform a variable liveness analysis and translate the code having variables into code having only registers. + +In addition, Compiler understands functions and function calling conventions. It has been designed in a way that the code generated is always a function having prototype like in a programming language. By having a function prototype the Compiler is able to insert prolog and epilog to a function being generated and it is able to call a function inside a generated one. + +There is no conclusion on which concept is better. Assembler brings full control on how the code is generated, while Compiler makes the generation more portable. + +Configuring/Building +==================== + +AsmJit is designed to be easy embeddable in any project. However, it has some compile-time flags that can be used to build a specific version of AsmJit including or omitting certain features: + +## Debugging + + - *ASMJIT_DEBUG* - Define to always turn debugging on (regardless of build-mode). + - *ASMJIT_RELEASE* - Define to always turn debugging off (regardless of build-mode). + + - By default none of these is defined, AsmJit detects mode based on compile-time macros (useful when using IDE that has switches for Debug/Release/etc...). + +## Library + + - *ASMJIT_STATIC* - Define when building AsmJit as a static library. No symbols will be exported by AsmJit by default. + - *ASMJIT_API* - This is AsmJit API decorator that is used in all functions that has to be exported. It can be redefined, however it's not a recommended way. + + - By default AsmJit build is configured as a shared library and *ASMJIT_API* contains compiler specific attributes to import/export AsmJit symbols. + +## Backends + + - *ASMJIT_BUILD_X86* - Always build x86 backend regardless of host architecture. + - *ASMJIT_BUILD_X64* - Always build x64 backend regardless of host architecture. + - *ASMJIT_BUILD_HOST* - Always build host backand, if only *ASMJIT_BUILD_HOST* is used only the host architecture detected at compile-time will be included. + + - By default only *ASMJIT_BUILD_HOST* is defined. + +To build AsmJit please use cmake that will generate project files for your favorite IDE and platform. If you don't use cmake and you still want to include AsmJit in your project it's perfectly fine by just including it there, probably defining *ASMJIT_STATIC* to prevent AsmJit trying to export the API. + +Examples +======== + +Comprehensive test suite can be found at src/app/test directory in AsmJit package. It can be used as a starting point before using AsmJit. + +License +======= + +AsmJit can be distributed under zlib license: + + * + +Google Groups & Mailing Lists +============================= + +AsmJit google group: + + * http://groups.google.com/group/asmjit-dev + +AsmJit mailing list: + + * asmjit-dev@googlegroups.com + +Contact Authors & Maintainers +============================= + + * Petr Kobalicek diff --git a/src/app/test/benchx86.cpp b/src/app/test/benchx86.cpp new file mode 100644 index 0000000..86c0424 --- /dev/null +++ b/src/app/test/benchx86.cpp @@ -0,0 +1,113 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Dependencies - AsmJit] +#include + +// [Dependencies - Test] +#include "genblend.h" +#include "genopcode.h" + +// [Dependencies - C] +#include +#include +#include + +// ============================================================================ +// [Performance] +// ============================================================================ + +struct Performance { + static inline uint32_t now() { +#if defined(ASMJIT_OS_WINDOWS) + return ::GetTickCount(); +#else + return 0; +#endif + } + + inline void reset() { + tick = 0; + best = 0xFFFFFFFF; + } + + inline uint32_t start() { + return (tick = now()); + } + + inline uint32_t diff() const { + return now() - tick; + } + + inline uint32_t end() { + tick = diff(); + if (best > tick) + best = tick; + return tick; + } + + uint32_t tick; + uint32_t best; +}; + +// ============================================================================ +// [Main] +// ============================================================================ + +int main(int argc, char* argv[]) { + using namespace asmjit; + using namespace asmjit::host; + + Performance perf; + uint32_t kNumRepeats = 10; + uint32_t kNumIterations = 100000; + + JitRuntime runtime; + Assembler a(&runtime); + Compiler c(&runtime); + + uint32_t r, i; + + // -------------------------------------------------------------------------- + // [Bench - Opcode] + // -------------------------------------------------------------------------- + + perf.reset(); + for (r = 0; r < kNumRepeats; r++) { + perf.start(); + for (i = 0; i < kNumIterations; i++) { + asmgen::opcode(a); + + void *p = a.make(); + runtime.release(p); + + a.clear(); + } + perf.end(); + } + printf("Opcode | Time: %u [ms]\n", perf.best); + + // -------------------------------------------------------------------------- + // [Bench - Blend] + // -------------------------------------------------------------------------- + + perf.reset(); + for (r = 0; r < kNumRepeats; r++) { + perf.start(); + for (i = 0; i < kNumIterations; i++) { + asmgen::blend(c); + + void* p = c.make(); + runtime.release(p); + + c.clear(); + } + perf.end(); + } + printf("Blend | Time: %u [ms]\n", perf.best); + + return 0; +} diff --git a/src/app/test/genblend.h b/src/app/test/genblend.h new file mode 100644 index 0000000..f5be442 --- /dev/null +++ b/src/app/test/genblend.h @@ -0,0 +1,178 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _GENBLEND_H +#define _GENBLEND_H + +// [Dependencies] +#include + +namespace asmgen { + +// Generate a typical alpha blend function using SSE2 instruction set. Used +// for benchmarking and also in test86. The generated code should be stable +// and can be tested. +static void blend(asmjit::host::Compiler& c) { + using namespace asmjit; + using namespace asmjit::host; + + GpVar dst(c, kVarTypeIntPtr, "dst"); + GpVar src(c, kVarTypeIntPtr, "src"); + + GpVar i(c, kVarTypeIntPtr, "i"); + GpVar j(c, kVarTypeIntPtr, "j"); + GpVar t(c, kVarTypeIntPtr, "t"); + + XmmVar cZero(c, kVarTypeXmm, "cZero"); + XmmVar cMul255A(c, kVarTypeXmm, "cMul255A"); + XmmVar cMul255M(c, kVarTypeXmm, "cMul255M"); + + XmmVar x0(c, kVarTypeXmm, "x0"); + XmmVar x1(c, kVarTypeXmm, "x1"); + XmmVar y0(c, kVarTypeXmm, "y0"); + XmmVar a0(c, kVarTypeXmm, "a0"); + XmmVar a1(c, kVarTypeXmm, "a1"); + + Label L_SmallLoop(c); + Label L_SmallEnd(c); + + Label L_LargeLoop(c); + Label L_LargeEnd(c); + + Label L_Data(c); + + c.addFunc(kFuncConvHost, FuncBuilder3()); + + c.setArg(0, dst); + c.setArg(1, src); + c.setArg(2, i); + + c.alloc(dst); + c.alloc(src); + c.alloc(i); + + // How many pixels we have to process to align the loop. + c.lea(t, ptr(L_Data)); + c.xor_(j, j); + c.xorps(cZero, cZero); + + c.sub(j, dst); + c.movaps(cMul255A, ptr(t, 0)); + + c.and_(j, 15); + c.movaps(cMul255M, ptr(t, 16)); + + c.shr(j, 2); + c.jz(L_SmallEnd); + + // j = min(i, j). + c.cmp(j, i); + c.cmovg(j, i); + + // i -= j. + c.sub(i, j); + + // Small loop. + c.bind(L_SmallLoop); + + c.pcmpeqb(a0, a0); + c.movd(y0, ptr(src)); + + c.pxor(a0, y0); + c.movd(x0, ptr(dst)); + + c.psrlw(a0, 8); + c.punpcklbw(x0, cZero); + + c.pshuflw(a0, a0, mm_shuffle(1, 1, 1, 1)); + c.punpcklbw(y0, cZero); + + c.pmullw(x0, a0); + c.paddsw(x0, cMul255A); + c.pmulhuw(x0, cMul255M); + + c.paddw(x0, y0); + c.packuswb(x0, x0); + + c.movd(ptr(dst), x0); + + c.add(dst, 4); + c.add(src, 4); + + c.dec(j); + c.jnz(L_SmallLoop); + + // Second section, prepare for an aligned loop. + c.bind(L_SmallEnd); + + c.test(i, i); + c.mov(j, i); + c.jz(c.getFunc()->getExitLabel()); + + c.and_(j, 3); + c.shr(i, 2); + c.jz(L_LargeEnd); + + // Aligned loop. + c.bind(L_LargeLoop); + + c.movups(y0, ptr(src)); + c.pcmpeqb(a0, a0); + c.movaps(x0, ptr(dst)); + + c.xorps(a0, y0); + c.movaps(x1, x0); + + c.psrlw(a0, 8); + c.punpcklbw(x0, cZero); + + c.movaps(a1, a0); + c.punpcklwd(a0, a0); + + c.punpckhbw(x1, cZero); + c.punpckhwd(a1, a1); + + c.pshufd(a0, a0, mm_shuffle(3, 3, 1, 1)); + c.pshufd(a1, a1, mm_shuffle(3, 3, 1, 1)); + + c.pmullw(x0, a0); + c.pmullw(x1, a1); + + c.paddsw(x0, cMul255A); + c.paddsw(x1, cMul255A); + + c.pmulhuw(x0, cMul255M); + c.pmulhuw(x1, cMul255M); + + c.add(src, 16); + c.packuswb(x0, x1); + + c.paddw(x0, y0); + c.movaps(ptr(dst), x0); + + c.add(dst, 16); + + c.dec(i); + c.jnz(L_LargeLoop); + + c.bind(L_LargeEnd); + c.test(j, j); + c.jnz(L_SmallLoop); + + c.endFunc(); + + // Data. + c.align(16); + c.bind(L_Data); + c.dxmm(XmmData::fromSw(0x0080)); + c.dxmm(XmmData::fromSw(0x0101)); +} + +} // asmgen namespace + +// [Guard] +#endif // _GENBLEND_H diff --git a/src/app/test/genopcode.h b/src/app/test/genopcode.h new file mode 100644 index 0000000..8548689 --- /dev/null +++ b/src/app/test/genopcode.h @@ -0,0 +1,2296 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _GENOPCODE_H +#define _GENOPCODE_H + +// [Dependencies] +#include + +namespace asmgen { + +// Generate all instructions asmjit can emit. +static void opcode(asmjit::host::Assembler& a) { + using namespace asmjit; + using namespace asmjit::host; + + // Prevent crashing when the generated function is called (for debugging to + // see disassembly). + a.ret(); + + // When any problem is found this section can be used to customize the index + // of the registers used. + GpReg gp0 = zax; + GpReg gp1 = zsi; + FpReg fpx = fp6; + + Mem ptr_gp0 = ptr(gp0); + Mem ptr_gp1 = ptr(gp1); + + Mem vm32x = ptr(gp0, xmm1); + Mem vm32y = ptr(gp0, ymm1); + + Mem intptr_gp0 = intptr_ptr(gp0); + Mem intptr_gp1 = intptr_ptr(gp1); + + // Base. + a.adc(gp0, gp1); + a.adc(gp0, intptr_gp1); + a.adc(gp0, 0); + a.adc(intptr_gp0, gp1); + a.adc(intptr_gp0, 0); + a.add(gp0, gp1); + a.add(gp0, intptr_gp1); + a.add(gp0, 0); + a.add(intptr_gp0, gp1); + a.add(intptr_gp0, 0); + a.and_(gp0, gp1); + a.and_(gp0, intptr_gp1); + a.and_(gp0, 0); + a.and_(intptr_gp0, gp1); + a.and_(intptr_gp0, 0); + a.bswap(gp0); + a.bt(gp0, gp1); + a.bt(intptr_gp0, gp1); + a.bt(gp0, 0); + a.bt(intptr_gp0, 0); + a.btc(gp0, gp1); + a.btc(intptr_gp0, gp1); + a.btc(gp0, 0); + a.btc(intptr_gp0, 0); + a.btr(gp0, gp1); + a.btr(intptr_gp0, gp1); + a.btr(gp0, 0); + a.btr(intptr_gp0, 0); + a.bts(gp0, gp1); + a.bts(intptr_gp0, gp1); + a.bts(gp0, 0); + a.bts(intptr_gp0, 0); + a.call(gp0); + a.call(intptr_gp0); + a.cbw(); + a.cwde(); + a.clc(); + a.cld(); + a.cmc(); + a.cmp(gp0, gp1); + a.cmp(gp0, intptr_gp1); + a.cmp(gp0, 0); + a.cmp(intptr_gp0, gp1); + a.cmp(intptr_gp0, 0); + a.cmpxchg(gp0, gp1); + a.cmpxchg(intptr_gp0, gp1); + a.cmpxchg8b(ptr_gp0); + a.cpuid(); + a.crc32(gp0, ptr_gp1); + a.dec(gp0); + a.dec(intptr_gp0); + a.div(gp0); + a.div(intptr_gp0); + a.idiv(gp0); + a.idiv(intptr_gp0); + a.imul(gp0); + a.imul(intptr_gp0); + a.imul(gp0, gp1); + a.imul(gp0, intptr_gp1); + a.imul(gp0, 0); + a.imul(gp0, gp1, 0); + a.imul(gp0, intptr_gp1, 0); + a.inc(gp0); + a.inc(intptr_gp0); + a.int3(); + a.lea(gp0, intptr_gp1); + a.mov(gp0, gp1); + a.mov(gp0, intptr_gp1); + a.mov(gp0, 0); + a.mov(intptr_gp0, gp1); + a.mov(intptr_gp0, 0); + a.movsx(gp0, al); + a.movsx(gp0, byte_ptr(gp1)); + a.movzx(gp0, al); + a.movzx(gp0, byte_ptr(gp1)); + a.movbe(gp0, ptr_gp1); + a.movbe(ptr_gp0, gp1); + a.mul(gp0); + a.mul(intptr_gp0); + a.neg(gp0); + a.neg(intptr_gp0); + a.nop(); + a.not_(gp0); + a.not_(intptr_gp0); + a.or_(gp0, gp1); + a.or_(gp0, intptr_gp1); + a.or_(gp0, 0); + a.or_(intptr_gp0, gp1); + a.or_(intptr_gp0, 0); + a.pop(gp0); + a.pop(intptr_gp0); + a.push(gp0); + a.push(intptr_gp0); + a.push(0); + a.rcl(gp0, cl); + a.rcl(gp0, 0); + a.rcl(gp0, 1); + a.rcl(intptr_gp0, cl); + a.rcl(intptr_gp0, 0); + a.rcl(intptr_gp0, 1); + a.rcr(gp0, cl); + a.rcr(gp0, 0); + a.rcr(gp0, 1); + a.rcr(intptr_gp0, cl); + a.rcr(intptr_gp0, 0); + a.rcr(intptr_gp0, 1); + a.rdtsc(); + a.rdtscp(); + a.ret(); + a.ret(0); + a.rol(gp0, cl); + a.rol(gp0, 0); + a.rol(gp0, 1); + a.rol(intptr_gp0, cl); + a.rol(intptr_gp0, 0); + a.rol(intptr_gp0, 1); + a.ror(gp0, cl); + a.ror(gp0, 0); + a.ror(gp0, 1); + a.ror(intptr_gp0, cl); + a.ror(intptr_gp0, 0); + a.ror(intptr_gp0, 1); + a.sbb(gp0, gp1); + a.sbb(gp0, intptr_gp1); + a.sbb(gp0, 0); + a.sbb(intptr_gp0, gp1); + a.sbb(intptr_gp0, 0); + a.sal(gp0, cl); + a.sal(gp0, 0); + a.sal(gp0, 1); + a.sal(intptr_gp0, cl); + a.sal(intptr_gp0, 0); + a.sal(intptr_gp0, 1); + a.sar(gp0, cl); + a.sar(gp0, 0); + a.sar(gp0, 1); + a.sar(intptr_gp0, cl); + a.sar(intptr_gp0, 0); + a.sar(intptr_gp0, 1); + a.shl(gp0, cl); + a.shl(gp0, 0); + a.shl(gp0, 1); + a.shl(intptr_gp0, cl); + a.shl(intptr_gp0, 0); + a.shl(intptr_gp0, 1); + a.shr(gp0, cl); + a.shr(gp0, 0); + a.shr(gp0, 1); + a.shr(intptr_gp0, cl); + a.shr(intptr_gp0, 0); + a.shr(intptr_gp0, 1); + a.shld(gp0, gp1, cl); + a.shld(gp0, gp1, 0); + a.shld(intptr_gp0, gp1, cl); + a.shld(intptr_gp0, gp1, 0); + a.shrd(gp0, gp1, cl); + a.shrd(gp0, gp1, 0); + a.shrd(intptr_gp0, gp1, cl); + a.shrd(intptr_gp0, gp1, 0); + a.stc(); + a.std(); + a.sub(gp0, gp1); + a.sub(gp0, intptr_gp1); + a.sub(gp0, 0); + a.sub(intptr_gp0, gp1); + a.sub(intptr_gp0, 0); + a.test(gp0, gp1); + a.test(gp0, 0); + a.test(intptr_gp0, gp1); + a.test(intptr_gp0, 0); + a.ud2(); + a.xadd(gp0, gp1); + a.xadd(intptr_gp0, gp1); + a.xchg(gp0, gp1); + a.xchg(intptr_gp0, gp1); + a.xchg(gp0, intptr_gp1); + a.xor_(gp0, gp1); + a.xor_(gp0, intptr_gp1); + a.xor_(gp0, 0); + a.xor_(intptr_gp0, gp1); + a.xor_(intptr_gp0, 0); + + // Fpu. + a.f2xm1(); + a.fabs(); + a.fadd(fp0, fpx); + a.fadd(fpx, fp0); + a.fadd(dword_ptr(gp0)); + a.fadd(qword_ptr(gp0)); + a.faddp(fpx); + a.fbld(dword_ptr(gp0)); + a.fbstp(dword_ptr(gp0)); + a.fchs(); + a.fclex(); + a.fcom(fpx); + a.fcom(dword_ptr(gp0)); + a.fcom(qword_ptr(gp0)); + a.fcomp(fpx); + a.fcomp(dword_ptr(gp0)); + a.fcomp(qword_ptr(gp0)); + a.fcompp(); + a.fcos(); + a.fdecstp(); + a.fdiv(fp0, fpx); + a.fdiv(fpx, fp0); + a.fdiv(dword_ptr(gp0)); + a.fdiv(qword_ptr(gp0)); + a.fdivp(fpx); + a.fdivr(fp0, fpx); + a.fdivr(fpx, fp0); + a.fdivr(dword_ptr(gp0)); + a.fdivr(qword_ptr(gp0)); + a.fdivrp(fpx); + a.fiadd(dword_ptr(gp0)); + a.ficom(word_ptr(gp0)); + a.ficom(dword_ptr(gp0)); + a.ficomp(word_ptr(gp0)); + a.ficomp(dword_ptr(gp0)); + a.fidiv(word_ptr(gp0)); + a.fidiv(dword_ptr(gp0)); + a.fidivr(word_ptr(gp0)); + a.fidivr(dword_ptr(gp0)); + a.fild(word_ptr(gp0)); + a.fild(dword_ptr(gp0)); + a.fild(qword_ptr(gp0)); + a.fimul(word_ptr(gp0)); + a.fimul(dword_ptr(gp0)); + a.fincstp(); + a.finit(); + a.fninit(); + a.fisub(word_ptr(gp0)); + a.fisub(dword_ptr(gp0)); + a.fisubr(word_ptr(gp0)); + a.fisubr(dword_ptr(gp0)); + a.fist(word_ptr(gp0)); + a.fist(dword_ptr(gp0)); + a.fistp(word_ptr(gp0)); + a.fistp(dword_ptr(gp0)); + a.fistp(qword_ptr(gp0)); + a.fld(dword_ptr(gp0)); + a.fld(qword_ptr(gp0)); + a.fld(tword_ptr(gp0)); + a.fld1(); + a.fldl2t(); + a.fldl2e(); + a.fldpi(); + a.fldlg2(); + a.fldln2(); + a.fldz(); + a.fldcw(ptr_gp0); + a.fldenv(ptr_gp0); + a.fmul(fp0, fpx); + a.fmul(fpx, fp0); + a.fmul(dword_ptr(gp0)); + a.fmul(qword_ptr(gp0)); + a.fmulp(fpx); + a.fnclex(); + a.fnop(); + a.fnsave(ptr_gp0); + a.fnstenv(ptr_gp0); + a.fnstcw(ptr_gp0); + a.fpatan(); + a.fprem(); + a.fprem1(); + a.fptan(); + a.frndint(); + a.frstor(ptr_gp0); + a.fsave(ptr_gp0); + a.fscale(); + a.fsin(); + a.fsincos(); + a.fsqrt(); + a.fst(dword_ptr(gp0)); + a.fst(qword_ptr(gp0)); + a.fstp(dword_ptr(gp0)); + a.fstp(qword_ptr(gp0)); + a.fstp(tword_ptr(gp0)); + a.fstcw(ptr_gp0); + a.fstenv(ptr_gp0); + a.fsub(fp0, fpx); + a.fsub(fpx, fp0); + a.fsub(dword_ptr(gp0)); + a.fsub(qword_ptr(gp0)); + a.fsubp(fpx); + a.fsubr(fp0, fpx); + a.fsubr(fpx, fp0); + a.fsubr(dword_ptr(gp0)); + a.fsubr(qword_ptr(gp0)); + a.fsubrp(fpx); + a.ftst(); + a.fucompp(); + a.fxam(); + a.fxrstor(ptr_gp0); + a.fxsave(ptr_gp0); + a.fxtract(); + a.fyl2x(); + a.fyl2xp1(); + + // MMX/MMX-EXT. + a.movd(ptr_gp0, mm7); + a.movd(eax, mm7); + a.movd(mm0, ptr_gp0); + a.movd(mm0, esi); + a.movq(mm0, mm7); + a.movq(ptr_gp0, mm7); + a.movq(mm0, ptr_gp0); + a.packuswb(mm0, mm7); + a.packuswb(mm0, ptr_gp0); + a.paddb(mm0, mm7); + a.paddb(mm0, ptr_gp0); + a.paddw(mm0, mm7); + a.paddw(mm0, ptr_gp0); + a.paddd(mm0, mm7); + a.paddd(mm0, ptr_gp0); + a.paddsb(mm0, mm7); + a.paddsb(mm0, ptr_gp0); + a.paddsw(mm0, mm7); + a.paddsw(mm0, ptr_gp0); + a.paddusb(mm0, mm7); + a.paddusb(mm0, ptr_gp0); + a.paddusw(mm0, mm7); + a.paddusw(mm0, ptr_gp0); + a.pand(mm0, mm7); + a.pand(mm0, ptr_gp0); + a.pandn(mm0, mm7); + a.pandn(mm0, ptr_gp0); + a.pcmpeqb(mm0, mm7); + a.pcmpeqb(mm0, ptr_gp0); + a.pcmpeqw(mm0, mm7); + a.pcmpeqw(mm0, ptr_gp0); + a.pcmpeqd(mm0, mm7); + a.pcmpeqd(mm0, ptr_gp0); + a.pcmpgtb(mm0, mm7); + a.pcmpgtb(mm0, ptr_gp0); + a.pcmpgtw(mm0, mm7); + a.pcmpgtw(mm0, ptr_gp0); + a.pcmpgtd(mm0, mm7); + a.pcmpgtd(mm0, ptr_gp0); + a.pmulhw(mm0, mm7); + a.pmulhw(mm0, ptr_gp0); + a.pmullw(mm0, mm7); + a.pmullw(mm0, ptr_gp0); + a.por(mm0, mm7); + a.por(mm0, ptr_gp0); + a.pmaddwd(mm0, mm7); + a.pmaddwd(mm0, ptr_gp0); + a.pslld(mm0, mm7); + a.pslld(mm0, ptr_gp0); + a.pslld(mm0, 0); + a.psllq(mm0, mm7); + a.psllq(mm0, ptr_gp0); + a.psllq(mm0, 0); + a.psllw(mm0, mm7); + a.psllw(mm0, ptr_gp0); + a.psllw(mm0, 0); + a.psrad(mm0, mm7); + a.psrad(mm0, ptr_gp0); + a.psrad(mm0, 0); + a.psraw(mm0, mm7); + a.psraw(mm0, ptr_gp0); + a.psraw(mm0, 0); + a.psrld(mm0, mm7); + a.psrld(mm0, ptr_gp0); + a.psrld(mm0, 0); + a.psrlq(mm0, mm7); + a.psrlq(mm0, ptr_gp0); + a.psrlq(mm0, 0); + a.psrlw(mm0, mm7); + a.psrlw(mm0, ptr_gp0); + a.psrlw(mm0, 0); + a.psubb(mm0, mm7); + a.psubb(mm0, ptr_gp0); + a.psubw(mm0, mm7); + a.psubw(mm0, ptr_gp0); + a.psubd(mm0, mm7); + a.psubd(mm0, ptr_gp0); + a.psubsb(mm0, mm7); + a.psubsb(mm0, ptr_gp0); + a.psubsw(mm0, mm7); + a.psubsw(mm0, ptr_gp0); + a.psubusb(mm0, mm7); + a.psubusb(mm0, ptr_gp0); + a.psubusw(mm0, mm7); + a.psubusw(mm0, ptr_gp0); + a.punpckhbw(mm0, mm7); + a.punpckhbw(mm0, ptr_gp0); + a.punpckhwd(mm0, mm7); + a.punpckhwd(mm0, ptr_gp0); + a.punpckhdq(mm0, mm7); + a.punpckhdq(mm0, ptr_gp0); + a.punpcklbw(mm0, mm7); + a.punpcklbw(mm0, ptr_gp0); + a.punpcklwd(mm0, mm7); + a.punpcklwd(mm0, ptr_gp0); + a.punpckldq(mm0, mm7); + a.punpckldq(mm0, ptr_gp0); + a.pxor(mm0, mm7); + a.pxor(mm0, ptr_gp0); + a.emms(); + + // 3DNOW! + a.pf2id(mm0, mm7); + a.pf2id(mm0, ptr_gp0); + a.pf2iw(mm0, mm7); + a.pf2iw(mm0, ptr_gp0); + a.pfacc(mm0, mm7); + a.pfacc(mm0, ptr_gp0); + a.pfadd(mm0, mm7); + a.pfadd(mm0, ptr_gp0); + a.pfcmpeq(mm0, mm7); + a.pfcmpeq(mm0, ptr_gp0); + a.pfcmpge(mm0, mm7); + a.pfcmpge(mm0, ptr_gp0); + a.pfcmpgt(mm0, mm7); + a.pfcmpgt(mm0, ptr_gp0); + a.pfmax(mm0, mm7); + a.pfmax(mm0, ptr_gp0); + a.pfmin(mm0, mm7); + a.pfmin(mm0, ptr_gp0); + a.pfmul(mm0, mm7); + a.pfmul(mm0, ptr_gp0); + a.pfnacc(mm0, mm7); + a.pfnacc(mm0, ptr_gp0); + a.pfpnacc(mm0, mm7); + a.pfpnacc(mm0, ptr_gp0); + a.pfrcp(mm0, mm7); + a.pfrcp(mm0, ptr_gp0); + a.pfrcpit1(mm0, mm7); + a.pfrcpit1(mm0, ptr_gp0); + a.pfrcpit2(mm0, mm7); + a.pfrcpit2(mm0, ptr_gp0); + a.pfrsqit1(mm0, mm7); + a.pfrsqit1(mm0, ptr_gp0); + a.pfrsqrt(mm0, mm7); + a.pfrsqrt(mm0, ptr_gp0); + a.pfsub(mm0, mm7); + a.pfsub(mm0, ptr_gp0); + a.pfsubr(mm0, mm7); + a.pfsubr(mm0, ptr_gp0); + a.pi2fd(mm0, mm7); + a.pi2fd(mm0, ptr_gp0); + a.pi2fw(mm0, mm7); + a.pi2fw(mm0, ptr_gp0); + a.pswapd(mm0, mm7); + a.pswapd(mm0, ptr_gp0); + a.prefetch3dnow(ptr_gp0); + a.prefetchw3dnow(ptr_gp0); + a.femms(); + + // SSE. + a.addps(xmm0, xmm7); + a.addps(xmm0, ptr_gp0); + a.addss(xmm0, xmm7); + a.addss(xmm0, ptr_gp0); + a.andnps(xmm0, xmm7); + a.andnps(xmm0, ptr_gp0); + a.andps(xmm0, xmm7); + a.andps(xmm0, ptr_gp0); + a.cmpps(xmm0, xmm0, 0); + a.cmpps(xmm0, ptr_gp0, 0); + a.cmpss(xmm0, xmm0, 0); + a.cmpss(xmm0, ptr_gp0, 0); + a.comiss(xmm0, xmm7); + a.comiss(xmm0, ptr_gp0); + a.cvtpi2ps(xmm0, mm7); + a.cvtpi2ps(xmm0, ptr_gp0); + a.cvtps2pi(mm0, xmm7); + a.cvtps2pi(mm0, ptr_gp0); + a.cvtsi2ss(xmm0, gp0); + a.cvtsi2ss(xmm0, ptr_gp0); + a.cvtss2si(gp0, xmm7); + a.cvtss2si(gp0, ptr_gp0); + a.cvttps2pi(mm0, xmm7); + a.cvttps2pi(mm0, ptr_gp0); + a.cvttss2si(gp0, xmm7); + a.cvttss2si(gp0, ptr_gp0); + a.divps(xmm0, xmm7); + a.divps(xmm0, ptr_gp0); + a.divss(xmm0, xmm7); + a.divss(xmm0, ptr_gp0); + a.ldmxcsr(ptr_gp0); + a.maskmovq(mm0, mm7); + a.maxps(xmm0, xmm7); + a.maxps(xmm0, ptr_gp0); + a.maxss(xmm0, xmm7); + a.maxss(xmm0, ptr_gp0); + a.minps(xmm0, xmm7); + a.minps(xmm0, ptr_gp0); + a.minss(xmm0, xmm7); + a.minss(xmm0, ptr_gp0); + a.movaps(xmm0, xmm7); + a.movaps(xmm0, ptr_gp0); + a.movaps(ptr_gp0, xmm7); + a.movd(ptr_gp0, xmm7); + a.movd(eax, xmm7); + a.movd(xmm0, ptr_gp0); + a.movd(xmm0, eax); + a.movq(mm0, mm7); + a.movq(xmm0, xmm7); + a.movq(ptr_gp0, xmm7); + a.movq(xmm0, ptr_gp0); + a.movntq(ptr_gp0, mm7); + a.movhlps(xmm0, xmm7); + a.movhps(xmm0, ptr_gp0); + a.movhps(ptr_gp0, xmm7); + a.movlhps(xmm0, xmm7); + a.movlps(xmm0, ptr_gp0); + a.movlps(ptr_gp0, xmm7); + a.movntps(ptr_gp0, xmm7); + a.movss(xmm0, ptr_gp0); + a.movss(ptr_gp0, xmm7); + a.movups(xmm0, xmm7); + a.movups(xmm0, ptr_gp0); + a.movups(ptr_gp0, xmm7); + a.mulps(xmm0, xmm7); + a.mulps(xmm0, ptr_gp0); + a.mulss(xmm0, xmm7); + a.mulss(xmm0, ptr_gp0); + a.orps(xmm0, xmm7); + a.orps(xmm0, ptr_gp0); + a.pavgb(mm0, mm7); + a.pavgb(mm0, ptr_gp0); + a.pavgw(mm0, mm7); + a.pavgw(mm0, ptr_gp0); + a.pextrw(gp0, mm7, 0); + a.pinsrw(mm0, eax, 0); + a.pinsrw(mm0, ptr_gp0, 0); + a.pmaxsw(mm0, mm7); + a.pmaxsw(mm0, ptr_gp0); + a.pmaxub(mm0, mm7); + a.pmaxub(mm0, ptr_gp0); + a.pminsw(mm0, mm7); + a.pminsw(mm0, ptr_gp0); + a.pminub(mm0, mm7); + a.pminub(mm0, ptr_gp0); + a.pmovmskb(gp0, mm7); + a.pmulhuw(mm0, mm7); + a.pmulhuw(mm0, ptr_gp0); + a.psadbw(mm0, mm7); + a.psadbw(mm0, ptr_gp0); + a.pshufw(mm0, mm7, 0); + a.pshufw(mm0, ptr_gp0, 0); + a.rcpps(xmm0, xmm7); + a.rcpps(xmm0, ptr_gp0); + a.rcpss(xmm0, xmm7); + a.rcpss(xmm0, ptr_gp0); + a.prefetch(ptr_gp0, 0); + a.psadbw(xmm0, xmm7); + a.psadbw(xmm0, ptr_gp0); + a.rsqrtps(xmm0, xmm7); + a.rsqrtps(xmm0, ptr_gp0); + a.rsqrtss(xmm0, xmm7); + a.rsqrtss(xmm0, ptr_gp0); + a.sfence(); + a.shufps(xmm0, xmm0, 0); + a.shufps(xmm0, ptr_gp0, 0); + a.sqrtps(xmm0, xmm7); + a.sqrtps(xmm0, ptr_gp0); + a.sqrtss(xmm0, xmm7); + a.sqrtss(xmm0, ptr_gp0); + a.stmxcsr(ptr_gp0); + a.subps(xmm0, xmm7); + a.subps(xmm0, ptr_gp0); + a.subss(xmm0, xmm7); + a.subss(xmm0, ptr_gp0); + a.ucomiss(xmm0, xmm7); + a.ucomiss(xmm0, ptr_gp0); + a.unpckhps(xmm0, xmm7); + a.unpckhps(xmm0, ptr_gp0); + a.unpcklps(xmm0, xmm7); + a.unpcklps(xmm0, ptr_gp0); + a.xorps(xmm0, xmm7); + a.xorps(xmm0, ptr_gp0); + + // SSE2. + a.addpd(xmm0, xmm7); + a.addpd(xmm0, ptr_gp0); + a.addsd(xmm0, xmm7); + a.addsd(xmm0, ptr_gp0); + a.andnpd(xmm0, xmm7); + a.andnpd(xmm0, ptr_gp0); + a.andpd(xmm0, xmm7); + a.andpd(xmm0, ptr_gp0); + a.clflush(ptr_gp0); + a.cmppd(xmm0, xmm0, 0); + a.cmppd(xmm0, ptr_gp0, 0); + a.cmpsd(xmm0, xmm0, 0); + a.cmpsd(xmm0, ptr_gp0, 0); + a.comisd(xmm0, xmm7); + a.comisd(xmm0, ptr_gp0); + a.cvtdq2pd(xmm0, xmm7); + a.cvtdq2pd(xmm0, ptr_gp0); + a.cvtdq2ps(xmm0, xmm7); + a.cvtdq2ps(xmm0, ptr_gp0); + a.cvtpd2dq(xmm0, xmm7); + a.cvtpd2dq(xmm0, ptr_gp0); + a.cvtpd2pi(mm0, xmm7); + a.cvtpd2pi(mm0, ptr_gp0); + a.cvtpd2ps(xmm0, xmm7); + a.cvtpd2ps(xmm0, ptr_gp0); + a.cvtpi2pd(xmm0, mm7); + a.cvtpi2pd(xmm0, ptr_gp0); + a.cvtps2dq(xmm0, xmm7); + a.cvtps2dq(xmm0, ptr_gp0); + a.cvtps2pd(xmm0, xmm7); + a.cvtps2pd(xmm0, ptr_gp0); + a.cvtsd2si(gp0, xmm7); + a.cvtsd2si(gp0, ptr_gp0); + a.cvtsd2ss(xmm0, xmm7); + a.cvtsd2ss(xmm0, ptr_gp0); + a.cvtsi2sd(xmm0, zsi); + a.cvtsi2sd(xmm0, ptr_gp0); + a.cvtss2sd(xmm0, xmm7); + a.cvtss2sd(xmm0, ptr_gp0); + a.cvtss2si(gp0, xmm7); + a.cvtss2si(gp0, ptr_gp0); + a.cvttpd2pi(mm0, xmm7); + a.cvttpd2pi(mm0, ptr_gp0); + a.cvttpd2dq(xmm0, xmm7); + a.cvttpd2dq(xmm0, ptr_gp0); + a.cvttps2dq(xmm0, xmm7); + a.cvttps2dq(xmm0, ptr_gp0); + a.cvttsd2si(gp0, xmm7); + a.cvttsd2si(gp0, ptr_gp0); + a.divpd(xmm0, xmm7); + a.divpd(xmm0, ptr_gp0); + a.divsd(xmm0, xmm7); + a.divsd(xmm0, ptr_gp0); + a.lfence(); + a.maskmovdqu(xmm0, xmm7); + a.maxpd(xmm0, xmm7); + a.maxpd(xmm0, ptr_gp0); + a.maxsd(xmm0, xmm7); + a.maxsd(xmm0, ptr_gp0); + a.mfence(); + a.minpd(xmm0, xmm7); + a.minpd(xmm0, ptr_gp0); + a.minsd(xmm0, xmm7); + a.minsd(xmm0, ptr_gp0); + a.movdqa(xmm0, xmm7); + a.movdqa(xmm0, ptr_gp0); + a.movdqa(ptr_gp0, xmm7); + a.movdqu(xmm0, xmm7); + a.movdqu(xmm0, ptr_gp0); + a.movdqu(ptr_gp0, xmm7); + a.movmskps(gp0, xmm7); + a.movmskpd(gp0, xmm7); + a.movsd(xmm0, xmm7); + a.movsd(xmm0, ptr_gp0); + a.movsd(ptr_gp0, xmm7); + a.movapd(xmm0, ptr_gp0); + a.movapd(ptr_gp0, xmm7); + a.movdq2q(mm0, xmm7); + a.movq2dq(xmm0, mm7); + a.movhpd(xmm0, ptr_gp0); + a.movhpd(ptr_gp0, xmm7); + a.movlpd(xmm0, ptr_gp0); + a.movlpd(ptr_gp0, xmm7); + a.movntdq(ptr_gp0, xmm7); + a.movnti(ptr_gp0, zsi); + a.movntpd(ptr_gp0, xmm7); + a.movupd(xmm0, ptr_gp0); + a.movupd(ptr_gp0, xmm7); + a.mulpd(xmm0, xmm7); + a.mulpd(xmm0, ptr_gp0); + a.mulsd(xmm0, xmm7); + a.mulsd(xmm0, ptr_gp0); + a.orpd(xmm0, xmm7); + a.orpd(xmm0, ptr_gp0); + a.packsswb(xmm0, xmm7); + a.packsswb(xmm0, ptr_gp0); + a.packssdw(xmm0, xmm7); + a.packssdw(xmm0, ptr_gp0); + a.packuswb(xmm0, xmm7); + a.packuswb(xmm0, ptr_gp0); + a.paddb(xmm0, xmm7); + a.paddb(xmm0, ptr_gp0); + a.paddw(xmm0, xmm7); + a.paddw(xmm0, ptr_gp0); + a.paddd(xmm0, xmm7); + a.paddd(xmm0, ptr_gp0); + a.paddq(mm0, mm7); + a.paddq(mm0, ptr_gp0); + a.paddq(xmm0, xmm7); + a.paddq(xmm0, ptr_gp0); + a.paddsb(xmm0, xmm7); + a.paddsb(xmm0, ptr_gp0); + a.paddsw(xmm0, xmm7); + a.paddsw(xmm0, ptr_gp0); + a.paddusb(xmm0, xmm7); + a.paddusb(xmm0, ptr_gp0); + a.paddusw(xmm0, xmm7); + a.paddusw(xmm0, ptr_gp0); + a.pand(xmm0, xmm7); + a.pand(xmm0, ptr_gp0); + a.pandn(xmm0, xmm7); + a.pandn(xmm0, ptr_gp0); + a.pause(); + a.pavgb(xmm0, xmm7); + a.pavgb(xmm0, ptr_gp0); + a.pavgw(xmm0, xmm7); + a.pavgw(xmm0, ptr_gp0); + a.pcmpeqb(xmm0, xmm7); + a.pcmpeqb(xmm0, ptr_gp0); + a.pcmpeqw(xmm0, xmm7); + a.pcmpeqw(xmm0, ptr_gp0); + a.pcmpeqd(xmm0, xmm7); + a.pcmpeqd(xmm0, ptr_gp0); + a.pcmpgtb(xmm0, xmm7); + a.pcmpgtb(xmm0, ptr_gp0); + a.pcmpgtw(xmm0, xmm7); + a.pcmpgtw(xmm0, ptr_gp0); + a.pcmpgtd(xmm0, xmm7); + a.pcmpgtd(xmm0, ptr_gp0); + a.pmaxsw(xmm0, xmm7); + a.pmaxsw(xmm0, ptr_gp0); + a.pmaxub(xmm0, xmm7); + a.pmaxub(xmm0, ptr_gp0); + a.pminsw(xmm0, xmm7); + a.pminsw(xmm0, ptr_gp0); + a.pminub(xmm0, xmm7); + a.pminub(xmm0, ptr_gp0); + a.pmovmskb(gp0, xmm7); + a.pmulhw(xmm0, xmm7); + a.pmulhw(xmm0, ptr_gp0); + a.pmulhuw(xmm0, xmm7); + a.pmulhuw(xmm0, ptr_gp0); + a.pmullw(xmm0, xmm7); + a.pmullw(xmm0, ptr_gp0); + a.pmuludq(mm0, mm7); + a.pmuludq(mm0, ptr_gp0); + a.pmuludq(xmm0, xmm7); + a.pmuludq(xmm0, ptr_gp0); + a.por(xmm0, xmm7); + a.por(xmm0, ptr_gp0); + a.pslld(xmm0, xmm7); + a.pslld(xmm0, ptr_gp0); + a.pslld(xmm0, 0); + a.psllq(xmm0, xmm7); + a.psllq(xmm0, ptr_gp0); + a.psllq(xmm0, 0); + a.psllw(xmm0, xmm7); + a.psllw(xmm0, ptr_gp0); + a.psllw(xmm0, 0); + a.pslldq(xmm0, 0); + a.psrad(xmm0, xmm7); + a.psrad(xmm0, ptr_gp0); + a.psrad(xmm0, 0); + a.psraw(xmm0, xmm7); + a.psraw(xmm0, ptr_gp0); + a.psraw(xmm0, 0); + a.psubb(xmm0, xmm7); + a.psubb(xmm0, ptr_gp0); + a.psubw(xmm0, xmm7); + a.psubw(xmm0, ptr_gp0); + a.psubd(xmm0, xmm7); + a.psubd(xmm0, ptr_gp0); + a.psubq(mm0, mm7); + a.psubq(mm0, ptr_gp0); + a.psubq(xmm0, xmm7); + a.psubq(xmm0, ptr_gp0); + a.pmaddwd(xmm0, xmm7); + a.pmaddwd(xmm0, ptr_gp0); + a.pshufd(xmm0, xmm0, 0); + a.pshufd(xmm0, ptr_gp0, 0); + a.pshufhw(xmm0, xmm0, 0); + a.pshufhw(xmm0, ptr_gp0, 0); + a.pshuflw(xmm0, xmm0, 0); + a.pshuflw(xmm0, ptr_gp0, 0); + a.psrld(xmm0, xmm7); + a.psrld(xmm0, ptr_gp0); + a.psrld(xmm0, 0); + a.psrlq(xmm0, xmm7); + a.psrlq(xmm0, ptr_gp0); + a.psrlq(xmm0, 0); + a.psrldq(xmm0, 0); + a.psrlw(xmm0, xmm7); + a.psrlw(xmm0, ptr_gp0); + a.psrlw(xmm0, 0); + a.psubsb(xmm0, xmm7); + a.psubsb(xmm0, ptr_gp0); + a.psubsw(xmm0, xmm7); + a.psubsw(xmm0, ptr_gp0); + a.psubusb(xmm0, xmm7); + a.psubusb(xmm0, ptr_gp0); + a.psubusw(xmm0, xmm7); + a.psubusw(xmm0, ptr_gp0); + a.punpckhbw(xmm0, xmm7); + a.punpckhbw(xmm0, ptr_gp0); + a.punpckhwd(xmm0, xmm7); + a.punpckhwd(xmm0, ptr_gp0); + a.punpckhdq(xmm0, xmm7); + a.punpckhdq(xmm0, ptr_gp0); + a.punpckhqdq(xmm0, xmm7); + a.punpckhqdq(xmm0, ptr_gp0); + a.punpcklbw(xmm0, xmm7); + a.punpcklbw(xmm0, ptr_gp0); + a.punpcklwd(xmm0, xmm7); + a.punpcklwd(xmm0, ptr_gp0); + a.punpckldq(xmm0, xmm7); + a.punpckldq(xmm0, ptr_gp0); + a.punpcklqdq(xmm0, xmm7); + a.punpcklqdq(xmm0, ptr_gp0); + a.pxor(xmm0, xmm7); + a.pxor(xmm0, ptr_gp0); + a.sqrtpd(xmm0, xmm7); + a.sqrtpd(xmm0, ptr_gp0); + a.sqrtsd(xmm0, xmm7); + a.sqrtsd(xmm0, ptr_gp0); + a.subpd(xmm0, xmm7); + a.subpd(xmm0, ptr_gp0); + a.subsd(xmm0, xmm7); + a.subsd(xmm0, ptr_gp0); + a.ucomisd(xmm0, xmm7); + a.ucomisd(xmm0, ptr_gp0); + a.unpckhpd(xmm0, xmm7); + a.unpckhpd(xmm0, ptr_gp0); + a.unpcklpd(xmm0, xmm7); + a.unpcklpd(xmm0, ptr_gp0); + a.xorpd(xmm0, xmm7); + a.xorpd(xmm0, ptr_gp0); + + // SSE3/SSSE3/SSE4.1/SSE4.2. + a.addsubpd(xmm0, xmm7); + a.addsubpd(xmm0, ptr_gp0); + a.addsubps(xmm0, xmm7); + a.addsubps(xmm0, ptr_gp0); + a.fisttp(dword_ptr(gp0)); + a.haddpd(xmm0, xmm7); + a.haddpd(xmm0, ptr_gp0); + a.haddps(xmm0, xmm7); + a.haddps(xmm0, ptr_gp0); + a.hsubpd(xmm0, xmm7); + a.hsubpd(xmm0, ptr_gp0); + a.hsubps(xmm0, xmm7); + a.hsubps(xmm0, ptr_gp0); + a.lddqu(xmm0, ptr_gp0); + a.monitor(); + a.movddup(xmm0, xmm7); + a.movddup(xmm0, ptr_gp0); + a.movshdup(xmm0, xmm7); + a.movshdup(xmm0, ptr_gp0); + a.movsldup(xmm0, xmm7); + a.movsldup(xmm0, ptr_gp0); + a.mwait(); + a.psignb(mm0, mm7); + a.psignb(mm0, ptr_gp0); + a.psignb(xmm0, xmm7); + a.psignb(xmm0, ptr_gp0); + a.psignw(mm0, mm7); + a.psignw(mm0, ptr_gp0); + a.psignw(xmm0, xmm7); + a.psignw(xmm0, ptr_gp0); + a.psignd(mm0, mm7); + a.psignd(mm0, ptr_gp0); + a.psignd(xmm0, xmm7); + a.psignd(xmm0, ptr_gp0); + a.phaddw(mm0, mm7); + a.phaddw(mm0, ptr_gp0); + a.phaddw(xmm0, xmm7); + a.phaddw(xmm0, ptr_gp0); + a.phaddd(mm0, mm7); + a.phaddd(mm0, ptr_gp0); + a.phaddd(xmm0, xmm7); + a.phaddd(xmm0, ptr_gp0); + a.phaddsw(mm0, mm7); + a.phaddsw(mm0, ptr_gp0); + a.phaddsw(xmm0, xmm7); + a.phaddsw(xmm0, ptr_gp0); + a.phsubw(mm0, mm7); + a.phsubw(mm0, ptr_gp0); + a.phsubw(xmm0, xmm7); + a.phsubw(xmm0, ptr_gp0); + a.phsubd(mm0, mm7); + a.phsubd(mm0, ptr_gp0); + a.phsubd(xmm0, xmm7); + a.phsubd(xmm0, ptr_gp0); + a.phsubsw(mm0, mm7); + a.phsubsw(mm0, ptr_gp0); + a.phsubsw(xmm0, xmm7); + a.phsubsw(xmm0, ptr_gp0); + a.pmaddubsw(mm0, mm7); + a.pmaddubsw(mm0, ptr_gp0); + a.pmaddubsw(xmm0, xmm7); + a.pmaddubsw(xmm0, ptr_gp0); + a.pabsb(mm0, mm7); + a.pabsb(mm0, ptr_gp0); + a.pabsb(xmm0, xmm7); + a.pabsb(xmm0, ptr_gp0); + a.pabsw(mm0, mm7); + a.pabsw(mm0, ptr_gp0); + a.pabsw(xmm0, xmm7); + a.pabsw(xmm0, ptr_gp0); + a.pabsd(mm0, mm7); + a.pabsd(mm0, ptr_gp0); + a.pabsd(xmm0, xmm7); + a.pabsd(xmm0, ptr_gp0); + a.pmulhrsw(mm0, mm7); + a.pmulhrsw(mm0, ptr_gp0); + a.pmulhrsw(xmm0, xmm7); + a.pmulhrsw(xmm0, ptr_gp0); + a.pshufb(mm0, mm7); + a.pshufb(mm0, ptr_gp0); + a.pshufb(xmm0, xmm7); + a.pshufb(xmm0, ptr_gp0); + a.palignr(mm0, mm7, 0); + a.palignr(mm0, ptr_gp0, 0); + a.palignr(xmm0, xmm0, 0); + a.palignr(xmm0, ptr_gp0, 0); + a.blendpd(xmm0, xmm0, 0); + a.blendpd(xmm0, ptr_gp0, 0); + a.blendps(xmm0, xmm0, 0); + a.blendps(xmm0, ptr_gp0, 0); + a.blendvpd(xmm0, xmm7); + a.blendvpd(xmm0, ptr_gp0); + a.blendvps(xmm0, xmm7); + a.blendvps(xmm0, ptr_gp0); + a.dppd(xmm0, xmm0, 0); + a.dppd(xmm0, ptr_gp0, 0); + a.dpps(xmm0, xmm0, 0); + a.dpps(xmm0, ptr_gp0, 0); + a.extractps(gp0, xmm0, 0); + a.extractps(ptr_gp0, xmm0, 0); + a.movntdqa(xmm0, ptr_gp0); + a.mpsadbw(xmm0, xmm0, 0); + a.mpsadbw(xmm0, ptr_gp0, 0); + a.packusdw(xmm0, xmm7); + a.packusdw(xmm0, ptr_gp0); + a.pblendvb(xmm0, xmm7); + a.pblendvb(xmm0, ptr_gp0); + a.pblendw(xmm0, xmm0, 0); + a.pblendw(xmm0, ptr_gp0, 0); + a.pcmpeqq(xmm0, xmm7); + a.pcmpeqq(xmm0, ptr_gp0); + a.pextrb(gp0, xmm0, 0); + a.pextrb(ptr_gp0, xmm0, 0); + a.pextrd(gp0, xmm0, 0); + a.pextrd(ptr_gp0, xmm0, 0); + a.pextrq(gp0, xmm0, 0); + a.pextrq(ptr_gp0, xmm0, 0); + a.pextrw(gp0, xmm0, 0); + a.pextrw(ptr_gp0, xmm0, 0); + a.phminposuw(xmm0, xmm7); + a.phminposuw(xmm0, ptr_gp0); + a.pinsrb(xmm0, eax, 0); + a.pinsrb(xmm0, ptr_gp0, 0); + a.pinsrd(xmm0, eax, 0); + a.pinsrd(xmm0, ptr_gp0, 0); + a.pinsrw(xmm0, eax, 0); + a.pinsrw(xmm0, ptr_gp0, 0); + a.pmaxuw(xmm0, xmm7); + a.pmaxuw(xmm0, ptr_gp0); + a.pmaxsb(xmm0, xmm7); + a.pmaxsb(xmm0, ptr_gp0); + a.pmaxsd(xmm0, xmm7); + a.pmaxsd(xmm0, ptr_gp0); + a.pmaxud(xmm0, xmm7); + a.pmaxud(xmm0, ptr_gp0); + a.pminsb(xmm0, xmm7); + a.pminsb(xmm0, ptr_gp0); + a.pminuw(xmm0, xmm7); + a.pminuw(xmm0, ptr_gp0); + a.pminud(xmm0, xmm7); + a.pminud(xmm0, ptr_gp0); + a.pminsd(xmm0, xmm7); + a.pminsd(xmm0, ptr_gp0); + a.pmovsxbw(xmm0, xmm7); + a.pmovsxbw(xmm0, ptr_gp0); + a.pmovsxbd(xmm0, xmm7); + a.pmovsxbd(xmm0, ptr_gp0); + a.pmovsxbq(xmm0, xmm7); + a.pmovsxbq(xmm0, ptr_gp0); + a.pmovsxwd(xmm0, xmm7); + a.pmovsxwd(xmm0, ptr_gp0); + a.pmovsxwq(xmm0, xmm7); + a.pmovsxwq(xmm0, ptr_gp0); + a.pmovsxdq(xmm0, xmm7); + a.pmovsxdq(xmm0, ptr_gp0); + a.pmovzxbw(xmm0, xmm7); + a.pmovzxbw(xmm0, ptr_gp0); + a.pmovzxbd(xmm0, xmm7); + a.pmovzxbd(xmm0, ptr_gp0); + a.pmovzxbq(xmm0, xmm7); + a.pmovzxbq(xmm0, ptr_gp0); + a.pmovzxwd(xmm0, xmm7); + a.pmovzxwd(xmm0, ptr_gp0); + a.pmovzxwq(xmm0, xmm7); + a.pmovzxwq(xmm0, ptr_gp0); + a.pmovzxdq(xmm0, xmm7); + a.pmovzxdq(xmm0, ptr_gp0); + a.pmuldq(xmm0, xmm7); + a.pmuldq(xmm0, ptr_gp0); + a.pmulld(xmm0, xmm7); + a.pmulld(xmm0, ptr_gp0); + a.ptest(xmm0, xmm7); + a.ptest(xmm0, ptr_gp0); + a.roundps(xmm0, xmm0, 0); + a.roundps(xmm0, ptr_gp0, 0); + a.roundss(xmm0, xmm0, 0); + a.roundss(xmm0, ptr_gp0, 0); + a.roundpd(xmm0, xmm0, 0); + a.roundpd(xmm0, ptr_gp0, 0); + a.roundsd(xmm0, xmm0, 0); + a.roundsd(xmm0, ptr_gp0, 0); + a.pcmpestri(xmm0, xmm0, 0); + a.pcmpestri(xmm0, ptr_gp0, 0); + a.pcmpestrm(xmm0, xmm0, 0); + a.pcmpestrm(xmm0, ptr_gp0, 0); + a.pcmpistri(xmm0, xmm0, 0); + a.pcmpistri(xmm0, ptr_gp0, 0); + a.pcmpistrm(xmm0, xmm0, 0); + a.pcmpistrm(xmm0, ptr_gp0, 0); + a.pcmpgtq(xmm0, xmm7); + a.pcmpgtq(xmm0, ptr_gp0); + a.popcnt(gp0, ptr_gp0); + + // AESNI. + a.aesdec(xmm0, xmm7); + a.aesdec(xmm0, ptr_gp0); + a.aesdeclast(xmm0, xmm7); + a.aesdeclast(xmm0, ptr_gp0); + a.aesenc(xmm0, xmm7); + a.aesenc(xmm0, ptr_gp0); + a.aesenclast(xmm0, xmm7); + a.aesenclast(xmm0, ptr_gp0); + a.aesimc(xmm0, xmm7); + a.aesimc(xmm0, ptr_gp0); + a.aeskeygenassist(xmm0, xmm7, 0); + a.aeskeygenassist(xmm0, ptr_gp0, 0); + + // PCLMULQDQ. + a.pclmulqdq(xmm0, xmm7, 0); + a.pclmulqdq(xmm0, ptr_gp0, 0); + + // AVX. + a.vaddpd(xmm0, xmm1, xmm2); + a.vaddpd(xmm0, xmm1, ptr_gp0); + a.vaddpd(ymm0, ymm1, ymm2); + a.vaddpd(ymm0, ymm1, ptr_gp0); + a.vaddps(xmm0, xmm1, xmm2); + a.vaddps(xmm0, xmm1, ptr_gp0); + a.vaddps(ymm0, ymm1, ymm2); + a.vaddps(ymm0, ymm1, ptr_gp0); + a.vaddsd(xmm0, xmm1, xmm2); + a.vaddsd(xmm0, xmm1, ptr_gp0); + a.vaddss(xmm0, xmm1, xmm2); + a.vaddss(xmm0, xmm1, ptr_gp0); + a.vaddsubpd(xmm0, xmm1, xmm2); + a.vaddsubpd(xmm0, xmm1, ptr_gp0); + a.vaddsubpd(ymm0, ymm1, ymm2); + a.vaddsubpd(ymm0, ymm1, ptr_gp0); + a.vaddsubps(xmm0, xmm1, xmm2); + a.vaddsubps(xmm0, xmm1, ptr_gp0); + a.vaddsubps(ymm0, ymm1, ymm2); + a.vaddsubps(ymm0, ymm1, ptr_gp0); + a.vandpd(xmm0, xmm1, xmm2); + a.vandpd(xmm0, xmm1, ptr_gp0); + a.vandpd(ymm0, ymm1, ymm2); + a.vandpd(ymm0, ymm1, ptr_gp0); + a.vandps(xmm0, xmm1, xmm2); + a.vandps(xmm0, xmm1, ptr_gp0); + a.vandps(ymm0, ymm1, ymm2); + a.vandps(ymm0, ymm1, ptr_gp0); + a.vandnpd(xmm0, xmm1, xmm2); + a.vandnpd(xmm0, xmm1, ptr_gp0); + a.vandnpd(ymm0, ymm1, ymm2); + a.vandnpd(ymm0, ymm1, ptr_gp0); + a.vandnps(xmm0, xmm1, xmm2); + a.vandnps(xmm0, xmm1, ptr_gp0); + a.vandnps(ymm0, ymm1, ymm2); + a.vandnps(ymm0, ymm1, ptr_gp0); + a.vblendpd(xmm0, xmm1, xmm2, 0); + a.vblendpd(xmm0, xmm1, ptr_gp0, 0); + a.vblendpd(ymm0, ymm1, ymm2, 0); + a.vblendpd(ymm0, ymm1, ptr_gp0, 0); + a.vblendps(xmm0, xmm1, xmm2, 0); + a.vblendps(xmm0, xmm1, ptr_gp0, 0); + a.vblendps(ymm0, ymm1, ymm2, 0); + a.vblendps(ymm0, ymm1, ptr_gp0, 0); + a.vblendvpd(xmm0, xmm1, xmm2, xmm3); + a.vblendvpd(xmm0, xmm1, ptr_gp0, xmm3); + a.vblendvpd(ymm0, ymm1, ymm2, ymm3); + a.vblendvpd(ymm0, ymm1, ptr_gp0, ymm3); + a.vbroadcastf128(ymm0, ptr_gp0); + a.vbroadcastsd(ymm0, ptr_gp0); + a.vbroadcastss(xmm0, ptr_gp0); + a.vbroadcastss(ymm0, ptr_gp0); + a.vcmppd(xmm0, xmm1, xmm2, 0); + a.vcmppd(xmm0, xmm1, ptr_gp0, 0); + a.vcmppd(ymm0, ymm1, ymm2, 0); + a.vcmppd(ymm0, ymm1, ptr_gp0, 0); + a.vcmpps(xmm0, xmm1, xmm2, 0); + a.vcmpps(xmm0, xmm1, ptr_gp0, 0); + a.vcmpps(ymm0, ymm1, ymm2, 0); + a.vcmpps(ymm0, ymm1, ptr_gp0, 0); + a.vcmpsd(xmm0, xmm1, xmm2, 0); + a.vcmpsd(xmm0, xmm1, ptr_gp0, 0); + a.vcmpss(xmm0, xmm1, xmm2, 0); + a.vcmpss(xmm0, xmm1, ptr_gp0, 0); + a.vcomisd(xmm0, xmm1); + a.vcomisd(xmm0, ptr_gp0); + a.vcomiss(xmm0, xmm1); + a.vcomiss(xmm0, ptr_gp0); + a.vcvtdq2pd(xmm0, xmm1); + a.vcvtdq2pd(xmm0, ptr_gp0); + a.vcvtdq2pd(ymm0, xmm1); + a.vcvtdq2pd(ymm0, ptr_gp0); + a.vcvtdq2ps(xmm0, xmm1); + a.vcvtdq2ps(xmm0, ptr_gp0); + a.vcvtdq2ps(ymm0, ymm1); + a.vcvtdq2ps(ymm0, ptr_gp0); + a.vcvtpd2dq(xmm0, xmm1); + a.vcvtpd2dq(xmm0, ymm1); + a.vcvtpd2dq(xmm0, ptr_gp0); + a.vcvtpd2ps(xmm0, xmm1); + a.vcvtpd2ps(xmm0, ymm1); + a.vcvtpd2ps(xmm0, ptr_gp0); + a.vcvtps2dq(xmm0, xmm1); + a.vcvtps2dq(xmm0, ptr_gp0); + a.vcvtps2dq(ymm0, ymm1); + a.vcvtps2dq(ymm0, ptr_gp0); + a.vcvtps2pd(xmm0, xmm1); + a.vcvtps2pd(xmm0, ptr_gp0); + a.vcvtps2pd(ymm0, xmm1); + a.vcvtps2pd(ymm0, ptr_gp0); + a.vcvtsd2si(gp0, xmm1); + a.vcvtsd2si(gp0, ptr_gp1); + a.vcvtsd2ss(xmm0, xmm1, xmm2); + a.vcvtsd2ss(xmm0, xmm1, ptr_gp0); + a.vcvtsi2sd(xmm0, xmm1, gp0); + a.vcvtsi2sd(xmm0, xmm1, ptr_gp0); + a.vcvtsi2ss(xmm0, xmm1, gp0); + a.vcvtsi2ss(xmm0, xmm1, ptr_gp0); + a.vcvtss2sd(xmm0, xmm1, xmm2); + a.vcvtss2sd(xmm0, xmm1, ptr_gp0); + a.vcvtss2si(gp0, xmm1); + a.vcvtss2si(gp0, ptr_gp1); + a.vcvttpd2dq(xmm0, xmm1); + a.vcvttpd2dq(xmm0, ymm1); + a.vcvttpd2dq(xmm0, ptr_gp0); + a.vcvttps2dq(xmm0, xmm1); + a.vcvttps2dq(xmm0, ptr_gp0); + a.vcvttps2dq(ymm0, ymm1); + a.vcvttps2dq(ymm0, ptr_gp0); + a.vcvttsd2si(gp0, xmm1); + a.vcvttsd2si(gp0, ptr_gp1); + a.vcvttss2si(gp0, xmm1); + a.vcvttss2si(gp0, ptr_gp1); + a.vdivpd(xmm0, xmm1, xmm2); + a.vdivpd(xmm0, xmm1, ptr_gp0); + a.vdivpd(ymm0, ymm1, ymm2); + a.vdivpd(ymm0, ymm1, ptr_gp0); + a.vdivps(xmm0, xmm1, xmm2); + a.vdivps(xmm0, xmm1, ptr_gp0); + a.vdivps(ymm0, ymm1, ymm2); + a.vdivps(ymm0, ymm1, ptr_gp0); + a.vdivsd(xmm0, xmm1, xmm2); + a.vdivsd(xmm0, xmm1, ptr_gp0); + a.vdivss(xmm0, xmm1, xmm2); + a.vdivss(xmm0, xmm1, ptr_gp0); + a.vdppd(xmm0, xmm1, xmm2, 0); + a.vdppd(xmm0, xmm1, ptr_gp0, 0); + a.vdpps(xmm0, xmm1, xmm2, 0); + a.vdpps(xmm0, xmm1, ptr_gp0, 0); + a.vdpps(ymm0, ymm1, ymm2, 0); + a.vdpps(ymm0, ymm1, ptr_gp0, 0); + a.vextractf128(xmm0, ymm0, 0); + a.vextractf128(ptr_gp0, ymm1, 0); + a.vextractps(gp0, xmm1, 0); + a.vextractps(ptr_gp0, xmm1, 0); + a.vhaddpd(xmm0, xmm1, xmm2); + a.vhaddpd(xmm0, xmm1, ptr_gp0); + a.vhaddpd(ymm0, ymm1, ymm2); + a.vhaddpd(ymm0, ymm1, ptr_gp0); + a.vhaddps(xmm0, xmm1, xmm2); + a.vhaddps(xmm0, xmm1, ptr_gp0); + a.vhaddps(ymm0, ymm1, ymm2); + a.vhaddps(ymm0, ymm1, ptr_gp0); + a.vhsubpd(xmm0, xmm1, xmm2); + a.vhsubpd(xmm0, xmm1, ptr_gp0); + a.vhsubpd(ymm0, ymm1, ymm2); + a.vhsubpd(ymm0, ymm1, ptr_gp0); + a.vhsubps(xmm0, xmm1, xmm2); + a.vhsubps(xmm0, xmm1, ptr_gp0); + a.vhsubps(ymm0, ymm1, ymm2); + a.vhsubps(ymm0, ymm1, ptr_gp0); + a.vinsertf128(ymm0, ymm1, xmm2, 0); + a.vinsertf128(ymm0, ymm1, ptr_gp0, 0); + a.vinsertps(xmm0, xmm1, xmm2, 0); + a.vinsertps(xmm0, xmm1, ptr_gp0, 0); + a.vlddqu(xmm0, ptr_gp0); + a.vlddqu(ymm0, ptr_gp0); + a.vldmxcsr(ptr_gp0); + a.vmaskmovdqu(xmm0, xmm1); + a.vmaskmovps(xmm0, xmm1, ptr_gp0); + a.vmaskmovps(ymm0, ymm1, ptr_gp0); + a.vmaskmovpd(xmm0, xmm1, ptr_gp0); + a.vmaskmovpd(ymm0, ymm1, ptr_gp0); + a.vmaskmovps(ptr_gp0, xmm0, xmm1); + a.vmaskmovps(ptr_gp0, ymm0, ymm1); + a.vmaskmovpd(ptr_gp0, xmm0, xmm1); + a.vmaskmovpd(ptr_gp0, ymm0, ymm1); + a.vmaxpd(xmm0, xmm1, xmm2); + a.vmaxpd(xmm0, xmm1, ptr_gp0); + a.vmaxpd(ymm0, ymm1, ymm2); + a.vmaxpd(ymm0, ymm1, ptr_gp0); + a.vmaxps(xmm0, xmm1, xmm2); + a.vmaxps(xmm0, xmm1, ptr_gp0); + a.vmaxps(ymm0, ymm1, ymm2); + a.vmaxps(ymm0, ymm1, ptr_gp0); + a.vmaxsd(xmm0, xmm1, xmm2); + a.vmaxsd(xmm0, xmm1, ptr_gp0); + a.vmaxss(xmm0, xmm1, xmm2); + a.vmaxss(xmm0, xmm1, ptr_gp0); + a.vminpd(xmm0, xmm1, xmm2); + a.vminpd(xmm0, xmm1, ptr_gp0); + a.vminpd(ymm0, ymm1, ymm2); + a.vminpd(ymm0, ymm1, ptr_gp0); + a.vminps(xmm0, xmm1, xmm2); + a.vminps(xmm0, xmm1, ptr_gp0); + a.vminps(ymm0, ymm1, ymm2); + a.vminps(ymm0, ymm1, ptr_gp0); + a.vminsd(xmm0, xmm1, xmm2); + a.vminsd(xmm0, xmm1, ptr_gp0); + a.vminss(xmm0, xmm1, xmm2); + a.vminss(xmm0, xmm1, ptr_gp0); + a.vmovapd(xmm0, xmm1); + a.vmovapd(xmm0, ptr_gp0); + a.vmovapd(ptr_gp0, xmm1); + a.vmovapd(ymm0, ymm1); + a.vmovapd(ymm0, ptr_gp0); + a.vmovapd(ptr_gp0, ymm1); + a.vmovaps(xmm0, xmm1); + a.vmovaps(xmm0, ptr_gp0); + a.vmovaps(ptr_gp0, xmm1); + a.vmovaps(ymm0, ymm1); + a.vmovaps(ymm0, ptr_gp0); + a.vmovaps(ptr_gp0, ymm1); + a.vmovd(xmm0, gp0); + a.vmovd(xmm0, ptr_gp0); + a.vmovd(gp0, xmm1); + a.vmovd(ptr_gp0, xmm1); + a.vmovddup(xmm0, xmm1); + a.vmovddup(xmm0, ptr_gp0); + a.vmovddup(ymm0, ymm1); + a.vmovddup(ymm0, ptr_gp0); + a.vmovdqa(xmm0, xmm1); + a.vmovdqa(xmm0, ptr_gp0); + a.vmovdqa(ptr_gp0, xmm1); + a.vmovdqa(ymm0, ymm1); + a.vmovdqa(ymm0, ptr_gp0); + a.vmovdqa(ptr_gp0, ymm1); + a.vmovdqu(xmm0, xmm1); + a.vmovdqu(xmm0, ptr_gp0); + a.vmovdqu(ptr_gp0, xmm1); + a.vmovdqu(ymm0, ymm1); + a.vmovdqu(ymm0, ptr_gp0); + a.vmovdqu(ptr_gp0, ymm1); + a.vmovhlps(xmm0, xmm1, xmm2); + a.vmovhpd(xmm0, xmm1, ptr_gp0); + a.vmovhpd(ptr_gp0, xmm1); + a.vmovhps(xmm0, xmm1, ptr_gp0); + a.vmovhps(ptr_gp0, xmm1); + a.vmovlhps(xmm0, xmm1, xmm2); + a.vmovlpd(xmm0, xmm1, ptr_gp0); + a.vmovlpd(ptr_gp0, xmm1); + a.vmovlps(xmm0, xmm1, ptr_gp0); + a.vmovlps(ptr_gp0, xmm1); + a.vmovmskpd(gp0, xmm1); + a.vmovmskpd(gp0, ymm1); + a.vmovmskps(gp0, xmm1); + a.vmovmskps(gp0, ymm1); + a.vmovntdq(ptr_gp0, xmm1); + a.vmovntdq(ptr_gp0, ymm1); + a.vmovntdqa(xmm0, ptr_gp0); + a.vmovntpd(ptr_gp0, xmm1); + a.vmovntpd(ptr_gp0, ymm1); + a.vmovntps(ptr_gp0, xmm1); + a.vmovntps(ptr_gp0, ymm1); + a.vmovsd(xmm0, xmm1, xmm2); + a.vmovsd(xmm0, ptr_gp0); + a.vmovsd(ptr_gp0, xmm1); + a.vmovshdup(xmm0, xmm1); + a.vmovshdup(xmm0, ptr_gp0); + a.vmovshdup(ymm0, ymm1); + a.vmovshdup(ymm0, ptr_gp0); + a.vmovsldup(xmm0, xmm1); + a.vmovsldup(xmm0, ptr_gp0); + a.vmovsldup(ymm0, ymm1); + a.vmovsldup(ymm0, ptr_gp0); + a.vmovss(xmm0, xmm1, xmm2); + a.vmovss(xmm0, ptr_gp0); + a.vmovss(ptr_gp0, xmm1); + a.vmovupd(xmm0, xmm1); + a.vmovupd(xmm0, ptr_gp0); + a.vmovupd(ptr_gp0, xmm1); + a.vmovupd(ymm0, ymm1); + a.vmovupd(ymm0, ptr_gp0); + a.vmovupd(ptr_gp0, ymm1); + a.vmovups(xmm0, xmm1); + a.vmovups(xmm0, ptr_gp0); + a.vmovups(ptr_gp0, xmm1); + a.vmovups(ymm0, ymm1); + a.vmovups(ymm0, ptr_gp0); + a.vmovups(ptr_gp0, ymm1); + a.vmpsadbw(xmm0, xmm1, xmm2, 0); + a.vmpsadbw(xmm0, xmm1, ptr_gp0, 0); + a.vmulpd(xmm0, xmm1, xmm2); + a.vmulpd(xmm0, xmm1, ptr_gp0); + a.vmulpd(ymm0, ymm1, ymm2); + a.vmulpd(ymm0, ymm1, ptr_gp0); + a.vmulps(xmm0, xmm1, xmm2); + a.vmulps(xmm0, xmm1, ptr_gp0); + a.vmulps(ymm0, ymm1, ymm2); + a.vmulps(ymm0, ymm1, ptr_gp0); + a.vmulsd(xmm0, xmm1, xmm2); + a.vmulsd(xmm0, xmm1, ptr_gp0); + a.vmulss(xmm0, xmm1, xmm2); + a.vmulss(xmm0, xmm1, ptr_gp0); + a.vorpd(xmm0, xmm1, xmm2); + a.vorpd(xmm0, xmm1, ptr_gp0); + a.vorpd(ymm0, ymm1, ymm2); + a.vorpd(ymm0, ymm1, ptr_gp0); + a.vorps(xmm0, xmm1, xmm2); + a.vorps(xmm0, xmm1, ptr_gp0); + a.vorps(ymm0, ymm1, ymm2); + a.vorps(ymm0, ymm1, ptr_gp0); + a.vpabsb(xmm0, xmm1); + a.vpabsb(xmm0, ptr_gp0); + a.vpabsd(xmm0, xmm1); + a.vpabsd(xmm0, ptr_gp0); + a.vpabsw(xmm0, xmm1); + a.vpabsw(xmm0, ptr_gp0); + a.vpackssdw(xmm0, xmm1, xmm2); + a.vpackssdw(xmm0, xmm1, ptr_gp0); + a.vpacksswb(xmm0, xmm1, xmm2); + a.vpacksswb(xmm0, xmm1, ptr_gp0); + a.vpackusdw(xmm0, xmm1, xmm2); + a.vpackusdw(xmm0, xmm1, ptr_gp0); + a.vpackuswb(xmm0, xmm1, xmm2); + a.vpackuswb(xmm0, xmm1, ptr_gp0); + a.vpaddb(xmm0, xmm1, xmm2); + a.vpaddb(xmm0, xmm1, ptr_gp0); + a.vpaddd(xmm0, xmm1, xmm2); + a.vpaddd(xmm0, xmm1, ptr_gp0); + a.vpaddq(xmm0, xmm1, xmm2); + a.vpaddq(xmm0, xmm1, ptr_gp0); + a.vpaddw(xmm0, xmm1, xmm2); + a.vpaddw(xmm0, xmm1, ptr_gp0); + a.vpaddsb(xmm0, xmm1, xmm2); + a.vpaddsb(xmm0, xmm1, ptr_gp0); + a.vpaddsw(xmm0, xmm1, xmm2); + a.vpaddsw(xmm0, xmm1, ptr_gp0); + a.vpaddusb(xmm0, xmm1, xmm2); + a.vpaddusb(xmm0, xmm1, ptr_gp0); + a.vpaddusw(xmm0, xmm1, xmm2); + a.vpaddusw(xmm0, xmm1, ptr_gp0); + a.vpalignr(xmm0, xmm1, xmm2, 0); + a.vpalignr(xmm0, xmm1, ptr_gp0, 0); + a.vpand(xmm0, xmm1, xmm2); + a.vpand(xmm0, xmm1, ptr_gp0); + a.vpandn(xmm0, xmm1, xmm2); + a.vpandn(xmm0, xmm1, ptr_gp0); + a.vpavgb(xmm0, xmm1, xmm2); + a.vpavgb(xmm0, xmm1, ptr_gp0); + a.vpavgw(xmm0, xmm1, xmm2); + a.vpavgw(xmm0, xmm1, ptr_gp0); + a.vpblendvb(xmm0, xmm1, xmm2, xmm3); + a.vpblendvb(xmm0, xmm1, ptr_gp0, xmm3); + a.vpblendw(xmm0, xmm1, xmm2, 0); + a.vpblendw(xmm0, xmm1, ptr_gp0, 0); + a.vpcmpeqb(xmm0, xmm1, xmm2); + a.vpcmpeqb(xmm0, xmm1, ptr_gp0); + a.vpcmpeqd(xmm0, xmm1, xmm2); + a.vpcmpeqd(xmm0, xmm1, ptr_gp0); + a.vpcmpeqq(xmm0, xmm1, xmm2); + a.vpcmpeqq(xmm0, xmm1, ptr_gp0); + a.vpcmpeqw(xmm0, xmm1, xmm2); + a.vpcmpeqw(xmm0, xmm1, ptr_gp0); + a.vpcmpgtb(xmm0, xmm1, xmm2); + a.vpcmpgtb(xmm0, xmm1, ptr_gp0); + a.vpcmpgtd(xmm0, xmm1, xmm2); + a.vpcmpgtd(xmm0, xmm1, ptr_gp0); + a.vpcmpgtq(xmm0, xmm1, xmm2); + a.vpcmpgtq(xmm0, xmm1, ptr_gp0); + a.vpcmpgtw(xmm0, xmm1, xmm2); + a.vpcmpgtw(xmm0, xmm1, ptr_gp0); + a.vpcmpestri(xmm0, xmm1, 0); + a.vpcmpestri(xmm0, ptr_gp0, 0); + a.vpcmpestrm(xmm0, xmm1, 0); + a.vpcmpestrm(xmm0, ptr_gp0, 0); + a.vpcmpistri(xmm0, xmm1, 0); + a.vpcmpistri(xmm0, ptr_gp0, 0); + a.vpcmpistrm(xmm0, xmm1, 0); + a.vpcmpistrm(xmm0, ptr_gp0, 0); + a.vpermilpd(xmm0, xmm1, xmm2); + a.vpermilpd(xmm0, xmm1, ptr_gp0); + a.vpermilpd(ymm0, ymm1, ymm2); + a.vpermilpd(ymm0, ymm1, ptr_gp0); + a.vpermilpd(xmm0, xmm1, 0); + a.vpermilpd(xmm0, ptr_gp0, 0); + a.vpermilpd(ymm0, ymm1, 0); + a.vpermilpd(ymm0, ptr_gp0, 0); + a.vpermilps(xmm0, xmm1, xmm2); + a.vpermilps(xmm0, xmm1, ptr_gp0); + a.vpermilps(ymm0, ymm1, ymm2); + a.vpermilps(ymm0, ymm1, ptr_gp0); + a.vpermilps(xmm0, xmm1, 0); + a.vpermilps(xmm0, ptr_gp0, 0); + a.vpermilps(ymm0, ymm1, 0); + a.vpermilps(ymm0, ptr_gp0, 0); + a.vperm2f128(ymm0, ymm1, ymm2, 0); + a.vperm2f128(ymm0, ymm1, ptr_gp0, 0); + a.vpextrb(gp0, xmm1, 0); + a.vpextrb(ptr_gp0, xmm1, 0); + a.vpextrd(gp0, xmm1, 0); + a.vpextrd(ptr_gp0, xmm1, 0); + a.vpextrw(gp0, xmm1, 0); + a.vpextrw(ptr_gp0, xmm1, 0); + a.vphaddd(xmm0, xmm1, xmm2); + a.vphaddd(xmm0, xmm1, ptr_gp0); + a.vphaddsw(xmm0, xmm1, xmm2); + a.vphaddsw(xmm0, xmm1, ptr_gp0); + a.vphaddw(xmm0, xmm1, xmm2); + a.vphaddw(xmm0, xmm1, ptr_gp0); + a.vphminposuw(xmm0, xmm1); + a.vphminposuw(xmm0, ptr_gp0); + a.vphsubd(xmm0, xmm1, xmm2); + a.vphsubd(xmm0, xmm1, ptr_gp0); + a.vphsubsw(xmm0, xmm1, xmm2); + a.vphsubsw(xmm0, xmm1, ptr_gp0); + a.vphsubw(xmm0, xmm1, xmm2); + a.vphsubw(xmm0, xmm1, ptr_gp0); + a.vpinsrb(xmm0, xmm1, gp0, 0); + a.vpinsrb(xmm0, xmm1, ptr_gp0, 0); + a.vpinsrd(xmm0, xmm1, gp0, 0); + a.vpinsrd(xmm0, xmm1, ptr_gp0, 0); + a.vpinsrw(xmm0, xmm1, gp0, 0); + a.vpinsrw(xmm0, xmm1, ptr_gp0, 0); + a.vpmaddubsw(xmm0, xmm1, xmm2); + a.vpmaddubsw(xmm0, xmm1, ptr_gp0); + a.vpmaddwd(xmm0, xmm1, xmm2); + a.vpmaddwd(xmm0, xmm1, ptr_gp0); + a.vpmaxsb(xmm0, xmm1, xmm2); + a.vpmaxsb(xmm0, xmm1, ptr_gp0); + a.vpmaxsd(xmm0, xmm1, xmm2); + a.vpmaxsd(xmm0, xmm1, ptr_gp0); + a.vpmaxsw(xmm0, xmm1, xmm2); + a.vpmaxsw(xmm0, xmm1, ptr_gp0); + a.vpmaxub(xmm0, xmm1, xmm2); + a.vpmaxub(xmm0, xmm1, ptr_gp0); + a.vpmaxud(xmm0, xmm1, xmm2); + a.vpmaxud(xmm0, xmm1, ptr_gp0); + a.vpmaxuw(xmm0, xmm1, xmm2); + a.vpmaxuw(xmm0, xmm1, ptr_gp0); + a.vpminsb(xmm0, xmm1, xmm2); + a.vpminsb(xmm0, xmm1, ptr_gp0); + a.vpminsd(xmm0, xmm1, xmm2); + a.vpminsd(xmm0, xmm1, ptr_gp0); + a.vpminsw(xmm0, xmm1, xmm2); + a.vpminsw(xmm0, xmm1, ptr_gp0); + a.vpminub(xmm0, xmm1, xmm2); + a.vpminub(xmm0, xmm1, ptr_gp0); + a.vpminud(xmm0, xmm1, xmm2); + a.vpminud(xmm0, xmm1, ptr_gp0); + a.vpminuw(xmm0, xmm1, xmm2); + a.vpminuw(xmm0, xmm1, ptr_gp0); + a.vpmovmskb(gp0, xmm1); + a.vpmovsxbd(xmm0, xmm1); + a.vpmovsxbd(xmm0, ptr_gp0); + a.vpmovsxbq(xmm0, xmm1); + a.vpmovsxbq(xmm0, ptr_gp0); + a.vpmovsxbw(xmm0, xmm1); + a.vpmovsxbw(xmm0, ptr_gp0); + a.vpmovsxdq(xmm0, xmm1); + a.vpmovsxdq(xmm0, ptr_gp0); + a.vpmovsxwd(xmm0, xmm1); + a.vpmovsxwd(xmm0, ptr_gp0); + a.vpmovsxwq(xmm0, xmm1); + a.vpmovsxwq(xmm0, ptr_gp0); + a.vpmovzxbd(xmm0, xmm1); + a.vpmovzxbd(xmm0, ptr_gp0); + a.vpmovzxbq(xmm0, xmm1); + a.vpmovzxbq(xmm0, ptr_gp0); + a.vpmovzxbw(xmm0, xmm1); + a.vpmovzxbw(xmm0, ptr_gp0); + a.vpmovzxdq(xmm0, xmm1); + a.vpmovzxdq(xmm0, ptr_gp0); + a.vpmovzxwd(xmm0, xmm1); + a.vpmovzxwd(xmm0, ptr_gp0); + a.vpmovzxwq(xmm0, xmm1); + a.vpmovzxwq(xmm0, ptr_gp0); + a.vpmuldq(xmm0, xmm1, xmm2); + a.vpmuldq(xmm0, xmm1, ptr_gp0); + a.vpmulhrsw(xmm0, xmm1, xmm2); + a.vpmulhrsw(xmm0, xmm1, ptr_gp0); + a.vpmulhuw(xmm0, xmm1, xmm2); + a.vpmulhuw(xmm0, xmm1, ptr_gp0); + a.vpmulhw(xmm0, xmm1, xmm2); + a.vpmulhw(xmm0, xmm1, ptr_gp0); + a.vpmulld(xmm0, xmm1, xmm2); + a.vpmulld(xmm0, xmm1, ptr_gp0); + a.vpmullw(xmm0, xmm1, xmm2); + a.vpmullw(xmm0, xmm1, ptr_gp0); + a.vpmuludq(xmm0, xmm1, xmm2); + a.vpmuludq(xmm0, xmm1, ptr_gp0); + a.vpor(xmm0, xmm1, xmm2); + a.vpor(xmm0, xmm1, ptr_gp0); + a.vpsadbw(xmm0, xmm1, xmm2); + a.vpsadbw(xmm0, xmm1, ptr_gp0); + a.vpshufb(xmm0, xmm1, xmm2); + a.vpshufb(xmm0, xmm1, ptr_gp0); + a.vpshufd(xmm0, xmm1, 0); + a.vpshufd(xmm0, ptr_gp0, 0); + a.vpshufhw(xmm0, xmm1, 0); + a.vpshufhw(xmm0, ptr_gp0, 0); + a.vpshuflw(xmm0, xmm1, 0); + a.vpshuflw(xmm0, ptr_gp0, 0); + a.vpsignb(xmm0, xmm1, xmm2); + a.vpsignb(xmm0, xmm1, ptr_gp0); + a.vpsignd(xmm0, xmm1, xmm2); + a.vpsignd(xmm0, xmm1, ptr_gp0); + a.vpsignw(xmm0, xmm1, xmm2); + a.vpsignw(xmm0, xmm1, ptr_gp0); + a.vpslld(xmm0, xmm1, xmm2); + a.vpslld(xmm0, xmm1, ptr_gp0); + a.vpslld(xmm0, xmm1, 0); + a.vpslldq(xmm0, xmm1, 0); + a.vpsllq(xmm0, xmm1, xmm2); + a.vpsllq(xmm0, xmm1, ptr_gp0); + a.vpsllq(xmm0, xmm1, 0); + a.vpsllw(xmm0, xmm1, xmm2); + a.vpsllw(xmm0, xmm1, ptr_gp0); + a.vpsllw(xmm0, xmm1, 0); + a.vpsrad(xmm0, xmm1, xmm2); + a.vpsrad(xmm0, xmm1, ptr_gp0); + a.vpsrad(xmm0, xmm1, 0); + a.vpsraw(xmm0, xmm1, xmm2); + a.vpsraw(xmm0, xmm1, ptr_gp0); + a.vpsraw(xmm0, xmm1, 0); + a.vpsrld(xmm0, xmm1, xmm2); + a.vpsrld(xmm0, xmm1, ptr_gp0); + a.vpsrld(xmm0, xmm1, 0); + a.vpsrldq(xmm0, xmm1, 0); + a.vpsrlq(xmm0, xmm1, xmm2); + a.vpsrlq(xmm0, xmm1, ptr_gp0); + a.vpsrlq(xmm0, xmm1, 0); + a.vpsrlw(xmm0, xmm1, xmm2); + a.vpsrlw(xmm0, xmm1, ptr_gp0); + a.vpsrlw(xmm0, xmm1, 0); + a.vpsubb(xmm0, xmm1, xmm2); + a.vpsubb(xmm0, xmm1, ptr_gp0); + a.vpsubd(xmm0, xmm1, xmm2); + a.vpsubd(xmm0, xmm1, ptr_gp0); + a.vpsubq(xmm0, xmm1, xmm2); + a.vpsubq(xmm0, xmm1, ptr_gp0); + a.vpsubw(xmm0, xmm1, xmm2); + a.vpsubw(xmm0, xmm1, ptr_gp0); + a.vpsubsb(xmm0, xmm1, xmm2); + a.vpsubsb(xmm0, xmm1, ptr_gp0); + a.vpsubsw(xmm0, xmm1, xmm2); + a.vpsubsw(xmm0, xmm1, ptr_gp0); + a.vpsubusb(xmm0, xmm1, xmm2); + a.vpsubusb(xmm0, xmm1, ptr_gp0); + a.vpsubusw(xmm0, xmm1, xmm2); + a.vpsubusw(xmm0, xmm1, ptr_gp0); + a.vptest(xmm0, xmm1); + a.vptest(xmm0, ptr_gp0); + a.vptest(ymm0, ymm1); + a.vptest(ymm0, ptr_gp0); + a.vpunpckhbw(xmm0, xmm1, xmm2); + a.vpunpckhbw(xmm0, xmm1, ptr_gp0); + a.vpunpckhdq(xmm0, xmm1, xmm2); + a.vpunpckhdq(xmm0, xmm1, ptr_gp0); + a.vpunpckhqdq(xmm0, xmm1, xmm2); + a.vpunpckhqdq(xmm0, xmm1, ptr_gp0); + a.vpunpckhwd(xmm0, xmm1, xmm2); + a.vpunpckhwd(xmm0, xmm1, ptr_gp0); + a.vpunpcklbw(xmm0, xmm1, xmm2); + a.vpunpcklbw(xmm0, xmm1, ptr_gp0); + a.vpunpckldq(xmm0, xmm1, xmm2); + a.vpunpckldq(xmm0, xmm1, ptr_gp0); + a.vpunpcklqdq(xmm0, xmm1, xmm2); + a.vpunpcklqdq(xmm0, xmm1, ptr_gp0); + a.vpunpcklwd(xmm0, xmm1, xmm2); + a.vpunpcklwd(xmm0, xmm1, ptr_gp0); + a.vpxor(xmm0, xmm1, xmm2); + a.vpxor(xmm0, xmm1, ptr_gp0); + a.vrcpps(xmm0, xmm1); + a.vrcpps(xmm0, ptr_gp0); + a.vrcpps(ymm0, ymm1); + a.vrcpps(ymm0, ptr_gp0); + a.vrcpss(xmm0, xmm1, xmm2); + a.vrcpss(xmm0, xmm1, ptr_gp0); + a.vrsqrtps(xmm0, xmm1); + a.vrsqrtps(xmm0, ptr_gp0); + a.vrsqrtps(ymm0, ymm1); + a.vrsqrtps(ymm0, ptr_gp0); + a.vrsqrtss(xmm0, xmm1, xmm2); + a.vrsqrtss(xmm0, xmm1, ptr_gp0); + a.vroundpd(xmm0, xmm1, 0); + a.vroundpd(xmm0, ptr_gp0, 0); + a.vroundpd(ymm0, ymm1, 0); + a.vroundpd(ymm0, ptr_gp0, 0); + a.vroundps(xmm0, xmm1, 0); + a.vroundps(xmm0, ptr_gp0, 0); + a.vroundps(ymm0, ymm1, 0); + a.vroundps(ymm0, ptr_gp0, 0); + a.vroundsd(xmm0, xmm1, xmm2, 0); + a.vroundsd(xmm0, xmm1, ptr_gp0, 0); + a.vroundss(xmm0, xmm1, xmm2, 0); + a.vroundss(xmm0, xmm1, ptr_gp0, 0); + a.vshufpd(xmm0, xmm1, xmm2, 0); + a.vshufpd(xmm0, xmm1, ptr_gp0, 0); + a.vshufpd(ymm0, ymm1, ymm2, 0); + a.vshufpd(ymm0, ymm1, ptr_gp0, 0); + a.vshufps(xmm0, xmm1, xmm2, 0); + a.vshufps(xmm0, xmm1, ptr_gp0, 0); + a.vshufps(ymm0, ymm1, ymm2, 0); + a.vshufps(ymm0, ymm1, ptr_gp0, 0); + a.vsqrtpd(xmm0, xmm1); + a.vsqrtpd(xmm0, ptr_gp0); + a.vsqrtpd(ymm0, ymm1); + a.vsqrtpd(ymm0, ptr_gp0); + a.vsqrtps(xmm0, xmm1); + a.vsqrtps(xmm0, ptr_gp0); + a.vsqrtps(ymm0, ymm1); + a.vsqrtps(ymm0, ptr_gp0); + a.vsqrtsd(xmm0, xmm1, xmm2); + a.vsqrtsd(xmm0, xmm1, ptr_gp0); + a.vsqrtss(xmm0, xmm1, xmm2); + a.vsqrtss(xmm0, xmm1, ptr_gp0); + a.vstmxcsr(ptr_gp0); + a.vsubpd(xmm0, xmm1, xmm2); + a.vsubpd(xmm0, xmm1, ptr_gp0); + a.vsubpd(ymm0, ymm1, ymm2); + a.vsubpd(ymm0, ymm1, ptr_gp0); + a.vsubps(xmm0, xmm1, xmm2); + a.vsubps(xmm0, xmm1, ptr_gp0); + a.vsubps(ymm0, ymm1, ymm2); + a.vsubps(ymm0, ymm1, ptr_gp0); + a.vsubsd(xmm0, xmm1, xmm2); + a.vsubsd(xmm0, xmm1, ptr_gp0); + a.vsubss(xmm0, xmm1, xmm2); + a.vsubss(xmm0, xmm1, ptr_gp0); + a.vtestps(xmm0, xmm1); + a.vtestps(xmm0, ptr_gp0); + a.vtestps(ymm0, ymm1); + a.vtestps(ymm0, ptr_gp0); + a.vtestpd(xmm0, xmm1); + a.vtestpd(xmm0, ptr_gp0); + a.vtestpd(ymm0, ymm1); + a.vtestpd(ymm0, ptr_gp0); + a.vucomisd(xmm0, xmm1); + a.vucomisd(xmm0, ptr_gp0); + a.vucomiss(xmm0, xmm1); + a.vucomiss(xmm0, ptr_gp0); + a.vunpckhpd(xmm0, xmm1, xmm2); + a.vunpckhpd(xmm0, xmm1, ptr_gp0); + a.vunpckhpd(ymm0, ymm1, ymm2); + a.vunpckhpd(ymm0, ymm1, ptr_gp0); + a.vunpckhps(xmm0, xmm1, xmm2); + a.vunpckhps(xmm0, xmm1, ptr_gp0); + a.vunpckhps(ymm0, ymm1, ymm2); + a.vunpckhps(ymm0, ymm1, ptr_gp0); + a.vunpcklpd(xmm0, xmm1, xmm2); + a.vunpcklpd(xmm0, xmm1, ptr_gp0); + a.vunpcklpd(ymm0, ymm1, ymm2); + a.vunpcklpd(ymm0, ymm1, ptr_gp0); + a.vunpcklps(xmm0, xmm1, xmm2); + a.vunpcklps(xmm0, xmm1, ptr_gp0); + a.vunpcklps(ymm0, ymm1, ymm2); + a.vunpcklps(ymm0, ymm1, ptr_gp0); + a.vxorpd(xmm0, xmm1, xmm2); + a.vxorpd(xmm0, xmm1, ptr_gp0); + a.vxorpd(ymm0, ymm1, ymm2); + a.vxorpd(ymm0, ymm1, ptr_gp0); + a.vxorps(xmm0, xmm1, xmm2); + a.vxorps(xmm0, xmm1, ptr_gp0); + a.vxorps(ymm0, ymm1, ymm2); + a.vxorps(ymm0, ymm1, ptr_gp0); + a.vzeroall(); + a.vzeroupper(); + + // AVX+AESNI. + a.vaesdec(xmm0, xmm1, xmm2); + a.vaesdec(xmm0, xmm1, ptr_gp0); + a.vaesdeclast(xmm0, xmm1, xmm2); + a.vaesdeclast(xmm0, xmm1, ptr_gp0); + a.vaesenc(xmm0, xmm1, xmm2); + a.vaesenc(xmm0, xmm1, ptr_gp0); + a.vaesenclast(xmm0, xmm1, xmm2); + a.vaesenclast(xmm0, xmm1, ptr_gp0); + a.vaesimc(xmm0, xmm1); + a.vaesimc(xmm0, ptr_gp0); + a.vaeskeygenassist(xmm0, xmm1, 0); + a.vaeskeygenassist(xmm0, ptr_gp0, 0); + + // AVX+PCLMULQDQ. + a.vpclmulqdq(xmm0, xmm1, xmm2, 0); + a.vpclmulqdq(xmm0, xmm1, ptr_gp0, 0); + + // AVX2. + a.vbroadcasti128(ymm0, ptr_gp0); + a.vbroadcastsd(ymm0, xmm1); + a.vbroadcastss(xmm0, xmm1); + a.vbroadcastss(ymm0, xmm1); + a.vextracti128(xmm0, ymm1, 0); + a.vextracti128(ptr_gp0, ymm1, 0); + a.vgatherdpd(xmm0, vm32x, xmm2); + a.vgatherdpd(ymm0, vm32y, ymm2); + a.vgatherdps(xmm0, vm32x, xmm2); + a.vgatherdps(ymm0, vm32y, ymm2); + a.vgatherqpd(xmm0, vm32x, xmm2); + a.vgatherqpd(ymm0, vm32y, ymm2); + a.vgatherqps(xmm0, vm32x, xmm2); + a.vgatherqps(xmm0, vm32y, xmm2); + a.vinserti128(ymm0, ymm1, xmm2, 0); + a.vinserti128(ymm0, ymm1, ptr_gp0, 0); + a.vmovntdqa(ymm0, ptr_gp0); + a.vmpsadbw(ymm0, ymm1, ymm2, 0); + a.vmpsadbw(ymm0, ymm1, ptr_gp0, 0); + a.vpabsb(ymm0, ymm1); + a.vpabsb(ymm0, ptr_gp0); + a.vpabsd(ymm0, ymm1); + a.vpabsd(ymm0, ptr_gp0); + a.vpabsw(ymm0, ymm1); + a.vpabsw(ymm0, ptr_gp0); + a.vpackssdw(ymm0, ymm1, ymm2); + a.vpackssdw(ymm0, ymm1, ptr_gp0); + a.vpacksswb(ymm0, ymm1, ymm2); + a.vpacksswb(ymm0, ymm1, ptr_gp0); + a.vpackusdw(ymm0, ymm1, ymm2); + a.vpackusdw(ymm0, ymm1, ptr_gp0); + a.vpackuswb(ymm0, ymm1, ymm2); + a.vpackuswb(ymm0, ymm1, ptr_gp0); + a.vpaddb(ymm0, ymm1, ymm2); + a.vpaddb(ymm0, ymm1, ptr_gp0); + a.vpaddd(ymm0, ymm1, ymm2); + a.vpaddd(ymm0, ymm1, ptr_gp0); + a.vpaddq(ymm0, ymm1, ymm2); + a.vpaddq(ymm0, ymm1, ptr_gp0); + a.vpaddw(ymm0, ymm1, ymm2); + a.vpaddw(ymm0, ymm1, ptr_gp0); + a.vpaddsb(ymm0, ymm1, ymm2); + a.vpaddsb(ymm0, ymm1, ptr_gp0); + a.vpaddsw(ymm0, ymm1, ymm2); + a.vpaddsw(ymm0, ymm1, ptr_gp0); + a.vpaddusb(ymm0, ymm1, ymm2); + a.vpaddusb(ymm0, ymm1, ptr_gp0); + a.vpaddusw(ymm0, ymm1, ymm2); + a.vpaddusw(ymm0, ymm1, ptr_gp0); + a.vpalignr(ymm0, ymm1, ymm2, 0); + a.vpalignr(ymm0, ymm1, ptr_gp0, 0); + a.vpand(ymm0, ymm1, ymm2); + a.vpand(ymm0, ymm1, ptr_gp0); + a.vpandn(ymm0, ymm1, ymm2); + a.vpandn(ymm0, ymm1, ptr_gp0); + a.vpavgb(ymm0, ymm1, ymm2); + a.vpavgb(ymm0, ymm1, ptr_gp0); + a.vpavgw(ymm0, ymm1, ymm2); + a.vpavgw(ymm0, ymm1, ptr_gp0); + a.vpblendd(xmm0, xmm1, xmm2, 0); + a.vpblendd(xmm0, xmm1, ptr_gp0, 0); + a.vpblendd(ymm0, ymm1, ymm2, 0); + a.vpblendd(ymm0, ymm1, ptr_gp0, 0); + a.vpblendvb(ymm0, ymm1, ymm2, ymm3); + a.vpblendvb(ymm0, ymm1, ptr_gp0, ymm3); + a.vpblendw(ymm0, ymm1, ymm2, 0); + a.vpblendw(ymm0, ymm1, ptr_gp0, 0); + a.vpbroadcastb(xmm0, xmm1); + a.vpbroadcastb(xmm0, ptr_gp0); + a.vpbroadcastb(ymm0, xmm1); + a.vpbroadcastb(ymm0, ptr_gp0); + a.vpbroadcastd(xmm0, xmm1); + a.vpbroadcastd(xmm0, ptr_gp0); + a.vpbroadcastd(ymm0, xmm1); + a.vpbroadcastd(ymm0, ptr_gp0); + a.vpbroadcastq(xmm0, xmm1); + a.vpbroadcastq(xmm0, ptr_gp0); + a.vpbroadcastq(ymm0, xmm1); + a.vpbroadcastq(ymm0, ptr_gp0); + a.vpbroadcastw(xmm0, xmm1); + a.vpbroadcastw(xmm0, ptr_gp0); + a.vpbroadcastw(ymm0, xmm1); + a.vpbroadcastw(ymm0, ptr_gp0); + a.vpcmpeqb(ymm0, ymm1, ymm2); + a.vpcmpeqb(ymm0, ymm1, ptr_gp0); + a.vpcmpeqd(ymm0, ymm1, ymm2); + a.vpcmpeqd(ymm0, ymm1, ptr_gp0); + a.vpcmpeqq(ymm0, ymm1, ymm2); + a.vpcmpeqq(ymm0, ymm1, ptr_gp0); + a.vpcmpeqw(ymm0, ymm1, ymm2); + a.vpcmpeqw(ymm0, ymm1, ptr_gp0); + a.vpcmpgtb(ymm0, ymm1, ymm2); + a.vpcmpgtb(ymm0, ymm1, ptr_gp0); + a.vpcmpgtd(ymm0, ymm1, ymm2); + a.vpcmpgtd(ymm0, ymm1, ptr_gp0); + a.vpcmpgtq(ymm0, ymm1, ymm2); + a.vpcmpgtq(ymm0, ymm1, ptr_gp0); + a.vpcmpgtw(ymm0, ymm1, ymm2); + a.vpcmpgtw(ymm0, ymm1, ptr_gp0); + a.vperm2i128(ymm0, ymm1, ymm2, 0); + a.vperm2i128(ymm0, ymm1, ptr_gp0, 0); + a.vpermd(ymm0, ymm1, ymm2); + a.vpermd(ymm0, ymm1, ptr_gp0); + a.vpermps(ymm0, ymm1, ymm2); + a.vpermps(ymm0, ymm1, ptr_gp0); + a.vpermpd(ymm0, ymm1, 0); + a.vpermpd(ymm0, ptr_gp0, 0); + a.vpermq(ymm0, ymm1, 0); + a.vpermq(ymm0, ptr_gp0, 0); + a.vpgatherdd(xmm0, vm32x, xmm2); + a.vpgatherdd(ymm0, vm32y, ymm2); + a.vpgatherdq(xmm0, vm32x, xmm2); + a.vpgatherdq(ymm0, vm32y, ymm2); + a.vpgatherqd(xmm0, vm32x, xmm2); + a.vpgatherqd(xmm0, vm32y, xmm2); + a.vpgatherqq(xmm0, vm32x, xmm2); + a.vpgatherqq(ymm0, vm32y, ymm2); + a.vpmovmskb(gp0, ymm1); + a.vpmovsxbd(ymm0, ptr_gp0); + a.vpmovsxbd(ymm0, xmm1); + a.vpmovsxbq(ymm0, ptr_gp0); + a.vpmovsxbq(ymm0, xmm1); + a.vpmovsxbw(ymm0, ptr_gp0); + a.vpmovsxbw(ymm0, xmm1); + a.vpmovsxdq(ymm0, ptr_gp0); + a.vpmovsxdq(ymm0, xmm1); + a.vpmovsxwd(ymm0, ptr_gp0); + a.vpmovsxwd(ymm0, xmm1); + a.vpmovsxwq(ymm0, ptr_gp0); + a.vpmovsxwq(ymm0, xmm1); + a.vpmovzxbd(ymm0, ptr_gp0); + a.vpmovzxbd(ymm0, xmm1); + a.vpmovzxbq(ymm0, ptr_gp0); + a.vpmovzxbq(ymm0, xmm1); + a.vpmovzxbw(ymm0, ptr_gp0); + a.vpmovzxbw(ymm0, xmm1); + a.vpmovzxdq(ymm0, ptr_gp0); + a.vpmovzxdq(ymm0, xmm1); + a.vpmovzxwd(ymm0, ptr_gp0); + a.vpmovzxwd(ymm0, xmm1); + a.vpmovzxwq(ymm0, ptr_gp0); + a.vpmovzxwq(ymm0, xmm1); + a.vpshufd(ymm0, ptr_gp0, 0); + a.vpshufd(ymm0, ymm1, 0); + a.vpshufhw(ymm0, ptr_gp0, 0); + a.vpshufhw(ymm0, ymm1, 0); + a.vpshuflw(ymm0, ptr_gp0, 0); + a.vpshuflw(ymm0, ymm1, 0); + a.vpslld(ymm0, ymm1, 0); + a.vpslldq(ymm0, ymm1, 0); + a.vpsllq(ymm0, ymm1, 0); + a.vpsllw(ymm0, ymm1, 0); + a.vpsrad(ymm0, ymm1, 0); + a.vpsraw(ymm0, ymm1, 0); + a.vpsrld(ymm0, ymm1, 0); + a.vpsrldq(ymm0, ymm1, 0); + a.vpsrlq(ymm0, ymm1, 0); + a.vpsrlw(ymm0, ymm1, 0); + a.vphaddd(ymm0, ymm1, ptr_gp0); + a.vphaddd(ymm0, ymm1, ymm2); + a.vphaddsw(ymm0, ymm1, ptr_gp0); + a.vphaddsw(ymm0, ymm1, ymm2); + a.vphaddw(ymm0, ymm1, ptr_gp0); + a.vphaddw(ymm0, ymm1, ymm2); + a.vphsubd(ymm0, ymm1, ptr_gp0); + a.vphsubd(ymm0, ymm1, ymm2); + a.vphsubsw(ymm0, ymm1, ptr_gp0); + a.vphsubsw(ymm0, ymm1, ymm2); + a.vphsubw(ymm0, ymm1, ptr_gp0); + a.vphsubw(ymm0, ymm1, ymm2); + a.vpmaddubsw(ymm0, ymm1, ptr_gp0); + a.vpmaddubsw(ymm0, ymm1, ymm2); + a.vpmaddwd(ymm0, ymm1, ptr_gp0); + a.vpmaddwd(ymm0, ymm1, ymm2); + a.vpmaskmovd(ptr_gp0, xmm1, xmm2); + a.vpmaskmovd(ptr_gp0, ymm1, ymm2); + a.vpmaskmovd(xmm0, xmm1, ptr_gp0); + a.vpmaskmovd(ymm0, ymm1, ptr_gp0); + a.vpmaskmovq(ptr_gp0, xmm1, xmm2); + a.vpmaskmovq(ptr_gp0, ymm1, ymm2); + a.vpmaskmovq(xmm0, xmm1, ptr_gp0); + a.vpmaskmovq(ymm0, ymm1, ptr_gp0); + a.vpmaxsb(ymm0, ymm1, ptr_gp0); + a.vpmaxsb(ymm0, ymm1, ymm2); + a.vpmaxsd(ymm0, ymm1, ptr_gp0); + a.vpmaxsd(ymm0, ymm1, ymm2); + a.vpmaxsw(ymm0, ymm1, ptr_gp0); + a.vpmaxsw(ymm0, ymm1, ymm2); + a.vpmaxub(ymm0, ymm1, ptr_gp0); + a.vpmaxub(ymm0, ymm1, ymm2); + a.vpmaxud(ymm0, ymm1, ptr_gp0); + a.vpmaxud(ymm0, ymm1, ymm2); + a.vpmaxuw(ymm0, ymm1, ptr_gp0); + a.vpmaxuw(ymm0, ymm1, ymm2); + a.vpminsb(ymm0, ymm1, ptr_gp0); + a.vpminsb(ymm0, ymm1, ymm2); + a.vpminsd(ymm0, ymm1, ptr_gp0); + a.vpminsd(ymm0, ymm1, ymm2); + a.vpminsw(ymm0, ymm1, ptr_gp0); + a.vpminsw(ymm0, ymm1, ymm2); + a.vpminub(ymm0, ymm1, ptr_gp0); + a.vpminub(ymm0, ymm1, ymm2); + a.vpminud(ymm0, ymm1, ptr_gp0); + a.vpminud(ymm0, ymm1, ymm2); + a.vpminuw(ymm0, ymm1, ptr_gp0); + a.vpminuw(ymm0, ymm1, ymm2); + a.vpmuldq(ymm0, ymm1, ptr_gp0); + a.vpmuldq(ymm0, ymm1, ymm2); + a.vpmulhrsw(ymm0, ymm1, ptr_gp0); + a.vpmulhrsw(ymm0, ymm1, ymm2); + a.vpmulhuw(ymm0, ymm1, ptr_gp0); + a.vpmulhuw(ymm0, ymm1, ymm2); + a.vpmulhw(ymm0, ymm1, ptr_gp0); + a.vpmulhw(ymm0, ymm1, ymm2); + a.vpmulld(ymm0, ymm1, ptr_gp0); + a.vpmulld(ymm0, ymm1, ymm2); + a.vpmullw(ymm0, ymm1, ptr_gp0); + a.vpmullw(ymm0, ymm1, ymm2); + a.vpmuludq(ymm0, ymm1, ptr_gp0); + a.vpmuludq(ymm0, ymm1, ymm2); + a.vpor(ymm0, ymm1, ptr_gp0); + a.vpor(ymm0, ymm1, ymm2); + a.vpsadbw(ymm0, ymm1, ptr_gp0); + a.vpsadbw(ymm0, ymm1, ymm2); + a.vpshufb(ymm0, ymm1, ptr_gp0); + a.vpshufb(ymm0, ymm1, ymm2); + a.vpsignb(ymm0, ymm1, ptr_gp0); + a.vpsignb(ymm0, ymm1, ymm2); + a.vpsignd(ymm0, ymm1, ptr_gp0); + a.vpsignd(ymm0, ymm1, ymm2); + a.vpsignw(ymm0, ymm1, ptr_gp0); + a.vpsignw(ymm0, ymm1, ymm2); + a.vpslld(ymm0, ymm1, ptr_gp0); + a.vpslld(ymm0, ymm1, xmm2); + a.vpsllq(ymm0, ymm1, ptr_gp0); + a.vpsllq(ymm0, ymm1, xmm2); + a.vpsllvd(xmm0, xmm1, ptr_gp0); + a.vpsllvd(xmm0, xmm1, xmm2); + a.vpsllvd(ymm0, ymm1, ptr_gp0); + a.vpsllvd(ymm0, ymm1, ymm2); + a.vpsllvq(xmm0, xmm1, ptr_gp0); + a.vpsllvq(xmm0, xmm1, xmm2); + a.vpsllvq(ymm0, ymm1, ptr_gp0); + a.vpsllvq(ymm0, ymm1, ymm2); + a.vpsllw(ymm0, ymm1, ptr_gp0); + a.vpsllw(ymm0, ymm1, xmm2); + a.vpsrad(ymm0, ymm1, ptr_gp0); + a.vpsrad(ymm0, ymm1, xmm2); + a.vpsravd(xmm0, xmm1, ptr_gp0); + a.vpsravd(xmm0, xmm1, xmm2); + a.vpsravd(ymm0, ymm1, ptr_gp0); + a.vpsravd(ymm0, ymm1, ymm2); + a.vpsraw(ymm0, ymm1, ptr_gp0); + a.vpsraw(ymm0, ymm1, xmm2); + a.vpsrld(ymm0, ymm1, ptr_gp0); + a.vpsrld(ymm0, ymm1, xmm2); + a.vpsrlq(ymm0, ymm1, ptr_gp0); + a.vpsrlq(ymm0, ymm1, xmm2); + a.vpsrlvd(xmm0, xmm1, ptr_gp0); + a.vpsrlvd(xmm0, xmm1, xmm2); + a.vpsrlvd(ymm0, ymm1, ptr_gp0); + a.vpsrlvd(ymm0, ymm1, ymm2); + a.vpsrlvq(xmm0, xmm1, ptr_gp0); + a.vpsrlvq(xmm0, xmm1, xmm2); + a.vpsrlvq(ymm0, ymm1, ptr_gp0); + a.vpsrlvq(ymm0, ymm1, ymm2); + a.vpsrlw(ymm0, ymm1, ptr_gp0); + a.vpsrlw(ymm0, ymm1, xmm2); + a.vpsubb(ymm0, ymm1, ptr_gp0); + a.vpsubb(ymm0, ymm1, ymm2); + a.vpsubd(ymm0, ymm1, ptr_gp0); + a.vpsubd(ymm0, ymm1, ymm2); + a.vpsubq(ymm0, ymm1, ptr_gp0); + a.vpsubq(ymm0, ymm1, ymm2); + a.vpsubsb(ymm0, ymm1, ptr_gp0); + a.vpsubsb(ymm0, ymm1, ymm2); + a.vpsubsw(ymm0, ymm1, ptr_gp0); + a.vpsubsw(ymm0, ymm1, ymm2); + a.vpsubusb(ymm0, ymm1, ptr_gp0); + a.vpsubusb(ymm0, ymm1, ymm2); + a.vpsubusw(ymm0, ymm1, ptr_gp0); + a.vpsubusw(ymm0, ymm1, ymm2); + a.vpsubw(ymm0, ymm1, ptr_gp0); + a.vpsubw(ymm0, ymm1, ymm2); + a.vpunpckhbw(ymm0, ymm1, ptr_gp0); + a.vpunpckhbw(ymm0, ymm1, ymm2); + a.vpunpckhdq(ymm0, ymm1, ptr_gp0); + a.vpunpckhdq(ymm0, ymm1, ymm2); + a.vpunpckhqdq(ymm0, ymm1, ptr_gp0); + a.vpunpckhqdq(ymm0, ymm1, ymm2); + a.vpunpckhwd(ymm0, ymm1, ptr_gp0); + a.vpunpckhwd(ymm0, ymm1, ymm2); + a.vpunpcklbw(ymm0, ymm1, ptr_gp0); + a.vpunpcklbw(ymm0, ymm1, ymm2); + a.vpunpckldq(ymm0, ymm1, ptr_gp0); + a.vpunpckldq(ymm0, ymm1, ymm2); + a.vpunpcklqdq(ymm0, ymm1, ptr_gp0); + a.vpunpcklqdq(ymm0, ymm1, ymm2); + a.vpunpcklwd(ymm0, ymm1, ptr_gp0); + a.vpunpcklwd(ymm0, ymm1, ymm2); + a.vpxor(ymm0, ymm1, ptr_gp0); + a.vpxor(ymm0, ymm1, ymm2); + + // FMA3. + a.vfmadd132pd(xmm0, xmm1, ptr_gp0); + a.vfmadd132pd(xmm0, xmm1, xmm2); + a.vfmadd132pd(ymm0, ymm1, ptr_gp0); + a.vfmadd132pd(ymm0, ymm1, ymm2); + a.vfmadd132ps(xmm0, xmm1, ptr_gp0); + a.vfmadd132ps(xmm0, xmm1, xmm2); + a.vfmadd132ps(ymm0, ymm1, ptr_gp0); + a.vfmadd132ps(ymm0, ymm1, ymm2); + a.vfmadd132sd(xmm0, xmm1, ptr_gp0); + a.vfmadd132sd(xmm0, xmm1, xmm2); + a.vfmadd132ss(xmm0, xmm1, ptr_gp0); + a.vfmadd132ss(xmm0, xmm1, xmm2); + a.vfmadd213pd(xmm0, xmm1, ptr_gp0); + a.vfmadd213pd(xmm0, xmm1, xmm2); + a.vfmadd213pd(ymm0, ymm1, ptr_gp0); + a.vfmadd213pd(ymm0, ymm1, ymm2); + a.vfmadd213ps(xmm0, xmm1, ptr_gp0); + a.vfmadd213ps(xmm0, xmm1, xmm2); + a.vfmadd213ps(ymm0, ymm1, ptr_gp0); + a.vfmadd213ps(ymm0, ymm1, ymm2); + a.vfmadd213sd(xmm0, xmm1, ptr_gp0); + a.vfmadd213sd(xmm0, xmm1, xmm2); + a.vfmadd213ss(xmm0, xmm1, ptr_gp0); + a.vfmadd213ss(xmm0, xmm1, xmm2); + a.vfmadd231pd(xmm0, xmm1, ptr_gp0); + a.vfmadd231pd(xmm0, xmm1, xmm2); + a.vfmadd231pd(ymm0, ymm1, ptr_gp0); + a.vfmadd231pd(ymm0, ymm1, ymm2); + a.vfmadd231ps(xmm0, xmm1, ptr_gp0); + a.vfmadd231ps(xmm0, xmm1, xmm2); + a.vfmadd231ps(ymm0, ymm1, ptr_gp0); + a.vfmadd231ps(ymm0, ymm1, ymm2); + a.vfmadd231sd(xmm0, xmm1, ptr_gp0); + a.vfmadd231sd(xmm0, xmm1, xmm2); + a.vfmadd231ss(xmm0, xmm1, ptr_gp0); + a.vfmadd231ss(xmm0, xmm1, xmm2); + a.vfmaddsub132pd(xmm0, xmm1, ptr_gp0); + a.vfmaddsub132pd(xmm0, xmm1, xmm2); + a.vfmaddsub132pd(ymm0, ymm1, ptr_gp0); + a.vfmaddsub132pd(ymm0, ymm1, ymm2); + a.vfmaddsub132ps(xmm0, xmm1, ptr_gp0); + a.vfmaddsub132ps(xmm0, xmm1, xmm2); + a.vfmaddsub132ps(ymm0, ymm1, ptr_gp0); + a.vfmaddsub132ps(ymm0, ymm1, ymm2); + a.vfmaddsub213pd(xmm0, xmm1, ptr_gp0); + a.vfmaddsub213pd(xmm0, xmm1, xmm2); + a.vfmaddsub213pd(ymm0, ymm1, ptr_gp0); + a.vfmaddsub213pd(ymm0, ymm1, ymm2); + a.vfmaddsub213ps(xmm0, xmm1, ptr_gp0); + a.vfmaddsub213ps(xmm0, xmm1, xmm2); + a.vfmaddsub213ps(ymm0, ymm1, ptr_gp0); + a.vfmaddsub213ps(ymm0, ymm1, ymm2); + a.vfmaddsub231pd(xmm0, xmm1, ptr_gp0); + a.vfmaddsub231pd(xmm0, xmm1, xmm2); + a.vfmaddsub231pd(ymm0, ymm1, ptr_gp0); + a.vfmaddsub231pd(ymm0, ymm1, ymm2); + a.vfmaddsub231ps(xmm0, xmm1, ptr_gp0); + a.vfmaddsub231ps(xmm0, xmm1, xmm2); + a.vfmaddsub231ps(ymm0, ymm1, ptr_gp0); + a.vfmaddsub231ps(ymm0, ymm1, ymm2); + a.vfmsub132pd(xmm0, xmm1, ptr_gp0); + a.vfmsub132pd(xmm0, xmm1, xmm2); + a.vfmsub132pd(ymm0, ymm1, ptr_gp0); + a.vfmsub132pd(ymm0, ymm1, ymm2); + a.vfmsub132ps(xmm0, xmm1, ptr_gp0); + a.vfmsub132ps(xmm0, xmm1, xmm2); + a.vfmsub132ps(ymm0, ymm1, ptr_gp0); + a.vfmsub132ps(ymm0, ymm1, ymm2); + a.vfmsub132sd(xmm0, xmm1, ptr_gp0); + a.vfmsub132sd(xmm0, xmm1, xmm2); + a.vfmsub132ss(xmm0, xmm1, ptr_gp0); + a.vfmsub132ss(xmm0, xmm1, xmm2); + a.vfmsub213pd(xmm0, xmm1, ptr_gp0); + a.vfmsub213pd(xmm0, xmm1, xmm2); + a.vfmsub213pd(ymm0, ymm1, ptr_gp0); + a.vfmsub213pd(ymm0, ymm1, ymm2); + a.vfmsub213ps(xmm0, xmm1, ptr_gp0); + a.vfmsub213ps(xmm0, xmm1, xmm2); + a.vfmsub213ps(ymm0, ymm1, ptr_gp0); + a.vfmsub213ps(ymm0, ymm1, ymm2); + a.vfmsub213sd(xmm0, xmm1, ptr_gp0); + a.vfmsub213sd(xmm0, xmm1, xmm2); + a.vfmsub213ss(xmm0, xmm1, ptr_gp0); + a.vfmsub213ss(xmm0, xmm1, xmm2); + a.vfmsub231pd(xmm0, xmm1, ptr_gp0); + a.vfmsub231pd(xmm0, xmm1, xmm2); + a.vfmsub231pd(ymm0, ymm1, ptr_gp0); + a.vfmsub231pd(ymm0, ymm1, ymm2); + a.vfmsub231ps(xmm0, xmm1, ptr_gp0); + a.vfmsub231ps(xmm0, xmm1, xmm2); + a.vfmsub231ps(ymm0, ymm1, ptr_gp0); + a.vfmsub231ps(ymm0, ymm1, ymm2); + a.vfmsub231sd(xmm0, xmm1, ptr_gp0); + a.vfmsub231sd(xmm0, xmm1, xmm2); + a.vfmsub231ss(xmm0, xmm1, ptr_gp0); + a.vfmsub231ss(xmm0, xmm1, xmm2); + a.vfmsubadd132pd(xmm0, xmm1, ptr_gp0); + a.vfmsubadd132pd(xmm0, xmm1, xmm2); + a.vfmsubadd132pd(ymm0, ymm1, ptr_gp0); + a.vfmsubadd132pd(ymm0, ymm1, ymm2); + a.vfmsubadd132ps(xmm0, xmm1, ptr_gp0); + a.vfmsubadd132ps(xmm0, xmm1, xmm2); + a.vfmsubadd132ps(ymm0, ymm1, ptr_gp0); + a.vfmsubadd132ps(ymm0, ymm1, ymm2); + a.vfmsubadd213pd(xmm0, xmm1, ptr_gp0); + a.vfmsubadd213pd(xmm0, xmm1, xmm2); + a.vfmsubadd213pd(ymm0, ymm1, ptr_gp0); + a.vfmsubadd213pd(ymm0, ymm1, ymm2); + a.vfmsubadd213ps(xmm0, xmm1, ptr_gp0); + a.vfmsubadd213ps(xmm0, xmm1, xmm2); + a.vfmsubadd213ps(ymm0, ymm1, ptr_gp0); + a.vfmsubadd213ps(ymm0, ymm1, ymm2); + a.vfmsubadd231pd(xmm0, xmm1, ptr_gp0); + a.vfmsubadd231pd(xmm0, xmm1, xmm2); + a.vfmsubadd231pd(ymm0, ymm1, ptr_gp0); + a.vfmsubadd231pd(ymm0, ymm1, ymm2); + a.vfmsubadd231ps(xmm0, xmm1, ptr_gp0); + a.vfmsubadd231ps(xmm0, xmm1, xmm2); + a.vfmsubadd231ps(ymm0, ymm1, ptr_gp0); + a.vfmsubadd231ps(ymm0, ymm1, ymm2); + a.vfnmadd132pd(xmm0, xmm1, ptr_gp0); + a.vfnmadd132pd(xmm0, xmm1, xmm2); + a.vfnmadd132pd(ymm0, ymm1, ptr_gp0); + a.vfnmadd132pd(ymm0, ymm1, ymm2); + a.vfnmadd132ps(xmm0, xmm1, ptr_gp0); + a.vfnmadd132ps(xmm0, xmm1, xmm2); + a.vfnmadd132ps(ymm0, ymm1, ptr_gp0); + a.vfnmadd132ps(ymm0, ymm1, ymm2); + a.vfnmadd132sd(xmm0, xmm1, ptr_gp0); + a.vfnmadd132sd(xmm0, xmm1, xmm2); + a.vfnmadd132ss(xmm0, xmm1, ptr_gp0); + a.vfnmadd132ss(xmm0, xmm1, xmm2); + a.vfnmadd213pd(xmm0, xmm1, ptr_gp0); + a.vfnmadd213pd(xmm0, xmm1, xmm2); + a.vfnmadd213pd(ymm0, ymm1, ptr_gp0); + a.vfnmadd213pd(ymm0, ymm1, ymm2); + a.vfnmadd213ps(xmm0, xmm1, ptr_gp0); + a.vfnmadd213ps(xmm0, xmm1, xmm2); + a.vfnmadd213ps(ymm0, ymm1, ptr_gp0); + a.vfnmadd213ps(ymm0, ymm1, ymm2); + a.vfnmadd213sd(xmm0, xmm1, ptr_gp0); + a.vfnmadd213sd(xmm0, xmm1, xmm2); + a.vfnmadd213ss(xmm0, xmm1, ptr_gp0); + a.vfnmadd213ss(xmm0, xmm1, xmm2); + a.vfnmadd231pd(xmm0, xmm1, ptr_gp0); + a.vfnmadd231pd(xmm0, xmm1, xmm2); + a.vfnmadd231pd(ymm0, ymm1, ptr_gp0); + a.vfnmadd231pd(ymm0, ymm1, ymm2); + a.vfnmadd231ps(xmm0, xmm1, ptr_gp0); + a.vfnmadd231ps(xmm0, xmm1, xmm2); + a.vfnmadd231ps(ymm0, ymm1, ptr_gp0); + a.vfnmadd231ps(ymm0, ymm1, ymm2); + a.vfnmadd231sd(xmm0, xmm1, ptr_gp0); + a.vfnmadd231sd(xmm0, xmm1, xmm2); + a.vfnmadd231ss(xmm0, xmm1, ptr_gp0); + a.vfnmadd231ss(xmm0, xmm1, xmm2); + a.vfnmsub132pd(xmm0, xmm1, ptr_gp0); + a.vfnmsub132pd(xmm0, xmm1, xmm2); + a.vfnmsub132pd(ymm0, ymm1, ptr_gp0); + a.vfnmsub132pd(ymm0, ymm1, ymm2); + a.vfnmsub132ps(xmm0, xmm1, ptr_gp0); + a.vfnmsub132ps(xmm0, xmm1, xmm2); + a.vfnmsub132ps(ymm0, ymm1, ptr_gp0); + a.vfnmsub132ps(ymm0, ymm1, ymm2); + a.vfnmsub132sd(xmm0, xmm1, ptr_gp0); + a.vfnmsub132sd(xmm0, xmm1, xmm2); + a.vfnmsub132ss(xmm0, xmm1, ptr_gp0); + a.vfnmsub132ss(xmm0, xmm1, xmm2); + a.vfnmsub213pd(xmm0, xmm1, ptr_gp0); + a.vfnmsub213pd(xmm0, xmm1, xmm2); + a.vfnmsub213pd(ymm0, ymm1, ptr_gp0); + a.vfnmsub213pd(ymm0, ymm1, ymm2); + a.vfnmsub213ps(xmm0, xmm1, ptr_gp0); + a.vfnmsub213ps(xmm0, xmm1, xmm2); + a.vfnmsub213ps(ymm0, ymm1, ptr_gp0); + a.vfnmsub213ps(ymm0, ymm1, ymm2); + a.vfnmsub213sd(xmm0, xmm1, ptr_gp0); + a.vfnmsub213sd(xmm0, xmm1, xmm2); + a.vfnmsub213ss(xmm0, xmm1, ptr_gp0); + a.vfnmsub213ss(xmm0, xmm1, xmm2); + a.vfnmsub231pd(xmm0, xmm1, ptr_gp0); + a.vfnmsub231pd(xmm0, xmm1, xmm2); + a.vfnmsub231pd(ymm0, ymm1, ptr_gp0); + a.vfnmsub231pd(ymm0, ymm1, ymm2); + a.vfnmsub231ps(xmm0, xmm1, ptr_gp0); + a.vfnmsub231ps(xmm0, xmm1, xmm2); + a.vfnmsub231ps(ymm0, ymm1, ptr_gp0); + a.vfnmsub231ps(ymm0, ymm1, ymm2); + a.vfnmsub231sd(xmm0, xmm1, ptr_gp0); + a.vfnmsub231sd(xmm0, xmm1, xmm2); + a.vfnmsub231ss(xmm0, xmm1, ptr_gp0); + a.vfnmsub231ss(xmm0, xmm1, xmm2); + + // BMI. + a.andn(gp0, gp1, zcx); + a.andn(gp0, gp1, ptr_gp1); + a.bextr(gp0, gp1, zcx); + a.bextr(gp0, ptr_gp1, zcx); + a.blsi(gp0, gp1); + a.blsi(gp0, ptr_gp1); + a.blsmsk(gp0, gp1); + a.blsmsk(gp0, ptr_gp1); + a.blsr(gp0, gp1); + a.blsr(gp0, ptr_gp1); + + // LZCNT. + a.lzcnt(gp0, gp1); + a.lzcnt(gp0, ptr_gp1); + + // TZCNT. + a.tzcnt(gp0, gp1); + a.tzcnt(gp0, ptr_gp1); + + // BMI2. + a.bzhi(gp0, gp1, zcx); + a.bzhi(gp0, ptr_gp1, zcx); + a.mulx(gp0, gp1, zcx); + a.mulx(gp0, gp1, ptr_gp1); + a.pdep(gp0, gp1, zcx); + a.pdep(gp0, gp1, ptr_gp1); + a.pext(gp0, gp1, zcx); + a.pext(gp0, gp1, ptr_gp1); + a.rorx(gp0, gp1, 0); + a.rorx(gp0, ptr_gp1, 0); + a.sarx(gp0, gp1, zcx); + a.sarx(gp0, ptr_gp1, zcx); + a.shlx(gp0, gp1, zcx); + a.shlx(gp0, ptr_gp1, zcx); + a.shrx(gp0, gp1, zcx); + a.shrx(gp0, ptr_gp1, zcx); + + // RDRAND. + a.rdrand(gp0); + + // F16C. + a.vcvtph2ps(xmm0, xmm1); + a.vcvtph2ps(xmm0, ptr_gp1); + a.vcvtph2ps(ymm0, xmm1); + a.vcvtph2ps(ymm0, ptr_gp1); + a.vcvtps2ph(xmm0, xmm1, 0); + a.vcvtps2ph(ptr_gp0, xmm1, 0); + a.vcvtps2ph(xmm0, ymm1, 0); + a.vcvtps2ph(ptr_gp0, ymm1, 0); +} + +} // asmgen namespace + +// [Guard] +#endif // _GENOPCODE_H diff --git a/src/app/test/testcpu.cpp b/src/app/test/testcpu.cpp new file mode 100644 index 0000000..7dafd40 --- /dev/null +++ b/src/app/test/testcpu.cpp @@ -0,0 +1,114 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Dependencies - AsmJit] +#include + +// [Dependencies - C] +#include +#include +#include + +using namespace asmjit; + +struct CpuFeature { + uint32_t feature; + const char* description; +}; + +#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64) +static const CpuFeature x86x64Features[] = { + { x86x64::kCpuFeatureMultithreading , "Multithreading" }, + { x86x64::kCpuFeatureExecuteDisableBit , "Execute-Disable Bit" }, + { x86x64::kCpuFeatureRdtsc , "Rdtsc" }, + { x86x64::kCpuFeatureRdtscp , "Rdtscp" }, + { x86x64::kCpuFeatureCmov , "Cmov" }, + { x86x64::kCpuFeatureCmpXchg8B , "Cmpxchg8b" }, + { x86x64::kCpuFeatureCmpXchg16B , "Cmpxchg16b" }, + { x86x64::kCpuFeatureClflush , "Clflush" }, + { x86x64::kCpuFeaturePrefetch , "Prefetch" }, + { x86x64::kCpuFeatureLahfSahf , "Lahf/Sahf" }, + { x86x64::kCpuFeatureFxsr , "Fxsave/Fxrstor" }, + { x86x64::kCpuFeatureFfxsr , "Fxsave/Fxrstor Opt." }, + { x86x64::kCpuFeatureMmx , "Mmx" }, + { x86x64::kCpuFeatureMmxExt , "MmxExt" }, + { x86x64::kCpuFeature3dNow , "3dnow" }, + { x86x64::kCpuFeature3dNowExt , "3dnowExt" }, + { x86x64::kCpuFeatureSse , "Sse" }, + { x86x64::kCpuFeatureSse2 , "Sse2" }, + { x86x64::kCpuFeatureSse3 , "Sse3" }, + { x86x64::kCpuFeatureSsse3 , "Ssse3" }, + { x86x64::kCpuFeatureSse4A , "Sse4a" }, + { x86x64::kCpuFeatureSse41 , "Sse4.1" }, + { x86x64::kCpuFeatureSse42 , "Sse4.2" }, + { x86x64::kCpuFeatureMsse , "Misaligned SSE" }, + { x86x64::kCpuFeatureMonitorMWait , "Monitor/MWait" }, + { x86x64::kCpuFeatureMovbe , "Movbe" }, + { x86x64::kCpuFeaturePopcnt , "Popcnt" }, + { x86x64::kCpuFeatureLzcnt , "Lzcnt" }, + { x86x64::kCpuFeatureAesni , "AesNI" }, + { x86x64::kCpuFeaturePclmulqdq , "Pclmulqdq" }, + { x86x64::kCpuFeatureRdrand , "Rdrand" }, + { x86x64::kCpuFeatureAvx , "Avx" }, + { x86x64::kCpuFeatureAvx2 , "Avx2" }, + { x86x64::kCpuFeatureF16C , "F16C" }, + { x86x64::kCpuFeatureFma3 , "Fma3" }, + { x86x64::kCpuFeatureFma4 , "Fma4" }, + { x86x64::kCpuFeatureXop , "Xop" }, + { x86x64::kCpuFeatureBmi , "Bmi" }, + { x86x64::kCpuFeatureBmi2 , "Bmi2" }, + { x86x64::kCpuFeatureHle , "Hle" }, + { x86x64::kCpuFeatureRtm , "Rtm" }, + { x86x64::kCpuFeatureFsGsBase , "FsGsBase" }, + { x86x64::kCpuFeatureRepMovsbStosbExt , "RepMovsbStosbExt" } +}; +#endif // ASMJIT_HOST || ASMJIT_HOST_X64 + +static void printFeatures(const char* prefix, const BaseCpu* cpu, const CpuFeature* data) { + for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(x86x64Features); i++) { + if (cpu->hasFeature(data[i].feature)) { + printf("%s%s\n", prefix, data[i].description); + } + } +} + +int main(int argc, char* argv[]) { + const BaseCpu* cpu_ = BaseCpu::getHost(); + + // -------------------------------------------------------------------------- + // [Core Features] + // -------------------------------------------------------------------------- + + printf("Host CPU\n"); + printf("========\n"); + + printf("\nBasic info\n"); + printf(" Vendor string : %s\n", cpu_->getVendorString()); + printf(" Brand string : %s\n", cpu_->getBrandString()); + printf(" Family : %u\n", cpu_->getFamily()); + printf(" Model : %u\n", cpu_->getModel()); + printf(" Stepping : %u\n", cpu_->getStepping()); + printf(" Cores Count : %u\n", cpu_->getCoresCount()); + + // -------------------------------------------------------------------------- + // [X86 Features] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64) + const x86x64::Cpu* cpu = static_cast(cpu_); + + printf("\nX86/X64 Extended Info:\n"); + printf(" Processor Type : %u\n", cpu->getProcessorType()); + printf(" Brand Index : %u\n", cpu->getBrandIndex()); + printf(" CL Flush Cache Line : %u\n", cpu->getFlushCacheLineSize()); + printf(" Max logical Processors: %u\n", cpu->getMaxLogicalProcessors()); + + printf("\nX86/X64 Features:\n"); + printFeatures(" ", cpu, x86x64Features); +#endif // ASMJIT_HOST || ASMJIT_HOST_X64 + + return 0; +} diff --git a/src/app/test/testdummy.cpp b/src/app/test/testdummy.cpp new file mode 100644 index 0000000..b275237 --- /dev/null +++ b/src/app/test/testdummy.cpp @@ -0,0 +1,66 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// This file is used as a dummy test. It's changed during development. + +// [Dependencies - AsmJit] +#include + +// [Dependencies - C] +#include +#include +#include + +typedef void (*MyFunc)(void); + +int main(int argc, char* argv[]) { + using namespace asmjit; + using namespace asmjit::host; + + JitRuntime runtime; + FileLogger logger(stderr); + logger.setOption(kLoggerOptionBinaryForm, true); + + Compiler c(&runtime); + c.setLogger(&logger); + + c.addFunc(kFuncConvHost, FuncBuilder0()); + + Label L_1(c); + Label L_2(c); + Label L_3(c); + Label L_4(c); + Label L_5(c); + Label L_6(c); + Label L_7(c); + + GpVar v1(c); + GpVar v2(c); + + c.bind(L_2); + c.bind(L_3); + + c.jmp(L_1); + c.bind(L_5); + c.mov(v1, 0); + c.bind(L_6); + c.jmp(L_3); + c.mov(v2, 1); + c.jmp(L_1); + c.bind(L_4); + c.jmp(L_2); + c.bind(L_7); + c.add(v1, v2); + + c.bind(L_1); + c.ret(); + c.endFunc(); + + MyFunc func = asmjit_cast(c.make()); + runtime.release((void*)func); + + return 0; +} diff --git a/src/app/test/testmem.cpp b/src/app/test/testmem.cpp new file mode 100644 index 0000000..feccfab --- /dev/null +++ b/src/app/test/testmem.cpp @@ -0,0 +1,173 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Dependencies - AsmJit] +#include + +// [Dependencies - C] +#include +#include +#include + +using namespace asmjit; + +static int problems = 0; + +static void gen(void* a, void* b, int i) { + int pattern = rand() % 256; + *(int *)a = i; + *(int *)b = i; + ::memset((char*)a + sizeof(int), pattern, i - sizeof(int)); + ::memset((char*)b + sizeof(int), pattern, i - sizeof(int)); +} + +static void verify(void* a, void* b) { + int ai = *(int*)a; + int bi = *(int*)b; + if (ai != bi || memcmp(a, b, ai) != 0) + { + printf("Failed to verify %p\n", a); + problems++; + } +} + +static void die() { + printf("Couldn't allocate virtual memory, this test needs at least 100MB of free virtual memory.\n"); + exit(1); +} + +static void stats(MemoryManager* memmgr) { + printf("-- Used: %d\n", (int)memmgr->getUsedBytes()); + printf("-- Allocated: %d\n", (int)memmgr->getAllocatedBytes()); +} + +static void shuffle(void **a, void **b, size_t count) { + for (size_t i = 0; i < count; ++i) { + size_t si = (size_t)rand() % count; + + void *ta = a[i]; + void *tb = b[i]; + + a[i] = a[si]; + b[i] = b[si]; + + a[si] = ta; + b[si] = tb; + } +} + +int main(int argc, char* argv[]) { + MemoryManager* memmgr = MemoryManager::getGlobal(); + + size_t i; + size_t count = 200000; + + printf("Memory alloc/free test - %d allocations\n\n", (int)count); + + void** a = (void**)::malloc(sizeof(void*) * count); + void** b = (void**)::malloc(sizeof(void*) * count); + if (!a || !b) die(); + + srand(100); + printf("Allocating virtual memory..."); + + for (i = 0; i < count; i++) { + int r = (rand() % 1000) + 4; + + a[i] = memmgr->alloc(r); + if (a[i] == NULL) die(); + + ::memset(a[i], 0, r); + } + + printf("done\n"); + stats(memmgr); + + printf("\n"); + printf("Freeing virtual memory..."); + + for (i = 0; i < count; i++) { + if (memmgr->release(a[i]) != kErrorOk) { + printf("Failed to free %p\n", b[i]); + problems++; + } + } + + printf("done\n"); + stats(memmgr); + + printf("\n"); + printf("Verified alloc/free test - %d allocations\n\n", (int)count); + + printf("Alloc..."); + for (i = 0; i < count; i++) { + int r = (rand() % 1000) + 4; + + a[i] = memmgr->alloc(r); + b[i] = ::malloc(r); + if (a[i] == NULL || b[i] == NULL) die(); + + gen(a[i], b[i], r); + } + printf("done\n"); + stats(memmgr); + + printf("\n"); + printf("Shuffling..."); + shuffle(a, b, count); + printf("done\n"); + + printf("\n"); + printf("Verify and free..."); + for (i = 0; i < count / 2; i++) { + verify(a[i], b[i]); + if (memmgr->release(a[i]) != kErrorOk) { + printf("Failed to free %p\n", a[i]); + problems++; + } + free(b[i]); + } + printf("done\n"); + stats(memmgr); + + printf("\n"); + printf("Alloc..."); + for (i = 0; i < count / 2; i++) { + int r = (rand() % 1000) + 4; + + a[i] = memmgr->alloc(r); + b[i] = ::malloc(r); + if (a[i] == NULL || b[i] == NULL) die(); + + gen(a[i], b[i], r); + } + printf("done\n"); + stats(memmgr); + + printf("\n"); + printf("Verify and free..."); + for (i = 0; i < count; i++) { + verify(a[i], b[i]); + if (memmgr->release(a[i]) != kErrorOk) { + printf("Failed to free %p\n", a[i]); + problems++; + } + free(b[i]); + } + printf("done\n"); + stats(memmgr); + + printf("\n"); + if (problems) + printf("Status: Failure: %d problems found\n", problems); + else + printf("Status: Success\n"); + + ::free(a); + ::free(b); + + return 0; +} diff --git a/src/app/test/testopcode.cpp b/src/app/test/testopcode.cpp new file mode 100644 index 0000000..582e8b2 --- /dev/null +++ b/src/app/test/testopcode.cpp @@ -0,0 +1,42 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// This file is used to test opcodes generated by AsmJit. Output can be +// disassembled in your IDE or by your favourite disassembler. Instructions +// are sorted alphabetically. + +// [Dependencies - AsmJit] +#include + +// [Dependencies - Test] +#include "genopcode.h" + +// [Dependencies - C] +#include +#include +#include + +typedef void (*VoidFunc)(void); + +int main(int argc, char* argv[]) { + using namespace asmjit; + using namespace asmjit::host; + + FileLogger logger(stdout); + logger.setOption(kLoggerOptionBinaryForm, true); + + JitRuntime runtime; + Assembler a(&runtime); + + a.setLogger(&logger); + asmgen::opcode(a); + + VoidFunc p = asmjit_cast(a.make()); + p(); + runtime.release((void*)p); + + return 0; +} diff --git a/src/app/test/testsizeof.cpp b/src/app/test/testsizeof.cpp new file mode 100644 index 0000000..223269a --- /dev/null +++ b/src/app/test/testsizeof.cpp @@ -0,0 +1,106 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Dependencies - AsmJit] +#include + +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) +#include +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 + +// [Dependencies - C] +#include +#include +#include + +using namespace asmjit; + +int main(int argc, char* argv[]) { + // -------------------------------------------------------------------------- + // [Runtime] + // -------------------------------------------------------------------------- + + printf("Sizeof[Runtime]:\n"); + printf(" int8_t : %u\n", static_cast(sizeof(int8_t))); + printf(" int16_t : %u\n", static_cast(sizeof(int16_t))); + printf(" int32_t : %u\n", static_cast(sizeof(int32_t))); + printf(" int64_t : %u\n", static_cast(sizeof(int64_t))); + printf(" long : %u\n", static_cast(sizeof(long))); + printf(" size_t : %u\n", static_cast(sizeof(size_t))); + printf(" intptr_t : %u\n", static_cast(sizeof(intptr_t))); + printf(" float : %u\n", static_cast(sizeof(float))); + printf(" double : %u\n", static_cast(sizeof(double))); + printf(" void* : %u\n", static_cast(sizeof(void*))); + printf("\n"); + + // -------------------------------------------------------------------------- + // [Core] + // -------------------------------------------------------------------------- + + printf("Sizeof[Base]:\n"); + printf(" asmjit::CodeGen : %u\n", static_cast(sizeof(CodeGen))); + printf(" asmjit::BaseAssembler : %u\n", static_cast(sizeof(BaseAssembler))); + printf(" asmjit::BaseCompiler : %u\n", static_cast(sizeof(BaseCompiler))); + printf(" asmjit::BaseRuntime : %u\n", static_cast(sizeof(BaseRuntime))); + printf("\n"); + printf(" asmjit::Operand : %u\n", static_cast(sizeof(Operand))); + printf(" asmjit::BaseReg : %u\n", static_cast(sizeof(BaseReg))); + printf(" asmjit::BaseVar : %u\n", static_cast(sizeof(BaseVar))); + printf(" asmjit::BaseMem : %u\n", static_cast(sizeof(BaseMem))); + printf(" asmjit::Imm : %u\n", static_cast(sizeof(Imm))); + printf(" asmjit::Label : %u\n", static_cast(sizeof(Label))); + printf("\n"); + printf(" asmjit::Ptr : %u\n", static_cast(sizeof(Ptr))); + printf(" asmjit::SignedPtr : %u\n", static_cast(sizeof(SignedPtr))); + printf("\n"); + printf(" asmjit::LabelData : %u\n", static_cast(sizeof(LabelData))); + printf(" asmjit::RelocData : %u\n", static_cast(sizeof(RelocData))); + printf("\n"); + printf(" asmjit::BaseNode : %u\n", static_cast(sizeof(BaseNode))); + printf(" asmjit::AlignNode : %u\n", static_cast(sizeof(AlignNode))); + printf(" asmjit::CallNode : %u\n", static_cast(sizeof(CallNode))); + printf(" asmjit::CommentNode : %u\n", static_cast(sizeof(CommentNode))); + printf(" asmjit::EmbedNode : %u\n", static_cast(sizeof(EmbedNode))); + printf(" asmjit::FuncNode : %u\n", static_cast(sizeof(FuncNode))); + printf(" asmjit::EndNode : %u\n", static_cast(sizeof(EndNode))); + printf(" asmjit::InstNode : %u\n", static_cast(sizeof(InstNode))); + printf(" asmjit::JumpNode : %u\n", static_cast(sizeof(JumpNode))); + printf(" asmjit::TargetNode : %u\n", static_cast(sizeof(TargetNode))); + printf("\n"); + printf(" asmjit::FuncDecl : %u\n", static_cast(sizeof(FuncDecl))); + printf(" asmjit::FuncInOut : %u\n", static_cast(sizeof(FuncInOut))); + printf(" asmjit::FuncPrototype : %u\n", static_cast(sizeof(FuncPrototype))); + printf("\n"); + printf(" asmjit::VarAttr : %u\n", static_cast(sizeof(VarAttr))); + printf(" asmjit::VarData : %u\n", static_cast(sizeof(VarData))); + printf(" asmjit::BaseVarInst : %u\n", static_cast(sizeof(BaseVarInst))); + printf(" asmjit::BaseVarState : %u\n", static_cast(sizeof(BaseVarState))); + printf("\n"); + + // -------------------------------------------------------------------------- + // [X86/X64] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + printf("Sizeof[X86/X64]:\n"); + printf(" asmjit::x86x64::X86X64Assembler: %u\n", static_cast(sizeof(x86x64::X86X64Assembler))); + printf(" asmjit::x86x64::X86X64Compiler : %u\n", static_cast(sizeof(x86x64::X86X64Compiler))); + printf("\n"); + printf(" asmjit::x86x64::X86X64CallNode : %u\n", static_cast(sizeof(x86x64::X86X64CallNode))); + printf(" asmjit::x86x64::X86X64FuncNode : %u\n", static_cast(sizeof(x86x64::X86X64FuncNode))); + printf("\n"); + printf(" asmjit::x86x64::X86X64FuncDecl : %u\n", static_cast(sizeof(x86x64::X86X64FuncDecl))); + printf("\n"); + printf(" asmjit::x86x64::VarInst : %u\n", static_cast(sizeof(x86x64::VarInst))); + printf(" asmjit::x86x64::VarState : %u\n", static_cast(sizeof(x86x64::VarState))); + printf("\n"); + printf(" asmjit::x86x64::InstInfo : %u\n", static_cast(sizeof(x86x64::InstInfo))); + printf(" asmjit::x86x64::VarInfo : %u\n", static_cast(sizeof(x86x64::VarInfo))); + printf("\n"); +#endif // ASMJIT_BUILD_X86 + + return 0; +} diff --git a/src/app/test/testx86.cpp b/src/app/test/testx86.cpp new file mode 100644 index 0000000..010ad84 --- /dev/null +++ b/src/app/test/testx86.cpp @@ -0,0 +1,1805 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Dependencies - AsmJit] +#include + +// [Dependencies - Test] +#include "genblend.h" + +// [Dependencies - C] +#include +#include +#include + +using namespace asmjit; +using namespace asmjit::host; + +// ============================================================================ +// [X86Test] +// ============================================================================ + +//! @brief Interface used to test Compiler. +struct X86Test { + X86Test(const char* name = NULL) { _name.setString(name); } + virtual ~X86Test() {} + + ASMJIT_INLINE const char* getName() const { return _name.getData(); } + + virtual void compile(Compiler& c) = 0; + virtual bool run(void* func, StringBuilder& result, StringBuilder& expect) = 0; + + StringBuilder _name; +}; + +// ============================================================================ +// [X86Test_AlignBase] +// ============================================================================ + +struct X86Test_AlignBase : public X86Test { + X86Test_AlignBase(uint32_t argCount, uint32_t varCount, bool naked, bool pushPop) : + _argCount(argCount), + _varCount(varCount), + _naked(naked), + _pushPop(pushPop) { + + _name.setFormat("[Align] Args=%u Vars=%u Naked=%c PushPop=%c", + argCount, + varCount, + naked ? 'Y' : 'N', + pushPop ? 'Y' : 'N'); + } + + static void add(PodVector& tests) { + for (unsigned int i = 0; i <= 6; i++) { + for (unsigned int j = 0; j <= 4; j++) { + tests.append(new X86Test_AlignBase(i, j, false, false)); + tests.append(new X86Test_AlignBase(i, j, false, true )); + tests.append(new X86Test_AlignBase(i, j, true , false)); + tests.append(new X86Test_AlignBase(i, j, true , true )); + } + } + } + + virtual void compile(Compiler& c) { + switch (_argCount) { + case 0: c.addFunc(kFuncConvHost, FuncBuilder0()); break; + case 1: c.addFunc(kFuncConvHost, FuncBuilder1()); break; + case 2: c.addFunc(kFuncConvHost, FuncBuilder2()); break; + case 3: c.addFunc(kFuncConvHost, FuncBuilder3()); break; + case 4: c.addFunc(kFuncConvHost, FuncBuilder4()); break; + case 5: c.addFunc(kFuncConvHost, FuncBuilder5()); break; + case 6: c.addFunc(kFuncConvHost, FuncBuilder6()); break; + } + + c.getFunc()->setHint(kFuncHintNaked, _naked); + c.getFunc()->setHint(kFuncHintPushPop, _pushPop); + + GpVar gpVar(c, kVarTypeIntPtr); + GpVar gpSum(c, kVarTypeInt32); + XmmVar xmmVar(c, kVarTypeXmm); + + // Alloc, use and spill preserved registers. + if (_varCount) { + c.comment("Var"); + + uint32_t varIndex = 0; + uint32_t regIndex = 0; + uint32_t regMask = 0x1; + uint32_t preservedMask = c.getFunc()->getDecl()->getPreserved(kRegClassGp); + + do { + if ((preservedMask & regMask) != 0 && (regIndex != kRegIndexSp && regIndex != kRegIndexBp)) { + GpVar tmp(c, kVarTypeInt32); + c.alloc(tmp, regIndex); + c.xor_(tmp, tmp); + c.spill(tmp); + varIndex++; + } + + regIndex++; + regMask <<= 1; + } while (varIndex < _varCount && regIndex < kRegCountGp); + } + + // Do a sum of arguments to verify possible relocation when misaligned. + if (_argCount) { + uint32_t argIndex; + + c.comment("Arg"); + c.xor_(gpSum, gpSum); + + for (argIndex = 0; argIndex < _argCount; argIndex++) { + GpVar gpArg(c, kVarTypeInt32); + + c.setArg(argIndex, gpArg); + c.add(gpSum, gpArg); + } + } + + // Check alignment of xmmVar (has to be 16). + c.comment("Ret"); + c.lea(gpVar, xmmVar.m()); + c.shl(gpVar.r32(), 28); + + // Add a sum of arguments to check whether they are correct. + if (_argCount) + c.or_(gpVar.r32(), gpSum); + + c.ret(gpVar); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func0)(); + typedef int (*Func1)(int); + typedef int (*Func2)(int, int); + typedef int (*Func3)(int, int, int); + typedef int (*Func4)(int, int, int, int); + typedef int (*Func5)(int, int, int, int, int); + typedef int (*Func6)(int, int, int, int, int, int); + + unsigned int resultRet = 0; + unsigned int expectRet = 0; + + switch (_argCount) { + case 0: + resultRet = asmjit_cast(_func)(); + expectRet = 0; + break; + case 1: + resultRet = asmjit_cast(_func)(1); + expectRet = 1; + break; + case 2: + resultRet = asmjit_cast(_func)(1, 2); + expectRet = 1 + 2; + break; + case 3: + resultRet = asmjit_cast(_func)(1, 2, 3); + expectRet = 1 + 2 + 3; + break; + case 4: + resultRet = asmjit_cast(_func)(1, 2, 3, 4); + expectRet = 1 + 2 + 3 + 4; + break; + case 5: + resultRet = asmjit_cast(_func)(1, 2, 3, 4, 5); + expectRet = 1 + 2 + 3 + 4 + 5; + break; + case 6: + resultRet = asmjit_cast(_func)(1, 2, 3, 4, 5, 6); + expectRet = 1 + 2 + 3 + 4 + 5 + 6; + break; + } + + result.setFormat("ret={%u, %u}", resultRet >> 28, resultRet & 0x0FFFFFFFU); + expect.setFormat("ret={%u, %u}", expectRet >> 28, expectRet & 0x0FFFFFFFU); + + return resultRet == expectRet; + } + + unsigned int _argCount; + unsigned int _varCount; + + bool _naked; + bool _pushPop; +}; + +// ============================================================================ +// [X86Test_JumpCross] +// ============================================================================ + +struct X86Test_JumpCross : public X86Test { + X86Test_JumpCross() : X86Test("[Jump] Cross jump") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_JumpCross()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder0()); + + Label L_1(c); + Label L_2(c); + Label L_3(c); + + c.jmp(L_2); + + c.bind(L_1); + c.jmp(L_3); + + c.bind(L_2); + c.jmp(L_1); + + c.bind(L_3); + + c.ret(); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(void); + Func func = asmjit_cast(_func); + + func(); + return true; + } +}; + +// ============================================================================ +// [X86Test_JumpUnreachable] +// ============================================================================ + +struct X86Test_JumpUnreachable : public X86Test { + X86Test_JumpUnreachable() : X86Test("[Jump] Unreachable code") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_JumpUnreachable()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder0()); + + Label L_1(c); + Label L_2(c); + Label L_3(c); + Label L_4(c); + Label L_5(c); + Label L_6(c); + Label L_7(c); + + GpVar v0(c, kVarTypeUInt32, "v0"); + GpVar v1(c, kVarTypeUInt32, "v1"); + + c.bind(L_2); + c.bind(L_3); + + c.jmp(L_1); + + c.bind(L_5); + c.mov(v0, 0); + + c.bind(L_6); + c.jmp(L_3); + c.mov(v1, 1); + c.jmp(L_1); + + c.bind(L_4); + c.jmp(L_2); + c.bind(L_7); + c.add(v0, v1); + + c.bind(L_1); + c.ret(); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(void); + Func func = asmjit_cast(_func); + + func(); + + result.appendString("ret={}"); + expect.appendString("ret={}"); + + return true; + } +}; + +// ============================================================================ +// [X86Test_AllocBase] +// ============================================================================ + +struct X86Test_AllocBase : public X86Test { + X86Test_AllocBase() : X86Test("[Alloc] Base") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocBase()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder0()); + + GpVar v0(c, kVarTypeInt32, "v0"); + GpVar v1(c, kVarTypeInt32, "v1"); + GpVar v2(c, kVarTypeInt32, "v2"); + GpVar v3(c, kVarTypeInt32, "v3"); + GpVar v4(c, kVarTypeInt32, "v4"); + + c.xor_(v0, v0); + + c.mov(v1, 1); + c.mov(v2, 2); + c.mov(v3, 3); + c.mov(v4, 4); + + c.add(v0, v1); + c.add(v0, v2); + c.add(v0, v3); + c.add(v0, v4); + + c.ret(v0); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(void); + Func func = asmjit_cast(_func); + + int resultRet = func(); + int expectRet = 1 + 2 + 3 + 4; + + result.setFormat("ret=%d", resultRet); + expect.setFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocManual] +// ============================================================================ + +struct X86Test_AllocManual : public X86Test { + X86Test_AllocManual() : X86Test("[Alloc] Manual alloc/spill") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocManual()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder0()); + + GpVar v0(c, kVarTypeInt32, "v0"); + GpVar v1(c, kVarTypeInt32, "v0"); + GpVar cnt(c, kVarTypeInt32, "cnt"); + + c.xor_(v0, v0); + c.xor_(v1, v1); + c.spill(v0); + c.spill(v1); + + Label L(c); + c.mov(cnt, 32); + c.bind(L); + + c.inc(v1); + c.add(v0, v1); + + c.dec(cnt); + c.jnz(L); + + c.ret(v0); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(void); + Func func = asmjit_cast(_func); + + int resultRet = func(); + int expectRet = + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + + 30 + 31 + 32; + + result.setFormat("ret=%d", resultRet); + expect.setFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocMany1] +// ============================================================================ + +struct X86Test_AllocMany1 : public X86Test { + X86Test_AllocMany1() : X86Test("[Alloc] Many #1") {} + + enum { kCount = 8 }; + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocMany1()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder2()); + + GpVar a0(c, kVarTypeIntPtr, "a0"); + GpVar a1(c, kVarTypeIntPtr, "a1"); + + c.setArg(0, a0); + c.setArg(1, a1); + + // Create some variables. + GpVar t(c, kVarTypeInt32); + GpVar x[kCount]; + + uint32_t i; + for (i = 0; i < kCount; i++) { + x[i] = c.newGpVar(kVarTypeInt32); + } + + // Setup variables (use mov with reg/imm to se if register allocator works). + for (i = 0; i < kCount; i++) { + c.mov(x[i], static_cast(i + 1)); + } + + // Make sum (addition). + c.xor_(t, t); + for (i = 0; i < kCount; i++) { + c.add(t, x[i]); + } + + // Store result to a given pointer in first argument. + c.mov(dword_ptr(a0), t); + + // Clear t. + c.xor_(t, t); + + // Make sum (subtraction). + for (i = 0; i < kCount; i++) { + c.sub(t, x[i]); + } + + // Store result to a given pointer in second argument. + c.mov(dword_ptr(a1), t); + + // End of function. + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(int*, int*); + Func func = asmjit_cast(_func); + + int resultX; + int resultY; + + int expectX = 36; + int expectY = -36; + + func(&resultX, &resultY); + + result.setFormat("ret={x=%d, y=%d}", resultX, resultY); + expect.setFormat("ret={x=%d, y=%d}", expectX, expectY); + + return resultX == expectX && resultY == expectY; + } +}; + +// ============================================================================ +// [X86Test_AllocMany2] +// ============================================================================ + +struct X86Test_AllocMany2 : public X86Test { + X86Test_AllocMany2() : X86Test("[Alloc] Many #2") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocMany2()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder1()); + + GpVar var[32]; + GpVar a(c, kVarTypeIntPtr, "a"); + + c.setArg(0, a); + + uint32_t i; + for (i = 0; i < ASMJIT_ARRAY_SIZE(var); i++) { + var[i] = c.newGpVar(kVarTypeInt32); + } + + for (i = 0; i < ASMJIT_ARRAY_SIZE(var); i++) { + c.xor_(var[i], var[i]); + } + + GpVar v0(c, kVarTypeInt32); + Label L(c); + + c.mov(v0, 32); + c.bind(L); + + for (i = 0; i < ASMJIT_ARRAY_SIZE(var); i++) { + c.add(var[i], i); + } + + c.dec(v0); + c.jnz(L); + + for (i = 0; i < ASMJIT_ARRAY_SIZE(var); i++) { + c.mov(dword_ptr(a, i * 4), var[i]); + } + + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(int*); + Func func = asmjit_cast(_func); + + int i; + int resultBuf[32]; + int expectBuf[32]; + + for (i = 0; i < ASMJIT_ARRAY_SIZE(resultBuf); i++) { + expectBuf[i] = i * 32; + } + + bool success = true; + func(resultBuf); + + for (i = 0; i < ASMJIT_ARRAY_SIZE(resultBuf); i++) { + result.appendFormat("%d", resultBuf[i]); + expect.appendFormat("%d", expectBuf[1]); + + success &= (resultBuf[i] == expectBuf[i]); + } + + return success; + } +}; + +// ============================================================================ +// [X86Test_AllocImul1] +// ============================================================================ + +struct X86Test_AllocImul1 : public X86Test { + X86Test_AllocImul1() : X86Test("[Alloc] Imul #1") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocImul1()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder4()); + + GpVar dstHi(c, kVarTypeIntPtr, "dstHi"); + GpVar dstLo(c, kVarTypeIntPtr, "dstLo"); + + GpVar vHi(c, kVarTypeInt32, "vHi"); + GpVar vLo(c, kVarTypeInt32, "vLo"); + GpVar src(c, kVarTypeInt32, "src"); + + c.setArg(0, dstHi); + c.setArg(1, dstLo); + c.setArg(2, vLo); + c.setArg(3, src); + + c.imul(vHi, vLo, src); + + c.mov(dword_ptr(dstHi), vHi); + c.mov(dword_ptr(dstLo), vLo); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(int*, int*, int, int); + Func func = asmjit_cast(_func); + + int v0 = 4; + int v1 = 4; + + int resultHi; + int resultLo; + + int expectHi = 0; + int expectLo = v0 * v1; + + func(&resultHi, &resultLo, v0, v1); + + result.setFormat("hi=%d, lo=%d", resultHi, resultLo); + expect.setFormat("hi=%d, lo=%d", expectHi, expectLo); + + return resultHi == expectHi && resultLo == expectLo; + } +}; + +// ============================================================================ +// [X86Test_AllocImul2] +// ============================================================================ + +struct X86Test_AllocImul2 : public X86Test { + X86Test_AllocImul2() : X86Test("[Alloc] Imul #2") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocImul2()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder2()); + + GpVar dst(c, kVarTypeIntPtr, "dst"); + GpVar src(c, kVarTypeIntPtr, "src"); + + c.setArg(0, dst); + c.setArg(1, src); + + for (unsigned int i = 0; i < 4; i++) { + GpVar x(c, kVarTypeInt32, "x"); + GpVar y(c, kVarTypeInt32, "y"); + GpVar hi(c, kVarTypeInt32, "hi"); + + c.mov(x, dword_ptr(src, 0)); + c.mov(y, dword_ptr(src, 4)); + + c.imul(hi, x, y); + c.add(dword_ptr(dst, 0), hi); + c.add(dword_ptr(dst, 4), x); + } + + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(int*, const int*); + Func func = asmjit_cast(_func); + + int src[2] = { 4, 9 }; + int resultRet[2] = { 0, 0 }; + int expectRet[2] = { 0, (4 * 9) * 4 }; + + func(resultRet, src); + + result.setFormat("ret={%d, %d}", resultRet[0], resultRet[1]); + expect.setFormat("ret={%d, %d}", expectRet[0], expectRet[1]); + + return resultRet[0] == expectRet[0] && resultRet[1] == expectRet[1]; + } +}; + +// ============================================================================ +// [X86Test_AllocSetz] +// ============================================================================ + +struct X86Test_AllocSetz : public X86Test { + X86Test_AllocSetz() : X86Test("[Alloc] Setz") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocSetz()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder3()); + + GpVar src0(c, kVarTypeInt32, "src0"); + GpVar src1(c, kVarTypeInt32, "src1"); + GpVar dst0(c, kVarTypeIntPtr, "dst0"); + + c.setArg(0, src0); + c.setArg(1, src1); + c.setArg(2, dst0); + + c.cmp(src0, src1); + c.setz(byte_ptr(dst0)); + + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(int, int, char*); + Func func = asmjit_cast(_func); + + char resultBuf[4]; + char expectBuf[4] = { 1, 0, 0, 1 }; + + func(0, 0, &resultBuf[0]); // We are expecting 1 (0 == 0). + func(0, 1, &resultBuf[1]); // We are expecting 0 (0 != 1). + func(1, 0, &resultBuf[2]); // We are expecting 0 (1 != 0). + func(1, 1, &resultBuf[3]); // We are expecting 1 (1 == 1). + + result.setFormat("out={%d, %d, %d, %d}", resultBuf[0], resultBuf[1], resultBuf[2], resultBuf[3]); + expect.setFormat("out={%d, %d, %d, %d}", expectBuf[0], expectBuf[1], expectBuf[2], expectBuf[3]); + + return resultBuf[0] == expectBuf[0] && + resultBuf[1] == expectBuf[1] && + resultBuf[2] == expectBuf[2] && + resultBuf[3] == expectBuf[3] ; + } +}; + +// ============================================================================ +// [X86Test_AllocShlRor] +// ============================================================================ + +struct X86Test_AllocShlRor : public X86Test { + X86Test_AllocShlRor() : X86Test("[Alloc] Shl/Ror") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocShlRor()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder4()); + + GpVar dst(c, kVarTypeIntPtr, "dst"); + GpVar var(c, kVarTypeInt32, "var"); + GpVar vShlParam(c, kVarTypeInt32, "vShlParam"); + GpVar vRorParam(c, kVarTypeInt32, "vRorParam"); + + c.setArg(0, dst); + c.setArg(1, var); + c.setArg(2, vShlParam); + c.setArg(3, vRorParam); + + c.shl(var, vShlParam); + c.ror(var, vRorParam); + + c.mov(dword_ptr(dst), var); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(int*, int, int, int); + Func func = asmjit_cast(_func); + + int v0 = 0x000000FF; + + int resultRet; + int expectRet = 0x0000FF00; + + func(&resultRet, v0, 16, 8); + + result.setFormat("ret=%d", resultRet); + expect.setFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocGpLo] +// ============================================================================ + +struct X86Test_AllocGpLo : public X86Test { + X86Test_AllocGpLo() : X86Test("[Alloc] GP.LO") {} + + enum { kCount = 32 }; + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocGpLo()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder1()); + + GpVar rPtr(c, kVarTypeUIntPtr); + GpVar rSum(c, kVarTypeUInt32); + + c.setArg(0, rPtr); + + GpVar rVar[kCount]; + uint32_t i; + + for (i = 0; i < kCount; i++) { + rVar[i] = c.newGpVar(kVarTypeUInt32); + } + + // Init pseudo-regs with values from our array. + for (i = 0; i < kCount; i++) { + c.mov(rVar[i], dword_ptr(rPtr, i * 4)); + } + + for (i = 2; i < kCount; i++) { + // Add and truncate to 8 bit; no purpose, just mess with jit. + c.add (rVar[i ], rVar[i-1]); + c.movzx(rVar[i ], rVar[i ].r8()); + c.movzx(rVar[i-2], rVar[i-1].r8()); + c.movzx(rVar[i-1], rVar[i-2].r8()); + } + + // Sum up all computed values. + c.mov(rSum, 0); + for (i = 0; i < kCount; i++) { + c.add(rSum, rVar[i]); + } + + // Return the sum. + c.ret(rSum); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(uint32_t*); + Func func = asmjit_cast(_func); + + unsigned int i; + + uint32_t buf[kCount]; + uint32_t resultRet; + uint32_t expectRet; + + expectRet = 0; + for (i = 0; i < kCount; i++) { + buf[i] = 1; + } + + for (i = 2; i < kCount; i++) { + buf[i ]+= buf[i-1]; + buf[i ] = buf[i ] & 0xFF; + buf[i-2] = buf[i-1] & 0xFF; + buf[i-1] = buf[i-2] & 0xFF; + } + + for (i = 0; i < kCount; i++) { + expectRet += buf[i]; + } + + for (i = 0; i < kCount; i++) { + buf[i] = 1; + } + resultRet = func(buf); + + result.setFormat("ret=%d", resultRet); + expect.setFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocRepMovsb] +// ============================================================================ + +struct X86Test_AllocRepMovsb : public X86Test { + X86Test_AllocRepMovsb() : X86Test("[Special] Rep Movsb") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocSetz()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder3()); + + GpVar dst(c, kVarTypeIntPtr, "dst"); + GpVar src(c, kVarTypeIntPtr, "src"); + GpVar cnt(c, kVarTypeIntPtr, "cnt"); + + c.setArg(0, dst); + c.setArg(1, src); + c.setArg(2, cnt); + + c.rep_movsb(dst, src, cnt); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(void*, void*, size_t); + Func func = asmjit_cast(_func); + + char dst[20]; + char src[20] = "Hello AsmJit!"; + func(dst, src, strlen(src) + 1); + + result.setFormat("ret=\"%s\"", dst); + expect.setFormat("ret=\"%s\"", src); + + return ::memcmp(dst, src, strlen(src) + 1) == 0; + } +}; + +// ============================================================================ +// [X86Test_AllocArgs] +// ============================================================================ + +struct X86Test_AllocArgs : public X86Test { + X86Test_AllocArgs() : X86Test("[Alloc] Args") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocArgs()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, + FuncBuilder8()); + + GpVar var[8]; + uint32_t i; + + for (i = 0; i < 8; i++) { + var[i] = c.newGpVar(); + } + + for (i = 0; i < 8; i++) { + c.setArg(i, var[i]); + } + + for (i = 0; i < 8; i++) { + c.add(var[i], static_cast(i + 1)); + } + + // Move some data into buffer provided by arguments so we can verify if it + // really works without looking into assembler output. + for (i = 0; i < 8; i++) { + c.add(byte_ptr(var[i]), static_cast(i + 1)); + } + + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(void*, void*, void*, void*, void*, void*, void*, void*); + Func func = asmjit_cast(_func); + + uint8_t resultBuf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + uint8_t expectBuf[9] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + + func(resultBuf, resultBuf, resultBuf, resultBuf, + resultBuf, resultBuf, resultBuf, resultBuf); + + result.setFormat("buf={%d, %d, %d, %d, %d, %d, %d, %d, %d}", + resultBuf[0], resultBuf[1], resultBuf[2], resultBuf[3], + resultBuf[4], resultBuf[5], resultBuf[6], resultBuf[7], + resultBuf[8]); + expect.setFormat("buf={%d, %d, %d, %d, %d, %d, %d, %d, %d}", + expectBuf[0], expectBuf[1], expectBuf[2], expectBuf[3], + expectBuf[4], expectBuf[5], expectBuf[6], expectBuf[7], + expectBuf[8]); + + return ::memcmp(resultBuf, expectBuf, 9) == 0; + } +}; + +// ============================================================================ +// [X86Test_AllocStack] +// ============================================================================ + +struct X86Test_AllocStack : public X86Test { + X86Test_AllocStack() : X86Test("[Alloc] Stack") {} + + enum { kSize = 256 }; + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocStack()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder0()); + + Mem stack = c.newStack(kSize, 1).setSize(1); + GpVar i(c, kVarTypeIntPtr, "i"); + GpVar a(c, kVarTypeInt32, "a"); + GpVar b(c, kVarTypeInt32, "b"); + + Label L_1(c); + Label L_2(c); + + // Fill stack by sequence [0, 1, 2, 3 ... 255]. + c.xor_(i, i); + + c.bind(L_1); + c.mov(stack.clone().setIndex(i, 0), i.r8()); + c.inc(i); + c.cmp(i, 255); + c.jle(L_1); + + // Sum sequence in stack. + c.xor_(i, i); + c.xor_(a, a); + + c.bind(L_2); + c.movzx(b, stack.clone().setIndex(i, 0)); + c.add(a, b); + c.inc(i); + c.cmp(i, 255); + c.jle(L_2); + + c.ret(a); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(void); + Func func = asmjit_cast(_func); + + int resultRet = func(); + int expectRet = 32640; + + result.setInt(resultRet); + expect.setInt(expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocMemcpy] +// ============================================================================ + +struct X86Test_AllocMemcpy : public X86Test { + X86Test_AllocMemcpy() : X86Test("[Alloc] Memcpy") {} + + enum { kCount = 32 }; + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocMemcpy()); + } + + virtual void compile(Compiler& c) { + GpVar dst(c, kVarTypeIntPtr, "dst"); + GpVar src(c, kVarTypeIntPtr, "src"); + GpVar cnt(c, kVarTypeUIntPtr, "cnt"); + + Label L_Loop(c); // Create base labels we use + Label L_Exit(c); // in our function. + + c.addFunc(kFuncConvHost, FuncBuilder3()); + c.setArg(0, dst); + c.setArg(1, src); + c.setArg(2, cnt); + + c.alloc(dst); // Allocate all registers now, + c.alloc(src); // because we want to keep them + c.alloc(cnt); // in physical registers only. + + c.test(cnt, cnt); // Exit if length is zero. + c.jz(L_Exit); + + c.bind(L_Loop); // Bind the loop label here. + + GpVar tmp(c, kVarTypeInt32); // Copy a single dword (4 bytes). + c.mov(tmp, dword_ptr(src)); + c.mov(dword_ptr(dst), tmp); + + c.add(src, 4); // Increment dst/src pointers. + c.add(dst, 4); + + c.dec(cnt); // Loop until cnt isn't zero. + c.jnz(L_Loop); + + c.bind(L_Exit); // Bind the exit label here. + c.endFunc(); // End of function. + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(uint32_t*, const uint32_t*, size_t); + Func func = asmjit_cast(_func); + + uint32_t i; + + uint32_t dstBuffer[kCount]; + uint32_t srcBuffer[kCount]; + + for (i = 0; i < kCount; i++) { + dstBuffer[i] = 0; + srcBuffer[i] = i; + } + + func(dstBuffer, srcBuffer, kCount); + + result.setString("buf={"); + expect.setString("buf={"); + + for (i = 0; i < kCount; i++) { + if (i != 0) { + result.appendString(", "); + expect.appendString(", "); + } + + result.appendFormat("%u", static_cast(dstBuffer[i])); + expect.appendFormat("%u", static_cast(srcBuffer[i])); + } + + result.appendString("}"); + expect.appendString("}"); + + return ::memcmp(dstBuffer, srcBuffer, kCount * sizeof(uint32_t)) == 0; + } +}; + +// ============================================================================ +// [X86Test_AllocBlend] +// ============================================================================ + +struct X86Test_AllocBlend : public X86Test { + X86Test_AllocBlend() : X86Test("[Alloc] Blend") {} + + enum { kCount = 17 }; + + static void add(PodVector& tests) { + tests.append(new X86Test_AllocBlend()); + } + + static uint32_t blendSrcOver(uint32_t d, uint32_t s) { + uint32_t saInv = ~s >> 24; + + uint32_t d_20 = (d ) & 0x00FF00FF; + uint32_t d_31 = (d >> 8) & 0x00FF00FF; + + d_20 *= saInv; + d_31 *= saInv; + + d_20 = ((d_20 + ((d_20 >> 8) & 0x00FF00FFU) + 0x00800080U) & 0xFF00FF00U) >> 8; + d_31 = ((d_31 + ((d_31 >> 8) & 0x00FF00FFU) + 0x00800080U) & 0xFF00FF00U); + + return d_20 + d_31 + s; + } + + virtual void compile(Compiler& c) { + asmgen::blend(c); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef void (*Func)(void*, const void*, size_t); + Func func = asmjit_cast(_func); + + uint32_t i; + + uint32_t dstBuffer[kCount] = { 0x00000000, 0x10101010, 0x20100804, 0x30200003, 0x40204040, 0x5000004D, 0x60302E2C, 0x706F6E6D, 0x807F4F2F, 0x90349001, 0xA0010203, 0xB03204AB, 0xC023AFBD, 0xD0D0D0C0, 0xE0AABBCC, 0xFFFFFFFF, 0xF8F4F2F1 }; + uint32_t srcBuffer[kCount] = { 0xE0E0E0E0, 0xA0008080, 0x341F1E1A, 0xFEFEFEFE, 0x80302010, 0x49490A0B, 0x998F7798, 0x00000000, 0x01010101, 0xA0264733, 0xBAB0B1B9, 0xFF000000, 0xDAB0A0C1, 0xE0BACFDA, 0x99887766, 0xFFFFFF80, 0xEE0A5FEC }; + uint32_t expBuffer[kCount]; + + for (i = 0; i < kCount; i++) { + expBuffer[i] = blendSrcOver(dstBuffer[i], srcBuffer[i]); + } + + func(dstBuffer, srcBuffer, kCount); + + result.setString("buf={"); + expect.setString("buf={"); + + for (i = 0; i < kCount; i++) { + if (i != 0) { + result.appendString(", "); + expect.appendString(", "); + } + + result.appendFormat("%0.8X", static_cast(dstBuffer[i])); + expect.appendFormat("%0.8X", static_cast(expBuffer[i])); + } + + result.appendString("}"); + expect.appendString("}"); + + return ::memcmp(expBuffer, dstBuffer, kCount * sizeof(uint32_t)) == 0; + } +}; + +// ============================================================================ +// [X86Test_CallBase] +// ============================================================================ + +struct X86Test_CallBase : public X86Test { + X86Test_CallBase() : X86Test("[Call] CDecl") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_CallBase()); + } + + virtual void compile(Compiler& c) { + GpVar v0(c, kVarTypeInt32, "v0"); + GpVar v1(c, kVarTypeInt32, "v1"); + GpVar v2(c, kVarTypeInt32, "v2"); + + c.addFunc(kFuncConvHost, FuncBuilder3()); + c.setArg(0, v0); + c.setArg(1, v1); + c.setArg(2, v2); + + // Just do something. + c.shl(v0, 1); + c.shl(v1, 1); + c.shl(v2, 1); + + // Call function. + GpVar fn(c, kVarTypeIntPtr, "fn"); + c.mov(fn, imm_ptr((void*)calledFunc)); + + X86X64CallNode* call = c.call(fn, kFuncConvHost, FuncBuilder3()); + call->setArg(0, v2); + call->setArg(1, v1); + call->setArg(2, v0); + call->setRet(0, v0); + + c.ret(v0); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(int, int, int); + Func func = asmjit_cast(_func); + + int resultRet = func(3, 2, 1); + int expectRet = 36; + + result.setFormat("ret=%d", resultRet); + expect.setFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } + + static int calledFunc(int a, int b, int c) { return (a + b) * c; } +}; + +// ============================================================================ +// [X86Test_CallFast] +// ============================================================================ + +struct X86Test_CallFast : public X86Test { + X86Test_CallFast() : X86Test("[Call] Fastcall") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_CallFast()); + } + + virtual void compile(Compiler& c) { + GpVar var(c, kVarTypeInt32, "var"); + GpVar fn(c, kVarTypeIntPtr, "fn"); + + c.addFunc(kFuncConvHost, FuncBuilder1()); + c.setArg(0, var); + + c.mov(fn, imm_ptr((void*)calledFunc)); + X86X64CallNode* call; + + call = c.call(fn, kFuncConvHostFastCall, FuncBuilder1()); + call->setArg(0, var); + call->setRet(0, var); + + call = c.call(fn, kFuncConvHostFastCall, FuncBuilder1()); + call->setArg(0, var); + call->setRet(0, var); + + c.ret(var); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(int); + Func func = asmjit_cast(_func); + + int resultRet = func(9); + int expectRet = (9 * 9) * (9 * 9); + + result.setFormat("ret=%d", resultRet); + expect.setFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } + + // Function that is called inside the generated one. Because this test is + // mainly about register arguments, we need to use the fastcall calling + // convention when running 32-bit. + static int ASMJIT_FASTCALL calledFunc(int a) { return a * a; } +}; + +// ============================================================================ +// [X86Test_CallManyArgs] +// ============================================================================ + +struct X86Test_CallManyArgs : public X86Test { + X86Test_CallManyArgs() : X86Test("[Call] Many Args") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_CallManyArgs()); + } + + static int calledFunc(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j) { + return (a * b * c * d * e) + (f * g * h * i * j); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder0()); + + // Prepare. + GpVar fn(c, kVarTypeIntPtr, "fn"); + GpVar va(c, kVarTypeInt32, "va"); + GpVar vb(c, kVarTypeInt32, "vb"); + GpVar vc(c, kVarTypeInt32, "vc"); + GpVar vd(c, kVarTypeInt32, "vd"); + GpVar ve(c, kVarTypeInt32, "ve"); + GpVar vf(c, kVarTypeInt32, "vf"); + GpVar vg(c, kVarTypeInt32, "vg"); + GpVar vh(c, kVarTypeInt32, "vh"); + GpVar vi(c, kVarTypeInt32, "vi"); + GpVar vj(c, kVarTypeInt32, "vj"); + + c.mov(fn, imm_ptr((void*)calledFunc)); + c.mov(va, 0x03); + c.mov(vb, 0x12); + c.mov(vc, 0xA0); + c.mov(vd, 0x0B); + c.mov(ve, 0x2F); + c.mov(vf, 0x02); + c.mov(vg, 0x0C); + c.mov(vh, 0x12); + c.mov(vi, 0x18); + c.mov(vj, 0x1E); + + // Call function. + X86X64CallNode* call = c.call(fn, kFuncConvHost, + FuncBuilder10()); + call->setArg(0, va); + call->setArg(1, vb); + call->setArg(2, vc); + call->setArg(3, vd); + call->setArg(4, ve); + call->setArg(5, vf); + call->setArg(6, vg); + call->setArg(7, vh); + call->setArg(8, vi); + call->setArg(9, vj); + call->setRet(0, va); + + c.ret(va); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(void); + Func func = asmjit_cast(_func); + + int resultRet = func(); + int expectRet = calledFunc(0x03, 0x12, 0xA0, 0x0B, 0x2F, 0x02, 0x0C, 0x12, 0x18, 0x1E); + + result.setFormat("ret=%d", resultRet); + expect.setFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_CallImmArgs] +// ============================================================================ + +struct X86Test_CallImmArgs : public X86Test { + X86Test_CallImmArgs() : X86Test("[Call] Imm Args") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_CallImmArgs()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder0()); + + // Prepare. + GpVar fn(c, kVarTypeIntPtr, "fn"); + GpVar rv(c, kVarTypeInt32, "rv"); + + c.mov(fn, imm_ptr((void*)X86Test_CallManyArgs::calledFunc)); + + // Call function. + X86X64CallNode* call = c.call(fn, kFuncConvHost, + FuncBuilder10()); + call->setArg(0, imm(0x03)); + call->setArg(1, imm(0x12)); + call->setArg(2, imm(0xA0)); + call->setArg(3, imm(0x0B)); + call->setArg(4, imm(0x2F)); + call->setArg(5, imm(0x02)); + call->setArg(6, imm(0x0C)); + call->setArg(7, imm(0x12)); + call->setArg(8, imm(0x18)); + call->setArg(9, imm(0x1E)); + call->setRet(0, rv); + + c.ret(rv); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(void); + Func func = asmjit_cast(_func); + + int resultRet = func(); + int expectRet = X86Test_CallManyArgs::calledFunc(0x03, 0x12, 0xA0, 0x0B, 0x2F, 0x02, 0x0C, 0x12, 0x18, 0x1E); + + result.setFormat("ret=%d", resultRet); + expect.setFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_CallConditional] +// ============================================================================ + +struct X86Test_CallConditional : public X86Test { + X86Test_CallConditional() : X86Test("[Call] Conditional") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_CallConditional()); + } + + virtual void compile(Compiler& c) { + GpVar x(c, kVarTypeInt32, "x"); + GpVar y(c, kVarTypeInt32, "y"); + GpVar op(c, kVarTypeInt32, "op"); + + X86X64CallNode* call; + GpVar result; + + c.addFunc(kFuncConvHost, FuncBuilder3()); + c.setArg(0, x); + c.setArg(1, y); + c.setArg(2, op); + + Label opAdd(c); + Label opMul(c); + + c.cmp(op, 0); + c.jz(opAdd); + c.cmp(op, 1); + c.jz(opMul); + + result = c.newGpVar(kVarTypeInt32, "result"); + c.mov(result, 0); + c.ret(result); + + c.bind(opAdd); + result = c.newGpVar(kVarTypeInt32, "result"); + + call = c.call((void*)calledFuncAdd, kFuncConvHost, FuncBuilder2()); + call->setArg(0, x); + call->setArg(1, y); + call->setRet(0, result); + c.ret(result); + + c.bind(opMul); + result = c.newGpVar(kVarTypeInt32, "result"); + + call = c.call((void*)calledFuncMul, kFuncConvHost, FuncBuilder2()); + call->setArg(0, x); + call->setArg(1, y); + call->setRet(0, result); + + c.ret(result); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(int, int, int); + Func func = asmjit_cast(_func); + + int arg1 = 4; + int arg2 = 8; + + int resultAdd = func(arg1, arg2, 0); + int expectAdd = calledFuncAdd(arg1, arg2); + + int resultMul = func(arg1, arg2, 1); + int expectMul = calledFuncMul(arg1, arg2); + + result.setFormat("ret={add=%d, mul=%d}", resultAdd, resultMul); + expect.setFormat("ret={add=%d, mul=%d}", expectAdd, expectMul); + + return (resultAdd == expectAdd) && (resultMul == expectMul); + } + + static int calledFuncAdd(int x, int y) { return x + y; } + static int calledFuncMul(int x, int y) { return x * y; } +}; + +// ============================================================================ +// [X86Test_CallMultiple] +// ============================================================================ + +struct X86Test_CallMultiple : public X86Test { + X86Test_CallMultiple() : X86Test("[Call] Multiple") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_CallMultiple()); + } + + static int ASMJIT_FASTCALL calledFunc(int* pInt, int index) { + return pInt[index]; + } + + virtual void compile(Compiler& c) { + unsigned int i; + + GpVar buf(c, kVarTypeIntPtr, "buf"); + GpVar acc0(c, kVarTypeInt32, "acc0"); + GpVar acc1(c, kVarTypeInt32, "acc1"); + + c.addFunc(kFuncConvHost, FuncBuilder1()); + c.setArg(0, buf); + + c.mov(acc0, 0); + c.mov(acc1, 0); + + for (i = 0; i < 4; i++) { + GpVar ret(c, kVarTypeInt32); + GpVar ptr(c, kVarTypeIntPtr); + GpVar idx(c, kVarTypeInt32); + X86X64CallNode* call; + + c.mov(ptr, buf); + c.mov(idx, static_cast(i)); + + call = c.call((void*)calledFunc, kFuncConvHostFastCall, FuncBuilder2()); + call->setArg(0, ptr); + call->setArg(1, idx); + call->setRet(0, ret); + + c.add(acc0, ret); + + c.mov(ptr, buf); + c.mov(idx, static_cast(i)); + + call = c.call((void*)calledFunc, kFuncConvHostFastCall, FuncBuilder2()); + call->setArg(0, ptr); + call->setArg(1, idx); + call->setRet(0, ret); + + c.sub(acc1, ret); + } + + c.add(acc0, acc1); + c.ret(acc0); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(int*); + Func func = asmjit_cast(_func); + + int buffer[4] = { 127, 87, 23, 17 }; + + int resultRet = func(buffer); + int expectRet = 0; + + result.setFormat("ret=%d", resultRet); + expect.setFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_CallRecursive] +// ============================================================================ + +struct X86Test_CallRecursive : public X86Test { + X86Test_CallRecursive() : X86Test("[Call] Recursive") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_CallRecursive()); + } + + virtual void compile(Compiler& c) { + GpVar val(c, kVarTypeInt32, "val"); + Label skip(c); + + X86X64FuncNode* func = c.addFunc(kFuncConvHost, FuncBuilder1()); + c.setArg(0, val); + + c.cmp(val, 1); + c.jle(skip); + + GpVar tmp(c, kVarTypeInt32, "tmp"); + c.mov(tmp, val); + c.dec(tmp); + + X86X64CallNode* call = c.call(func->getEntryLabel(), kFuncConvHost, FuncBuilder1()); + call->setArg(0, tmp); + call->setRet(0, tmp); + c.mul(c.newGpVar(kVarTypeInt32), val, tmp); + + c.bind(skip); + c.ret(val); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef int (*Func)(int); + Func func = asmjit_cast(_func); + + int resultRet = func(5); + int expectRet = 1 * 2 * 3 * 4 * 5; + + result.setFormat("ret=%d", resultRet); + expect.setFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_Dummy] +// ============================================================================ + +struct X86Test_Dummy : public X86Test { + X86Test_Dummy() : X86Test("[Dummy] Dummy") {} + + static void add(PodVector& tests) { + tests.append(new X86Test_Dummy()); + } + + virtual void compile(Compiler& c) { + c.addFunc(kFuncConvHost, FuncBuilder0()); + + GpVar r(c, kVarTypeUInt32); + GpVar a(c, kVarTypeUInt32); + GpVar b(c, kVarTypeUInt32); + + c.alloc(r, eax); + c.alloc(a, ecx); + c.alloc(b, edx); + + c.mov(a, 16); + c.mov(b, 99); + + c.mul(r, a, b); + c.alloc(a, esi); + c.alloc(b, ecx); + c.alloc(r, edi); + c.mul(a, b, r); + + c.ret(b); + c.endFunc(); + } + + virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) { + typedef uint32_t (*Func)(void); + Func func = asmjit_cast(_func); + + return func() == 0; + } +}; + +// ============================================================================ +// [X86TestSuite] +// ============================================================================ + +struct X86TestSuite { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + X86TestSuite(); + ~X86TestSuite(); + + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + int run(); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + PodVector tests; + StringBuilder output; + + int result; + int binSize; + bool alwaysPrintLog; +}; + +#define ADD_TEST(_Class_) \ + _Class_::add(tests) + +X86TestSuite::X86TestSuite() : + result(EXIT_SUCCESS), + binSize(0), + alwaysPrintLog(false) { + + // Align. + ADD_TEST(X86Test_AlignBase); + + // Jump. + ADD_TEST(X86Test_JumpCross); + ADD_TEST(X86Test_JumpUnreachable); + + // Alloc. + ADD_TEST(X86Test_AllocBase); + ADD_TEST(X86Test_AllocManual); + ADD_TEST(X86Test_AllocMany1); + ADD_TEST(X86Test_AllocMany2); + ADD_TEST(X86Test_AllocImul1); + ADD_TEST(X86Test_AllocImul2); + ADD_TEST(X86Test_AllocSetz); + ADD_TEST(X86Test_AllocShlRor); + ADD_TEST(X86Test_AllocGpLo); + ADD_TEST(X86Test_AllocRepMovsb); + ADD_TEST(X86Test_AllocArgs); + ADD_TEST(X86Test_AllocStack); + ADD_TEST(X86Test_AllocMemcpy); + ADD_TEST(X86Test_AllocBlend); + + // Call. + ADD_TEST(X86Test_CallBase); + ADD_TEST(X86Test_CallFast); + ADD_TEST(X86Test_CallManyArgs); + ADD_TEST(X86Test_CallImmArgs); + ADD_TEST(X86Test_CallConditional); + ADD_TEST(X86Test_CallMultiple); + ADD_TEST(X86Test_CallRecursive); + + // Dummy. + // ADD_TEST(X86Test_Dummy); +} + +X86TestSuite::~X86TestSuite() { + size_t i; + size_t count = tests.getLength(); + + for (i = 0; i < count; i++) { + X86Test* test = tests[i]; + delete test; + } +} + +int X86TestSuite::run() { + size_t i; + size_t count = tests.getLength(); + + FILE* file = stdout; + + for (i = 0; i < count; i++) { + JitRuntime runtime; + + StringLogger logger; + logger.setOption(kLoggerOptionBinaryForm, true); + + Compiler compiler(&runtime); + compiler.setLogger(&logger); + + X86Test* test = tests[i]; + test->compile(compiler); + + void* func = compiler.make(); + + if (alwaysPrintLog) { + fprintf(file, "\n%s", logger.getString()); + fflush(file); + } + + if (func != NULL) { + StringBuilder result; + StringBuilder expect; + + if (test->run(func, result, expect)) { + fprintf(file, "[Success] %s.\n", test->getName()); + } + else { + if (!alwaysPrintLog) + fprintf(file, "\n%s", logger.getString()); + fprintf(file, "-------------------------------------------------------------------------------\n"); + fprintf(file, "[Failure] %s.\n", test->getName()); + fprintf(file, "-------------------------------------------------------------------------------\n"); + fprintf(file, "Result : %s\n", result.getData()); + fprintf(file, "Expected: %s\n", expect.getData()); + fprintf(file, "===============================================================================\n"); + } + + runtime.release(func); + } + else { + if (!alwaysPrintLog) + fprintf(file, "%s\n", logger.getString()); + fprintf(file, "-------------------------------------------------------------------------------\n"); + fprintf(file, "[Failure] %s.\n", test->getName()); + fprintf(file, "===============================================================================\n"); + } + + fflush(file); + } + + fputs("\n", file); + fputs(output.getData(), file); + fflush(file); + + return result; +} + +// ============================================================================ +// [Main] +// ============================================================================ + +int main(int argc, char* argv[]) { + return X86TestSuite().run(); +} diff --git a/src/asmjit/asmjit.h b/src/asmjit/asmjit.h new file mode 100644 index 0000000..c0c0aae --- /dev/null +++ b/src/asmjit/asmjit.h @@ -0,0 +1,323 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_ASMJIT_H +#define _ASMJIT_ASMJIT_H + +//! @mainpage +//! +//! @brief AsmJit is a complete x86/x64 JIT Assembler for C++ language. +//! +//! It supports FPU, MMX, 3dNow, SSE, SSE2, SSE3 and SSE4 intrinsics, powerful +//! compiler that helps to write portable functions for 32-bit (x86) and 64-bit +//! (x64) architectures. AsmJit can be used to create functions at runtime that +//! can be called from existing (but also generated) C/C++ code. +//! +//! AsmJit is a cross-platform library that supports various compilers and +//! operating systems. Currently only limitation is x86 (32-bit) or x64 (64-bit) +//! processor. Currently tested operating systems are Windows (32-bit and 64-bit), +//! Linux (32-bit and 64-bit) and MacOSX (32-bit and 64-bit). +//! +//! @section AsmJit_Main_Introduction Introduction +//! +//! AsmJit library contains two main classes for code generation with different +//! goals. First main code generation class is called @c asmjit::Assembler and +//! contains low level API that can be used to generate JIT binary code. It +//! directly emits binary stream that represents encoded x86/x64 assembler +//! opcodes. Together with operands and labels it can be used to generate +//! complete code. For details look to @ref asmjit_base and @ref asmjit_compiler +//! sections. +//! +//! There is also class named @c asmjit::BaseCompiler that allows to develop +//! cross-platform assembler code without worring about function calling +//! conventions and registers allocation. It can be also used to write 32-bit +//! and 64-bit portable code. Compiler is a recommended concept to use for code +//! generation. +//! +//! Everything in AsmJit library is in @c asmjit namespace. +//! +//! @section AsmJit_Main_CodeGeneration Code Generation +//! +//! - @ref asmjit_base "Assembler core" - Operands, intrinsics and low-level assembler. +//! - @ref asmjit_compiler "Compiler" - High level code generation. +//! - @ref asmjit_cpuinfo "Cpu Information" - Get information about host processor. +//! - @ref asmjit_logging "Logging" - Logging and error handling. +//! - @ref AsmJit_MemoryManagement "Memory Management" - Virtual memory management. +//! +//! @section AsmJit_Main_Configuration Configuration, Definitions and Utilities +//! +//! - @ref asmjit_config "Configuration" - Macros used to configure AsmJit. +//! +//! @section AsmJit_Main_HomePage AsmJit Homepage +//! +//! - http://code.google.com/p/asmjit/ +//! +//! @section AsmJit_Main_ResourcesX86 External X86/X64 Assembler Resources +//! - http://www.agner.org/optimize/ +//! - http://www.mark.masmcode.com/ (Assembler Tips) +//! - http://avisynth.org/mediawiki/Filter_SDK/Assembler_optimizing (Optimizing) +//! - http://www.ragestorm.net/distorm/ (Disassembling) +//! +//! @section AsmJit_Main_Terminology Terminology +//! +//! - Non-volatile (preserved) register - Register that can't be changed +//! by callee (callee must save and restore it if it want to use it inside). +//! +//! - Volatile (non-preserved) register - The opossite. Register that can +//! be freely used by callee. The caller must free all registers before calling +//! other function. + +//! @defgroup asmjit_base Platform neutral API, abstract classes and operands. +//! +//! Contains all AsmJit classes and helper functions that are neutral or +//! abstract. All abstract classes are reimplemented for every supported +//! architecture. +//! +//! - See @c asmjit::Assembler class for low level code generation +//! documentation. +//! - See @c asmjit::Operand for AsmJit operand's overview. +//! +//! @section AsmJit_Core_Registers Registers +//! +//! There are static objects that represents X86 and X64 registers. They can +//! be used directly (like @c eax, @c mm, @c xmm, ...) or created through +//! these functions: +//! +//! - @c asmjit::gpb_lo() - Get Gpb-lo register. +//! - @c asmjit::gpb_hi() - Get Gpb-hi register. +//! - @c asmjit::gpw() - Get Gpw register. +//! - @c asmjit::gpd() - Get Gpd register. +//! - @c asmjit::gpq() - Get Gpq Gp register. +//! - @c asmjit::gpz() - Get Gpd/Gpq register. +//! - @c asmjit::fp() - Get Fp register. +//! - @c asmjit::mm() - Get Mm register. +//! - @c asmjit::xmm() - Get Xmm register. +//! - @c asmjit::ymm() - Get Ymm register. +//! +//! @section AsmJit_Core_Addressing Addressing +//! +//! X86 and x64 architectures contains several addressing modes and most ones +//! are possible with AsmJit library. Memory represents are represented by +//! @c asmjit::BaseMem class. These functions are used to make operands that +//! represents memory addresses: +//! +//! - @c asmjit::ptr() +//! - @c asmjit::byte_ptr() +//! - @c asmjit::word_ptr() +//! - @c asmjit::dword_ptr() +//! - @c asmjit::qword_ptr() +//! - @c asmjit::tword_ptr() +//! - @c asmjit::oword_ptr() +//! - @c asmjit::yword_ptr() +//! - @c asmjit::intptr_ptr() +//! +//! Most useful function to make pointer should be @c asmjit::ptr(). It creates +//! pointer to the target with unspecified size. Unspecified size works in all +//! intrinsics where are used registers (this means that size is specified by +//! register operand or by instruction itself). For example @c asmjit::ptr() +//! can't be used with @c asmjit::Assembler::inc() instruction. In this case +//! size must be specified and it's also reason to make difference between +//! pointer sizes. +//! +//! Supported are simple address forms (register + displacement) and complex +//! address forms (register + (register << shift) + displacement). +//! +//! @section AsmJit_Core_Immediates Immediates +//! +//! Immediate values are constants thats passed directly after instruction +//! opcode. To create such value use @c asmjit::imm() or @c asmjit::imm_u() +//! methods to create signed or unsigned immediate value. +//! +//! @sa @c asmjit::BaseCompiler. + +//! @defgroup asmjit_compiler Compiler (high-level code generation). +//! +//! Contains classes related to @c asmjit::Compiler that can be used +//! to generate code using high-level constructs. +//! +//! - See @c Compiler class for high level code generation +//! documentation - calling conventions, function declaration +//! and variables management. + +//! @defgroup asmjit_config Configuration. +//! +//! Contains macros that can be redefined to fit into any project. + +//! @defgroup asmjit_cpuinfo CPU information. +//! +//! X86 or x64 cpuid instruction allows to get information about processor +//! vendor and it's features. It's always used to detect features like MMX, +//! SSE and other newer ones. +//! +//! AsmJit library supports low level cpuid call implemented internally as +//! C++ function using inline assembler or intrinsics and also higher level +//! CPU features detection. The low level function (also used by higher level +//! one) is @c asmjit::cpuid(). +//! +//! AsmJit library also contains higher level function @c asmjit::getCpu() +//! that returns features detected by the library. The detection process is +//! done only once and the returned object is always the same. @c asmjit::BaseCpu +//! structure not contains only information through @c asmjit::cpuid(), but +//! there is also small multiplatform code to detect number of processors +//! (or cores) through operating system API. +//! +//! It's recommended to use @c asmjit::cpuInfo to detect and check for +//! host processor features. +//! +//! Example how to use asmjit::cpuid(): +//! +//! @code +//! // All functions and structures are in asmjit namesapce. +//! using namespace asmjit; +//! +//! // Here will be retrieved result of cpuid call. +//! CpuId out; +//! +//! // Use cpuid function to do the job. +//! cpuid(0 /* eax */, &out /* eax, ebx, ecx, edx */); +//! +//! // If eax argument to cpuid is 0, ebx, ecx and edx registers +//! // are filled with cpu vendor. +//! char vendor[13]; +//! memcpy(i->vendor, &out.ebx, 4); +//! memcpy(i->vendor + 4, &out.edx, 4); +//! memcpy(i->vendor + 8, &out.ecx, 4); +//! vendor[12] = '\0'; +//! +//! // Print vendor +//! puts(vendor); +//! @endcode +//! +//! If the high-level interface of asmjit::BaseCpu is not enough, you can use +//! low-level asmjit::cpuid() when running on x86/x64 host, but please read +//! processor manuals provided by Intel, AMD or other manufacturer for cpuid +//! details. +//! +//! Example of using @c asmjit::BaseCpu::getHost(): +//! +//! @code +//! // All functions and structures are in asmjit namesapce. +//! using namespace asmjit; +//! +//! // Call to cpuInfo return BaseCpu structure that shouldn't be modified. +//! // Make it const by default. +//! const BaseCpu* cpu = BaseCpu::getHost(); +//! +//! // Now you are able to get specific features. +//! +//! // Processor has SSE2 +//! if (cpu->features & kCpuFeatureSse2) { +//! // your code... +//! } +//! // Processor has MMX +//! else if (cpu->features & kCpuFeature_MMX) { +//! // your code... +//! } +//! // Processor is old, no SSE2 or MMX support. +//! else { +//! // your code... +//! } +//! @endcode +//! +//! Better example is in app/test/testcpu.cpp file. + + +//! @defgroup asmjit_logging Logging and error handling. +//! +//! Contains classes related to loging. Currently logging is implemented in +//! @ref asmjit::BaseLogger class. The function @ref asmjit::BaseLogger::log() +//! can be overridden to redirect logging into any user-defined stream. +//! +//! To log your assembler output to FILE stream use this code: +//! +//! @code +//! // Create assembler +//! Assembler a; +//! +//! // Create and set file based logger +//! FileLogger logger(stderr); +//! a.setLogger(&logger); +//! @endcode +//! +//! You can see that logging goes through @c Assembler. If you are using +//! @c Compiler and you want to log messages in correct assembler order, +//! you should look at @ref Compiler::comment() method. It allows you to +//! insert text message into items stream so the @c Compiler is able to +//! send messages to @ref Assembler in correct order. +//! +//! @sa @c asmjit::BaseLogger, @c asmjit::FileLogger. + + +//! @defgroup AsmJit_MemoryManagement Virtual memory management. +//! +//! Using @c asmjit::Assembler or @c asmjit::Compiler to generate machine +//! code is not final step. Each generated code needs to run in memory +//! that is not protected against code execution. To alloc this code it's +//! needed to use operating system functions provided to enable execution +//! code in specified memory block or to allocate memory that is not +//! protected. The solution is always to use @c See asmjit::Assembler::make() +//! and @c asmjit::Compiler::make() functions that can allocate memory and +//! relocate code for you. But AsmJit also contains classes for manual memory +//! management thats internally used by AsmJit but can be used by programmers +//! too. +//! +//! Memory management contains low level and high level classes related to +//! allocating and freeing virtual memory. Low level class is +//! @c asmjit::VMem that can allocate and free full pages of virtual memory +//! provided by operating system. Higher level class is @c asmjit::MemoryManager +//! that is able to manage complete allocation and free mechanism. It +//! internally uses larger chunks of memory to make allocation fast and +//! effective. +//! +//! Using @c asmjit::VMem::alloc() is cross-platform way how to allocate this +//! kind of memory without worrying about operating system and it's API. Each +//! memory block that is no longer needed should be released by @ref +//! asmjit::VMem::release() method. Higher-level interface for virtual memory +//! allocation can be found at asmjit::MemoryManager class. +//! +//! @sa @c asmjit::VMem, @ asmjit::MemoryManager. + + +//! @addtogroup asmjit_config +//! @{ + +//! @def ASMJIT_OS_WINDOWS +//! @brief Macro that is declared if AsmJit is compiled for Windows. + +//! @def ASMJIT_OS_POSIX +//! @brief Macro that is declared if AsmJit is compiled for unix like +//! operating system. + +//! @def ASMJIT_API +//! @brief Attribute that's added to classes that can be exported if AsmJit +//! is compiled as a dll library. + +//! @def ASMJIT_ASSERT +//! @brief Assertion macro. Default implementation calls +//! @c asmjit::assertionFailed() function. + +//! @} + + +//! @namespace asmjit +//! @brief Main AsmJit library namespace. +//! +//! There are not other namespaces used in AsmJit library. + +// [Dependencies - Core] +#include "base.h" + +// [Dependencies - X86/X64] +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) +#include "x86.h" +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 + +// [Dependencies - Host] +#include "host.h" + +// [Guard] +#endif // _ASMJIT_ASMJIT_H diff --git a/src/asmjit/base.h b/src/asmjit/base.h new file mode 100644 index 0000000..047bdd7 --- /dev/null +++ b/src/asmjit/base.h @@ -0,0 +1,35 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_H +#define _ASMJIT_BASE_H + +// [Dependencies - AsmJit] +#include "build.h" + +#include "base/assembler.h" +#include "base/assert.h" +#include "base/codegen.h" +#include "base/compiler.h" +#include "base/cpu.h" +#include "base/defs.h" +#include "base/error.h" +#include "base/func.h" +#include "base/globals.h" +#include "base/intutil.h" +#include "base/lock.h" +#include "base/logger.h" +#include "base/memorymanager.h" +#include "base/podlist.h" +#include "base/podvector.h" +#include "base/string.h" +#include "base/vectypes.h" +#include "base/vmem.h" +#include "base/zone.h" + +// [Guard] +#endif // _ASMJIT_BASE_H diff --git a/src/asmjit/base/apibegin.h b/src/asmjit/base/apibegin.h new file mode 100644 index 0000000..9447cbb --- /dev/null +++ b/src/asmjit/base/apibegin.h @@ -0,0 +1,54 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +#if !defined(_ASMJIT_BUILD_H) +#include "../build.h" +#endif // !_ASMJIT_BUILD_H + +// ============================================================================ +// [MSVC] +// ============================================================================ + +#if defined(_MSC_VER) + +// Disable some warnings we know about +#pragma warning(push) +#pragma warning(disable: 4127) // conditional expression is constant +#pragma warning(disable: 4201) // nameless struct/union +#pragma warning(disable: 4244) // '+=' : conversion from 'int' to 'x', possible + // loss of data +#pragma warning(disable: 4251) // struct needs to have dll-interface to be used + // by clients of struct ... +#pragma warning(disable: 4275) // non dll-interface struct ... used as base for + // dll-interface struct +#pragma warning(disable: 4355) // this used in base member initializer list +#pragma warning(disable: 4480) // specifying underlying type for enum +#pragma warning(disable: 4800) // forcing value to bool 'true' or 'false' + +// Rename symbols. +#if !defined(vsnprintf) +#define ASMJIT_DEFINED_VSNPRINTF +#define vsnprintf _vsnprintf +#endif // !vsnprintf + +#if !defined(snprintf) +#define ASMJIT_DEFINED_SNPRINTF +#define snprintf _snprintf +#endif // !snprintf + +#endif // _MSC_VER + +// ============================================================================ +// [GNUC] +// ============================================================================ + +#if defined(__GNUC__) +// GCC warnings fix: I can't understand why GCC has no interface to push/pop +// specific warnings. +// # if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 402001 +// # pragma GCC diagnostic ignored "-w" +// # endif +#endif // __GNUC__ diff --git a/src/asmjit/base/apiend.h b/src/asmjit/base/apiend.h new file mode 100644 index 0000000..3732478 --- /dev/null +++ b/src/asmjit/base/apiend.h @@ -0,0 +1,34 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// ============================================================================ +// [MSVC] +// ============================================================================ + +#if defined(_MSC_VER) + +// Pop disabled warnings by ApiBegin.h +#pragma warning(pop) + +// Rename symbols back. +#if defined(ASMJIT_DEFINED_VSNPRINTF) +#undef ASMJIT_DEFINED_VSNPRINTF +#undef vsnprintf +#endif // ASMJIT_DEFINED_VSNPRINTF + +#if defined(ASMJIT_DEFINED_SNPRINTF) +#undef ASMJIT_DEFINED_SNPRINTF +#undef snprintf +#endif // ASMJIT_DEFINED_SNPRINTF + +#endif // _MSC_VER + +// ============================================================================ +// [GNUC] +// ============================================================================ + +#if defined(__GNUC__) +#endif // __GNUC__ diff --git a/src/asmjit/base/assembler.cpp b/src/asmjit/base/assembler.cpp new file mode 100644 index 0000000..bea4df1 --- /dev/null +++ b/src/asmjit/base/assembler.cpp @@ -0,0 +1,286 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/assembler.h" +#include "../base/intutil.h" +#include "../base/memorymanager.h" + +// [Dependenceis - C] +#include + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::BaseAssembler - Construction / Destruction] +// ============================================================================ + +BaseAssembler::BaseAssembler(BaseRuntime* runtime) : + CodeGen(runtime), + _buffer(NULL), + _end(NULL), + _cursor(NULL), + _trampolineSize(0), + _comment(NULL), + _unusedLinks(NULL) {} + +BaseAssembler::~BaseAssembler() { + if (_buffer != NULL) + ::free(_buffer); +} + +// ============================================================================ +// [asmjit::BaseAssembler - Clear / Reset] +// ============================================================================ + +void BaseAssembler::clear() { + _purge(); +} + +void BaseAssembler::reset() { + _purge(); + _zoneAllocator.reset(); + + if (_buffer != NULL) { + ::free(_buffer); + + _buffer = NULL; + _end = NULL; + _cursor = NULL; + } + + _labels.reset(); + _relocData.reset(); +} + +void BaseAssembler::_purge() { + _zoneAllocator.clear(); + _cursor = _buffer; + + _options = 0; + _trampolineSize = 0; + + _comment = NULL; + _unusedLinks = NULL; + + _labels.clear(); + _relocData.clear(); + + clearError(); +} + +// ============================================================================ +// [asmjit::BaseAssembler - Buffer] +// ============================================================================ + +Error BaseAssembler::_grow(size_t n) { + size_t capacity = getCapacity(); + size_t after = getOffset() + n; + + // Overflow. + if (n > IntUtil::maxUInt() - capacity) + return setError(kErrorNoHeapMemory); + + // Grow is called when allocation is needed, so it shouldn't happen, but on + // the other hand it is simple to catch and it's not an error. + if (after <= capacity) + return kErrorOk; + + if (capacity < kMemAllocOverhead) + capacity = kMemAllocOverhead; + else + capacity += kMemAllocOverhead; + + do { + size_t oldCapacity = capacity; + + if (capacity < kMemAllocGrowMax) + capacity *= 2; + else + capacity += kMemAllocGrowMax; + + // Overflow. + if (oldCapacity > capacity) + return setError(kErrorNoHeapMemory); + } while (capacity - kMemAllocOverhead < after); + + capacity -= kMemAllocOverhead; + return _reserve(capacity); +} + +Error BaseAssembler::_reserve(size_t n) { + size_t capacity = getCapacity(); + if (n <= capacity) + return kErrorOk; + + uint8_t* newBuffer; + if (_buffer == NULL) + newBuffer = static_cast(::malloc(n)); + else + newBuffer = static_cast(::realloc(_buffer, n)); + + if (newBuffer == NULL) + return setError(kErrorNoHeapMemory); + + size_t offset = getOffset(); + + _buffer = newBuffer; + _end = _buffer + n; + _cursor = newBuffer + offset; + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::BaseAssembler - Label] +// ============================================================================ + +Error BaseAssembler::_registerIndexedLabels(size_t index) { + size_t i = _labels.getLength(); + if (index < i) + return kErrorOk; + + if (_labels._grow(index - i) != kErrorOk) + return setError(kErrorNoHeapMemory); + + LabelData data; + data.offset = -1; + data.links = NULL; + + do { + _labels.append(data); + } while (++i < index); + + return kErrorOk; +} + +Error BaseAssembler::_newLabel(Label* dst) { + dst->_label.op = kOperandTypeLabel; + dst->_label.size = 0; + dst->_label.id = OperandUtil::makeLabelId(static_cast(_labels.getLength())); + + LabelData data; + data.offset = -1; + data.links = NULL; + + if (_labels.append(data) != kErrorOk) + goto _NoMemory; + return kErrorOk; + +_NoMemory: + dst->_label.id = kInvalidValue; + return setError(kErrorNoHeapMemory); +} + +LabelLink* BaseAssembler::_newLabelLink() { + LabelLink* link = _unusedLinks; + + if (link) { + _unusedLinks = link->prev; + } + else { + link = _zoneAllocator.allocT(); + if (link == NULL) + return NULL; + } + + link->prev = NULL; + link->offset = 0; + link->displacement = 0; + link->relocId = -1; + + return link; +} + +// ============================================================================ +// [asmjit::BaseAssembler - Embed] +// ============================================================================ + +Error BaseAssembler::embed(const void* data, uint32_t size) { + if (getRemainingSpace() < size) { + Error error = _grow(size); + if (error != kErrorOk) + return setError(error); + } + + uint8_t* cursor = getCursor(); + ::memcpy(cursor, data, size); + setCursor(cursor + size); + + if (_logger) + _logger->logBinary(kLoggerStyleData, data, size); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::BaseAssembler - Make] +// ============================================================================ + +void* BaseAssembler::make() { + // Do nothing on error condition or if no instruction has been emitted. + if (_error != kErrorOk || getCodeSize() == 0) + return NULL; + + void* p; + Error error = _runtime->add(&p, this); + + if (error != kErrorOk) + setError(error); + + return p; +} + +// ============================================================================ +// [asmjit::BaseAssembler - Emit (Helpers)] +// ============================================================================ + +#define no noOperand + +Error BaseAssembler::emit(uint32_t code) { + return _emit(code, no, no, no, no); +} + +Error BaseAssembler::emit(uint32_t code, const Operand& o0) { + return _emit(code, o0, no, no, no); +} + +Error BaseAssembler::emit(uint32_t code, const Operand& o0, const Operand& o1) { + return _emit(code, o0, o1, no, no); +} + +Error BaseAssembler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2) { + return _emit(code, o0, o1, o2, no); +} + +Error BaseAssembler::emit(uint32_t code, int o0_) { + return _emit(code, Imm(o0_), no, no, no); +} + +Error BaseAssembler::emit(uint32_t code, const Operand& o0, int o1_) { + return _emit(code, o0, Imm(o1_), no, no); +} + +Error BaseAssembler::emit(uint32_t code, const Operand& o0, const Operand& o1, int o2_) { + return _emit(code, o0, o1, Imm(o2_), no); +} + +Error BaseAssembler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int o3_) { + return _emit(code, o0, o1, o2, Imm(o3_)); +} + +#undef no + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/assembler.h b/src/asmjit/base/assembler.h new file mode 100644 index 0000000..95c75a2 --- /dev/null +++ b/src/asmjit/base/assembler.h @@ -0,0 +1,459 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_ASSEMBLER_H +#define _ASMJIT_BASE_ASSEMBLER_H + +// [Dependencies - AsmJit] +#include "../base/codegen.h" +#include "../base/defs.h" +#include "../base/error.h" +#include "../base/logger.h" +#include "../base/podlist.h" +#include "../base/podvector.h" +#include "../base/runtime.h" +#include "../base/zone.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::LabelLink] +// ============================================================================ + +//! @brief Data structure used to link linked-labels. +struct LabelLink { + //! @brief Previous link. + LabelLink* prev; + //! @brief Offset. + intptr_t offset; + //! @brief Inlined displacement. + intptr_t displacement; + //! @brief RelocId if link must be absolute when relocated. + intptr_t relocId; +}; + +// ============================================================================ +// [asmjit::LabelData] +// ============================================================================ + +//! @brief Label data. +struct LabelData { + //! @brief Label offset. + intptr_t offset; + //! @brief Label links chain. + LabelLink* links; +}; + +// ============================================================================ +// [asmjit::RelocData] +// ============================================================================ + +//! @brief Code relocation data (relative vs absolute addresses). +//! +//! X86/X64: +//! +//! X86 architecture uses 32-bit absolute addressing model by memory operands, +//! but 64-bit mode uses relative addressing model (RIP + displacement). In +//! code we are always using relative addressing model for referencing labels +//! and embedded data. In 32-bit mode we must patch all references to absolute +//! address before we can call generated function. +struct RelocData { + //! @brief Type of relocation. + uint32_t type; + //! @brief Size of relocation (4 or 8 bytes). + uint32_t size; + + //! @brief Offset from code begin address. + Ptr from; + + //! @brief Relative displacement from code begin address (not to @c offset) + //! or absolute address. + Ptr data; +}; + +// ============================================================================ +// [asmjit::BaseAssembler] +// ============================================================================ + +//! @brief Base assembler. +//! +//! This class implements core setialization API only. The platform specific +//! methods and intrinsics is implemented by derived classes. +//! +//! @sa BaseCompiler. +struct BaseAssembler : public CodeGen { + ASMJIT_NO_COPY(BaseAssembler) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref BaseAssembler instance. + ASMJIT_API BaseAssembler(BaseRuntime* runtime); + //! @brief Destroy the @ref BaseAssembler instance. + ASMJIT_API virtual ~BaseAssembler(); + + // -------------------------------------------------------------------------- + // [Clear / Reset] + // -------------------------------------------------------------------------- + + //! @brief Clear everything, but not deallocate buffers. + ASMJIT_API void clear(); + //! @brief Reset everything (means also to free all buffers). + ASMJIT_API void reset(); + //! @brief Called by clear() and reset() to clear all data related to derived + //! class implementation. + ASMJIT_API virtual void _purge(); + + // -------------------------------------------------------------------------- + // [Buffer] + // -------------------------------------------------------------------------- + + //! @brief Get capacity of the code buffer. + ASMJIT_INLINE size_t getCapacity() const { + return (size_t)(_end - _buffer); + } + + //! @brief Get the number of remaining bytes (space between cursor and the + //! end of the buffer). + ASMJIT_INLINE size_t getRemainingSpace() const { + return (size_t)(_end - _cursor); + } + + //! @brief Get buffer. + ASMJIT_INLINE uint8_t* getBuffer() const { + return _buffer; + } + + //! @brief Get the end of the buffer (points to the first byte that is outside). + ASMJIT_INLINE uint8_t* getEnd() const { + return _end; + } + + //! @brief Get the current position in the buffer. + ASMJIT_INLINE uint8_t* getCursor() const { + return _cursor; + } + + //! @brief Set the current position in the buffer. + ASMJIT_INLINE void setCursor(uint8_t* cursor) { + ASMJIT_ASSERT(cursor >= _buffer && cursor <= _end); + _cursor = cursor; + } + + //! @brief Get the current offset in the buffer (_cursor - _buffer). + ASMJIT_INLINE size_t getOffset() const { + return (size_t)(_cursor - _buffer); + } + + //! @brief Set the current offset in the buffer to @a offset and get the + //! previous offset value. + ASMJIT_INLINE size_t setOffset(size_t offset) { + ASMJIT_ASSERT(offset < getCapacity()); + + size_t oldOffset = (size_t)(_cursor - _buffer); + _cursor = _buffer + offset; + return oldOffset; + } + + //! @brief Grow the internal buffer. + //! + //! The internal buffer will grow at least by @a n bytes so @a n bytes + //! can be added to it. If @a n is zero or getOffset() + n + //! is not greater than the current capacity of the buffer this function + //! won't do anything. + ASMJIT_API Error _grow(size_t n); + + //! @brief Reserve the internal buffer to at least @a n bytes. + ASMJIT_API Error _reserve(size_t n); + + //! @brief Set byte at position @a pos. + ASMJIT_INLINE uint8_t getByteAt(size_t pos) const { + ASMJIT_ASSERT(pos + 1 <= (size_t)(_end - _buffer)); + return *reinterpret_cast(_buffer + pos); + } + + //! @brief Set word at position @a pos. + ASMJIT_INLINE uint16_t getWordAt(size_t pos) const { + ASMJIT_ASSERT(pos + 2 <= (size_t)(_end - _buffer)); + return *reinterpret_cast(_buffer + pos); + } + + //! @brief Set dword at position @a pos. + ASMJIT_INLINE uint32_t getDWordAt(size_t pos) const { + ASMJIT_ASSERT(pos + 4 <= (size_t)(_end - _buffer)); + return *reinterpret_cast(_buffer + pos); + } + + //! @brief Set qword at position @a pos. + ASMJIT_INLINE uint64_t getQWordAt(size_t pos) const { + ASMJIT_ASSERT(pos + 8 <= (size_t)(_end - _buffer)); + return *reinterpret_cast(_buffer + pos); + } + + //! @brief Set int32_t at position @a pos. + ASMJIT_INLINE int32_t getInt32At(size_t pos) const { + ASMJIT_ASSERT(pos + 4 <= (size_t)(_end - _buffer)); + return *reinterpret_cast(_buffer + pos); + } + + //! @brief Set uint32_t at position @a pos. + ASMJIT_INLINE uint32_t getUInt32At(size_t pos) const { + ASMJIT_ASSERT(pos + 4 <= (size_t)(_end - _buffer)); + return *reinterpret_cast(_buffer + pos); + } + + //! @brief Set byte at position @a pos. + ASMJIT_INLINE void setByteAt(size_t pos, uint8_t x) { + ASMJIT_ASSERT(pos + 1 <= (size_t)(_end - _buffer)); + *reinterpret_cast(_buffer + pos) = x; + } + + //! @brief Set word at position @a pos. + ASMJIT_INLINE void setWordAt(size_t pos, uint16_t x) { + ASMJIT_ASSERT(pos + 2 <= (size_t)(_end - _buffer)); + *reinterpret_cast(_buffer + pos) = x; + } + + //! @brief Set dword at position @a pos. + ASMJIT_INLINE void setDWordAt(size_t pos, uint32_t x) { + ASMJIT_ASSERT(pos + 4 <= (size_t)(_end - _buffer)); + *reinterpret_cast(_buffer + pos) = x; + } + + //! @brief Set qword at position @a pos. + ASMJIT_INLINE void setQWordAt(size_t pos, uint64_t x) { + ASMJIT_ASSERT(pos + 8 <= (size_t)(_end - _buffer)); + *reinterpret_cast(_buffer + pos) = x; + } + + //! @brief Set int32_t at position @a pos. + ASMJIT_INLINE void setInt32At(size_t pos, int32_t x) { + ASMJIT_ASSERT(pos + 4 <= (size_t)(_end - _buffer)); + *reinterpret_cast(_buffer + pos) = x; + } + + //! @brief Set uint32_t at position @a pos. + ASMJIT_INLINE void setUInt32At(size_t pos, uint32_t x) { + ASMJIT_ASSERT(pos + 4 <= (size_t)(_end - _buffer)); + *reinterpret_cast(_buffer + pos) = x; + } + + // -------------------------------------------------------------------------- + // [GetCodeSize] + // -------------------------------------------------------------------------- + + //! @brief Get current offset in buffer (same as getOffset() + getTramplineSize()). + ASMJIT_INLINE size_t getCodeSize() const { + return getOffset() + getTrampolineSize(); + } + + // -------------------------------------------------------------------------- + // [GetTrampolineSize] + // -------------------------------------------------------------------------- + + //! @brief Get size of all possible trampolines needed to successfuly generate + //! relative jumps to absolute addresses. This value is only non-zero if jmp + //! of call instructions were used with immediate operand (this means jumping + //! or calling an absolute address directly). + ASMJIT_INLINE size_t getTrampolineSize() const { + return _trampolineSize; + } + + // -------------------------------------------------------------------------- + // [Label] + // -------------------------------------------------------------------------- + + //! @brief Get count of labels created. + ASMJIT_INLINE size_t getLabelsCount() const { + return _labels.getLength(); + } + + //! @brief Get whether @a label is created. + ASMJIT_INLINE bool isLabelCreated(const Label& label) const { + return static_cast(label.getId()) < _labels.getLength(); + } + + //! @internal + //! + //! @brief Register labels for other code generator (@ref Compiler). + ASMJIT_API Error _registerIndexedLabels(size_t index); + + //! @internal + //! + //! @brief Create and initialize a new label. + ASMJIT_API Error _newLabel(Label* dst); + + //! @internal + //! + //! @brief New LabelLink instance. + ASMJIT_API LabelLink* _newLabelLink(); + + //! @brief Create and return new label. + ASMJIT_INLINE Label newLabel() { + Label result(DontInitialize); + _newLabel(&result); + return result; + } + + //! @brief Bind label to the current offset (virtual). + virtual void _bind(const Label& label) = 0; + + //! @brief Bind label to the current offset (virtual). + //! + //! @note Label can be bound only once! + ASMJIT_INLINE void bind(const Label& label) { + _bind(label); + } + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! @brief Embed data into the code buffer. + ASMJIT_API Error embed(const void* data, uint32_t size); + + // -------------------------------------------------------------------------- + // [Align] + // -------------------------------------------------------------------------- + + //! @brief Align target buffer to @a m bytes. + //! + //! Typical usage of this is to align labels at start of the inner loops. + //! + //! Inserts @c nop() instructions or CPU optimized NOPs. + ASMJIT_INLINE Error align(uint32_t m) { + return _align(m); + } + + //! @brief Align target buffer to @a m bytes (virtual). + virtual Error _align(uint32_t m) = 0; + + // -------------------------------------------------------------------------- + // [Reloc] + // -------------------------------------------------------------------------- + + //! @brief Simplifed version of @c relocCode() method designed for JIT. + //! + //! @overload + ASMJIT_INLINE size_t relocCode(void* dst) const { + return _relocCode(dst, static_cast((uintptr_t)dst)); + } + + //! @brief Relocate code to a given address @a dst. + //! + //! @param dst Where the relocated code should me stored. The pointer can be + //! address returned by virtual memory allocator or your own address if you + //! want only to store the code for later reuse (or load, etc...). + //! @param addressBase Base address used for relocation. When using JIT code + //! generation, this will be the same as @a dst, only casted to system + //! integer type. But when generating code for remote process then the value + //! can be different. + //! + //! @retval The bytes used. Code-generator can create trampolines which are + //! used when calling other functions inside the JIT code. However, these + //! trampolines can be unused so the relocCode() returns the exact size needed + //! for the function. + //! + //! A given buffer will be overwritten, to get number of bytes required use + //! @c getCodeSize(). + ASMJIT_INLINE size_t relocCode(void* dst, Ptr base) const { + return _relocCode(dst, base); + } + + //! @brief Reloc code (virtual). + virtual size_t _relocCode(void* dst, Ptr base) const = 0; + + // -------------------------------------------------------------------------- + // [Make] + // -------------------------------------------------------------------------- + + ASMJIT_API void* make(); + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + //! @brief Emit an instruction. + ASMJIT_API Error emit(uint32_t code); + //! @overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0); + //! @overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1); + //! @overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2); + //! @overload + ASMJIT_INLINE Error emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) { + return _emit(code, o0, o1, o2, o3); + } + + //! @brief Emit an instruction with integer immediate operand. + ASMJIT_API Error emit(uint32_t code, int o0); + //! @overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, int o1); + //! @overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1, int o2); + //! @overload + ASMJIT_API Error emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int o3); + + //! @brief Emit an instruction (virtual). + virtual Error _emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) = 0; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Buffer where the code is emitted (either live or temporary). + //! + //! This is actually the base pointer of the buffer, to get the current + //! position (cursor) look at the @c _cursor member. + uint8_t* _buffer; + //! @brief The end of the buffer (points to the first invalid byte). + //! + //! The end of the buffer is calculated as _buffer + size. + uint8_t* _end; + //! @brief The current position in code @c _buffer. + uint8_t* _cursor; + + //! @brief Size of possible trampolines. + uint32_t _trampolineSize; + + //! @brief Inline comment that will be logged by the next instruction and + //! set to NULL. + const char* _comment; + //! @brief Linked list of unused links (@c LabelLink* structures) + LabelLink* _unusedLinks; + + //! @brief Labels data. + PodVector _labels; + //! @brief Relocations data. + PodVector _relocData; +}; + +//! @} + +// ============================================================================ +// [Defined-Later] +// ============================================================================ + +ASMJIT_INLINE Label::Label(BaseAssembler& a) : Operand(DontInitialize) { + a._newLabel(this); +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_ASSEMBLER_H diff --git a/src/asmjit/base/assert.cpp b/src/asmjit/base/assert.cpp new file mode 100644 index 0000000..8db6078 --- /dev/null +++ b/src/asmjit/base/assert.cpp @@ -0,0 +1,31 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/assert.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +// helpers +namespace asmjit { + +// ============================================================================ +// [asmjit::Assert] +// ============================================================================ + +void assertionFailed(const char* exp, const char* file, int line) { + ::fprintf(stderr, "Assertion failed: %s\n, file %s, line %d\n", exp, file, line); + ::abort(); +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/assert.h b/src/asmjit/base/assert.h new file mode 100644 index 0000000..3a2985c --- /dev/null +++ b/src/asmjit/base/assert.h @@ -0,0 +1,65 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_ASSERT_H +#define _ASMJIT_BASE_ASSERT_H + +// [Dependencies - AsmJit] +#include "../build.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::Assert] +// ============================================================================ + +//! @brief Called in debug build on assertion failure. +//! +//! @param exp Expression that failed. +//! @param file Source file name where it happened. +//! @param line Line in the source file. +//! +//! If you have problems with assertions put a breakpoint at assertionFailed() +//! function (asmjit/base/assert.cpp) to see what happened. +ASMJIT_API void assertionFailed(const char* exp, const char* file, int line); + +// ============================================================================ +// [ASMJIT_ASSERT] +// ============================================================================ + +#if defined(ASMJIT_DEBUG) + +#if !defined(ASMJIT_ASSERT) +#define ASMJIT_ASSERT(_Exp_) \ + do { \ + if (!(_Exp_)) ::asmjit::assertionFailed(#_Exp_, __FILE__, __LINE__); \ + } while (0) +#endif + +#else + +#if !defined(ASMJIT_ASSERT) +#define ASMJIT_ASSERT(_Exp_) ASMJIT_NOP() +#endif + +#endif // DEBUG + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_ASSERT_H diff --git a/src/asmjit/base/codegen.cpp b/src/asmjit/base/codegen.cpp new file mode 100644 index 0000000..0b5f053 --- /dev/null +++ b/src/asmjit/base/codegen.cpp @@ -0,0 +1,118 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/codegen.h" +#include "../base/intutil.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::CodeGen - Construction / Destruction] +// ============================================================================ + +CodeGen::CodeGen(BaseRuntime* runtime) : + _runtime(runtime), + _logger(NULL), + _errorHandler(NULL), + _arch(kArchNone), + _regSize(0), + _error(kErrorOk), + _features(IntUtil::mask(kCodeGenOptimizedAlign)), + _options(0), + _zoneAllocator(16384 - sizeof(Zone::Chunk) - kMemAllocOverhead) {} + +CodeGen::~CodeGen() { + if (_errorHandler != NULL) + _errorHandler->release(); +} + +// ============================================================================ +// [asmjit::CodeGen - Logging] +// ============================================================================ + +Error CodeGen::setLogger(BaseLogger* logger) { + _logger = logger; + return kErrorOk; +} + +// ============================================================================ +// [asmjit::CodeGen - Error] +// ============================================================================ + +Error CodeGen::setError(Error error, const char* message) { + if (error == kErrorOk) { + _error = kErrorOk; + return kErrorOk; + } + + if (message == NULL) + message = ErrorUtil::asString(error); + + // Error handler is called before logger so logging can be skipped if error + // has been handled. + ErrorHandler* handler = _errorHandler; + if (handler != NULL && handler->handleError(error, message)) + return error; + + BaseLogger* logger = _logger; + if (logger != NULL) { + logger->logFormat(kLoggerStyleComment, + "*** ERROR: %s (%u).\n", message, static_cast(error)); + } + + // The handler->handleError() function may throw an exception or longjmp() + // to terminate the execution of setError(). This is the reason why we have + // delayed changing the _error member until now. + _error = error; + return error; +} + +Error CodeGen::setErrorHandler(ErrorHandler* handler) { + ErrorHandler* oldHandler = _errorHandler; + + if (oldHandler != NULL) + oldHandler->release(); + + if (handler != NULL) + handler = handler->addRef(); + + _errorHandler = handler; + return kErrorOk; +} + +// ============================================================================ +// [asmjit::CodeGen - Features] +// ============================================================================ + +bool CodeGen::hasFeature(uint32_t feature) const { + if (feature >= sizeof(_features) * 8) + return false; + + feature = 1 << feature; + return (_features & feature) != 0; +} + +Error CodeGen::setFeature(uint32_t feature, bool value) { + if (feature >= sizeof(_features) * 8) + return setError(kErrorInvalidArgument); + + feature = static_cast(value) << feature; + _features = static_cast((static_cast(_features) & ~feature) | feature); + + return kErrorOk; +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/codegen.h b/src/asmjit/base/codegen.h new file mode 100644 index 0000000..4397e12 --- /dev/null +++ b/src/asmjit/base/codegen.h @@ -0,0 +1,204 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_CODEGEN_H +#define _ASMJIT_BASE_CODEGEN_H + +// [Dependencies - AsmJit] +#include "../base/defs.h" +#include "../base/error.h" +#include "../base/logger.h" +#include "../base/runtime.h" +#include "../base/zone.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::kCodeGen] +// ============================================================================ + +//! @brief @ref CodeGen features. +ASMJIT_ENUM(kCodeGen) { + //! @brief Emit optimized code-alignment sequences. + //! + //! X86/X64: + //! + //! Default align sequence used by X86/X64 architecture is one-byte 0x90 + //! opcode that is mostly shown by disassemblers as nop. However there are + //! more optimized align sequences for 2-11 bytes that may execute faster. + //! If this feature is enabled asmjit will generate specialized sequences + //! for alignment between 1 to 11 bytes. Also when @ref x86x64::Compiler + //! is used, it may add rex prefixes into the code to make some instructions + //! larger so no alignment sequences are needed. + //! + //! @default true. + kCodeGenOptimizedAlign = 0, + + //! @brief Emit jump-prediction hints. + //! + //! X86/X64: + //! + //! Jump prediction is usually based on the direction of the jump. If the + //! jump is backward it is usually predicted as taken; and if the jump is + //! forward it is usually predicted as not-taken. The reason is that loops + //! generally use backward jumps and conditions usually use forward jumps. + //! However this behavior can be overridden by using instruction prefixes. + //! If this option is enabled these hints will be emitted. + //! + //! @default true. + kCodeGenPredictedJumps = 1 +}; + +// ============================================================================ +// [asmjit::CodeGen] +// ============================================================================ + +//! @brief Abstract class inherited by @ref Assembler and @ref Compiler. +struct CodeGen { + ASMJIT_NO_COPY(CodeGen) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref CodeGen instance. + ASMJIT_API CodeGen(BaseRuntime* runtime); + //! @brief Destroy the @ref CodeGen instance. + ASMJIT_API virtual ~CodeGen(); + + // -------------------------------------------------------------------------- + // [Runtime] + // -------------------------------------------------------------------------- + + //! @brief Get runtime. + ASMJIT_INLINE BaseRuntime* getRuntime() const { return _runtime; } + + // -------------------------------------------------------------------------- + // [Logger] + // -------------------------------------------------------------------------- + + //! @brief Get whether the code generator has a logger. + ASMJIT_INLINE bool hasLogger() const { return _logger != NULL; } + //! @brief Get logger. + ASMJIT_INLINE BaseLogger* getLogger() const { return _logger; } + //! @brief Set logger to @a logger. + ASMJIT_API Error setLogger(BaseLogger* logger); + + // -------------------------------------------------------------------------- + // [Arch] + // -------------------------------------------------------------------------- + + //! @brief Get target architecture. + ASMJIT_INLINE uint32_t getArch() const { return _arch; } + + //! @brief Get default register size (4 or 8 bytes). + ASMJIT_INLINE uint32_t getRegSize() const { return _regSize; } + + // -------------------------------------------------------------------------- + // [Error] + // -------------------------------------------------------------------------- + + //! @brief Get last error code. + ASMJIT_INLINE Error getError() const { return _error; } + //! @brief Set last error code and propagate it through the error handler. + ASMJIT_API Error setError(Error error, const char* message = NULL); + //! @brief Clear the last error code. + ASMJIT_INLINE void clearError() { _error = kErrorOk; } + + //! @brief Get error handler. + ASMJIT_INLINE ErrorHandler* getErrorHandler() const { return _errorHandler; } + //! @brief Set error handler. + ASMJIT_API Error setErrorHandler(ErrorHandler* handler); + //! @brief Clear error handler. + ASMJIT_INLINE Error clearErrorHandler() { return setErrorHandler(NULL); } + + // -------------------------------------------------------------------------- + // [Features] + // -------------------------------------------------------------------------- + + //! @brief Get code-generator @a feature. + ASMJIT_API bool hasFeature(uint32_t feature) const; + //! @brief Set code-generator @a feature to @a value. + ASMJIT_API Error setFeature(uint32_t feature, bool value); + + // -------------------------------------------------------------------------- + // [Options] + // -------------------------------------------------------------------------- + + //! @brief Get options. + ASMJIT_INLINE uint32_t getOptions() const { return _options; } + //! @brief Set options. + ASMJIT_INLINE void setOptions(uint32_t options) { _options = options; } + + //! @brief Get options and clear them. + ASMJIT_INLINE uint32_t getOptionsAndClear() { + uint32_t options = _options; + _options = 0; + return options; + }; + + // -------------------------------------------------------------------------- + // [Purge] + // -------------------------------------------------------------------------- + + //! @brief Called by clear() and reset() to clear all data used by the code + //! generator. + virtual void _purge() = 0; + + // -------------------------------------------------------------------------- + // [Make] + // -------------------------------------------------------------------------- + + //! @brief Make is a convenience method to make and relocate the current code + //! into the associated runtime. + //! + //! What is needed is only to cast the returned pointer to your function type + //! and then use it. If there was an error during make() @c NULL is returned + //! and the last error code can be obtained by calling @ref getError(). + virtual void* make() = 0; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Runtime. + BaseRuntime* _runtime; + //! @brief Logger. + BaseLogger* _logger; + //! @brief Error handler, called by @ref setError(). + ErrorHandler* _errorHandler; + + //! @brief Target architecture. + uint8_t _arch; + //! @brief Get the default register size of the architecture (4 or 8 bytes). + uint8_t _regSize; + //! @brief Last error code. + uint8_t _error; + //! @brief Target features. + uint8_t _features; + //! @brief Options for the next generated instruction (only 8-bits used). + uint32_t _options; + + //! @brief Zone memory allocator. + Zone _zoneAllocator; +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_CODEGEN_H diff --git a/src/asmjit/base/compiler.cpp b/src/asmjit/base/compiler.cpp new file mode 100644 index 0000000..74e8031 --- /dev/null +++ b/src/asmjit/base/compiler.cpp @@ -0,0 +1,549 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/assembler.h" +#include "../base/compiler.h" +#include "../base/context_p.h" +#include "../base/cpu.h" +#include "../base/intutil.h" +#include "../base/logger.h" + +// [Dependencies - C] +#include + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Constants] +// ============================================================================ + +static const char noName[1] = { '\0' }; +enum { kBaseCompilerDefaultLookAhead = 64 }; + +// ============================================================================ +// [asmjit::BaseCompiler - Construction / Destruction] +// ============================================================================ + +BaseCompiler::BaseCompiler(BaseRuntime* runtime) : + CodeGen(runtime), + _nodeFlowId(0), + _nodeFlags(0), + _maxLookAhead(kBaseCompilerDefaultLookAhead), + _targetVarMapping(NULL), + _firstNode(NULL), + _lastNode(NULL), + _cursor(NULL), + _func(NULL), + _varAllocator(4096 - kMemAllocOverhead), + _stringAllocator(4096 - kMemAllocOverhead) {} + +BaseCompiler::~BaseCompiler() { + reset(); +} + +// ============================================================================ +// [asmjit::BaseCompiler - Clear / Reset] +// ============================================================================ + +void BaseCompiler::clear() { + _purge(); +} + +void BaseCompiler::reset() { + _purge(); + _zoneAllocator.reset(); + + _varAllocator.reset(); + _stringAllocator.reset(); + + _targets.reset(); + _vars.reset(); +} + +void BaseCompiler::_purge() { + _zoneAllocator.clear(); + + _varAllocator.clear(); + _stringAllocator.clear(); + + _options = 0; + + _firstNode = NULL; + _lastNode = NULL; + + _cursor = NULL; + _func = NULL; + + _targets.clear(); + _vars.clear(); + + clearError(); +} + +// ============================================================================ +// [asmjit::BaseCompiler - Node Management] +// ============================================================================ + +BaseNode* BaseCompiler::setCursor(BaseNode* node) { + BaseNode* old = _cursor; + _cursor = node; + return old; +} + +BaseNode* BaseCompiler::addNode(BaseNode* node) { + ASMJIT_ASSERT(node != NULL); + ASMJIT_ASSERT(node->_prev == NULL); + ASMJIT_ASSERT(node->_next == NULL); + + if (_cursor == NULL) { + if (_firstNode == NULL) { + _firstNode = node; + _lastNode = node; + } + else { + node->_next = _firstNode; + _firstNode->_prev = node; + _firstNode = node; + } + } + else { + BaseNode* prev = _cursor; + BaseNode* next = _cursor->_next; + + node->_prev = prev; + node->_next = next; + + prev->_next = node; + if (next) + next->_prev = node; + else + _lastNode = node; + } + + _cursor = node; + return node; +} + +BaseNode* BaseCompiler::addNodeBefore(BaseNode* node, BaseNode* ref) { + ASMJIT_ASSERT(node != NULL); + ASMJIT_ASSERT(node->_prev == NULL); + ASMJIT_ASSERT(node->_next == NULL); + ASMJIT_ASSERT(ref != NULL); + + BaseNode* prev = ref->_prev; + BaseNode* next = ref; + + node->_prev = prev; + node->_next = next; + + next->_prev = node; + if (prev) + prev->_next = node; + else + _firstNode = node; + + return node; +} + +BaseNode* BaseCompiler::addNodeAfter(BaseNode* node, BaseNode* ref) { + ASMJIT_ASSERT(node != NULL); + ASMJIT_ASSERT(node->_prev == NULL); + ASMJIT_ASSERT(node->_next == NULL); + ASMJIT_ASSERT(ref != NULL); + + BaseNode* prev = ref; + BaseNode* next = ref->_next; + + node->_prev = prev; + node->_next = next; + + prev->_next = node; + if (next) + next->_prev = node; + else + _lastNode = node; + + return node; +} + +BaseNode* BaseCompiler::removeNode(BaseNode* node) { + BaseNode* prev = node->_prev; + BaseNode* next = node->_next; + + if (_firstNode == node) + _firstNode = next; + else + prev->_next = next; + + if (_lastNode == node) + _lastNode = prev; + else + next->_prev = prev; + + node->_prev = NULL; + node->_next = NULL; + + if (_cursor == node) + _cursor = prev; + + return node; +} + +void BaseCompiler::removeNodes(BaseNode* first, BaseNode* last) { + if (first == last) { + removeNode(first); + return; + } + + BaseNode* prev = first->_prev; + BaseNode* next = last->_next; + + if (_firstNode == first) + _firstNode = next; + else + prev->_next = next; + + if (_lastNode == last) + _lastNode = prev; + else + next->_prev = prev; + + BaseNode* node = first; + for (;;) { + BaseNode* next = node->getNext(); + ASMJIT_ASSERT(next != NULL); + + node->_prev = NULL; + node->_next = NULL; + + if (_cursor == node) + _cursor = prev; + + if (node == last) + break; + node = next; + } +} + +// ============================================================================ +// [asmjit::BaseCompiler - Align] +// ============================================================================ + +AlignNode* BaseCompiler::newAlign(uint32_t m) { + AlignNode* node = newNode(m); + if (node == NULL) + goto _NoMemory; + return node; + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +AlignNode* BaseCompiler::addAlign(uint32_t m) { + AlignNode* node = newAlign(m); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +// ============================================================================ +// [asmjit::BaseCompiler - Target] +// ============================================================================ + +TargetNode* BaseCompiler::newTarget() { + TargetNode* node = newNode( + OperandUtil::makeLabelId(static_cast(_targets.getLength()))); + + if (node == NULL || _targets.append(node) != kErrorOk) + goto _NoMemory; + return node; + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +TargetNode* BaseCompiler::addTarget() { + TargetNode* node = newTarget(); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +// ============================================================================ +// [asmjit::BaseCompiler - Label] +// ============================================================================ + +Error BaseCompiler::_newLabel(Label* dst) { + dst->_init_packed_op_sz_b0_b1_id(kOperandTypeLabel, 0, 0, 0, kInvalidValue); + dst->_init_packed_d2_d3(0, 0); + + TargetNode* node = newTarget(); + if (node == NULL) + goto _NoMemory; + + dst->_label.id = node->getLabelId(); + return kErrorOk; + +_NoMemory: + return setError(kErrorNoHeapMemory); +} + +void BaseCompiler::bind(const Label& label) { + uint32_t index = label.getId(); + ASMJIT_ASSERT(index < _targets.getLength()); + + addNode(_targets[index]); +} + +// ============================================================================ +// [asmjit::BaseCompiler - Embed] +// ============================================================================ + +EmbedNode* BaseCompiler::newEmbed(const void* data, uint32_t size) { + EmbedNode* node; + + if (size > EmbedNode::kInlineBufferSize) { + void* clonedData = _stringAllocator.alloc(size); + if (clonedData == NULL) + goto _NoMemory; + + ::memcpy(clonedData, data, size); + data = clonedData; + } + + node = newNode(const_cast(data), size); + if (node == NULL) + goto _NoMemory; + return node; + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +EmbedNode* BaseCompiler::addEmbed(const void* data, uint32_t size) { + EmbedNode* node = newEmbed(data, size); + if (node == NULL) + return node; + return static_cast(addNode(node)); +} + +// ============================================================================ +// [asmjit::BaseCompiler - Comment] +// ============================================================================ + +CommentNode* BaseCompiler::newComment(const char* str) { + CommentNode* node; + + if (str != NULL && str[0]) { + str = _stringAllocator.sdup(str); + if (str == NULL) + goto _NoMemory; + } + + node = newNode(str); + if (node == NULL) + goto _NoMemory; + return node; + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +CommentNode* BaseCompiler::addComment(const char* str) { + CommentNode* node = newComment(str); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +CommentNode* BaseCompiler::comment(const char* fmt, ...) { + char buf[256]; + char* p = buf; + + if (fmt) { + *p++ = ';'; + *p++ = ' '; + + va_list ap; + va_start(ap, fmt); + p += vsnprintf(p, 254, fmt, ap); + va_end(ap); + } + + p[0] = '\n'; + p[1] = '\0'; + + return addComment(fmt); +} + +// ============================================================================ +// [asmjit::BaseCompiler - Hint] +// ============================================================================ + +HintNode* BaseCompiler::newHint(BaseVar& var, uint32_t hint, uint32_t value) { + if (var.getId() == kInvalidValue) + return NULL; + VarData* vd = getVd(var); + + HintNode* node = newNode(vd, hint, value); + if (node == NULL) + goto _NoMemory; + return node; + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +HintNode* BaseCompiler::addHint(BaseVar& var, uint32_t hint, uint32_t value) { + if (var.getId() == kInvalidValue) + return NULL; + + HintNode* node = newHint(var, hint, value); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +// ============================================================================ +// [asmjit::BaseCompiler - Vars] +// ============================================================================ + +VarData* BaseCompiler:: _newVd(uint32_t type, uint32_t size, uint32_t c, const char* name) { + VarData* vd = reinterpret_cast(_varAllocator.alloc(sizeof(VarData))); + if (vd == NULL) + goto _NoMemory; + + vd->_name = noName; + vd->_id = OperandUtil::makeVarId(static_cast(_vars.getLength())); + vd->_contextId = kInvalidValue; + + if (name != NULL && name[0] != '\0') { + vd->_name = _stringAllocator.sdup(name); + } + + vd->_type = static_cast(type); + vd->_class = static_cast(c); + vd->_flags = 0; + vd->_priority = 10; + + vd->_state = kVarStateUnused; + vd->_regIndex = kInvalidReg; + vd->_isStack = false; + vd->_isMemArg = false; + vd->_isCalculated = false; + vd->_saveOnUnuse = false; + vd->_modified = false; + vd->_reserved0 = 0; + vd->_alignment = static_cast(IntUtil::iMin(size, 64)); + + vd->_size = size; + + vd->_memOffset = 0; + vd->_memCell = NULL; + + vd->rReadCount = 0; + vd->rWriteCount = 0; + vd->mReadCount = 0; + vd->mWriteCount = 0; + + vd->_va = NULL; + + if (_vars.append(vd) != kErrorOk) + goto _NoMemory; + return vd; + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +void BaseCompiler::alloc(BaseVar& var) { + addHint(var, kVarHintAlloc, kInvalidValue); +} + +void BaseCompiler::alloc(BaseVar& var, uint32_t regIndex) { + addHint(var, kVarHintAlloc, regIndex); +} + +void BaseCompiler::alloc(BaseVar& var, const BaseReg& reg) { + addHint(var, kVarHintAlloc, reg.getRegIndex()); +} + +void BaseCompiler::save(BaseVar& var) { + addHint(var, kVarHintSave, kInvalidValue); +} + +void BaseCompiler::spill(BaseVar& var) { + addHint(var, kVarHintSpill, kInvalidValue); +} + +void BaseCompiler::unuse(BaseVar& var) { + addHint(var, kVarHintUnuse, kInvalidValue); +} + +uint32_t BaseCompiler::getPriority(BaseVar& var) const { + if (var.getId() == kInvalidValue) + return kInvalidValue; + + VarData* vd = getVdById(var.getId()); + return vd->getPriority(); +} + +void BaseCompiler::setPriority(BaseVar& var, uint32_t priority) { + if (var.getId() == kInvalidValue) + return; + + if (priority > 255) + priority = 255; + + VarData* vd = getVdById(var.getId()); + vd->_priority = static_cast(priority); +} + +bool BaseCompiler::getSaveOnUnuse(BaseVar& var) const { + if (var.getId() == kInvalidValue) + return false; + + VarData* vd = getVdById(var.getId()); + return static_cast(vd->_saveOnUnuse); +} + +void BaseCompiler::setSaveOnUnuse(BaseVar& var, bool value) { + if (var.getId() == kInvalidValue) + return; + + VarData* vd = getVdById(var.getId()); + vd->_saveOnUnuse = value; +} + +void BaseCompiler::rename(BaseVar& var, const char* name) { + if (var.getId() == kInvalidValue) + return; + + VarData* vd = getVdById(var.getId()); + vd->_name = noName; + + if (name != NULL && name[0] != '\0') { + vd->_name = _stringAllocator.sdup(name); + } +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/compiler.h b/src/asmjit/base/compiler.h new file mode 100644 index 0000000..1b6fdb9 --- /dev/null +++ b/src/asmjit/base/compiler.h @@ -0,0 +1,1984 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_COMPILER_H +#define _ASMJIT_BASE_COMPILER_H + +// [Dependencies - AsmJit] +#include "../base/assembler.h" +#include "../base/codegen.h" +#include "../base/defs.h" +#include "../base/error.h" +#include "../base/func.h" +#include "../base/intutil.h" +#include "../base/podlist.h" +#include "../base/podvector.h" +#include "../base/runtime.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct BaseCompiler; + +struct VarAttr; +struct VarData; +struct BaseVarInst; +struct BaseVarState; + +struct BaseNode; +struct EndNode; +struct InstNode; +struct JumpNode; + +// ============================================================================ +// [asmjit::kVarAttrFlags] +// ============================================================================ + +//! @brief Variable attribute flags. +ASMJIT_ENUM(kVarAttrFlags) { + //! @brief Variable is accessed through register on input. + kVarAttrInReg = 0x00000001, + //! @brief Variable is accessed through register on output. + kVarAttrOutReg = 0x00000002, + //! @brief Variable is accessed through register on input & output. + kVarAttrInOutReg = 0x00000003, + + //! @brief Variable is accessed through memory on input. + kVarAttrInMem = 0x00000004, + //! @brief Variable is accessed through memory on output. + kVarAttrOutMem = 0x00000008, + //! @brief Variable is accessed through memory on input & output. + kVarAttrInOutMem = 0x0000000C, + + //! @brief It can be decided whether it's better to alloc variable to register + //! or memory on the input. + kVarAttrInDecide = 0x00000010, + //! @brief It can be decided whether it's better to alloc variable to register + //! or memory on the output. + kVarAttrOutDecide = 0x00000020, + //! @brief It can be decided whether it's better to alloc variable to register + //! or memory on the input & output. + kVarAttrInOutDecide = 0x00000030, + + //! @brief Variable is converted to other type/class on the input. + kVarAttrInConv = 0x00000040, + //! @brief Variable is converted from other type/class on the output. + kVarAttrOutConv = 0x00000080, + //! @brief Combination of @ref kVarAttrInConv and @ref kVarAttrOutConv. + kVarAttrInOutConv = 0x000000C0, + + //! @brief Variable is a function call operand. + kVarAttrInCall = 0x00000100, + //! @brief Variable is a function argument passed in register. + kVarAttrInArg = 0x00000200, + //! @brief Variable is a function argument passed on the stack. + kVarAttrInStack = 0x00000400, + //! @brief Variable is a function return value passed in register. + kVarAttrOutRet = 0x00000800, + + //! @brief Variable should be unused at the end of the instruction/node. + kVarAttrUnuse = 0x00001000, + + kVarAttrInAll = kVarAttrInReg | kVarAttrInMem | kVarAttrInDecide | kVarAttrInCall | kVarAttrInArg | kVarAttrInStack, + kVarAttrOutAll = kVarAttrOutReg | kVarAttrOutMem | kVarAttrOutDecide | kVarAttrOutRet, + + //! @brief Variable is already allocated on the input. + kVarAttrAllocInDone = 0x00400000, + //! @brief Variable is already allocated on the output. + kVarAttrAllocOutDone = 0x00800000 +}; + +// ============================================================================ +// [asmjit::kVarHint] +// ============================================================================ + +//! @brief Variable hint (used by @ref BaseCompiler). +//! +//! @sa @ref BaseCompiler. +ASMJIT_ENUM(kVarHint) { + //! @brief Alloc variable. + kVarHintAlloc = 0, + //! @brief Spill variable. + kVarHintSpill = 1, + //! @brief Save variable if modified. + kVarHintSave = 2, + //! @brief Save variable if modified and mark it as unused. + kVarHintSaveAndUnuse = 3, + //! @brief Mark variable as unused. + kVarHintUnuse = 4 +}; + +// ============================================================================ +// [asmjit::kVarState] +// ============================================================================ + +//! @brief State of variable. +//! +//! @note State of variable is used only during make process and it's not +//! visible to the developer. +ASMJIT_ENUM(kVarState) { + //! @brief Variable is currently not used. + kVarStateUnused = 0, + + //! @brief Variable is in register. + //! + //! Variable is currently allocated in register. + kVarStateReg = 1, + + //! @brief Variable is in memory location or spilled. + //! + //! Variable was spilled from register to memory or variable is used for + //! memory only storage. + kVarStateMem = 2 +}; + +// ============================================================================ +// [asmjit::kNodeType] +// ============================================================================ + +//! @brief Type of node (see @ref BaseNode). +ASMJIT_ENUM(kNodeType) { + //! @brief Invalid node (internal, can't be used). + kNodeTypeNone = 0, + //! @brief Node is an .align directive, see @ref AlignNode. + kNodeTypeAlign, + //! @brief Node is an embedded data, see @ref EmbedNode. + kNodeTypeEmbed, + //! @brief Node is a comment, see @ref CommentNode. + kNodeTypeComment, + //! @brief Node is a variable hint (alloc, spill, use, unuse), see @ref HintNode. + kNodeTypeHint, + //! @brief Node is a label, see @ref TargetNode. + kNodeTypeTarget, + //! @brief Node is an instruction, see @ref InstNode. + kNodeTypeInst, + //! @brief Node is a function declaration, see @ref FuncNode. + kNodeTypeFunc, + //! @brief Node is an end of the function, see @ref EndNode. + kNodeTypeEnd, + //! @brief Node is a return, see @ref RetNode. + kNodeTypeRet, + //! @brief Node is a function call, see @ref CallNode. + kNodeTypeCall, + //! @brief Node is a function call argument moved on stack, see @ref SArgNode. + kNodeTypeSArg +}; + +// ============================================================================ +// [asmjit::kNodeFlag] +// ============================================================================ + +ASMJIT_ENUM(kNodeFlag) { + //! @brief Whether the node was translated by @ref BaseContext. + kNodeFlagIsTranslated = 0x0001, + + //! @Brief Whether the @ref InstNode is a jump. + kNodeFlagIsJmp = 0x0002, + //! @Brief Whether the @ref InstNode is a conditional jump. + kNodeFlagIsJcc = 0x0004, + + //! @brief Whether the @ref InstNode is an unconditinal jump or conditional + //! jump that is likely to be taken. + kNodeFlagIsTaken = 0x0008, + + //! @brief Whether the @ref Node will return from a function. + //! + //! This flag is used by both @ref EndNode and @ref RetNode. + kNodeFlagIsRet = 0x0010, + + //! @brief Whether the instruction is special. + kNodeFlagIsSpecial = 0x0020, + + //! @brief Whether the instruction is an FPU instruction. + kNodeFlagIsFp = 0x0040 +}; + +// ============================================================================ +// [asmjit::MemCell] +// ============================================================================ + +struct MemCell { + ASMJIT_NO_COPY(MemCell) + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get cell offset. + ASMJIT_INLINE int32_t getOffset() const { return _offset; } + //! @brief Set cell offset. + ASMJIT_INLINE void setOffset(int32_t offset) { _offset = offset; } + + //! @brief Get cell size. + ASMJIT_INLINE uint32_t getSize() const { return _size; } + //! @brief Set cell size. + ASMJIT_INLINE void setSize(uint32_t size) { _size = size; } + + //! @brief Get cell alignment. + ASMJIT_INLINE uint32_t getAlignment() const { return _alignment; } + //! @brief Set cell alignment. + ASMJIT_INLINE void setAlignment(uint32_t alignment) { _alignment = alignment; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Next active cell. + MemCell* _next; + + //! @brief Offset, relative to base-offset. + int32_t _offset; + //! @brief Size. + uint32_t _size; + //! @brief Alignment. + uint32_t _alignment; +}; + +// ============================================================================ +// [asmjit::VarBits] +// ============================================================================ + +//! @brief Bit-array used by variable-liveness analysis. +struct VarBits { + // -------------------------------------------------------------------------- + // [Enums] + // -------------------------------------------------------------------------- + + enum { + kEntitySize = static_cast(sizeof(uintptr_t)), + kEntityBits = kEntitySize * 8 + }; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uintptr_t getBit(uint32_t index) const { + return (data[index / kEntityBits] >> (index % kEntityBits)) & 1; + } + + ASMJIT_INLINE void setBit(uint32_t index) { + data[index / kEntityBits] |= static_cast(1) << (index % kEntityBits); + } + + ASMJIT_INLINE void delBit(uint32_t index) { + data[index / kEntityBits] &= ~(static_cast(1) << (index % kEntityBits)); + } + + // -------------------------------------------------------------------------- + // [Ops] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool copyBits(const VarBits* s0, uint32_t len) { + uintptr_t r = 0; + for (uint32_t i = 0; i < len; i++) { + uintptr_t t = s0->data[i]; + data[i] = t; + r |= t; + } + return r != 0; + } + + ASMJIT_INLINE bool addBits(const VarBits* s0, uint32_t len) { + return addBits(this, s0, len); + } + + ASMJIT_INLINE bool addBits(const VarBits* s0, const VarBits* s1, uint32_t len) { + uintptr_t r = 0; + for (uint32_t i = 0; i < len; i++) { + uintptr_t t = s0->data[i] | s1->data[i]; + data[i] = t; + r |= t; + } + return r != 0; + } + + ASMJIT_INLINE bool andBits(const VarBits* s1, uint32_t len) { + return andBits(this, s1, len); + } + + ASMJIT_INLINE bool andBits(const VarBits* s0, const VarBits* s1, uint32_t len) { + uintptr_t r = 0; + for (uint32_t i = 0; i < len; i++) { + uintptr_t t = s0->data[i] & s1->data[i]; + data[i] = t; + r |= t; + } + return r != 0; + } + + ASMJIT_INLINE bool delBits(const VarBits* s1, uint32_t len) { + return delBits(this, s1, len); + } + + ASMJIT_INLINE bool delBits(const VarBits* s0, const VarBits* s1, uint32_t len) { + uintptr_t r = 0; + for (uint32_t i = 0; i < len; i++) { + uintptr_t t = s0->data[i] & ~s1->data[i]; + data[i] = t; + r |= t; + } + return r != 0; + } + + ASMJIT_INLINE bool _addBitsDelSource(VarBits* s1, uint32_t len) { + return _addBitsDelSource(this, s1, len); + } + + ASMJIT_INLINE bool _addBitsDelSource(const VarBits* s0, VarBits* s1, uint32_t len) { + uintptr_t r = 0; + for (uint32_t i = 0; i < len; i++) { + uintptr_t a = s0->data[i]; + uintptr_t b = s1->data[i]; + + this->data[i] = a | b; + b &= ~a; + + s1->data[i] = b; + r |= b; + } + return r != 0; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + uintptr_t data[1]; +}; + +// ============================================================================ +// [asmjit::VarData] +// ============================================================================ + +//! @brief Base variable data. +struct VarData { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get variable name. + ASMJIT_INLINE const char* getName() const { return _name; } + //! @brief Get variable id. + ASMJIT_INLINE uint32_t getId() const { return _id; } + + //! @brief Get whether the variable has context id. + ASMJIT_INLINE bool hasContextId() const { return _contextId != kInvalidValue; } + //! @brief Get context variable id (used only by @ref Context). + ASMJIT_INLINE uint32_t getContextId() const { return _contextId; } + //! @brief Set context variable id (used only by @ref Context). + ASMJIT_INLINE void setContextId(uint32_t contextId) { _contextId = contextId; } + //! @brief Reset context variable id (used only by @ref Context). + ASMJIT_INLINE void resetContextId() { _contextId = kInvalidValue; } + + //! @brief Get variable type. + ASMJIT_INLINE uint32_t getType() const { return _type; } + //! @brief Get variable class. + ASMJIT_INLINE uint32_t getClass() const { return _class; } + //! @brief Get variable flags. + ASMJIT_INLINE uint32_t getFlags() const { return _flags; } + + //! @brief Get variable priority. + ASMJIT_INLINE uint32_t getPriority() const { return _priority; } + + //! @brief Get variable state (only used by @ref Context). + ASMJIT_INLINE uint32_t getState() const { return _state; } + //! @brief Set variable state (only used by @ref Context). + ASMJIT_INLINE void setState(uint32_t state) { _state = static_cast(state); } + + //! @brief Get register index. + ASMJIT_INLINE uint32_t getRegIndex() const { return _regIndex; } + //! @brief Set register index. + ASMJIT_INLINE void setRegIndex(uint32_t regIndex) { _regIndex = static_cast(regIndex); } + //! @brief Reset register index. + ASMJIT_INLINE void resetRegIndex() { _regIndex = static_cast(kInvalidReg); } + + //! @brief Get whether the VarData is only memory allocated on the stack. + ASMJIT_INLINE bool isStack() const { return static_cast(_isStack); } + + //! @brief Get whether the variable is a function argument passed through memory. + ASMJIT_INLINE bool isMemArg() const { return static_cast(_isMemArg); } + + //! @brief Get variable content can be calculated by a simple instruction. + ASMJIT_INLINE bool isCalculated() const { return static_cast(_isCalculated); } + //! @brief Get whether to save variable when it's unused (spill). + ASMJIT_INLINE bool saveOnUnuse() const { return static_cast(_saveOnUnuse); } + + //! @brief Get whether the variable was changed. + ASMJIT_INLINE bool isModified() const { return static_cast(_modified); } + //! @brief Set whether the variable was changed. + ASMJIT_INLINE void setModified(bool modified) { _modified = modified; } + + //! @brief Get variable alignment. + ASMJIT_INLINE uint32_t getAlignment() const { return _alignment; } + //! @brief Get variable size. + ASMJIT_INLINE uint32_t getSize() const { return _size; } + + //! @brief Get home memory offset. + ASMJIT_INLINE int32_t getMemOffset() const { return _memOffset; } + //! @brief Set home memory offset. + ASMJIT_INLINE void setMemOffset(int32_t offset) { _memOffset = offset; } + + //! @brief Get home memory cell. + ASMJIT_INLINE MemCell* getMemCell() const { return _memCell; } + //! @brief Set home memory cell. + ASMJIT_INLINE void setMemCell(MemCell* cell) { _memCell = cell; } + + // -------------------------------------------------------------------------- + // [Accessors - Temporary Usage] + // -------------------------------------------------------------------------- + + //! @brief Get temporary VarAttr. + ASMJIT_INLINE VarAttr* getVa() const { return _va; } + //! @brief Set temporary VarAttr. + ASMJIT_INLINE void setVa(VarAttr* va) { _va = va; } + //! @brief Reset temporary VarAttr. + ASMJIT_INLINE void resetVa() { _va = NULL; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Variable name. + const char* _name; + + //! @brief Variable id. + uint32_t _id; + //! @brief Context variable id (used by @ref Context only, initially set to + //! @c kInvalidValue). + uint32_t _contextId; + + //! @brief Variable type. + uint8_t _type; + //! @brief Variable class. + uint8_t _class; + //! @brief Variable flags. + uint8_t _flags; + //! @brief Variable priority. + uint8_t _priority; + + //! @brief Variable state (connected with actual @ref BaseVarState). + uint8_t _state; + //! @brief Actual register index (only used by @ref Context), during translate. + uint8_t _regIndex; + + //! @brief Whether the variable is only used as memory allocated on the stack. + uint8_t _isStack : 1; + //! @brief Whether the variable is a function argument passed through memory. + uint8_t _isMemArg : 1; + //! @brief Whether variable content can be calculated by a simple instruction. + //! + //! This is used mainly by MMX and SSE2 code. This flag indicates that + //! register allocator should never reserve memory for this variable, because + //! the content can be generated by a single instruction (for example PXOR). + uint8_t _isCalculated : 1; + //! @brief Save on unuse (at end of the variable scope). + uint8_t _saveOnUnuse : 1; + //! @brief Whether variable was changed (connected with actual @ref BaseVarState). + uint8_t _modified : 1; + //! @internal + uint8_t _reserved0 : 3; + //! @brief Varialbe natural alignment. + uint8_t _alignment; + + //! @brief Variable size. + uint32_t _size; + + //! @brief Home memory offset. + int32_t _memOffset; + //! @brief Home memory cell, used by @c Context (initially NULL). + MemCell* _memCell; + + //! @brief Register read access statistics. + uint32_t rReadCount; + //! @brief Register write access statistics. + uint32_t rWriteCount; + + //! @brief Memory read statistics. + uint32_t mReadCount; + //! @brief Memory write statistics. + uint32_t mWriteCount; + + // -------------------------------------------------------------------------- + // [Members - Temporary Usage] + // -------------------------------------------------------------------------- + + // These variables are only used during register allocation. They are + // initialized by init() phase and cleared by cleanup() phase. + + union { + //! @brief Temporary link to VarAttr* used by the @ref Context used in + //! various phases, but always set back to NULL when finished. + //! + //! This temporary data is designed to be used by algorithms that need to + //! store some data into variables themselves during compilation. But it's + //! expected that after variable is compiled & translated the data is set + //! back to zero/null. Initial value is NULL. + VarAttr* _va; + + //! @internal + uintptr_t _vaUInt; + }; +}; + +// ============================================================================ +// [asmjit::VarAttr] +// ============================================================================ + +struct VarAttr { + // -------------------------------------------------------------------------- + // [Setup] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void setup(VarData* vd, uint32_t flags = 0, uint32_t inRegs = 0, uint32_t allocableRegs = 0) { + _vd = vd; + _flags = flags; + _varCount = 0; + _argStackCount = 0; + _inRegIndex = kInvalidReg; + _outRegIndex = kInvalidReg; + _inRegs = inRegs; + _allocableRegs = allocableRegs; + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get VarData. + ASMJIT_INLINE VarData* getVd() const { return _vd; } + //! @brief Set VarData. + ASMJIT_INLINE void setVd(VarData* vd) { _vd = vd; } + + //! @brief Get flags. + ASMJIT_INLINE uint32_t getFlags() const { return _flags; } + //! @brief Set flags. + ASMJIT_INLINE void setFlags(uint32_t flags) { _flags = flags; } + + //! @brief Get whether @a flag is on. + ASMJIT_INLINE bool hasFlag(uint32_t flag) { return (_flags & flag) != 0; } + //! @brief Add @a flags. + ASMJIT_INLINE void addFlags(uint32_t flags) { _flags |= flags; } + //! @brief Mask @a flags. + ASMJIT_INLINE void andFlags(uint32_t flags) { _flags &= flags; } + //! @brief Clear @a flags. + ASMJIT_INLINE void delFlags(uint32_t flags) { _flags &= ~flags; } + + //! @brief Get how many times the variable is used by the instruction/node. + ASMJIT_INLINE uint32_t getVarCount() const { return _varCount; } + //! @brief Set how many times the variable is used by the instruction/node. + ASMJIT_INLINE void setVarCount(uint32_t count) { _varCount = static_cast(count); } + //! @brief Add how many times the variable is used by the instruction/node. + ASMJIT_INLINE void addVarCount(uint32_t count = 1) { _varCount += static_cast(count); } + + //! @brief Get how many times the variable is used by the function argument. + ASMJIT_INLINE uint32_t getArgStackCount() const { return _argStackCount; } + //! @brief Set how many times the variable is used by the function argument. + ASMJIT_INLINE void setArgStackCount(uint32_t count) { _argStackCount = static_cast(count); } + //! @brief Add how many times the variable is used by the function argument. + ASMJIT_INLINE void addArgStackCount(uint32_t count = 1) { _argStackCount += static_cast(count); } + + //! @brief Get whether the variable has to be allocated in a specific input register. + ASMJIT_INLINE uint32_t hasInRegIndex() const { return _inRegIndex != kInvalidReg; } + //! @brief Get the input register index or @ref kInvalidReg. + ASMJIT_INLINE uint32_t getInRegIndex() const { return _inRegIndex; } + //! @brief Set the input register index. + ASMJIT_INLINE void setInRegIndex(uint32_t index) { _inRegIndex = static_cast(index); } + //! @brief Reset the input register index. + ASMJIT_INLINE void resetInRegIndex() { _inRegIndex = kInvalidReg; } + + //! @brief Get whether the variable has to be allocated in a specific output register. + ASMJIT_INLINE uint32_t hasOutRegIndex() const { return _outRegIndex != kInvalidReg; } + //! @brief Get the output register index or @ref kInvalidReg. + ASMJIT_INLINE uint32_t getOutRegIndex() const { return _outRegIndex; } + //! @brief Set the output register index. + ASMJIT_INLINE void setOutRegIndex(uint32_t index) { _outRegIndex = static_cast(index); } + //! @brief Reset the output register index. + ASMJIT_INLINE void resetOutRegIndex() { _outRegIndex = kInvalidReg; } + + //! @brief Get whether the mandatory input registers are in used. + ASMJIT_INLINE bool hasInRegs() const { return _inRegs != 0; } + //! @brief Get mandatory input registers (mask). + ASMJIT_INLINE uint32_t getInRegs() const { return _inRegs; } + //! @brief Set mandatory input registers (mask). + ASMJIT_INLINE void setInRegs(uint32_t mask) { _inRegs = mask; } + //! @brief Add mandatory input registers (mask). + ASMJIT_INLINE void addInRegs(uint32_t mask) { _inRegs |= mask; } + //! @brief And mandatory input registers (mask). + ASMJIT_INLINE void andInRegs(uint32_t mask) { _inRegs &= mask; } + //! @brief Clear mandatory input registers (mask). + ASMJIT_INLINE void delInRegs(uint32_t mask) { _inRegs &= ~mask; } + + //! @brief Get allocable input registers (mask). + ASMJIT_INLINE uint32_t getAllocableRegs() const { return _allocableRegs; } + //! @brief Set allocable input registers (mask). + ASMJIT_INLINE void setAllocableRegs(uint32_t mask) { _allocableRegs = mask; } + //! @brief Add allocable input registers (mask). + ASMJIT_INLINE void addAllocableRegs(uint32_t mask) { _allocableRegs |= mask; } + //! @brief And allocable input registers (mask). + ASMJIT_INLINE void andAllocableRegs(uint32_t mask) { _allocableRegs &= mask; } + //! @brief Clear allocable input registers (mask). + ASMJIT_INLINE void delAllocableRegs(uint32_t mask) { _allocableRegs &= ~mask; } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE VarAttr& operator=(const VarAttr& other) { + ::memcpy(this, &other, sizeof(VarAttr)); + return *this; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + VarData* _vd; + //! @brief Flags. + uint32_t _flags; + + union { + struct { + //! @brief How many times the variable is used by the instruction/node. + uint8_t _varCount; + //! @brief How many times the variable is used as a function argument on + //! the stack. + //! + //! This is important to know for function-call allocator. It doesn't + //! allocate by arguments, but by using VarAttr's. + uint8_t _argStackCount; + //! @brief Input register index or @ref kInvalidReg if it's not given. + //! + //! Even if the input register index is not given (i.e. it may by any + //! register), register allocator should assign an index that will be + //! used to persist a variable into this specific index. It's helpful + //! in situations where one variable has to be allocated in multiple + //! registers to determine the register which will be persistent. + uint8_t _inRegIndex; + //! @brief Output register index or @ref kInvalidReg if it's not given. + //! + //! Typically @ref kInvalidReg if variable is only used on input. + uint8_t _outRegIndex; + }; + + //! @internal + //! + //! @brief Packed data #0. + uint32_t _packed; + }; + + //! @brief Mandatory input registers. + //! + //! Mandatory input registers are required by the instruction even if + //! there are duplicates. This schema allows us to allocate one variable + //! in one or more register when needed. Required mostly by instructions + //! that have implicit register operands (imul, cpuid, ...) and function + //! call. + uint32_t _inRegs; + + //! @brief Allocable input registers. + //! + //! Optional input registers is a mask of all allocable registers for a given + //! variable where we have to pick one of them. This mask is usually not used + //! when _inRegs is set. If both masks are used then the register + //! allocator tries first to find an intersection between these and allocates + //! an extra slot if not found. + uint32_t _allocableRegs; +}; + +// ============================================================================ +// [asmjit::BaseVarInst] +// ============================================================================ + +//! @brief Variable allocation instructions. +struct BaseVarInst {}; + +// ============================================================================ +// [asmjit::BaseVarState] +// ============================================================================ + +//! @brief Variable(s) state. +struct BaseVarState {}; + +// ============================================================================ +// [asmjit::BaseNode] +// ============================================================================ + +//! @brief Base node. +//! +//! @ref Every node represents an abstract instruction, directive, label, or +//! macro-instruction generated by compiler. +struct BaseNode { + ASMJIT_NO_COPY(BaseNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new @ref BaseNode. + //! + //! @note Always use compiler to create nodes. + ASMJIT_INLINE BaseNode(BaseCompiler* compiler, uint32_t type); // Defined-Later. + + //! @brief Destroy @ref BaseNode. + ASMJIT_INLINE ~BaseNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get previous node in the compiler stream. + ASMJIT_INLINE BaseNode* getPrev() const { return _prev; } + //! @brief Get next node in the compiler stream. + ASMJIT_INLINE BaseNode* getNext() const { return _next; } + + //! @brief Get comment string. + ASMJIT_INLINE const char* getComment() const { return _comment; } + //! @brief Set comment string to @a str. + ASMJIT_INLINE void setComment(const char* comment) { _comment = comment; } + + //! @brief Get type of node, see @ref kNodeType. + ASMJIT_INLINE uint32_t getType() const { return _type; } + + //! @brief Get node flags. + ASMJIT_INLINE uint32_t getFlags() const { return _flags; } + //! @brief Set node flags to @a flags. + ASMJIT_INLINE void setFlags(uint32_t flags) { _flags = static_cast(flags); } + + //! @brief Get whether the instruction has flag @a flag. + ASMJIT_INLINE bool hasFlag(uint32_t flag) const { return (static_cast(_flags) & flag) != 0; } + //! @brief Add instruction @a flags. + ASMJIT_INLINE void addFlags(uint32_t flags) { _flags |= static_cast(flags); } + //! @brief Clear instruction @a flags. + ASMJIT_INLINE void delFlags(uint32_t flags) { _flags &= static_cast(~flags); } + + //! @brief Get whether the node has beed fetched. + ASMJIT_INLINE bool isFetched() const { return _flowId != 0; } + //! @brief Get whether the node has been translated. + ASMJIT_INLINE bool isTranslated() const { return hasFlag(kNodeFlagIsTranslated); } + + //! @brief Whether the instruction is an unconditional jump. + ASMJIT_INLINE bool isJmp() const { return hasFlag(kNodeFlagIsJmp); } + //! @brief Whether the instruction is a conditional jump. + ASMJIT_INLINE bool isJcc() const { return hasFlag(kNodeFlagIsJcc); } + //! @brief Whether the instruction is an unconditional or conditional jump. + ASMJIT_INLINE bool isJmpOrJcc() const { return hasFlag(kNodeFlagIsJmp | kNodeFlagIsJcc); } + //! @brief Whether the instruction is a return. + ASMJIT_INLINE bool isRet() const { return hasFlag(kNodeFlagIsRet); } + + //! @brief Get whether the instruction is special. + ASMJIT_INLINE bool isSpecial() const { return hasFlag(kNodeFlagIsSpecial); } + //! @brief Get whether the instruction accesses FPU. + ASMJIT_INLINE bool isFp() const { return hasFlag(kNodeFlagIsFp); } + + //! @brief Get flow index. + ASMJIT_INLINE uint32_t getFlowId() const { return _flowId; } + //! @brief Set flow index. + ASMJIT_INLINE void setFlowId(uint32_t flowId) { _flowId = flowId; } + + //! @brief Get whether node contains variable allocation instructions. + ASMJIT_INLINE bool hasVarInst() const { return _varInst != NULL; } + + //! @brief Get variable allocation instructions. + ASMJIT_INLINE BaseVarInst* getVarInst() const { return _varInst; } + //! @brief Get variable allocation instructions . + template + ASMJIT_INLINE T* getVarInst() const { return static_cast(_varInst); } + //! @brief Set variable allocation instructions. + ASMJIT_INLINE void setVarInst(BaseVarInst* vi) { _varInst = vi; } + + //! @brief Get node state. + ASMJIT_INLINE BaseVarState* getState() const { return _state; } + //! @brief Get node state . + template + ASMJIT_INLINE T* getState() const { return static_cast(_state); } + //! @brief Set node state. + ASMJIT_INLINE void setState(BaseVarState* state) { _state = state; } + + //! @brief Get whether the node has variable liveness bits. + ASMJIT_INLINE bool hasLiveness() const { return _liveness != NULL; } + //! @brief Get variable liveness bits. + ASMJIT_INLINE VarBits* getLiveness() const { return _liveness; } + //! @brief Set variable liveness bits. + ASMJIT_INLINE void setLiveness(VarBits* liveness) { _liveness = liveness; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Previous node. + BaseNode* _prev; + //! @brief Next node. + BaseNode* _next; + + //! @brief Node type, see @ref kNodeType. + uint8_t _type; + //! @brief Operands count (if the node has operands, otherwise zero). + uint8_t _opCount; + //! @brief Node flags, different meaning for every node type. + uint16_t _flags; + + //! @brief Flow index. + uint32_t _flowId; + + //! @brief Inline comment string, initially set to NULL. + const char* _comment; + + //! @brief Variable allocation instructions (initially NULL, filled by prepare + //! phase). + BaseVarInst* _varInst; + + //! @brief Variable liveness bits (initially NULL, filled by analysis phase). + VarBits* _liveness; + + //! @brief Saved state. + //! + //! Initially NULL, not all nodes have saved state, only branch/flow control + //! nodes. + BaseVarState* _state; +}; + +// ============================================================================ +// [asmjit::AlignNode] +// ============================================================================ + +//! @brief Align node. +struct AlignNode : public BaseNode { + ASMJIT_NO_COPY(AlignNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref AlignNode instance. + ASMJIT_INLINE AlignNode(BaseCompiler* compiler, uint32_t size) : BaseNode(compiler, kNodeTypeAlign) { + _size = size; + } + + //! @brief Destroy the @ref AlignNode instance. + ASMJIT_INLINE ~AlignNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get align size in bytes. + ASMJIT_INLINE uint32_t getSize() const { return _size; } + //! @brief Set align size in bytes to @a size. + ASMJIT_INLINE void setSize(uint32_t size) { _size; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Size of the alignment. + uint32_t _size; +}; + +// ============================================================================ +// [asmjit::EmbedNode] +// ============================================================================ + +//! @brief Embed node. +//! +//! Embed node is used to embed data into final assembler stream. The data is +//! considered to be RAW; No analysis is performed on RAW data. +struct EmbedNode : public BaseNode { + ASMJIT_NO_COPY(EmbedNode) + + // -------------------------------------------------------------------------- + // [Enums] + // -------------------------------------------------------------------------- + + enum { kInlineBufferSize = 8 }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EmbedNode instance. + ASMJIT_INLINE EmbedNode(BaseCompiler* compiler, void* data, uint32_t size) : BaseNode(compiler, kNodeTypeEmbed) { + _size = size; + if (size <= kInlineBufferSize) + ::memcpy(_data.buf, data, size); + else + _data.ptr = static_cast(data); + } + + //! @brief Destroy the @ref EmbedNode instance. + ASMJIT_INLINE ~EmbedNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get pointer to data. + uint8_t* getData() { return getSize() <= kInlineBufferSize ? const_cast(_data.buf) : _data.ptr; } + //! @brief Get size of data. + uint32_t getSize() const { return _size; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Size of the embedded data. + uint32_t _size; + + union { + //! @brief data buffer. + uint8_t buf[kInlineBufferSize]; + //! @brief Data buffer. + uint8_t* ptr; + } _data; +}; + +// ============================================================================ +// [asmjit::CommentNode] +// ============================================================================ + +//! @brief Comment node. +//! +//! Comments allows to comment your assembler stream for better debugging +//! and visualization. Comments are usually ignored in release builds unless +//! the logger is present. +struct CommentNode : public BaseNode { + ASMJIT_NO_COPY(CommentNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref CommentNode instance. + ASMJIT_INLINE CommentNode(BaseCompiler* compiler, const char* comment) : BaseNode(compiler, kNodeTypeComment) { + _comment = comment; + } + + //! @brief Destroy the @ref CommentNode instance. + ASMJIT_INLINE ~CommentNode() {} +}; + +// ============================================================================ +// [asmjit::HintNode] +// ============================================================================ + +//! @brief Hint node. +struct HintNode : public BaseNode { + ASMJIT_NO_COPY(HintNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref HintNode instance. + ASMJIT_INLINE HintNode(BaseCompiler* compiler, VarData* vd, uint32_t hint, uint32_t value) : BaseNode(compiler, kNodeTypeHint) { + _vd = vd; + _hint = hint; + _value = value; + } + + //! @brief Destroy the @ref HintNode instance. + ASMJIT_INLINE ~HintNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get variable. + ASMJIT_INLINE VarData* getVd() const { return _vd; } + + //! @brief Get hint it (see @ref kVarHint). + ASMJIT_INLINE uint32_t getHint() const{ return _hint; } + //! @brief Set hint it (see @ref kVarHint). + ASMJIT_INLINE void setHint(uint32_t hint) { _hint = hint; } + + //! @brief Get hint value. + ASMJIT_INLINE uint32_t getValue() const { return _value; } + //! @brief Set hint value. + ASMJIT_INLINE void setValue(uint32_t value) { _value = value; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Variable. + VarData* _vd; + //! @brief Hint id. + uint32_t _hint; + //! @brief Value. + uint32_t _value; +}; + +// ============================================================================ +// [asmjit::TargetNode] +// ============================================================================ + +//! @brief label node. +struct TargetNode : public BaseNode { + ASMJIT_NO_COPY(TargetNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref TargetNode instance. + ASMJIT_INLINE TargetNode(BaseCompiler* compiler, uint32_t labelId) : BaseNode(compiler, kNodeTypeTarget) { + _id = labelId; + _numRefs = 0; + _from = NULL; + } + + //! @brief Destroy the @ref TargetNode instance. + ASMJIT_INLINE ~TargetNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get target label. + ASMJIT_INLINE Label getLabel() const { return Label(_id); } + //! @brief Get target label id. + ASMJIT_INLINE uint32_t getLabelId() const { return _id; } + + //! @brief Get first jmp instruction. + ASMJIT_INLINE JumpNode* getFrom() const { return _from; } + + //! @brief Get whether the node has assigned state. + ASMJIT_INLINE bool hasState() const { return _state != NULL; } + //! @brief Get state for this target. + ASMJIT_INLINE BaseVarState* getState() const { return _state; } + //! @brief Set state for this target. + ASMJIT_INLINE void setState(BaseVarState* state) { _state = state; } + + //! @brief Get number of jumps to this target. + ASMJIT_INLINE uint32_t getNumRefs() const { return _numRefs; } + //! @brief Set number of jumps to this target. + ASMJIT_INLINE void setNumRefs(uint32_t i) { _numRefs = i; } + //! @brief Add number of jumps to this target. + ASMJIT_INLINE void addNumRefs(uint32_t i = 1) { _numRefs += i; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Label id. + uint32_t _id; + //! @brief Count of jumps here. + uint32_t _numRefs; + + //! @brief First jump instruction that points to this target (label). + JumpNode* _from; +}; + +// ============================================================================ +// [asmjit::InstNode] +// ============================================================================ + +//! @brief Instruction node. +struct InstNode : public BaseNode { + ASMJIT_NO_COPY(InstNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref InstNode instance. + ASMJIT_INLINE InstNode(BaseCompiler* compiler, uint32_t code, uint32_t options, Operand* opList, uint32_t opCount) : BaseNode(compiler, kNodeTypeInst) { + _code = static_cast(code); + _options = static_cast(options); + + _opCount = static_cast(opCount); + _opList = opList; + + _updateMemOp(); + } + + //! @brief Destroy the @ref InstNode instance. + ASMJIT_INLINE ~InstNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get instruction code, see @c kInstCode. + ASMJIT_INLINE uint32_t getCode() const + { return _code; } + + //! @brief Set instruction code to @a code. + //! + //! Please do not modify instruction code if you are not know what you are + //! doing. Incorrect instruction code or operands can raise assertion() at + //! runtime. + ASMJIT_INLINE void setCode(uint32_t code) + { _code = static_cast(code); } + + //! @brief Whether the instruction is an unconditional jump or whether the + //! instruction is a conditional jump which is likely to be taken. + ASMJIT_INLINE bool isTaken() const { return hasFlag(kNodeFlagIsTaken); } + + //! @brief Get emit options. + ASMJIT_INLINE uint32_t getOptions() const { return _options; } + //! @brief Set emit options. + ASMJIT_INLINE void setOptions(uint32_t options) { _options = static_cast(options); } + //! @brief Add emit options. + ASMJIT_INLINE void addOptions(uint32_t options) { _options |= static_cast(options); } + //! @brief Mask emit options. + ASMJIT_INLINE void andOptions(uint32_t options) { _options &= static_cast(options); } + //! @brief Clear emit options. + ASMJIT_INLINE void delOptions(uint32_t options) { _options &= static_cast(~options); } + + //! @brief Get operands list. + ASMJIT_INLINE Operand* getOpList() { return _opList; } + //! @overload + ASMJIT_INLINE const Operand* getOpList() const { return _opList; } + + //! @brief Get operands count. + ASMJIT_INLINE uint32_t getOpCount() const { return _opCount; } + + //! @brief Get whether the instruction contains a memory operand. + ASMJIT_INLINE bool hasMemOp() const { return _memOpIndex != 0xFF; } + + //! @brief Set memory operand index (in opList), 0xFF means that instruction + //! doesn't have a memory operand. + ASMJIT_INLINE void setMemOpIndex(uint32_t index) { _memOpIndex = static_cast(index); } + //! @brief Reset memory operand index, setting it to 0xFF. + ASMJIT_INLINE void resetMemOpIndex() { _memOpIndex = 0xFF; } + + //! @brief Get memory operand. + //! + //! @note Can only be called if the instruction has such operand, see @ref + //! hasMemOp(). + ASMJIT_INLINE BaseMem* getMemOp() const { + ASMJIT_ASSERT(hasMemOp()); + return static_cast(&_opList[_memOpIndex]); + } + + //! @overload + template + ASMJIT_INLINE T* getMemOp() const { + ASMJIT_ASSERT(hasMemOp()); + return static_cast(&_opList[_memOpIndex]); + } + + // -------------------------------------------------------------------------- + // [Utils] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void _updateMemOp() { + Operand* opList = getOpList(); + uint32_t opCount = getOpCount(); + + uint32_t i; + for (i = 0; i < opCount; i++) + if (opList[i].isMem()) + goto _Update; + i = 0xFF; + +_Update: + setMemOpIndex(i); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Instruction code, see @c kInstCode. + uint16_t _code; + //! @brief Instruction options, see @c kInstOptions. + uint8_t _options; + //! @internal + uint8_t _memOpIndex; + + //! @brief Operands list. + Operand* _opList; +}; + +// ============================================================================ +// [asmjit::JumpNode] +// ============================================================================ + +//! @brief Jump node. +struct JumpNode : public InstNode { + ASMJIT_NO_COPY(JumpNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE JumpNode(BaseCompiler* compiler, uint32_t code, uint32_t options, Operand* opList, uint32_t opCount) : + InstNode(compiler, code, options, opList, opCount) {} + ASMJIT_INLINE ~JumpNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE TargetNode* getTarget() const { return _target; } + ASMJIT_INLINE JumpNode* getJumpNext() const { return _jumpNext; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Target node. + TargetNode* _target; + //! @brief Next jump to the same target in a single linked-list. + JumpNode *_jumpNext; +}; + +// ============================================================================ +// [asmjit::FuncNode] +// ============================================================================ + +//! @brief Function declaration node. +//! +//! Functions are base blocks for generating assembler output. Each generated +//! assembler stream needs standard entry and leave sequences which are compatible +//! with the operating system ABI. +//! +//! @ref FuncNode can be used to generate function prolog and epilog which are +//! compatible with a given function calling convention and to allocate and +//! manage variables that can be allocated/spilled during compilation phase. +struct FuncNode : public BaseNode { + ASMJIT_NO_COPY(FuncNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @c FuncNode instance. + //! + //! @note Always use @ref asmjit::BaseCompiler::addFunc() to create a + //! @ref FuncNode instance. + ASMJIT_INLINE FuncNode(BaseCompiler* compiler) : + BaseNode(compiler, kNodeTypeFunc), + _entryNode(NULL), + _exitNode(NULL), + _end(NULL), + _decl(NULL), + _argList(NULL), + _funcHints(IntUtil::mask(kFuncHintNaked)), + _funcFlags(0), + _expectedStackAlignment(0), + _requiredStackAlignment(0), + _redZoneSize(0), + _spillZoneSize(0), + _argStackSize(0), + _memStackSize(0), + _callStackSize(0) {} + + //! @brief Destroy the @c FuncNode instance. + ASMJIT_INLINE ~FuncNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get function entry @ref TargetNode. + ASMJIT_INLINE TargetNode* getEntryNode() const { return _entryNode; } + //! @brief Get function exit @ref TargetNode. + ASMJIT_INLINE TargetNode* getExitNode() const { return _exitNode; } + + //! @brief Get function entry label. + ASMJIT_INLINE Label getEntryLabel() const { return _entryNode->getLabel(); } + //! @brief Get function exit label. + ASMJIT_INLINE Label getExitLabel() const { return _exitNode->getLabel(); } + + //! @brief Get function @ref EndNode. + ASMJIT_INLINE EndNode* getEnd() const { return _end; } + //! @brief Get function declaration. + ASMJIT_INLINE FuncDecl* getDecl() const { return _decl; } + + //! @brief Get arguments list. + ASMJIT_INLINE VarData** getArgList() const { return _argList; } + //! @brief Get arguments count. + ASMJIT_INLINE uint32_t getArgCount() const { return _decl->getArgCount(); } + + //! @brief Get argument at @a i. + ASMJIT_INLINE VarData* getArg(uint32_t i) const { + ASMJIT_ASSERT(i < getArgCount()); + return _argList[i]; + } + + //! @brief Set argument at @a i. + ASMJIT_INLINE void setArg(uint32_t i, VarData* vd) { + ASMJIT_ASSERT(i < getArgCount()); + _argList[i] = vd; + } + + //! @brief Reset argument at @a i. + ASMJIT_INLINE void resetArg(uint32_t i) { + ASMJIT_ASSERT(i < getArgCount()); + _argList[i] = NULL; + } + + //! @brief Get function hints. + ASMJIT_INLINE uint32_t getFuncHints() const { return _funcHints; } + //! @brief Get function flags. + ASMJIT_INLINE uint32_t getFuncFlags() const { return _funcFlags; } + + //! @brief Get whether the _funcFlags has @a flag + ASMJIT_INLINE bool hasFuncFlag(uint32_t flag) const { return (_funcFlags & flag) != 0; } + //! @brief Set function @a flag. + ASMJIT_INLINE void addFuncFlags(uint32_t flags) { _funcFlags |= flags; } + //! @brief Clear function @a flag. + ASMJIT_INLINE void clearFuncFlags(uint32_t flags) { _funcFlags &= ~flags; } + + //! @brief Get whether the function is naked. + ASMJIT_INLINE bool isNaked() const { return hasFuncFlag(kFuncFlagIsNaked); } + //! @brief Get whether the function is also a caller. + ASMJIT_INLINE bool isCaller() const { return hasFuncFlag(kFuncFlagIsCaller); } + //! @brief Get whether the required stack alignment is lower than expected one, + //! thus it has to be aligned manually. + ASMJIT_INLINE bool isStackMisaligned() const { return hasFuncFlag(kFuncFlagIsStackMisaligned); } + //! @brief Get whether the stack pointer is adjusted inside function prolog/epilog. + ASMJIT_INLINE bool isStackAdjusted() const { return hasFuncFlag(kFuncFlagIsStackAdjusted); } + + //! @brief Get whether the function is finished. + ASMJIT_INLINE bool isFinished() const { return hasFuncFlag(kFuncFlagIsFinished); } + + //! @brief Get expected stack alignment. + ASMJIT_INLINE uint32_t getExpectedStackAlignment() const { return _expectedStackAlignment; } + //! @brief Set expected stack alignment. + ASMJIT_INLINE void setExpectedStackAlignment(uint32_t alignment) { _expectedStackAlignment = alignment; } + + //! @brief Get required stack alignment. + ASMJIT_INLINE uint32_t getRequiredStackAlignment() const { return _requiredStackAlignment; } + //! @brief Set required stack alignment. + ASMJIT_INLINE void setRequiredStackAlignment(uint32_t alignment) { _requiredStackAlignment = alignment; } + + //! @brief Update required stack alignment so it's not lower than expected + //! stack alignment. + ASMJIT_INLINE void updateRequiredStackAlignment() { + if (_requiredStackAlignment <= _expectedStackAlignment) { + _requiredStackAlignment = _expectedStackAlignment; + clearFuncFlags(kFuncFlagIsStackMisaligned); + } + else { + addFuncFlags(kFuncFlagIsStackMisaligned); + } + } + + //! @brief Set stack "Red Zone" size. + ASMJIT_INLINE uint32_t getRedZoneSize() const { return _redZoneSize; } + //! @brief Get stack "Red Zone" size. + ASMJIT_INLINE void setRedZoneSize(uint32_t s) { _redZoneSize = static_cast(s); } + + //! @brief Set stack "Spill Zone" size. + ASMJIT_INLINE uint32_t getSpillZoneSize() const { return _spillZoneSize; } + //! @brief Get stack "Spill Zone" size. + ASMJIT_INLINE void setSpillZoneSize(uint32_t s) { _spillZoneSize = static_cast(s); } + + //! @brief Get stack size used by function arguments. + ASMJIT_INLINE uint32_t getArgStackSize() const { return _argStackSize; } + + //! @brief Get stack size used by variables and memory allocated on the stack. + ASMJIT_INLINE uint32_t getMemStackSize() const { return _memStackSize; } + + //! @brief Get stack size used by function calls. + ASMJIT_INLINE uint32_t getCallStackSize() const { return _callStackSize; } + //! @brief Merge stack size used by function call with @a s. + ASMJIT_INLINE void mergeCallStackSize(uint32_t s) { if (_callStackSize < s) _callStackSize = s; } + + // -------------------------------------------------------------------------- + // [Hints] + // -------------------------------------------------------------------------- + + //! @brief Set function hint. + ASMJIT_INLINE void setHint(uint32_t hint, uint32_t value) { + ASMJIT_ASSERT(hint <= 31); + ASMJIT_ASSERT(value <= 1); + + _funcHints &= ~(1 << hint); + _funcHints |= (value << hint); + } + + //! @brief Get function hint. + ASMJIT_INLINE uint32_t getHint(uint32_t hint) const { + ASMJIT_ASSERT(hint <= 31); + return (_funcHints >> hint) & 0x1; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Function entry. + TargetNode* _entryNode; + //! @brief Function exit. + TargetNode* _exitNode; + + //! @brief Function declaration. + FuncDecl* _decl; + //! @brief Function end. + EndNode* _end; + + //! @brief Arguments list as @ref VarData. + VarData** _argList; + + //! @brief Function hints; + uint32_t _funcHints; + //! @brief Function flags. + uint32_t _funcFlags; + + //! @brief Expected stack alignment (we depend on this value). + //! + //! @note It can be global alignment given by the OS or described by an + //! target platform ABI. + uint32_t _expectedStackAlignment; + //! @brief Required stack alignment (usually for multimedia instructions). + uint32_t _requiredStackAlignment; + + //! @brief The "Red Zone" suze - count of bytes which might be accessed + //! without adjusting the stack pointer. + uint16_t _redZoneSize; + //! @brief Spill zone size (zone used by WIN64ABI). + uint16_t _spillZoneSize; + + //! @brief Stack size needed for function arguments. + uint32_t _argStackSize; + //! @brief Stack size needed for all variables and memory allocated on + //! the stack. + uint32_t _memStackSize; + //! @brief Stack size needed to call other functions. + uint32_t _callStackSize; +}; + +// ============================================================================ +// [asmjit::EndNode] +// ============================================================================ + +//! @brief End of function/block node. +struct EndNode : public BaseNode { + ASMJIT_NO_COPY(EndNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EndNode instance. + ASMJIT_INLINE EndNode(BaseCompiler* compiler) : BaseNode(compiler, kNodeTypeEnd) { + _flags |= kNodeFlagIsRet; + } + + //! @brief Destroy the @ref EndNode instance. + ASMJIT_INLINE ~EndNode() {} +}; + +// ============================================================================ +// [asmjit::RetNode] +// ============================================================================ + +//! @brief Function return node. +struct RetNode : public BaseNode { + ASMJIT_NO_COPY(RetNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref RetNode instance. + ASMJIT_INLINE RetNode(BaseCompiler* compiler, const Operand& o0, const Operand& o1) : BaseNode(compiler, kNodeTypeRet) { + _flags |= kNodeFlagIsRet; + _ret[0] = o0; + _ret[1] = o1; + } + + //! @brief Destroy the @ref RetNode instance. + ASMJIT_INLINE ~RetNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get the first return operand. + ASMJIT_INLINE Operand& getFirst() { return _ret[0]; } + //! @overload + ASMJIT_INLINE const Operand& getFirst() const { return _ret[0]; } + + //! @brief Get the second return operand. + ASMJIT_INLINE Operand& getSecond() { return _ret[1]; } + //! @overload + ASMJIT_INLINE const Operand& getSecond() const { return _ret[1]; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Ret operand(s). + Operand _ret[2]; +}; + +// ============================================================================ +// [asmjit::CallNode] +// ============================================================================ + +//! @brief Function-call node. +struct CallNode : public BaseNode { + ASMJIT_NO_COPY(CallNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref CallNode instance. + ASMJIT_INLINE CallNode(BaseCompiler* compiler, const Operand& target) : + BaseNode(compiler, kNodeTypeCall), + _decl(NULL), + _target(target), + _args(NULL) {} + + //! @brief Destroy the @ref CallNode instance. + ASMJIT_INLINE ~CallNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get function declaration. + ASMJIT_INLINE FuncDecl* getDecl() const { return _decl; } + + //! @brief Get target operand. + ASMJIT_INLINE Operand& getTarget() { return _target; } + //! @overload + ASMJIT_INLINE const Operand& getTarget() const { return _target; } + + //! @brief Get return at @a i. + ASMJIT_INLINE Operand& getRet(uint32_t i = 0) { + ASMJIT_ASSERT(i < 2); + return _ret[i]; + } + //! @overload + ASMJIT_INLINE const Operand& getRet(uint32_t i = 0) const { + ASMJIT_ASSERT(i < 2); + return _ret[i]; + } + + //! @brief Get argument at @a i. + ASMJIT_INLINE Operand& getArg(uint32_t i) { + ASMJIT_ASSERT(i < kFuncArgCountLoHi); + return _args[i]; + } + //! @overload + ASMJIT_INLINE const Operand& getArg(uint32_t i) const { + ASMJIT_ASSERT(i < kFuncArgCountLoHi); + return _args[i]; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Function declaration. + FuncDecl* _decl; + + //! @brief Target (address of function, register, label, ...). + Operand _target; + //! @brief Return. + Operand _ret[2]; + //! @brief Arguments. + Operand* _args; +}; + +// ============================================================================ +// [asmjit::SArgNode] +// ============================================================================ + +//! @brief Function-call 'argument on the stack' node. +struct SArgNode : public BaseNode { + ASMJIT_NO_COPY(SArgNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref SArgNode instance. + ASMJIT_INLINE SArgNode(BaseCompiler* compiler, VarData* vd, CallNode* call) : + BaseNode(compiler, kNodeTypeSArg), + _vd(vd), + _call(call) {} + + //! @brief Destroy the @ref SArgNode instance. + ASMJIT_INLINE ~SArgNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get the associated variable. + ASMJIT_INLINE VarData* getVd() const { return _vd; } + + //! @brief Get the associated function-call. + ASMJIT_INLINE CallNode* getCall() const { return _call; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Variable. + VarData* _vd; + //! @brief Associated @ref CallNode. + CallNode* _call; +}; + +// ============================================================================ +// [asmjit::BaseCompiler] +// ============================================================================ + +//! @brief Base compiler. +//! +//! @sa BaseAssembler. +struct BaseCompiler : public CodeGen { + ASMJIT_NO_COPY(BaseCompiler) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref BaseCompiler instance. + ASMJIT_API BaseCompiler(BaseRuntime* runtime); + //! @brief Destroy the @ref BaseCompiler instance. + ASMJIT_API virtual ~BaseCompiler(); + + // -------------------------------------------------------------------------- + // [LookAhead] + // -------------------------------------------------------------------------- + + //! @brief Get maximum look ahead. + ASMJIT_INLINE uint32_t getMaxLookAhead() const { return _maxLookAhead; } + //! @brief Set maximum look ahead to @a val. + ASMJIT_INLINE void setMaxLookAhead(uint32_t val) { _maxLookAhead = val; } + + // -------------------------------------------------------------------------- + // [Clear / Reset] + // -------------------------------------------------------------------------- + + //! @brief Clear everything, but keep buffers allocated. + //! + //! @note This method will destroy your code. + ASMJIT_API void clear(); + //! @brief Clear everything and reset all buffers. + //! + //! @note This method will destroy your code. + ASMJIT_API void reset(); + //! @brief Called by clear() and reset() to clear all data related to derived + //! class implementation. + ASMJIT_API virtual void _purge(); + + // -------------------------------------------------------------------------- + // [Nodes] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE T* newNode() { + void* p = _zoneAllocator.alloc(sizeof(T)); + return new(p) T(this); + } + + template + ASMJIT_INLINE T* newNode(P0 p0) { + void* p = _zoneAllocator.alloc(sizeof(T)); + return new(p) T(this, p0); + } + + template + ASMJIT_INLINE T* newNode(P0 p0, P1 p1) { + void* p = _zoneAllocator.alloc(sizeof(T)); + return new(p) T(this, p0, p1); + } + + template + ASMJIT_INLINE T* newNode(P0 p0, P1 p1, P2 p2) { + void* p = _zoneAllocator.alloc(sizeof(T)); + return new(p) T(this, p0, p1, p2); + } + + //! @brief Get first node. + ASMJIT_INLINE BaseNode* getFirstNode() const { return _firstNode; } + //! @brief Get last node. + ASMJIT_INLINE BaseNode* getLastNode() const { return _lastNode; } + + //! @brief Get current node. + //! + //! @note If this method returns @c NULL it means that nothing has been emitted + //! yet. + ASMJIT_INLINE BaseNode* getCursor() const { return _cursor; } + //! @brief Set the current node without returning the previous node (private). + ASMJIT_INLINE void _setCursor(BaseNode* node) { _cursor = node; } + //! @brief Set the current node to @a node and return the previous one. + ASMJIT_API BaseNode* setCursor(BaseNode* node); + + //! @brief Add node @a node after current and set current to @a node. + ASMJIT_API BaseNode* addNode(BaseNode* node); + //! @brief Add node before @a ref. + ASMJIT_API BaseNode* addNodeBefore(BaseNode* node, BaseNode* ref); + //! @brief Add node after @a ref. + ASMJIT_API BaseNode* addNodeAfter(BaseNode* node, BaseNode* ref); + //! @brief Remove node @a node. + ASMJIT_API BaseNode* removeNode(BaseNode* node); + //! @brief Remove multiple nodes. + ASMJIT_API void removeNodes(BaseNode* first, BaseNode* last); + + // -------------------------------------------------------------------------- + // [Func] + // -------------------------------------------------------------------------- + + //! @brief Get current function. + ASMJIT_INLINE FuncNode* getFunc() const { return _func; } + + // -------------------------------------------------------------------------- + // [Align] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref AlignNode. + ASMJIT_API AlignNode* newAlign(uint32_t m); + //! @brief Add a new @ref AlignNode. + ASMJIT_API AlignNode* addAlign(uint32_t m); + + //! @brief Align target buffer to @a m bytes. + //! + //! Typical usage of this is to align labels at start of the inner loops. + //! + //! Inserts @c nop() instructions or CPU optimized NOPs. + ASMJIT_INLINE AlignNode* align(uint32_t m) { return addAlign(m); } + + // -------------------------------------------------------------------------- + // [Target] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref TargetNode. + ASMJIT_API TargetNode* newTarget(); + //! @brief Add a new @ref TargetNode. + ASMJIT_API TargetNode* addTarget(); + + //! @brief Get @ref TargetNode by @a id. + ASMJIT_INLINE TargetNode* getTargetById(uint32_t id) { + ASMJIT_ASSERT(OperandUtil::isLabelId(id)); + ASMJIT_ASSERT(id < _targets.getLength()); + + return _targets[id]; + } + + //! @brief Get @ref TargetNode by @a label. + ASMJIT_INLINE TargetNode* getTarget(const Label& label) { + return getTargetById(label.getId()); + } + + // -------------------------------------------------------------------------- + // [Label] + // -------------------------------------------------------------------------- + + //! @brief Get count of created labels. + ASMJIT_INLINE size_t getLabelsCount() const + { return _targets.getLength(); } + + //! @brief Get whether @a label is created. + ASMJIT_INLINE bool isLabelCreated(const Label& label) const + { return static_cast(label.getId()) < _targets.getLength(); } + + //! @internal + //! + //! @brief Create and initialize a new label. + ASMJIT_API Error _newLabel(Label* dst); + + //! @brief Create and return new label. + ASMJIT_INLINE Label newLabel() { + Label result(DontInitialize); + _newLabel(&result); + return result; + } + + //! @brief Bind label to the current offset. + //! + //! @note Label can be bound only once! + ASMJIT_API void bind(const Label& label); + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EmbedNode. + ASMJIT_API EmbedNode* newEmbed(const void* data, uint32_t size); + //! @brief Add a new @ref EmbedNode. + ASMJIT_API EmbedNode* addEmbed(const void* data, uint32_t size); + + //! @brief Embed data. + ASMJIT_INLINE EmbedNode* embed(const void* data, uint32_t size) { return addEmbed(data, size); } + + // -------------------------------------------------------------------------- + // [Comment] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref CommentNode. + ASMJIT_API CommentNode* newComment(const char* str); + //! @brief Add a new @ref CommentNode. + ASMJIT_API CommentNode* addComment(const char* str); + + //! @brief Emit a single comment line. + ASMJIT_API CommentNode* comment(const char* fmt, ...); + + // -------------------------------------------------------------------------- + // [Hint] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref HintNode. + ASMJIT_API HintNode* newHint(BaseVar& var, uint32_t hint, uint32_t value); + //! @brief Add a new @ref HintNode. + ASMJIT_API HintNode* addHint(BaseVar& var, uint32_t hint, uint32_t value); + + // -------------------------------------------------------------------------- + // [Vars] + // -------------------------------------------------------------------------- + + //! @brief Get whether variable @a var is created. + ASMJIT_INLINE bool isVarCreated(const BaseVar& var) const { + return static_cast(var.getId() & kOperandIdNum) < _vars.getLength(); + } + + //! @internal + //! + //! @brief Get @ref VarData by @a var. + ASMJIT_INLINE VarData* getVd(const BaseVar& var) const { + return getVdById(var.getId()); + } + + //! @internal + //! + //! @brief Get @ref VarData by @a id. + ASMJIT_INLINE VarData* getVdById(uint32_t id) const { + ASMJIT_ASSERT(id != kInvalidValue); + ASMJIT_ASSERT(static_cast(id & kOperandIdNum) < _vars.getLength()); + + return _vars[id & kOperandIdNum]; + } + + //! @internal + //! + //! @brief Get an array of 'VarData*'. + ASMJIT_INLINE VarData** _getVdArray() const { + return const_cast(_vars.getData()); + } + + //! @internal + //! + //! @brief Create a new @ref VarData. + ASMJIT_API VarData* _newVd(uint32_t type, uint32_t size, uint32_t c, const char* name); + + //! @brief Create a new @ref BaseVar. + virtual Error _newVar(BaseVar* var, uint32_t type, const char* name) = 0; + + //! @brief Alloc variable @a var. + ASMJIT_API void alloc(BaseVar& var); + //! @brief Alloc variable @a var using @a regIndex as a register index. + ASMJIT_API void alloc(BaseVar& var, uint32_t regIndex); + //! @brief Alloc variable @a var using @a reg as a demanded register. + ASMJIT_API void alloc(BaseVar& var, const BaseReg& reg); + //! @brief Spill variable @a var. + ASMJIT_API void spill(BaseVar& var); + //! @brief Save variable @a var if modified. + ASMJIT_API void save(BaseVar& var); + //! @brief Unuse variable @a var. + ASMJIT_API void unuse(BaseVar& var); + + //! @brief Get priority of variable @a var. + ASMJIT_API uint32_t getPriority(BaseVar& var) const; + //! @brief Set priority of variable @a var to @a priority. + ASMJIT_API void setPriority(BaseVar& var, uint32_t priority); + + //! @brief Get save-on-unuse @a var property. + ASMJIT_API bool getSaveOnUnuse(BaseVar& var) const; + //! @brief Set save-on-unuse @a var property to @a value. + ASMJIT_API void setSaveOnUnuse(BaseVar& var, bool value); + + //! @brief Rename variable @a var to @a name. + //! + //! @note Only new name will appear in the logger. + ASMJIT_API void rename(BaseVar& var, const char* name); + + // -------------------------------------------------------------------------- + // [Stack] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref BaseMem. + virtual Error _newStack(BaseMem* mem, uint32_t size, uint32_t alignment, const char* name) = 0; + + // -------------------------------------------------------------------------- + // [Serialize] + // -------------------------------------------------------------------------- + + //! @brief Send assembled code to @a assembler. + virtual Error serialize(BaseAssembler& assembler) = 0; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Flow id added to each node created (used only by @ref Context). + uint32_t _nodeFlowId; + //! @brief Flags added to each node created (used only by @ref Context). + uint32_t _nodeFlags; + //! @brief Maximum count of nodes to look ahead when allocating/spilling + //! registers. + uint32_t _maxLookAhead; + + //! @brief Variable mapping (translates incoming kVarType into target). + const uint8_t* _targetVarMapping; + + //! @brief First node. + BaseNode* _firstNode; + //! @brief Last node. + BaseNode* _lastNode; + + //! @brief Current node. + BaseNode* _cursor; + //! @brief Current function. + FuncNode* _func; + + //! @brief Variable allocator. + Zone _varAllocator; + //! @brief String/data allocator. + Zone _stringAllocator; + + //! @brief Targets. + PodVector _targets; + //! @brief Variables. + PodVector _vars; +}; + +// ============================================================================ +// [Defined-Later] +// ============================================================================ + +ASMJIT_INLINE Label::Label(BaseCompiler& c) : Operand(DontInitialize) { + c._newLabel(this); +} + +ASMJIT_INLINE BaseNode::BaseNode(BaseCompiler* compiler, uint32_t type) { + _prev = NULL; + _next = NULL; + _type = static_cast(type); + _opCount = 0; + _flags = static_cast(compiler->_nodeFlags); + _flowId = compiler->_nodeFlowId; + _comment = NULL; + _varInst = NULL; + _liveness = NULL; + _state = NULL; +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_COMPILER_H diff --git a/src/asmjit/base/context.cpp b/src/asmjit/base/context.cpp new file mode 100644 index 0000000..5e13c13 --- /dev/null +++ b/src/asmjit/base/context.cpp @@ -0,0 +1,343 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/context_p.h" +#include "../base/intutil.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::BaseContext - Construction / Destruction] +// ============================================================================ + +BaseContext::BaseContext(BaseCompiler* compiler) : + _compiler(compiler), + _zoneAllocator(8192 - sizeof(Zone::Chunk) - kMemAllocOverhead) { + + BaseContext::reset(); +} + +BaseContext::~BaseContext() {} + +// ============================================================================ +// [asmjit::BaseContext - Reset] +// ============================================================================ + +void BaseContext::reset() { + _zoneAllocator.clear(); + + _func = NULL; + _start = NULL; + _end = NULL; + _extraBlock = NULL; + _stop = NULL; + + _unreachableList.reset(); + _jccList.reset(); + _contextVd.clear(); + + _memVarCells = NULL; + _memStackCells = NULL; + + _mem1ByteVarsUsed = 0; + _mem2ByteVarsUsed = 0; + _mem4ByteVarsUsed = 0; + _mem8ByteVarsUsed = 0; + _mem16ByteVarsUsed = 0; + _mem32ByteVarsUsed = 0; + _mem64ByteVarsUsed = 0; + _memStackCellsUsed = 0; + + _memMaxAlign = 0; + _memVarTotal = 0; + _memStackTotal = 0; + _memAllTotal = 0; + + _state = NULL; +} + +// ============================================================================ +// [asmjit::BaseContext - Mem] +// ============================================================================ + +static ASMJIT_INLINE uint32_t BaseContext_getDefaultAlignment(uint32_t size) { + if (size > 32) + return 64; + else if (size > 16) + return 32; + else if (size > 8) + return 16; + else if (size > 4) + return 8; + else if (size > 2) + return 4; + else if (size > 1) + return 2; + else + return 1; +} + +MemCell* BaseContext::_newVarCell(VarData* vd) { + ASMJIT_ASSERT(vd->_memCell == NULL); + + MemCell* cell; + uint32_t size = vd->getSize(); + + if (vd->isStack()) { + cell = _newStackCell(size, vd->getAlignment()); + + if (cell == NULL) + return NULL; + } + else { + cell = static_cast(_zoneAllocator.alloc(sizeof(MemCell))); + if (cell == NULL) + goto _NoMemory; + + cell->_next = _memVarCells; + _memVarCells = cell; + + cell->_offset = 0; + cell->_size = size; + cell->_alignment = size; + + _memMaxAlign = IntUtil::iMax(_memMaxAlign, size); + _memVarTotal += size; + + switch (size) { + case 1: _mem1ByteVarsUsed++ ; break; + case 2: _mem2ByteVarsUsed++ ; break; + case 4: _mem4ByteVarsUsed++ ; break; + case 8: _mem8ByteVarsUsed++ ; break; + case 16: _mem16ByteVarsUsed++; break; + case 32: _mem32ByteVarsUsed++; break; + case 64: _mem64ByteVarsUsed++; break; + default: ASMJIT_ASSERT(!"Reached"); + } + } + + vd->_memCell = cell; + return cell; + +_NoMemory: + _compiler->setError(kErrorNoHeapMemory); + return NULL; +} + +MemCell* BaseContext::_newStackCell(uint32_t size, uint32_t alignment) { + MemCell* cell = static_cast(_zoneAllocator.alloc(sizeof(MemCell))); + if (cell == NULL) + goto _NoMemory; + + if (alignment == 0) + alignment = BaseContext_getDefaultAlignment(size); + + if (alignment > 64) + alignment = 64; + + ASMJIT_ASSERT(IntUtil::isPowerOf2(alignment)); + size = IntUtil::alignTo(size, alignment); + + // Insert it sorted according to the alignment and size. + { + MemCell** pPrev = &_memStackCells; + MemCell* cur = *pPrev; + + for (cur = *pPrev; cur != NULL; cur = cur->_next) { + if (cur->getAlignment() > alignment) + continue; + if (cur->getAlignment() == alignment && cur->getSize() > size) + continue; + break; + } + + cell->_next = cur; + cell->_offset = 0; + cell->_size = size; + cell->_alignment = alignment; + + *pPrev = cell; + _memStackCellsUsed++; + + _memMaxAlign = IntUtil::iMax(_memMaxAlign, alignment); + _memStackTotal += size; + } + + return cell; + +_NoMemory: + _compiler->setError(kErrorNoHeapMemory); + return NULL; +} + +Error BaseContext::resolveCellOffsets() { + MemCell* varCell = _memVarCells; + MemCell* stackCell = _memStackCells; + + uint32_t stackAlignment = 0; + if (stackCell != NULL) + stackAlignment = stackCell->getAlignment(); + + uint32_t pos64 = 0; + uint32_t pos32 = pos64 + _mem64ByteVarsUsed * 64; + uint32_t pos16 = pos32 + _mem32ByteVarsUsed * 32; + uint32_t pos8 = pos16 + _mem16ByteVarsUsed * 16; + uint32_t pos4 = pos8 + _mem8ByteVarsUsed * 8 ; + uint32_t pos2 = pos4 + _mem4ByteVarsUsed * 4 ; + uint32_t pos1 = pos2 + _mem2ByteVarsUsed * 2 ; + + uint32_t stackPos = pos1 + _mem1ByteVarsUsed; + + uint32_t gapAlignment = stackAlignment; + uint32_t gapSize = 0; + + if (gapAlignment) + IntUtil::deltaTo(stackPos, gapAlignment); + stackPos += gapSize; + + uint32_t gapPos = stackPos; + uint32_t allTotal = stackPos; + + // Vars - Allocated according to alignment/width. + while (varCell != NULL) { + uint32_t size = varCell->getSize(); + uint32_t offset; + + switch (size) { + case 1: offset = pos1 ; pos1 += 1 ; break; + case 2: offset = pos2 ; pos2 += 2 ; break; + case 4: offset = pos4 ; pos4 += 4 ; break; + case 8: offset = pos8 ; pos8 += 8 ; break; + case 16: offset = pos16; pos16 += 16; break; + case 32: offset = pos32; pos32 += 32; break; + case 64: offset = pos64; pos64 += 64; break; + default: ASMJIT_ASSERT(!"Reached"); + } + + varCell->setOffset(static_cast(offset)); + varCell = varCell->_next; + } + + // Stack - Allocated according to alignment and width. + while (stackCell != NULL) { + uint32_t size = stackCell->getSize(); + uint32_t alignment = stackCell->getAlignment(); + uint32_t offset; + + // Try to fill the gap between variables / stack first. + if (size <= gapSize && alignment <= gapAlignment) { + offset = gapPos; + + gapSize -= size; + gapPos -= size; + + if (alignment < gapAlignment) + gapAlignment = alignment; + } + else { + offset = stackPos; + + stackPos += size; + allTotal += size; + } + + stackCell->setOffset(offset); + stackCell = stackCell->_next; + } + + _memAllTotal = allTotal; + return kErrorOk; +} + +// ============================================================================ +// [asmjit::BaseContext - RemoveUnreachableCode] +// ============================================================================ + +Error BaseContext::removeUnreachableCode() { + PodList::Link* link = _unreachableList.getFirst(); + BaseNode* stop = getStop(); + + while (link != NULL) { + BaseNode* node = link->getValue(); + if (node != NULL) { + // Locate all unreachable nodes. + BaseNode* first = node; + do { + if (node->isFetched() || (node->getType() == kNodeTypeTarget && static_cast(node)->getNumRefs() > 0)) + break; + node = node->getNext(); + } while (node != stop); + + // Remove. + if (node != first) { + BaseNode* last = (node != NULL) ? node->getPrev() : getCompiler()->getLastNode(); + getCompiler()->removeNodes(first, last); + } + } + + link = link->getNext(); + } + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::BaseContext - Cleanup] +// ============================================================================ + +//! @internal +//! +//! @brief Translate the given function @a func. +void BaseContext::cleanup() { + VarData** array = _contextVd.getData(); + size_t length = _contextVd.getLength(); + + for (size_t i = 0; i < length; i++) { + VarData* vd = array[i]; + vd->resetContextId(); + vd->resetRegIndex(); + } + + _contextVd.clear(); + _extraBlock = NULL; +} + +// ============================================================================ +// [asmjit::BaseContext - CompileFunc] +// ============================================================================ + +Error BaseContext::compile(FuncNode* func) { + BaseNode* end = func->getEnd(); + BaseNode* stop = end->getNext(); + + _func = func; + _stop = stop; + _extraBlock = end; + + ASMJIT_PROPAGATE_ERROR(fetch()); + ASMJIT_PROPAGATE_ERROR(removeUnreachableCode()); + ASMJIT_PROPAGATE_ERROR(analyze()); + ASMJIT_PROPAGATE_ERROR(translate()); + + // We alter the compiler cursor, because it doesn't make sense to reference + // it after compilation - some nodes may disappear and it's forbidden to add + // new code after the compilation is done. + _compiler->_setCursor(NULL); + + return kErrorOk; +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/context_p.h b/src/asmjit/base/context_p.h new file mode 100644 index 0000000..7d38ef4 --- /dev/null +++ b/src/asmjit/base/context_p.h @@ -0,0 +1,254 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_CONTEXT_H +#define _ASMJIT_BASE_CONTEXT_H + +// [Dependencies - AsmJit] +#include "../base/compiler.h" +#include "../base/zone.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::BaseContext] +// ============================================================================ + +struct BaseContext { + ASMJIT_NO_COPY(BaseContext) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + BaseContext(BaseCompiler* compiler); + virtual ~BaseContext(); + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + //! @brief Reset the whole context. + virtual void reset(); + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get compiler. + ASMJIT_INLINE BaseCompiler* getCompiler() const { return _compiler; } + + //! @brief Get function. + ASMJIT_INLINE FuncNode* getFunc() const { return _func; } + //! @brief Get stop node. + ASMJIT_INLINE BaseNode* getStop() const { return _stop; } + + //! @brief Get start of the current scope. + ASMJIT_INLINE BaseNode* getStart() const { return _start; } + //! @brief Get end of the current scope. + ASMJIT_INLINE BaseNode* getEnd() const { return _end; } + + //! @brief Get extra block. + ASMJIT_INLINE BaseNode* getExtraBlock() const { return _extraBlock; } + //! @brief Set extra block. + ASMJIT_INLINE void setExtraBlock(BaseNode* node) { _extraBlock = node; } + + // -------------------------------------------------------------------------- + // [Error] + // -------------------------------------------------------------------------- + + //! @brief Get the last error code. + ASMJIT_INLINE Error getError() const { + return getCompiler()->getError(); + } + + //! @brief Set the last error code and propagate it through the error handler. + ASMJIT_INLINE Error setError(Error error, const char* message = NULL) { + return getCompiler()->setError(error, message); + } + + // -------------------------------------------------------------------------- + // [State] + // -------------------------------------------------------------------------- + + //! @brief Get current state. + ASMJIT_INLINE BaseVarState* getState() const { return _state; } + + //! @brief Load current state from @a target state. + virtual void loadState(BaseVarState* src) = 0; + //! @brief Save current state, returning new @ref BaseVarState instance. + virtual BaseVarState* saveState() = 0; + + //! @brief Change the current state to @a target state. + virtual void switchState(BaseVarState* src) = 0; + + //! @brief Change the current state to the intersection of two states @a a + //! and @a b. + virtual void intersectStates(BaseVarState* a, BaseVarState* b) = 0; + + // -------------------------------------------------------------------------- + // [Mem] + // -------------------------------------------------------------------------- + + MemCell* _newVarCell(VarData* vd); + MemCell* _newStackCell(uint32_t size, uint32_t alignment); + + ASMJIT_INLINE MemCell* getVarCell(VarData* vd) { + MemCell* cell = vd->getMemCell(); + return cell ? cell : _newVarCell(vd); + } + + virtual Error resolveCellOffsets(); + + // -------------------------------------------------------------------------- + // [Bits] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE VarBits* newBits(uint32_t len) { + return static_cast( + _zoneAllocator.calloc(static_cast(len) * VarBits::kEntitySize)); + } + + ASMJIT_INLINE VarBits* copyBits(const VarBits* src, uint32_t len) { + return static_cast( + _zoneAllocator.dup(src, static_cast(len) * VarBits::kEntitySize)); + } + + // -------------------------------------------------------------------------- + // [Fetch] + // -------------------------------------------------------------------------- + + //! @brief Fetch. + //! + //! Fetch iterates over all nodes and gathers information about all variables + //! used. The process generates information required by register allocator, + //! variable liveness analysis and translator. + virtual Error fetch() = 0; + + // -------------------------------------------------------------------------- + // [RemoveUnreachableCode] + // -------------------------------------------------------------------------- + + //! @brief Remove unreachable code. + virtual Error removeUnreachableCode(); + + // -------------------------------------------------------------------------- + // [Analyze] + // -------------------------------------------------------------------------- + + //! @brief Preform variable liveness analysis. + //! + //! Analysis phase iterates over nodes in reverse order and generates a bit + //! array describing variables that are alive at every node in the function. + //! When the analysis start all variables are assumed dead. When a read or + //! read/write operations of a variable is detected the variable becomes + //! alive; when only write operation is detected the variable becomes dead. + //! + //! When a label is found all jumps to that label are followed and analysis + //! repeats until all variables are resolved. + virtual Error analyze() = 0; + + // -------------------------------------------------------------------------- + // [Translate] + // -------------------------------------------------------------------------- + + //! @brief Translate code by allocating registers and handling state changes. + virtual Error translate() = 0; + + // -------------------------------------------------------------------------- + // [Cleanup] + // -------------------------------------------------------------------------- + + virtual void cleanup(); + + // -------------------------------------------------------------------------- + // [Compile] + // -------------------------------------------------------------------------- + + virtual Error compile(FuncNode* func); + + // -------------------------------------------------------------------------- + // [Serialize] + // -------------------------------------------------------------------------- + + virtual Error serialize(BaseAssembler* assembler, BaseNode* start, BaseNode* stop) = 0; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Compiler. + BaseCompiler* _compiler; + //! @brief Function. + FuncNode* _func; + + //! @brief Zone allocator. + Zone _zoneAllocator; + + //! @brief Start of the current active scope. + BaseNode* _start; + //! @brief End of the current active scope. + BaseNode* _end; + + //! @brief Node that is used to insert extra code after the function body. + BaseNode* _extraBlock; + //! @brief Stop node. + BaseNode* _stop; + + //! @brief Unreachable nodes. + PodList _unreachableList; + //! @brief Jump nodes. + PodList _jccList; + + //! @brief All variables used by the current function. + PodVector _contextVd; + + //! @brief Memory used to spill variables. + MemCell* _memVarCells; + //! @brief Memory used to alloc memory on the stack. + MemCell* _memStackCells; + + //! @brief Count of 1-byte cells. + uint32_t _mem1ByteVarsUsed; + //! @brief Count of 2-byte cells. + uint32_t _mem2ByteVarsUsed; + //! @brief Count of 4-byte cells. + uint32_t _mem4ByteVarsUsed; + //! @brief Count of 8-byte cells. + uint32_t _mem8ByteVarsUsed; + //! @brief Count of 16-byte cells. + uint32_t _mem16ByteVarsUsed; + //! @brief Count of 32-byte cells. + uint32_t _mem32ByteVarsUsed; + //! @brief Count of 64-byte cells. + uint32_t _mem64ByteVarsUsed; + //! @brief Count of stack memory cells. + uint32_t _memStackCellsUsed; + + //! @brief Maximum memory alignment used by the function. + uint32_t _memMaxAlign; + //! @brief Count of bytes used by variables. + uint32_t _memVarTotal; + //! @brief Count of bytes used by stack. + uint32_t _memStackTotal; + //! @brief Count of bytes used by variables and stack after alignment. + uint32_t _memAllTotal; + + //! @brief Current state (used by register allocator). + BaseVarState* _state; +}; + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_CONTEXT_H diff --git a/src/asmjit/base/cpu.cpp b/src/asmjit/base/cpu.cpp new file mode 100644 index 0000000..de24722 --- /dev/null +++ b/src/asmjit/base/cpu.cpp @@ -0,0 +1,83 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/cpu.h" + +#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64) +#include "../x86/x86cpu.h" +#else +// ? +#endif // ASMJIT_HOST || ASMJIT_HOST_X64 + +// [Dependencies - Windows] +#if defined(ASMJIT_OS_WINDOWS) +# include +#endif // ASMJIT_OS_WINDOWS + +// [Dependencies - Posix] +#if defined(ASMJIT_OS_POSIX) +# include +# include +# include +# include +#endif // ASMJIT_OS_POSIX + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::BaseCpu - DetectNumberOfCores] +// ============================================================================ + +uint32_t BaseCpu::detectNumberOfCores() { +#if defined(ASMJIT_OS_WINDOWS) + SYSTEM_INFO info; + ::GetSystemInfo(&info); + return info.dwNumberOfProcessors; +#elif defined(ASMJIT_OS_POSIX) && defined(_SC_NPROCESSORS_ONLN) + // It seems that sysconf returns the number of "logical" processors on both + // mac and linux. So we get the number of "online logical" processors. + long res = ::sysconf(_SC_NPROCESSORS_ONLN); + if (res == -1) return 1; + + return static_cast(res); +#else + return 1; +#endif +} + +// ============================================================================ +// [asmjit::BaseCpu - GetHost] +// ============================================================================ + +#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64) +struct HostCpu : public x86x64::Cpu { + ASMJIT_INLINE HostCpu() : Cpu() { hostCpuDetect(this); } +}; +#else +#error "asmjit/base/cpu.cpp - Unsupported CPU." +#endif // ASMJIT_HOST || ASMJIT_HOST_X64 + +const BaseCpu* BaseCpu::getHost() +{ +#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64) + static HostCpu cpu; +#else +#error "asmjit/base/cpu.cpp - Unsupported CPU." +#endif // ASMJIT_HOST || ASMJIT_HOST_X64 + return &cpu; +} + +} // AsmJit + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/cpu.h b/src/asmjit/base/cpu.h new file mode 100644 index 0000000..bffc17e --- /dev/null +++ b/src/asmjit/base/cpu.h @@ -0,0 +1,141 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_CPU_H +#define _ASMJIT_BASE_CPU_H + +// [Api-Begin] +#include "../base/apibegin.h" +#include "../base/assert.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::kCpuVendor] +// ============================================================================ + +//! @brief Cpu vendor IDs. +//! +//! Cpu vendor IDs are specific for AsmJit library. Vendor ID is not directly +//! read from cpuid result, instead it's based on CPU vendor string. +ASMJIT_ENUM(kCpuVendor) { + //! @brief Unknown CPU vendor. + kCpuVendorUnknown = 0, + + //! @brief Intel CPU vendor. + kCpuVendorIntel = 1, + //! @brief AMD CPU vendor. + kCpuVendorAmd = 2, + //! @brief National Semiconductor CPU vendor (applies also to Cyrix processors). + kCpuVendorNSM = 3, + //! @brief Transmeta CPU vendor. + kCpuVendorTransmeta = 4, + //! @brief VIA CPU vendor. + kCpuVendorVia = 5 +}; + +// ============================================================================ +// [asmjit::BaseCpu] +// ============================================================================ + +//! @brief Base cpu information. +struct BaseCpu { + ASMJIT_NO_COPY(BaseCpu) + + enum { kFeaturesPerUInt32 = static_cast(sizeof(uint32_t)) * 8 }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE BaseCpu(uint32_t size = sizeof(BaseCpu)) : _size(size) {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get CPU vendor string. + ASMJIT_INLINE const char* getVendorString() const { return _vendorString; } + //! @brief Get CPU brand string. + ASMJIT_INLINE const char* getBrandString() const { return _brandString; } + + //! @brief Get CPU vendor ID. + ASMJIT_INLINE uint32_t getVendorId() const { return _vendorId; } + //! @brief Get CPU family ID. + ASMJIT_INLINE uint32_t getFamily() const { return _family; } + //! @brief Get CPU model ID. + ASMJIT_INLINE uint32_t getModel() const { return _model; } + //! @brief Get CPU stepping. + ASMJIT_INLINE uint32_t getStepping() const { return _stepping; } + //! @brief Get CPU cores count (or sum of all cores of all procesors). + ASMJIT_INLINE uint32_t getCoresCount() const { return _coresCount; } + + //! @brief Get whether CPU has a @a feature. + ASMJIT_INLINE bool hasFeature(uint32_t feature) const { + ASMJIT_ASSERT(feature < sizeof(_features) * 8); + + return static_cast( + (_features[feature / kFeaturesPerUInt32] >> (feature % kFeaturesPerUInt32)) & 0x1); + } + + //! @brief Add CPU @a feature. + ASMJIT_INLINE BaseCpu& addFeature(uint32_t feature) { + ASMJIT_ASSERT(feature < sizeof(_features) * 8); + + _features[feature / kFeaturesPerUInt32] |= (1U << (feature % kFeaturesPerUInt32)); + return *this; + } + + // -------------------------------------------------------------------------- + // [Statics] + // -------------------------------------------------------------------------- + + //! @brief Detect number of cores (or sum of all cores of all processors). + static ASMJIT_API uint32_t detectNumberOfCores(); + + //! @brief Get host cpu. + static ASMJIT_API const BaseCpu* getHost(); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Size of the structure in bytes. + uint32_t _size; + + //! @brief Cpu short vendor string. + char _vendorString[16]; + //! @brief Cpu long vendor string (brand). + char _brandString[64]; + + //! @brief Cpu vendor id (see @c asmjit::kCpuVendor enum). + uint32_t _vendorId; + //! @brief Cpu family ID. + uint32_t _family; + //! @brief Cpu model ID. + uint32_t _model; + //! @brief Cpu stepping. + uint32_t _stepping; + //! @brief Cpu cores count (or sum of all CPU cores of all processors). + uint32_t _coresCount; + + //! @brief Cpu features bitfield. + uint32_t _features[4]; +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_CPU_H diff --git a/src/asmjit/base/defs.cpp b/src/asmjit/base/defs.cpp new file mode 100644 index 0000000..957dd72 --- /dev/null +++ b/src/asmjit/base/defs.cpp @@ -0,0 +1,27 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::Operand] +// ============================================================================ + +const Operand noOperand; + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/defs.h b/src/asmjit/base/defs.h new file mode 100644 index 0000000..0c50e14 --- /dev/null +++ b/src/asmjit/base/defs.h @@ -0,0 +1,1154 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_DEFS_H +#define _ASMJIT_BASE_DEFS_H + +// [Dependencies - AsmJit] +#include "../base/intutil.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct BaseAssembler; +struct BaseCompiler; + +// ============================================================================ +// [asmjit::kOperandType] +// ============================================================================ + +//! @brief Operand types that can be encoded in @ref Operand. +ASMJIT_ENUM(kOperandType) { + //! @brief Invalid operand, used only internally (not initialized Operand). + kOperandTypeNone = 0, + //! @brief Operand is a register. + kOperandTypeReg = 1, + //! @brief Operand is a variable. + kOperandTypeVar = 2, + //! @brief Operand is a memory. + kOperandTypeMem = 3, + //! @brief Operand is an immediate value. + kOperandTypeImm = 4, + //! @brief Operand is a label. + kOperandTypeLabel = 5 +}; + +// ============================================================================ +// [asmjit::kOperandId] +// ============================================================================ + +//! @brief Operand id masks used to determine the operand type. +ASMJIT_ENUM(kOperandId) { + //! @brief Operand id refers to @ref BaseVar. + kOperandIdVar = 0x80000000U, + //! @brief Operand id to real index mask. + kOperandIdNum = 0x7FFFFFFFU +}; + +// ============================================================================ +// [asmjit::kRegClass] +// ============================================================================ + +//! @brief Register class. +ASMJIT_ENUM(kRegClass) { + //! @brief Gp register class (any architecture). + kRegClassGp = 0, + + //! @brief Invalid register class. + kRegClassInvalid = 0xFF +}; + +// ============================================================================ +// [asmjit::kInstCode] +// ============================================================================ + +//! @brief Instruction codes (stub). +ASMJIT_ENUM(kInstCode) { + //! @brief No instruction. + kInstNone = 0 +}; + +// ============================================================================ +// [asmjit::kInstOptions] +// ============================================================================ + +//! @brief Instruction options (stub). +ASMJIT_ENUM(kInstOptions) { + //! @brief No instruction options. + kInstOptionNone = 0x00, + + //! @brief Emit short form of the instruction. + //! + //! X86/X64: + //! + //! Short form is mostly related to jmp and jcc instructions, but can be used + //! by other instructions supporting 8-bit or 32-bit immediates. This option + //! can be dangerous if the short jmp/jcc is required, but not encodable due + //! to large displacement, in such case an error happens and the whole + //! assembler/compiler stream is unusable. + kInstOptionShortForm = 0x01, + + //! @brief Emit long form of the instruction. + //! + //! X86/X64: + //! + //! Long form is mosrlt related to jmp and jcc instructions, but like the + //! @ref kInstOptionShortForm option it can be used by other instructions + //! supporting both 8-bit and 32-bit immediates. + kInstOptionLongForm = 0x02, + + //! @brief Condition is likely to be taken (instruction). + kInstOptionTaken = 0x04, + //! @brief Condition is unlikely to be taken (instruction). + kInstOptionNotTaken = 0x08 +}; + +// ============================================================================ +// [asmjit::kSize] +// ============================================================================ + +//! @brief Common size of registers and pointers. +ASMJIT_ENUM(kSize) { + //! @brief 1 byte size. + kSizeByte = 1, + //! @brief 2 bytes size. + kSizeWord = 2, + //! @brief 4 bytes size. + kSizeDWord = 4, + //! @brief 8 bytes size. + kSizeQWord = 8, + //! @brief 10 bytes size. + kSizeTWord = 10, + //! @brief 16 bytes size. + kSizeOWord = 16, + //! @brief 32 bytes size. + kSizeYWord = 32 +}; + +// ============================================================================ +// [asmjit::kMemType] +// ============================================================================ + +//! @brief Type of memory operand. +ASMJIT_ENUM(kMemType) { + //! @brief Memory operand is a combination of base register, optional index + //! register and optional displacement. + //! + //! @note The @ref Assembler interprets @ref kMemTypeBaseIndex and @ref + //! kMemTypeStackIndex types the same way, but @ref Compiler interprets + //! @ref kMemTypeBaseIndex as [base + index] and @ref kMemTypeStackIndex as + //! [stack(base) + index]. + kMemTypeBaseIndex = 0, + + //! @brief Memory operand is a combination of variable's memory location, + //! optional index register and displacement. + //! + //! @note The @ref Assembler interprets @ref kMemTypeBaseIndex and @ref + //! kMemTypeStackIndex types in the same way, but @ref Compiler interprets + //! @ref kMemTypeBaseIndex as [base + index] and @ref kMemTypeStackIndex as + //! [stack(base) + index]. + kMemTypeStackIndex = 1, + + //! @brief Memory operand refers to the memory location specified by a label. + kMemTypeLabel = 2, + //! @brief Memory operand is an absolute memory location. + //! + //! Supported mostly by x86, truncated to a 32-bit value when running in + //! 64-bit mode (x64). + kMemTypeAbsolute = 3 +}; + +// ============================================================================ +// [asmjit::kVarType] +// ============================================================================ + +ASMJIT_ENUM(kVarType) { + //! @brief Variable is signed 8-bit integer. + kVarTypeInt8 = 0, + //! @brief Variable is unsigned 8-bit integer. + kVarTypeUInt8 = 1, + //! @brief Variable is signed 16-bit integer. + kVarTypeInt16 = 2, + //! @brief Variable is unsigned 16-bit integer. + kVarTypeUInt16 = 3, + //! @brief Variable is signed 32-bit integer. + kVarTypeInt32 = 4, + //! @brief Variable is unsigned 32-bit integer. + kVarTypeUInt32 = 5, + //! @brief Variable is signed 64-bit integer. + kVarTypeInt64 = 6, + //! @brief Variable is unsigned 64-bit integer. + kVarTypeUInt64 = 7, + + //! @brief Variable is target @c intptr_t (not compatible with host @c intptr_t). + kVarTypeIntPtr = 8, + //! @brief Variable is target @c uintptr_t (not compatible with host @c uintptr_t). + kVarTypeUIntPtr = 9, + + //! @brief Variable is 32-bit floating point (single precision). + kVarTypeFp32 = 10, + //! @brief Variable is 64-bit floating point (double precision). + kVarTypeFp64 = 11, + //! @brief Variable is 80-bit or 128-bit floating point (extended precision). + //! + //! @note Experimental, better not to use. + kVarTypeFpEx = 12, + + //! @brief Invalid variable type. + kVarTypeInvalid = 0xFF, + + //! @internal + _kVarTypeIntStart = kVarTypeInt8, + //! @internal + _kVarTypeIntEnd = kVarTypeUIntPtr +}; + +// ============================================================================ +// [asmjit::kRelocMode] +// ============================================================================ + +ASMJIT_ENUM(kRelocMode) { + kRelocAbsToAbs = 0, + kRelocRelToAbs = 1, + kRelocAbsToRel = 2, + kRelocTrampoline = 3 +}; + +// ============================================================================ +// [asmjit::Ptr] +// ============================================================================ + +//! @brief 64-bit signed pointer, compatible with JIT and non-JIT generators. +typedef int64_t SignedPtr; + +//! @brief 64-bit unsigned pointer, compatible with JIT and non-JIT generators. +typedef uint64_t Ptr; + +// ============================================================================ +// [asmjit::Operand] +// ============================================================================ + +//! @brief Operand can contain register, memory location, immediate, or label. +struct Operand { + // -------------------------------------------------------------------------- + // [Structs] + // -------------------------------------------------------------------------- + + //! @internal + //! + //! @brief Base operand data. + struct BaseOp { + //! @brief Type of operand, see @c kOperandType. + uint8_t op; + //! @brief Size of operand (register, address, immediate, or variable). + uint8_t size; + //! @brief Flags, each operand uses this byte for something else. + uint8_t reserved0; + //! @brief Reserved (not used). + uint8_t reserved1; + + //! @brief Operand id (private variable for @ref BaseAssembler and + //! @ref BaseCompiler classes). + //! + //! @note Uninitialized operand has always set id to @ref kInvalidValue. + uint32_t id; + }; + + //! @internal + //! + //! @brief Register or Variable operand data. + struct VRegOp { + //! @brief Type of operand, @c kOperandTypeReg. + uint8_t op; + //! @brief Size of register or variable. + uint8_t size; + + union { + //! @brief Register code = (type << 8) | index. + uint16_t code; + + //! @brief Register type and index access. + struct { +#if defined(ASMJIT_HOST_LE) + //! @brief Register index. + uint8_t index; + //! @brief Register type. + uint8_t type; +#else + //! @brief Register type. + uint8_t type; + //! @brief Register index. + uint8_t index; +#endif // ASMJIT_HOST + }; + }; + + //! @brief Variable id (used by @ref BaseCompiler to identify variables). + uint32_t id; + + //! @brief Variable type. + uint32_t vType; + //! @internal + //! + //! @brief Unused. + uint32_t vUnused; + }; + + //! @internal + //! + //! @brief Memory or Variable operand data. + struct VMemOp { + //! @brief Type of operand, @c kOperandTypeMem. + uint8_t op; + //! @brief Size of the pointer in bytes. + uint8_t size; + //! @brief Type of the memory operand, see @ref kMemType. + uint8_t type; + //! X86/X64 layout: + //! - segment [3 bits], see @ref kSeg. + //! - shift [2 bits], index register shift (0 to 3). + uint8_t flags; + + //! @brief Base register, variable or label id. + uint32_t base; + //! @brief Index register or variable. + uint32_t index; + //! @brief 32-bit displacement or absolute address. + int32_t displacement; + }; + + //! @internal + //! + //! @brief Immediate operand data. + struct ImmOp { + //! @brief Type of operand, @ref kOperandTypeImm. + uint8_t op; + //! @brief Size of immediate (or 0 to autodetect). + uint8_t size; + //! @brief Reserved (not used). + uint8_t reserved0; + //! @brief Reserved (not used). + uint8_t reserved1; + + //! @brief Operand id (@ref kInvalidValue). + uint32_t id; + + union { + //! @brief 8x signed 8-bit immediate values. + int8_t _i8[8]; + //! @brief 8x unsigned 8-bit immediate values. + uint8_t _u8[8]; + + //! @brief 4x signed 16-bit immediate values. + int16_t _i16[4]; + //! @brief 4x unsigned 16-bit immediate values. + uint16_t _u16[4]; + + //! @brief 2x signed 32-bit immediate values. + int32_t _i32[2]; + //! @brief 2x unsigned 32-bit immediate values. + uint32_t _u32[2]; + + //! @brief 1x signed 64-bit immediate value. + int64_t _i64[1]; + //! @brief 1x unsigned 64-bit immediate value. + uint64_t _u64[1]; + + //! @brief 2x SP-FP values. + float _f32[2]; + //! @brief 1x DP-FP value. + double _f64[1]; + } value; + }; + + //! @internal + //! + //! @brief Label operand data. + struct LabelOp { + //! @brief Type of operand, @c kOperandTypeLabel. + uint8_t op; + //! @brief Reserved (not used). + uint8_t size; + //! @brief Reserved (not used). + uint8_t reserved0; + //! @brief Reserved (not used). + uint8_t reserved1; + + //! @brief Operand id. + uint32_t id; + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create an uninitialized operand. + ASMJIT_INLINE Operand() { + _init_packed_op_sz_b0_b1_id(kOperandTypeNone, 0, 0, 0, kInvalidValue); + _init_packed_d2_d3(0, 0); + } + + //! @brief Create a reference to @a other operand. + ASMJIT_INLINE Operand(const Operand& other) { + _init(other); + } + + explicit ASMJIT_INLINE Operand(const _DontInitialize&) {} + + // -------------------------------------------------------------------------- + // [Operand] + // -------------------------------------------------------------------------- + + //! @brief Clone Operand. + ASMJIT_INLINE Operand clone() const { + return Operand(*this); + } + + // -------------------------------------------------------------------------- + // [Init & Copy] + // -------------------------------------------------------------------------- + + //! @internal + //! + //! @brief Initialize operand to @a other (used by constructors). + ASMJIT_INLINE void _init(const Operand& other) { ::memcpy(this, &other, sizeof(Operand)); } + + ASMJIT_INLINE void _init_packed_op_sz_b0_b1_id(uint32_t op, uint32_t sz, uint32_t r0, uint32_t r1, uint32_t id) { + // This hack is not for performance, but to decrease the size of the binary + // generated when constructing AsmJit operands (mostly for third parties). + // Some compilers are not able to join four BYTE writes to a single DWORD + // write. Because the 'a', 'b', 'c' and 'd' variables are usually compile + // time constants the compiler can do a really nice job if they are joined + // by using bitwise operations. + _packed[0].setPacked_2x32(IntUtil::pack32_4x8(op, sz, r0, r1), id); + } + + ASMJIT_INLINE void _init_packed_op_sz_w0_id(uint32_t op, uint32_t sz, uint32_t w0, uint32_t id) { + _packed[0].setPacked_2x32(IntUtil::pack32_2x8_1x16(op, sz, w0), id); + } + + ASMJIT_INLINE void _init_packed_d0_d1(uint32_t u0, uint32_t u1) { + _packed[0].setPacked_2x32(u0, u1); + } + + ASMJIT_INLINE void _init_packed_d2_d3(uint32_t u2, uint32_t u3) { + _packed[1].setPacked_2x32(u2, u3); + } + + //! @internal + //! + //! @brief Initialize operand to @a other (used by assign operators). + ASMJIT_INLINE void _copy(const Operand& other) { ::memcpy(this, &other, sizeof(Operand)); } + + // -------------------------------------------------------------------------- + // [Data] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE T& getData() { return reinterpret_cast(_base); } + + template + ASMJIT_INLINE const T& getData() const { return reinterpret_cast(_base); } + + // -------------------------------------------------------------------------- + // [Type] + // -------------------------------------------------------------------------- + + //! @brief Get type of the operand, see @ref kOperandType. + ASMJIT_INLINE uint32_t getOp() const { return _base.op; } + + //! @brief Get whether the operand is none (@ref kOperandTypeNone). + ASMJIT_INLINE bool isNone() const { return (_base.op == kOperandTypeNone); } + //! @brief Get whether the operand is any register (@ref kOperandTypeReg). + ASMJIT_INLINE bool isReg() const { return (_base.op == kOperandTypeReg); } + //! @brief Get whether the operand is variable (@ref kOperandTypeVar). + ASMJIT_INLINE bool isVar() const { return (_base.op == kOperandTypeVar); } + //! @brief Get whether the operand is memory address (@ref kOperandTypeMem). + ASMJIT_INLINE bool isMem() const { return (_base.op == kOperandTypeMem); } + //! @brief Get whether the operand is an immediate value (@ref kOperandTypeImm). + ASMJIT_INLINE bool isImm() const { return (_base.op == kOperandTypeImm); } + //! @brief Get whether the operand is label (@ref kOperandTypeLabel). + ASMJIT_INLINE bool isLabel() const { return (_base.op == kOperandTypeLabel); } + + // -------------------------------------------------------------------------- + // [Type - Combined] + // -------------------------------------------------------------------------- + + //! @brief Get whether the operand is register of @a type. + ASMJIT_INLINE bool isRegType(uint32_t type) const { + return (_packed[0].u32[0] & IntUtil::pack32_2x8_1x16(0xFF, 0, 0xFF00)) == IntUtil::pack32_2x8_1x16(kOperandTypeReg, 0, (type << 8)); + } + + //! @brief Get whether the operand is register and of @a type and @a index. + ASMJIT_INLINE bool isRegCode(uint32_t type, uint32_t index) const { + return (_packed[0].u32[0] & IntUtil::pack32_2x8_1x16(0xFF, 0, 0xFFFF)) == IntUtil::pack32_2x8_1x16(kOperandTypeReg, 0, (type << 8) + index); + } + + //! @brief Get whether the operand is a register or memory. + ASMJIT_INLINE bool isRegOrMem() const { + ASMJIT_ASSERT(kOperandTypeReg == 1); + ASMJIT_ASSERT(kOperandTypeMem == 3); + return (static_cast(_base.op) | 0x2U) == 0x3U; + } + + //! @brief Get whether the operand is variable or memory. + ASMJIT_INLINE bool isVarOrMem() const { + ASMJIT_ASSERT(kOperandTypeVar == 2); + ASMJIT_ASSERT(kOperandTypeMem == 3); + return (static_cast(_base.op) - 2U) <= 1; + } + + // -------------------------------------------------------------------------- + // [Size] + // -------------------------------------------------------------------------- + + //! @brief Get size of the operand in bytes. + ASMJIT_INLINE uint32_t getSize() const { return _base.size; } + + // -------------------------------------------------------------------------- + // [Id] + // -------------------------------------------------------------------------- + + //! @brief Get operand id (Operand id's are used internally by + //! @ref BaseAssembler and @ref BaseCompiler classes). + //! + //! @note There is no way how to change or remove operand id. If you don't + //! need the operand just assign different operand to this one. + ASMJIT_INLINE uint32_t getId() const { return _base.id; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + //! @brief Base data. + BaseOp _base; + //! @brief Register or variable data. + VRegOp _vreg; + //! @brief Memory data. + VMemOp _vmem; + //! @brief Immediate data. + ImmOp _imm; + //! @brief Label data. + LabelOp _label; + + //! @brief Packed operand as two 64-bit integers. + UInt64 _packed[2]; + }; +}; + +ASMJIT_VAR const Operand noOperand; + +// ============================================================================ +// [asmjit::OperandUtil] +// ============================================================================ + +//! @brief Operand utilities. +struct OperandUtil { + //! @brief Make variable id. + static ASMJIT_INLINE uint32_t makeVarId(uint32_t id) { + return id | kOperandIdVar; + } + + //! @brief Make label id. + static ASMJIT_INLINE uint32_t makeLabelId(uint32_t id) { + return id; + } + + //! @brief Strip variable id bit so it becomes a pure index to VarData[] array. + static ASMJIT_INLINE uint32_t stripVarId(uint32_t id) { + return id & 0x7FFFFFFFU; + } + + //! @brief Get whether the id refers to @ref BaseVar. + //! + //! @note The function will never return @c true if the id is @c kInvalidValue. + //! The trick is to compare a given id to -1 (kInvalidValue) so we check both + //! using only one comparison. + static ASMJIT_INLINE bool isVarId(uint32_t id) { + return static_cast(id) < -1; + } + + //! @brief Get whether the id refers to @ref Label. + //! + //! @note The function will never return @c true if the id is @c kInvalidValue. + static ASMJIT_INLINE bool isLabelId(uint32_t id) { + return static_cast(id) >= 0; + } +}; + +// ============================================================================ +// [asmjit::BaseReg] +// ============================================================================ + +//! @brief Base class for all register operands. +struct BaseReg : public Operand { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a dummy base register. + ASMJIT_INLINE BaseReg() : Operand(DontInitialize) + { _init_packed_op_sz_w0_id(kOperandTypeReg, 0, (kInvalidReg << 8) + kInvalidReg, kInvalidValue); } + + //! @brief Create a new base register. + ASMJIT_INLINE BaseReg(uint32_t type, uint32_t index, uint32_t size) : Operand(DontInitialize) + { _init_packed_op_sz_w0_id(kOperandTypeReg, size, (type << 8) + index, kInvalidValue); } + + //! @brief Create a new reference to @a other. + ASMJIT_INLINE BaseReg(const BaseReg& other) : Operand(other) {} + + explicit ASMJIT_INLINE BaseReg(const _DontInitialize&) : Operand(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [BaseReg Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone BaseReg operand. + ASMJIT_INLINE BaseReg clone() const { + return BaseReg(*this); + } + + //! @brief Get whether register code is equal to @a type. + ASMJIT_INLINE bool isRegType(uint32_t type) const { + return _vreg.type == type; + } + + //! @brief Get whether register code is equal to @a type. + ASMJIT_INLINE bool isRegCode(uint32_t code) const { + return _vreg.code == code; + } + + //! @brief Get whether register code is equal to @a type. + ASMJIT_INLINE bool isRegCode(uint32_t type, uint32_t index) const { + return _vreg.code == (type << 8) + index; + } + + //! @brief Get register code that equals to '(type << 8) + index'. + ASMJIT_INLINE uint32_t getRegCode() const { + return _vreg.code; + } + + //! @brief Get register type. + ASMJIT_INLINE uint32_t getRegType() const { + return _vreg.type; + } + + //! @brief Get register index. + ASMJIT_INLINE uint32_t getRegIndex() const { + return _vreg.index; + } + +#define ASMJIT_REG_OP(_Type_) \ + ASMJIT_INLINE _Type_ clone() const { \ + return _Type_(*this); \ + } \ + \ + /*! @brief Set register @a size. */ \ + ASMJIT_INLINE _Type_& setSize(uint32_t size) { \ + _vreg.size = static_cast(size); \ + return *this; \ + } \ + \ + /*! @brief Set register @a code. */ \ + ASMJIT_INLINE _Type_& setCode(uint32_t code) { \ + _vreg.code = static_cast(code); \ + return *this; \ + } \ + \ + /*! @brief Set register @a type and @a index. */ \ + ASMJIT_INLINE _Type_& setCode(uint32_t type, uint32_t index) { \ + _vreg.type = static_cast(type); \ + _vreg.index = static_cast(index); \ + return *this; \ + } \ + \ + /*! @brief Set register @a type. */ \ + ASMJIT_INLINE _Type_& setType(uint32_t type) { \ + _vreg.type = static_cast(type); \ + return *this; \ + } \ + \ + /*! @brief Set register @a index. */ \ + ASMJIT_INLINE _Type_& setIndex(uint32_t index) { \ + _vreg.index = static_cast(index); \ + return *this; \ + } \ + \ + ASMJIT_INLINE _Type_& operator=(const _Type_& other) { _copy(other); return *this; } \ + \ + ASMJIT_INLINE bool operator==(const _Type_& other) const { return _packed[0].u32[0] == other._packed[0].u32[0]; } \ + ASMJIT_INLINE bool operator!=(const _Type_& other) const { return !operator==(other); } +}; + +// ============================================================================ +// [asmjit::BaseMem] +// ============================================================================ + +//! @brief Base class for all memory operands. +struct BaseMem : public Operand { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE BaseMem() : Operand(DontInitialize) { + reset(); + } + + ASMJIT_INLINE BaseMem(const BaseMem& other) : Operand(other) {} + explicit ASMJIT_INLINE BaseMem(const _DontInitialize&) : Operand(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [BaseMem Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone BaseMem operand. + ASMJIT_INLINE BaseMem clone() const { + return BaseMem(*this); + } + + //! @brief Reset BaseMem operand. + ASMJIT_INLINE void reset() { + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, 0, kMemTypeBaseIndex, 0, kInvalidValue); + _init_packed_d2_d3(kInvalidValue, 0); + } + + //! @brief Get the type of the memory operand, see @c kMemType. + ASMJIT_INLINE uint32_t getMemType() const { return _vmem.type; } + //! @brief Get whether the type of the memory operand is either @ref + //! kMemTypeBaseIndex or @ref kMemTypeStackIndex. + ASMJIT_INLINE bool isBaseIndexType() const { return _vmem.type <= kMemTypeStackIndex; } + + //! @brief Get whether the memory operand has base register. + ASMJIT_INLINE bool hasBase() const { return _vmem.base != kInvalidValue; } + //! @brief Get memory operand base id, or @c kInvalidValue. + ASMJIT_INLINE uint32_t getBase() const { return _vmem.base; } + + //! @brief Set memory operand size. + ASMJIT_INLINE BaseMem& setSize(uint32_t size) { + _vmem.size = static_cast(size); + return *this; + } + + //! @brief Get memory operand relative displacement. + ASMJIT_INLINE int32_t getDisplacement() const + { return _vmem.displacement; } + + //! @brief Set memory operand relative displacement. + ASMJIT_INLINE BaseMem& setDisplacement(int32_t disp) { + _vmem.displacement = disp; + return *this; + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE BaseMem& operator=(const BaseMem& other) { + _copy(other); + return *this; + } + + ASMJIT_INLINE bool operator==(const BaseMem& other) const { + return (_packed[0] == other._packed[0]) & (_packed[1] == other._packed[1]); + } + + ASMJIT_INLINE bool operator!=(const BaseMem& other) const { return !(*this == other); } +}; + +// ============================================================================ +// [asmjit::BaseVar] +// ============================================================================ + +//! @brief Base class for all variables. +struct BaseVar : public Operand { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE BaseVar() : Operand(DontInitialize) { + _init_packed_op_sz_b0_b1_id(kOperandTypeVar, 0, 0, 0, kInvalidValue); + _init_packed_d2_d3(kInvalidValue, kInvalidValue); + } + + ASMJIT_INLINE BaseVar(const BaseVar& other) : Operand(other) {} + + explicit ASMJIT_INLINE BaseVar(const _DontInitialize&) : Operand(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [BaseVar Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone BaseVar operand. + ASMJIT_INLINE BaseVar clone() const { + return BaseVar(*this); + } + + ASMJIT_INLINE uint32_t getVarType() const { + return _vreg.vType; + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE BaseVar& operator=(const BaseVar& other) { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const BaseVar& other) const + { return _packed[0] == other._packed[0]; } + + ASMJIT_INLINE bool operator!=(const BaseVar& other) const { return !operator==(other); } +}; + +// ============================================================================ +// [asmjit::Imm] +// ============================================================================ + +//! @brief Immediate operand. +//! +//! Immediate operand is usually part of instruction itself (it's inlined after +//! or before instruction opcode). Immediates can be only signed or unsigned +//! integers. +//! +//! To create immediate operand, use @c imm() and @c imm_u() constructors or +//! constructors provided by @c Immediate class itself. +struct Imm : public Operand { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new immediate value (initial value is 0). + Imm() : Operand(DontInitialize) { + _init_packed_op_sz_b0_b1_id(kOperandTypeImm, 0, 0, 0, kInvalidValue); + _imm.value._i64[0] = 0; + } + + //! @brief Create a new signed immediate value, assigning the value to @a val. + explicit Imm(int64_t val) : Operand(DontInitialize) { + _init_packed_op_sz_b0_b1_id(kOperandTypeImm, 0, 0, 0, kInvalidValue); + _imm.value._i64[0] = val; + } + + //! @brief Create a new immediate value from @a other. + ASMJIT_INLINE Imm(const Imm& other) : Operand(other) {} + + explicit ASMJIT_INLINE Imm(const _DontInitialize&) : Operand(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [Immediate Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone Imm operand. + ASMJIT_INLINE Imm clone() const { + return Imm(*this); + } + + //! @brief Get whether the immediate can be casted to 8-bit signed integer. + ASMJIT_INLINE bool isInt8() const { return IntUtil::isInt8(_imm.value._i64[0]); } + //! @brief Get whether the immediate can be casted to 8-bit unsigned integer. + ASMJIT_INLINE bool isUInt8() const { return IntUtil::isUInt8(_imm.value._i64[0]); } + + //! @brief Get whether the immediate can be casted to 16-bit signed integer. + ASMJIT_INLINE bool isInt16() const { return IntUtil::isInt16(_imm.value._i64[0]); } + //! @brief Get whether the immediate can be casted to 16-bit unsigned integer. + ASMJIT_INLINE bool isUInt16() const { return IntUtil::isUInt16(_imm.value._i64[0]); } + + //! @brief Get whether the immediate can be casted to 32-bit signed integer. + ASMJIT_INLINE bool isInt32() const { return IntUtil::isInt32(_imm.value._i64[0]); } + //! @brief Get whether the immediate can be casted to 32-bit unsigned integer. + ASMJIT_INLINE bool isUInt32() const { return IntUtil::isUInt32(_imm.value._i64[0]); } + + //! @brief Get immediate value as 8-bit signed integer. + ASMJIT_INLINE int8_t getInt8() const { return _imm.value._i8[_ASMJIT_HOST_INDEX(8, 0)]; } + //! @brief Get immediate value as 8-bit unsigned integer. + ASMJIT_INLINE uint8_t getUInt8() const { return _imm.value._u8[_ASMJIT_HOST_INDEX(8, 0)]; } + //! @brief Get immediate value as 16-bit signed integer. + ASMJIT_INLINE int16_t getInt16() const { return _imm.value._i16[_ASMJIT_HOST_INDEX(4, 0)]; } + //! @brief Get immediate value as 16-bit unsigned integer. + ASMJIT_INLINE uint16_t getUInt16() const { return _imm.value._u16[_ASMJIT_HOST_INDEX(4, 0)]; } + //! @brief Get immediate value as 32-bit signed integer. + ASMJIT_INLINE int32_t getInt32() const { return _imm.value._i32[_ASMJIT_HOST_INDEX(2, 0)]; } + //! @brief Get immediate value as 32-bit unsigned integer. + ASMJIT_INLINE uint32_t getUInt32() const { return _imm.value._u32[_ASMJIT_HOST_INDEX(2, 0)]; } + //! @brief Get immediate value as 64-bit signed integer. + ASMJIT_INLINE int64_t getInt64() const { return _imm.value._i64[0]; } + //! @brief Get immediate value as 64-bit unsigned integer. + ASMJIT_INLINE uint64_t getUInt64() const { return _imm.value._u64[0]; } + + //! @brief Get immediate value as intptr_t. + ASMJIT_INLINE intptr_t getIntPtr() const { + if (sizeof(intptr_t) == sizeof(int64_t)) + return static_cast(getInt64()); + else + return static_cast(getInt32()); + } + + //! @brief Get immediate value as uintptr_t. + ASMJIT_INLINE uintptr_t getUIntPtr() const { + if (sizeof(uintptr_t) == sizeof(uint64_t)) + return static_cast(getUInt64()); + else + return static_cast(getUInt32()); + } + + //! @brief Get Lo 32-bit signed integer. + ASMJIT_INLINE int32_t getInt32Lo() const { return _imm.value._i32[_ASMJIT_HOST_INDEX(2, 0)]; } + //! @brief Get Lo 32-bit signed integer. + ASMJIT_INLINE uint32_t getUInt32Lo() const { return _imm.value._u32[_ASMJIT_HOST_INDEX(2, 0)]; } + //! @brief Get Hi 32-bit signed integer. + ASMJIT_INLINE int32_t getInt32Hi() const { return _imm.value._i32[_ASMJIT_HOST_INDEX(2, 1)]; } + //! @brief Get Hi 32-bit signed integer. + ASMJIT_INLINE uint32_t getUInt32Hi() const { return _imm.value._u32[_ASMJIT_HOST_INDEX(2, 1)]; } + + //! @brief Set immediate value to 8-bit signed integer @a val. + ASMJIT_INLINE Imm& setInt8(int8_t val) { + if (kArchHost64Bit) { + _imm.value._i64[0] = static_cast(val); + } + else { + int32_t val32 = static_cast(val); + _imm.value._i32[_ASMJIT_HOST_INDEX(2, 0)] = val32; + _imm.value._i32[_ASMJIT_HOST_INDEX(2, 1)] = val32 >> 31; + } + return *this; + } + + //! @brief Set immediate value to 8-bit unsigned integer @a val. + ASMJIT_INLINE Imm& setUInt8(uint8_t val) { + if (kArchHost64Bit) { + _imm.value._u64[0] = static_cast(val); + } + else { + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 0)] = static_cast(val); + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 1)] = 0; + } + return *this; + } + + //! @brief Set immediate value to 16-bit signed integer @a val. + ASMJIT_INLINE Imm& setInt16(int16_t val) { + if (kArchHost64Bit) { + _imm.value._i64[0] = static_cast(val); + } + else { + int32_t val32 = static_cast(val); + _imm.value._i32[_ASMJIT_HOST_INDEX(2, 0)] = val32; + _imm.value._i32[_ASMJIT_HOST_INDEX(2, 1)] = val32 >> 31; + } + return *this; + } + + //! @brief Set immediate value to 16-bit unsigned integer @a val. + ASMJIT_INLINE Imm& setUInt16(uint16_t val) { + if (kArchHost64Bit) { + _imm.value._u64[0] = static_cast(val); + } + else { + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 0)] = static_cast(val); + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 1)] = 0; + } + return *this; + } + + //! @brief Set immediate value to 32-bit signed integer @a val. + ASMJIT_INLINE Imm& setInt32(int32_t val) { + if (kArchHost64Bit) { + _imm.value._i64[0] = static_cast(val); + } + else { + _imm.value._i32[_ASMJIT_HOST_INDEX(2, 0)] = val; + _imm.value._i32[_ASMJIT_HOST_INDEX(2, 1)] = val >> 31; + } + return *this; + } + + //! @brief Set immediate value to 32-bit unsigned integer @a val. + ASMJIT_INLINE Imm& setUInt32(uint32_t val) { + if (kArchHost64Bit) { + _imm.value._u64[0] = static_cast(val); + } + else { + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 0)] = val; + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 1)] = 0; + } + return *this; + } + + //! @brief Set immediate value to 64-bit signed integer @a val. + ASMJIT_INLINE Imm& setInt64(int64_t val) { + _imm.value._i64[0] = val; + return *this; + } + + //! @brief Set immediate value to 64-bit unsigned integer @a val. + ASMJIT_INLINE Imm& setUInt64(uint64_t val) { + _imm.value._u64[0] = val; + return *this; + } + + //! @brief Set immediate value to intptr_t @a val. + ASMJIT_INLINE Imm& setIntPtr(intptr_t val) { + _imm.value._i64[0] = static_cast(val); + return *this; + } + + //! @brief Set immediate value to uintptr_t @a val. + ASMJIT_INLINE Imm& setUIntPtr(uintptr_t val) { + _imm.value._u64[0] = static_cast(val); + return *this; + } + + //! @brief Set immediate value as unsigned type to @a val. + ASMJIT_INLINE Imm& setPtr(void* p) { return setIntPtr((intptr_t)p); } + + // -------------------------------------------------------------------------- + // [Float] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Imm& setFloat(float f) { + _imm.value._f32[_ASMJIT_HOST_INDEX(2, 0)] = f; + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 1)] = 0; + return *this; + } + + ASMJIT_INLINE Imm& setDouble(double d) { + _imm.value._f64[0] = d; + return *this; + } + + // -------------------------------------------------------------------------- + // [Truncate] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Imm& truncateTo8Bits() { + if (kArchHost64Bit) { + _imm.value._u64[0] &= static_cast(0x000000FFU); + } + else { + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 0)] &= 0x000000FFU; + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 1)] = 0; + } + return *this; + } + + ASMJIT_INLINE Imm& truncateTo16Bits() { + if (kArchHost64Bit) { + _imm.value._u64[0] &= static_cast(0x0000FFFFU); + } + else { + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 0)] &= 0x0000FFFFU; + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 1)] = 0; + } + return *this; + } + + ASMJIT_INLINE Imm& truncateTo32Bits() { + _imm.value._u32[_ASMJIT_HOST_INDEX(2, 1)] = 0; + return *this; + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + //! @brief Assign @a other to the immediate operand. + ASMJIT_INLINE Imm& operator=(const Imm& other) { _copy(other); return *this; } +}; + +//! @brief Create signed immediate value operand. +static ASMJIT_INLINE Imm imm(int64_t val) { return Imm(val); } +//! @brief Create unsigned immediate value operand. +static ASMJIT_INLINE Imm imm_u(uint64_t val) { return Imm(static_cast(val)); } +//! @brief Create void* pointer immediate value operand. +static ASMJIT_INLINE Imm imm_ptr(void* p) { return Imm(static_cast((intptr_t)p)); } + +// ============================================================================ +// [asmjit::Label] +// ============================================================================ + +//! @brief Label (jump target or data location). +//! +//! Label represents locations typically used as jump targets, but may be also +//! used as position where are stored constants or static variables. If you +//! want to use @c Label you need first to associate it with @ref BaseAssembler +//! or @ref BaseCompiler instance. To create new label use @ref +//! BaseAssembler::newLabel() or @ref BaseCompiler::newLabel(). +//! +//! Example of using labels: +//! +//! @code +//! // Create Assembler/Compiler. +//! Assembler a; +//! +//! // Create Label instance. +//! Label L_1(a); +//! +//! // ... your code ... +//! +//! // Using label, see @ref asmjit::BaseAssembler or @ref asmjit::BaseCompiler. +//! a.jump(L_1); +//! +//! // ... your code ... +//! +//! // Bind label to current position, see @ref asmjit::BaseAssembler::bind() +//! // or @ref asmjit::BaseCompiler::bind(). +//! a.bind(L_1); +//! @endcode +struct Label : public Operand { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new, unassociated label. + ASMJIT_INLINE Label() : Operand(DontInitialize) { + _init_packed_op_sz_b0_b1_id(kOperandTypeLabel, 0, 0, 0, kInvalidValue); + _init_packed_d2_d3(0, 0); + } + + explicit ASMJIT_INLINE Label(uint32_t id) : Operand(DontInitialize) { + _init_packed_op_sz_b0_b1_id(kOperandTypeLabel, 0, 0, 0, id); + _init_packed_d2_d3(0, 0); + } + + //! @brief Create new initialized label. + explicit ASMJIT_INLINE Label(BaseAssembler& a); + //! @brief Create new initialized label. + explicit ASMJIT_INLINE Label(BaseCompiler& c); + + //! @brief Create reference to another label. + ASMJIT_INLINE Label(const Label& other) : Operand(other) {} + + explicit ASMJIT_INLINE Label(const _DontInitialize&) : Operand(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Label& operator=(const Label& other) { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const Label& other) const { return _base.id == other._base.id; } + ASMJIT_INLINE bool operator!=(const Label& other) const { return _base.id != other._base.id; } +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_DEFS_H diff --git a/src/asmjit/base/error.cpp b/src/asmjit/base/error.cpp new file mode 100644 index 0000000..35c5a74 --- /dev/null +++ b/src/asmjit/base/error.cpp @@ -0,0 +1,69 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/error.h" +#include "../base/intutil.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::ErrorHandler - Construction / Destruction] +// ============================================================================ + +ErrorHandler::ErrorHandler() {} +ErrorHandler::~ErrorHandler() {} + +// ============================================================================ +// [asmjit::ErrorHandler - Interface] +// ============================================================================ + +ErrorHandler* ErrorHandler::addRef() const { return const_cast(this); } +void ErrorHandler::release() {} + +// ============================================================================ +// [asmjit::ErrorUtil - AsString] +// ============================================================================ + +static const char* errorMessages[] = { + "Ok", + + "No heap memory", + "No virtual memory", + + "Invalid argument", + "Invalid state", + + "Unknown instruction", + "Illegal instruction", + "Illegal addressing", + "Illegal short jump", + + "No function defined", + "Incomplete function", + "Overlapped arguments", + "No registers", + "Overlapped registers", + "Incompatible argument", + "Incompatible return", + + "Unknown error" +}; + +const char* ErrorUtil::asString(Error err) { + return errorMessages[IntUtil::iMin(err, kErrorCount)]; +} + +} // AsmJit + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/error.h b/src/asmjit/base/error.h new file mode 100644 index 0000000..adba3f7 --- /dev/null +++ b/src/asmjit/base/error.h @@ -0,0 +1,196 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_ERROR_H +#define _ASMJIT_BASE_ERROR_H + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::kError] +// ============================================================================ + +//! @brief AsmJit error codes. +ASMJIT_ENUM(kError) { + //! @brief No error (success). + //! + //! This is default state and state you want. + kErrorOk = 0, + + //! @brief Heap memory allocation failed. + kErrorNoHeapMemory = 1, + //! @brief Virtual memory allocation failed. + kErrorNoVirtualMemory = 2, + + //! @brief Invalid argument. + kErrorInvalidArgument = 3, + //! @brief Invalid state. + kErrorInvalidState = 4, + + //! @brief Unknown instruction. This happens only if instruction code is + //! out of bounds. Shouldn't happen. + kErrorAssemblerUnknownInst = 5, + //! @brief Illegal instruction, usually generated by asmjit::Assembler + //! class when emitting instruction opcode. If this error is generated the + //! target buffer is not affected by this invalid instruction. + //! + //! You can also get this status code if you are under x64 (64-bit x86) and + //! you tried to decode instruction using AH, BH, CH or DH register with REX + //! prefix. These registers can't be accessed if REX prefix is used and AsmJit + //! didn't check for this situation in intrinsics (@c Compiler takes care of + //! this and rearrange registers if needed). + //! + //! Examples that will raise @c kErrorAssemblerIllegalInst error (a is + //! @c Assembler instance): + //! + //! @code + //! a.mov(dword_ptr(eax), al); // Invalid address size. + //! a.mov(byte_ptr(r10), ah); // Undecodable instruction (AH used with r10 + //! // that can be encoded by using REX prefix only) + //! @endcode + //! + //! @note In debug mode you get assertion failure instead of setting error + //! code. + kErrorAssemblerIllegalInst = 6, + //! @brief Illegal addressing used (unencodable). + kErrorAssemblerIllegalAddr = 7, + //! @brief Short jump instruction used, but displacement is out of bounds. + kErrorAssemblerIllegalShortJump = 8, + + //! @brief No function defined. + kErrorCompilerNoFunc = 9, + //! @brief Function generation is not finished by using @c Compiler::endFunc() + //! or something bad happened during generation related to function. This can + //! be missing compiler node, etc... + kErrorCompilerIncompleteFunc = 10, + //! @brief Tried to generate a function with overlapped arguments. + kErrorCompilerOverlappedArgs = 11, + + //! @brief Compiler can't allocate registers. + kErrorCompilerNoRegs = 12, + //! @brief Compiler can't allocate registers, because they overlap. + kErrorCompilerOverlappedRegs = 13, + + //! @brief Tried to call function with an incompatible argument. + kErrorCompilerIncompatibleArg = 14, + //! @brief Incompatible return value. + kErrorCompilerIncompatibleRet = 15, + + //! @brief Count of AsmJit status codes. Can grow in future. + kErrorCount = 16 +}; + +// ============================================================================ +// [asmjit::Error] +// ============================================================================ + +typedef uint32_t Error; + +// ============================================================================ +// [asmjit::ErrorHandler] +// ============================================================================ + +struct ErrorHandler { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref ErrorHandler. + ASMJIT_API ErrorHandler(); + //! @brief Destroy the @ref ErrorHandler. + ASMJIT_API virtual ~ErrorHandler(); + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + //! @brief Reference this error handler. + //! + //! @note This member function is provided for convenience. The default + //! implementation does nothing. If you are working in environment where + //! multiple @ref ErrorHandler instances are used in different @ref Assembler + //! and @ref Compiler instances (or in multithreaded environment) you might + //! want to provide your own functionality for reference counting. In that + //! case override @ref addRef() and @ref release() functions to inc/dec your + //! reference count value. + ASMJIT_API virtual ErrorHandler* addRef() const; + + //! @brief Release this error handler. + //! + //! @note This member function is provided for convenience. See @ref addRef() + //! for more detailed information related to reference counting. + ASMJIT_API virtual void release(); + + //! @brief Error handler (pure). + //! + //! Error handler is called when an error happened. An error can happen in + //! many places, but error handler is mostly used by @ref Assembler and + //! @ref Compiler classes to report anything that may prevent correct code + //! generation. There are multiple ways how the error handler can be used + //! and each has it's pros/cons. + //! + //! AsmJit library doesn't use exceptions and can be compiled with or without + //! exception feature support. Even if the AsmJit library is compiled without + //! exceptions it is exception-safe and handleError() can report an incoming + //! error by throwing an exception of any type. It's guaranteed that the + //! exception won't be catched by AsmJit and will be propagated to the code + //! calling AsmJit @ref Assembler or @ref Compiler. Alternative to throwing + //! exception is using setjmp() / longjmp() pair from the standard C library. + //! + //! If the exception or setjmp() / longjmp() mechanism is used, the state of + //! the @ref Assember or @ref Compiler is unchanged and if it's possible the + //! execution (instruction serialization) can continue. However if the error + //! happened during any phase that translates or modifies the stored code + //! (for example relocation done by @ref Assembler or analysis/translation + //! done by @ref Compiler) the execution can't continue and the error will + //! be also stored in @ref Assembler or @ref Compiler. + //! + //! Finally, if exceptions nor setjmp() / longjmp() mechanisms were used, + //! you can still implement a compatible design by returning from your error + //! handler. Returning @c true means that error was reported and AsmJit + //! should continue execution. When @c false is returned, AsmJit sets the + //! error immediately to the @ref Assembler or @ref Compiler and execution + //! shouldn't continue (this is the default behavior in case no error handler + //! is used). + virtual bool handleError(Error code, const char* message) = 0; +}; + +// ============================================================================ +// [asmjit::ErrorUtil] +// ============================================================================ + +struct ErrorUtil { + //! @brief Get printable version of AsmJit @ref kError code. + static ASMJIT_API const char* asString(Error code); +}; + +// ============================================================================ +// [ASMJIT_PROPAGATE_ERROR] +// ============================================================================ + +#define ASMJIT_PROPAGATE_ERROR(_Exp_) \ + do { \ + ::asmjit::Error errval_ = (_Exp_); \ + if (errval_ != ::asmjit::kErrorOk) \ + return errval_; \ + } while (0) + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_ERROR_H diff --git a/src/asmjit/base/func.cpp b/src/asmjit/base/func.cpp new file mode 100644 index 0000000..8eddd12 --- /dev/null +++ b/src/asmjit/base/func.cpp @@ -0,0 +1,20 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/func.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +} // AsmJit + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/func.h b/src/asmjit/base/func.h new file mode 100644 index 0000000..19b933a --- /dev/null +++ b/src/asmjit/base/func.h @@ -0,0 +1,629 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_FUNC_H +#define _ASMJIT_BASE_FUNC_H + +// [Dependencies - AsmJit] +#include "../base/assert.h" +#include "../base/defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +template +struct FnTypeId; + +// ============================================================================ +// [asmjit::kFuncConv] +// ============================================================================ + +ASMJIT_ENUM(kFuncConv) { + //! @brief Calling convention is invalid (can't be used). + kFuncConvNone = 0 +}; + +// ============================================================================ +// [asmjit::kFuncHint] +// ============================================================================ + +//! @brief Function hints. +ASMJIT_ENUM(kFuncHint) { + //! @brief Make a naked function (default true). + //! + //! Naked function is function without using standard prolog/epilog sequence). + //! + //! @section X86/X64 + //! + //! Standard prolog sequence is: + //! + //! "push zbp" + //! "mov zsp, zbp" + //! "sub zsp, StackAdjustment" + //! + //! which is equal to: + //! + //! "enter StackAdjustment, 0" + //! + //! Standard epilog sequence is: + //! + //! "mov zsp, zbp" + //! "pop zbp" + //! "ret" + //! + //! which is equal to: + //! + //! "leave" + //! "ret" + //! + //! Naked functions can omit the prolog/epilog sequence. The advantage of + //! doing such modification is that EBP/RBP register can be used by the + //! register allocator which can result in less spills/allocs. + kFuncHintNaked = 0, + + //! @brief Generate compact function prolog/epilog if possible. + //! + //! @section X86/X64 + //! + //! Use shorter, but possible slower prolog/epilog sequence to save/restore + //! registers. + kFuncHintCompact = 1 +}; + +// ============================================================================ +// [asmjit::kFuncFlags] +// ============================================================================ + +//! @brief Function flags. +ASMJIT_ENUM(kFuncFlags) { + //! @brief Whether the function is using naked (minimal) prolog / epilog. + kFuncFlagIsNaked = 0x00000001, + + //! @brief Whether an another function is called from this function. + kFuncFlagIsCaller = 0x00000002, + + //! @brief Whether the stack is not aligned to the required stack alignment, + //! thus it has to be aligned manually. + kFuncFlagIsStackMisaligned = 0x00000004, + + //! @brief Whether the stack pointer is adjusted by the stack size needed + //! to save registers and function variables. + //! + //! @section X86/X64 + //! + //! Stack pointer (ESP/RSP) is adjusted by 'sub' instruction in prolog and by + //! 'add' instruction in epilog (only if function is not naked). If function + //! needs to perform manual stack alignment more instructions are used to + //! adjust the stack (like "and zsp, -Alignment"). + kFuncFlagIsStackAdjusted = 0x00000008, + + //! @brief Whether the function is finished using @c Compiler::endFunc(). + kFuncFlagIsFinished = 0x80000000 +}; + +// ============================================================================ +// [asmjit::kFuncDir] +// ============================================================================ + +//! @brief Function arguments direction. +ASMJIT_ENUM(kFuncDir) { + //! @brief Arguments are passed left to right. + //! + //! This arguments direction is unusual to C programming, it's used by pascal + //! compilers and in some calling conventions by Borland compiler). + kFuncDirLtr = 0, + //! @brief Arguments are passed right ro left + //! + //! This is default argument direction in C programming. + kFuncDirRtl = 1 +}; + +// ============================================================================ +// [asmjit::kFuncStackInvalid] +// ============================================================================ + +enum { + //! @brief Invalid stack offset in function or function parameter. + kFuncStackInvalid = -1 +}; + +// ============================================================================ +// [asmjit::kFuncArg] +// ============================================================================ + +//! @brief Function argument (lo/hi) specification. +ASMJIT_ENUM(kFuncArg) { + //! @brief Maxumum number of function arguments supported by AsmJit. + kFuncArgCount = 16, + //! @brief Extended maximum number of arguments (used internally). + kFuncArgCountLoHi = kFuncArgCount * 2, + + //! @brief Index to the LO part of function argument (default). + //! + //! This value is typically omitted and added only if there is HI argument + //! accessed. + kFuncArgLo = 0, + //! @brief Index to the HI part of function argument. + //! + //! HI part of function argument depends on target architecture. On x86 it's + //! typically used to transfer 64-bit integers (they form a pair of 32-bit + //! integers). + kFuncArgHi = kFuncArgCount +}; + +// ============================================================================ +// [asmjit::kFuncRet] +// ============================================================================ + +//! @brief Function return value (lo/hi) specification. +ASMJIT_ENUM(kFuncRet) { + //! @brief Index to the LO part of function return value. + kFuncRetLo = 0, + //! @brief Index to the HI part of function return value. + kFuncRetHi = 1 +}; + +// ============================================================================ +// [asmjit::FnTypeId] +// ============================================================================ + +//! @internal +#define ASMJIT_DECLARE_TYPE_CORE(_PtrId_) \ + template \ + struct TypeId { enum { kId = static_cast(::asmjit::kVarTypeInvalid) }; }; \ + \ + template \ + struct TypeId { enum { kId = _PtrId_ }; } + +//! @internal +//! +//! @brief Declare C/C++ type-id mapped to @c asmjit::kVarType. +#define ASMJIT_DECLARE_TYPE_ID(_T_, _Id_) \ + template<> \ + struct TypeId<_T_> { enum { kId = _Id_ }; } + +//! @brief Function builder 'void' type. +struct FnVoid {}; + +//! @brief Function builder 'int8_t' type. +struct FnInt8 {}; +//! @brief Function builder 'uint8_t' type. +struct FnUInt8 {}; + +//! @brief Function builder 'int16_t' type. +struct FnInt16 {}; +//! @brief Function builder 'uint16_t' type. +struct FnUInt16 {}; + +//! @brief Function builder 'int32_t' type. +struct FnInt32 {}; +//! @brief Function builder 'uint32_t' type. +struct FnUInt32 {}; + +//! @brief Function builder 'int64_t' type. +struct FnInt64 {}; +//! @brief Function builder 'uint64_t' type. +struct FnUInt64 {}; + +//! @brief Function builder 'intptr_t' type. +struct FnIntPtr {}; +//! @brief Function builder 'uintptr_t' type. +struct FnUIntPtr {}; + +//! @brief Function builder 'float' type. +struct FnFloat {}; +//! @brief Function builder 'double' type. +struct FnDouble {}; + +ASMJIT_DECLARE_TYPE_CORE(kVarTypeIntPtr); + +ASMJIT_DECLARE_TYPE_ID(void, kVarTypeInvalid); +ASMJIT_DECLARE_TYPE_ID(FnVoid, kVarTypeInvalid); + +ASMJIT_DECLARE_TYPE_ID(int8_t, kVarTypeInt8); +ASMJIT_DECLARE_TYPE_ID(FnInt8, kVarTypeInt8); + +ASMJIT_DECLARE_TYPE_ID(uint8_t, kVarTypeUInt8); +ASMJIT_DECLARE_TYPE_ID(FnUInt8, kVarTypeUInt8); + +ASMJIT_DECLARE_TYPE_ID(int16_t, kVarTypeInt16); +ASMJIT_DECLARE_TYPE_ID(FnInt16, kVarTypeInt16); + +ASMJIT_DECLARE_TYPE_ID(uint16_t, kVarTypeUInt8); +ASMJIT_DECLARE_TYPE_ID(FnUInt16, kVarTypeUInt8); + +ASMJIT_DECLARE_TYPE_ID(int32_t, kVarTypeInt32); +ASMJIT_DECLARE_TYPE_ID(FnInt32, kVarTypeUInt8); + +ASMJIT_DECLARE_TYPE_ID(uint32_t, kVarTypeUInt32); +ASMJIT_DECLARE_TYPE_ID(FnUInt32, kVarTypeUInt8); + +ASMJIT_DECLARE_TYPE_ID(int64_t, kVarTypeInt64); +ASMJIT_DECLARE_TYPE_ID(FnInt64, kVarTypeUInt8); + +ASMJIT_DECLARE_TYPE_ID(uint64_t, kVarTypeUInt64); +ASMJIT_DECLARE_TYPE_ID(FnUInt64, kVarTypeUInt8); + +ASMJIT_DECLARE_TYPE_ID(float, kVarTypeFp32); +ASMJIT_DECLARE_TYPE_ID(FnFloat, kVarTypeFp32); + +ASMJIT_DECLARE_TYPE_ID(double, kVarTypeFp64); +ASMJIT_DECLARE_TYPE_ID(FnDouble, kVarTypeFp64); + +// ============================================================================ +// [asmjit::FuncInOut] +// ============================================================================ + +//! @brief Function in/out (argument or a return value). +//! +//! This class contains function argument or return value translated from the +//! @ref FuncPrototype. +struct FuncInOut { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint32_t getVarType() const { return _varType; } + + ASMJIT_INLINE bool hasRegIndex() const { return _regIndex != kInvalidReg; } + ASMJIT_INLINE uint32_t getRegIndex() const { return _regIndex; } + + ASMJIT_INLINE bool hasStackOffset() const { return _stackOffset != kFuncStackInvalid; } + ASMJIT_INLINE int32_t getStackOffset() const { return static_cast(_stackOffset); } + + //! @brief Get whether the argument / return value is assigned. + ASMJIT_INLINE bool isSet() const { + return (_regIndex != kInvalidReg) | (_stackOffset != kFuncStackInvalid); + } + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + //! @brief Reset the function argument to "unassigned state". + ASMJIT_INLINE void reset() { _packed = 0xFFFFFFFF; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + struct { + //! @brief Variable type, see @c kVarType. + uint8_t _varType; + //! @brief Register index if argument / return value is a register. + uint8_t _regIndex; + //! @brief Stack offset if argument / return value is on the stack. + int16_t _stackOffset; + }; + + //! @brief All members packed into single 32-bit integer. + uint32_t _packed; + }; +}; + +// ============================================================================ +// [asmjit::FuncPrototype] +// ============================================================================ + +//! @brief Function prototype. +//! +//! Function prototype contains information about function return type, count +//! of arguments and their types. Function prototype is a low level structure +//! which doesn't contain platform specific or calling convention specific +//! information. Function prototype is used to create a @ref FuncDecl. +struct FuncPrototype { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get function return value. + ASMJIT_INLINE uint32_t getRet() const { return _ret; } + + //! @brief Get function arguments' IDs. + ASMJIT_INLINE const uint32_t* getArgList() const { return _argList; } + //! @brief Get count of function arguments. + ASMJIT_INLINE uint32_t getArgCount() const { return _argCount; } + + //! @brief Get argument at index @a id. + ASMJIT_INLINE uint32_t getArg(uint32_t id) const { + ASMJIT_ASSERT(id < _argCount); + return _argList[id]; + } + + //! @brief Set function definition - return type and arguments. + ASMJIT_INLINE void _setPrototype(uint32_t ret, const uint32_t* argList, uint32_t argCount) { + _ret = ret; + _argList = argList; + _argCount = argCount; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + uint32_t _ret; + uint32_t _argCount; + const uint32_t* _argList; +}; + +// ============================================================================ +// [asmjit::FuncDecl] +// ============================================================================ + +//! @brief Function declaration. +struct FuncDecl { + // -------------------------------------------------------------------------- + // [Accessors - Calling Convention] + // -------------------------------------------------------------------------- + + //! @brief Get function calling convention, see @c kFuncConv. + ASMJIT_INLINE uint32_t getConvention() const { return _convention; } + + //! @brief Get whether the callee pops the stack. + ASMJIT_INLINE uint32_t getCalleePopsStack() const { return _calleePopsStack; } + + //! @brief Get direction of arguments passed on the stack. + //! + //! Direction should be always @c kFuncDirRtl. + //! + //! @note This is related to used calling convention, it's not affected by + //! number of function arguments or their types. + ASMJIT_INLINE uint32_t getDirection() const { return _direction; } + + //! @brief Get stack size needed for function arguments passed on the stack. + ASMJIT_INLINE uint32_t getArgStackSize() const { return _argStackSize; } + //! @brief Get size of "Red Zone". + ASMJIT_INLINE uint32_t getRedZoneSize() const { return _redZoneSize; } + //! @brief Get size of "Spill Zone". + ASMJIT_INLINE uint32_t getSpillZoneSize() const { return _spillZoneSize; } + + // -------------------------------------------------------------------------- + // [Accessors - Arguments and Return] + // -------------------------------------------------------------------------- + + //! @brief Get whether the function has a return value. + ASMJIT_INLINE bool hasRet() const { return _retCount != 0; } + //! @brief Get count of function return values. + ASMJIT_INLINE uint32_t getRetCount() const { return _retCount; } + + //! @brief Get function return value. + ASMJIT_INLINE FuncInOut& getRet(uint32_t index = kFuncRetLo) { return _retList[index]; } + //! @brief Get function return value. + ASMJIT_INLINE const FuncInOut& getRet(uint32_t index = kFuncRetLo) const { return _retList[index]; } + + //! @brief Get count of function arguments. + ASMJIT_INLINE uint32_t getArgCount() const { return _argCount; } + + //! @brief Get function arguments array. + ASMJIT_INLINE FuncInOut* getArgList() { return _argList; } + //! @brief Get function arguments array (const). + ASMJIT_INLINE const FuncInOut* getArgList() const { return _argList; } + + //! @brief Get function argument at index @a index. + ASMJIT_INLINE FuncInOut& getArg(size_t index) { + ASMJIT_ASSERT(index < kFuncArgCountLoHi); + return _argList[index]; + } + + //! @brief Get function argument at index @a index. + ASMJIT_INLINE const FuncInOut& getArg(size_t index) const { + ASMJIT_ASSERT(index < kFuncArgCountLoHi); + return _argList[index]; + } + + ASMJIT_INLINE void resetArg(size_t index) { + ASMJIT_ASSERT(index < kFuncArgCountLoHi); + _argList[index].reset(); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Calling convention. + uint8_t _convention; + //! @brief Whether a callee pops stack. + uint8_t _calleePopsStack : 1; + //! @brief Direction for arguments passed on the stack, see @c kFuncDir. + uint8_t _direction : 1; + //! @brief Reserved #0 (alignment). + uint8_t _reserved0 : 6; + + //! @brief Count of arguments (in @c _argList). + uint8_t _argCount; + //! @brief Count of return value(s). + uint8_t _retCount; + + //! @brief Count of bytes consumed by arguments on the stack (aligned). + uint32_t _argStackSize; + + //! @brief Size of "Red Zone". + //! + //! @note Used by AMD64-ABI (128 bytes). + uint16_t _redZoneSize; + + //! @brief Size of "Spill Zone". + //! + //! @note Used by WIN64-ABI (32 bytes). + uint16_t _spillZoneSize; + + //! @brief Function arguments (including HI arguments) mapped to physical + //! registers and stack offset. + FuncInOut _argList[kFuncArgCountLoHi]; + + //! @brief Function return value(s). + FuncInOut _retList[2]; +}; + +// ============================================================================ +// [asmjit::FuncBuilderX] +// ============================================================================ + +//! @brief Custom function builder for up to 32 function arguments. +struct FuncBuilderX : public FuncPrototype { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE FuncBuilderX() { + _setPrototype(kVarTypeInvalid, _builderArgList, 0); + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Set return type to @a retType. + ASMJIT_INLINE void setRet(uint32_t retType) { + _ret = retType; + } + + ASMJIT_INLINE void setArg(uint32_t id, uint32_t type) { + ASMJIT_ASSERT(id < _argCount); + _builderArgList[id] = type; + } + + ASMJIT_INLINE void addArg(uint32_t type) { + ASMJIT_ASSERT(_argCount < kFuncArgCount); + _builderArgList[_argCount++] = type; + } + + template + ASMJIT_INLINE void setRetT() + { setRet(TypeId::kId); } + + template + ASMJIT_INLINE void setArgT(uint32_t id) + { setArg(id, TypeId::kId); } + + template + ASMJIT_INLINE void addArgT() + { addArg(TypeId::kId); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + uint32_t _builderArgList[kFuncArgCount]; +}; + +#define _TID(_T_) TypeId<_T_>::kId + +//! @brief Function builder (no args). +template +struct FuncBuilder0 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder0() { + _setPrototype(_TID(RET), NULL, 0); + } +}; + +//! @brief Function builder (1 argument). +template +struct FuncBuilder1 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder1() { + static const uint32_t args[] = { _TID(P0) }; + _setPrototype(_TID(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! @brief Function builder (2 arguments). +template +struct FuncBuilder2 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder2() { + static const uint32_t args[] = { _TID(P0), _TID(P1) }; + _setPrototype(_TID(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! @brief Function builder (3 arguments). +template +struct FuncBuilder3 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder3() { + static const uint32_t args[] = { _TID(P0), _TID(P1), _TID(P2) }; + _setPrototype(_TID(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! @brief Function builder (4 arguments). +template +struct FuncBuilder4 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder4() { + static const uint32_t args[] = { _TID(P0), _TID(P1), _TID(P2), _TID(P3) }; + _setPrototype(_TID(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! @brief Function builder (5 arguments). +template +struct FuncBuilder5 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder5() { + static const uint32_t args[] = { _TID(P0), _TID(P1), _TID(P2), _TID(P3), _TID(P4) }; + _setPrototype(_TID(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! @brief Function builder (6 arguments). +template +struct FuncBuilder6 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder6() { + static const uint32_t args[] = { _TID(P0), _TID(P1), _TID(P2), _TID(P3), _TID(P4), _TID(P5) }; + _setPrototype(_TID(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! @brief Function builder (7 arguments). +template +struct FuncBuilder7 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder7() { + static const uint32_t args[] = { _TID(P0), _TID(P1), _TID(P2), _TID(P3), _TID(P4), _TID(P5), _TID(P6) }; + _setPrototype(_TID(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! @brief Function builder (8 arguments). +template +struct FuncBuilder8 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder8() { + static const uint32_t args[] = { _TID(P0), _TID(P1), _TID(P2),_TID(P3), _TID(P4), _TID(P5), _TID(P6), _TID(P7) }; + _setPrototype(_TID(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! @brief Function builder (9 arguments). +template +struct FuncBuilder9 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder9() { + static const uint32_t args[] = { _TID(P0), _TID(P1), _TID(P2), _TID(P3), _TID(P4), _TID(P5), _TID(P6), _TID(P7), _TID(P8) }; + _setPrototype(_TID(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +//! @brief Function builder (10 arguments). +template +struct FuncBuilder10 : public FuncPrototype { + ASMJIT_INLINE FuncBuilder10() { + static const uint32_t args[] = { _TID(P0), _TID(P1), _TID(P2), _TID(P3), _TID(P4), _TID(P5), _TID(P6), _TID(P7), _TID(P8), _TID(P9) }; + _setPrototype(_TID(RET), args, ASMJIT_ARRAY_SIZE(args)); + } +}; + +#undef _TID + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_FUNC_H diff --git a/src/asmjit/base/globals.h b/src/asmjit/base/globals.h new file mode 100644 index 0000000..af22e22 --- /dev/null +++ b/src/asmjit/base/globals.h @@ -0,0 +1,124 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_GLOBALS_H +#define _ASMJIT_BASE_GLOBALS_H + +// [Dependencies - AsmJit] +#include "../build.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::kGlobal] +// ============================================================================ + +ASMJIT_ENUM(kGlobal) { + //! @brief Invalid operand id. + kInvalidValue = 0xFFFFFFFF, + + //! @brief Invalid register index. + kInvalidReg = 0xFF, + + //! @brief Minimum reserved bytes in @ref Buffer. + kBufferGrow = 32U, + + //! @brief Minimum size of assembler/compiler code buffer. + kMemAllocMinimum = 4096, + + //! @brief Memory grow threshold. + //! + //! If the grow threshold is reached capacity is not doubled anymore. + kMemAllocGrowMax = 8192 * 1024, + + //! @brief An overhead of the host memory allocator. + //! + //! We decrement the overhead from our pools so the host operating system + //! doesn't need allocate an extra virtual page to put the data it needs + //! to manage the requested memory block (for example if a single virtual + //! page is 4096 and we require the same memory size we decrease our + //! requirement by kMemAllocOverhead). + kMemAllocOverhead = sizeof(intptr_t) * 4, +}; + +static const size_t kInvalidIndex = ~static_cast(0); + +// ============================================================================ +// [asmjit::kArch] +// ============================================================================ + +//! @brief Architecture. +ASMJIT_ENUM(kArch) { + //! @brief No/Unknown architecture. + kArchNone = 0, + + //! @brief X86 architecture. + kArchX86 = 1, + //! @brief X64 architecture, also called AMD64. + kArchX64 = 2, + //! @brief Arm architecture. + kArchArm = 3, + +#if defined(ASMJIT_HOST_X86) + kArchHost = kArchX86, +#endif // ASMJIT_HOST + +#if defined(ASMJIT_HOST_X64) + kArchHost = kArchX86, +#endif // ASMJIT_HOST + +#if defined(ASMJIT_HOST_ARM) + kArchHost = kArchArm, +#endif // ASMJIT_HOST_ARM + + //! @brief Whether the host is 64-bit. + kArchHost64Bit = sizeof(intptr_t) >= 8 +}; + +// ============================================================================ +// [asmjit::_Initialize] +// ============================================================================ + +struct _Initialize {}; +static const _Initialize Initialize = {}; + +// ============================================================================ +// [asmjit::_DontInitialize] +// ============================================================================ + +struct _DontInitialize {}; +static const _DontInitialize DontInitialize = {}; + +//! @} + +} // asmjit namespace + +// ============================================================================ +// [asmjit_cast<>] +// ============================================================================ + +//! @brief Cast used to cast pointer to function. It's like reinterpret_cast<>, +//! but uses internally C style cast to work with MinGW. +//! +//! If you are using single compiler and @c reinterpret_cast<> works for you, +//! there is no reason to use @c asmjit_cast<>. If you are writing +//! cross-platform software with various compiler support, consider using +//! @c asmjit_cast<> instead of @c reinterpret_cast<>. +template +static ASMJIT_INLINE T asmjit_cast(Z* p) { return (T)p; } + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_GLOBALS_H diff --git a/src/asmjit/base/intutil.h b/src/asmjit/base/intutil.h new file mode 100644 index 0000000..ff40770 --- /dev/null +++ b/src/asmjit/base/intutil.h @@ -0,0 +1,693 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_INTUTIL_H +#define _ASMJIT_BASE_INTUTIL_H + +// [Dependencies - AsmJit] +#include "../base/assert.h" +#include "../base/globals.h" + +#if defined(_MSC_VER) +#pragma intrinsic(_BitScanForward) +#endif // ASMJIT_OS_WINDOWS + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::IntTraits] +// ============================================================================ + +template +struct IntTraits { + enum { + kIsSigned = (~static_cast(0)) < static_cast(0), + kIsUnsigned = !kIsSigned, + + kIs8Bit = sizeof(T) == 1, + kIs16Bit = sizeof(T) == 2, + kIs32Bit = sizeof(T) == 4, + kIs64Bit = sizeof(T) == 8, + + kIsIntPtr = sizeof(T) == sizeof(intptr_t) + }; +}; + +// ============================================================================ +// [asmjit::IntUtil] +// ============================================================================ + +struct IntUtil { + // -------------------------------------------------------------------------- + // [Float <-> Int] + // -------------------------------------------------------------------------- + + union Float { + int32_t i; + float f; + }; + + union Double { + int64_t i; + double d; + }; + + static ASMJIT_INLINE int32_t floatAsInt(float f) { Float m; m.f = f; return m.i; } + static ASMJIT_INLINE float intAsFloat(int32_t i) { Float m; m.i = i; return m.f; } + + static ASMJIT_INLINE int64_t doubleAsInt(double d) { Double m; m.d = d; return m.i; } + static ASMJIT_INLINE double intAsDouble(int64_t i) { Double m; m.i = i; return m.d; } + + // -------------------------------------------------------------------------- + // [AsmJit - Pack / Unpack] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t pack32_2x8_1x16(uint32_t u0, uint32_t u1, uint32_t w2) { +#if defined(ASMJIT_HOST_LE) + return u0 + (u1 << 8) + (w2 << 16); +#else + return (u0 << 24) + (u1 << 16) + (w2); +#endif // ASMJIT_HOST + } + + static ASMJIT_INLINE uint32_t pack32_4x8(uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3) { +#if defined(ASMJIT_HOST_LE) + return u0 + (u1 << 8) + (u2 << 16) + (u3 << 24); +#else + return (u0 << 24) + (u1 << 16) + (u2 << 8) + u3; +#endif // ASMJIT_HOST + } + + static ASMJIT_INLINE uint64_t pack64_2x32(uint32_t u0, uint32_t u1) { +#if defined(ASMJIT_HOST_LE) + return (static_cast(u1) << 32) + u0; +#else + return (static_cast(u0) << 32) + u1; +#endif // ASMJIT_HOST + } + + // -------------------------------------------------------------------------- + // [AsmJit - Min/Max] + // -------------------------------------------------------------------------- + + // NOTE: Because some environments declare min() and max() as macros, it has + // been decided to use different name so we never collide with them. + + template + static ASMJIT_INLINE T iMin(const T& a, const T& b) { return a < b ? a : b; } + + template + static ASMJIT_INLINE T iMax(const T& a, const T& b) { return a > b ? a : b; } + + // -------------------------------------------------------------------------- + // [AsmJit - MaxUInt] + // -------------------------------------------------------------------------- + + template + static ASMJIT_INLINE T maxUInt() { return ~T(0); } + + // -------------------------------------------------------------------------- + // [AsmJit - InInterval] + // -------------------------------------------------------------------------- + + template + static ASMJIT_INLINE bool inInterval(const T& x, const T& start, const T& end) + { return x >= start && x <= end; } + + // -------------------------------------------------------------------------- + // [AsmJit - IsInt/IsUInt] + // -------------------------------------------------------------------------- + + //! @brief Get whether the given integer @a x can be casted to a signed 8-bit + //! integer. + template + static ASMJIT_INLINE bool isInt8(T x) { + if (IntTraits::kIsSigned) + return sizeof(T) <= sizeof(int8_t) ? true : x >= T(-128) && x <= T(127); + else + return x <= T(127); + } + + //! @brief Get whether the given integer @a x can be casted to an unsigned 8-bit + //! integer. + template + static ASMJIT_INLINE bool isUInt8(T x) { + if (IntTraits::kIsSigned) + return x >= T(0) && (sizeof(T) <= sizeof(uint8_t) ? true : x <= T(255)); + else + return sizeof(T) <= sizeof(uint8_t) ? true : x <= T(255); + } + + //! @brief Get whether the given integer @a x can be casted to a signed 16-bit + //! integer. + template + static ASMJIT_INLINE bool isInt16(T x) { + if (IntTraits::kIsSigned) + return sizeof(T) <= sizeof(int16_t) ? true : x >= T(-32768) && x <= T(32767); + else + return x >= T(0) && (sizeof(T) <= sizeof(int16_t) ? true : x <= T(32767)); + } + + //! @brief Get whether the given integer @a x can be casted to an unsigned 16-bit + //! integer. + template + static ASMJIT_INLINE bool isUInt16(T x) { + if (IntTraits::kIsSigned) + return x >= T(0) && (sizeof(T) <= sizeof(uint16_t) ? true : x <= T(65535)); + else + return sizeof(T) <= sizeof(uint16_t) ? true : x <= T(65535); + } + + //! @brief Get whether the given integer @a x can be casted to a signed 32-bit + //! integer. + template + static ASMJIT_INLINE bool isInt32(T x) { + if (IntTraits::kIsSigned) + return sizeof(T) <= sizeof(int32_t) ? true : x >= T(-32768) && x <= T(32767); + else + return x >= T(0) && (sizeof(T) <= sizeof(int32_t) ? true : x <= T(2147483647)); + } + + //! @brief Get whether the given integer @a x can be casted to an unsigned 32-bit + //! integer. + template + static ASMJIT_INLINE bool isUInt32(T x) { + if (IntTraits::kIsSigned) + return x >= T(0) && (sizeof(T) <= sizeof(uint32_t) ? true : x <= T(4294967295U)); + else + return sizeof(T) <= sizeof(uint32_t) ? true : x <= T(4294967295U); + } + + // -------------------------------------------------------------------------- + // [AsmJit - IsPowerOf2] + // -------------------------------------------------------------------------- + + template + static ASMJIT_INLINE bool isPowerOf2(T n) { + return n != 0 && (n & (n - 1)) == 0; + } + + // -------------------------------------------------------------------------- + // [AsmJit - Mask] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t mask(uint32_t x) { + ASMJIT_ASSERT(x < 32); + return (1U << x); + } + + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1) { + return mask(x0) | mask(x1); + } + + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2) { + return mask(x0) | mask(x1) | mask(x2); + } + + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) { + return mask(x0) | mask(x1) | mask(x2) | mask(x3); + } + + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4) { + return mask(x0) | mask(x1) | mask(x2) | mask(x3) | + mask(x4) ; + } + + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5) { + return mask(x0) | mask(x1) | mask(x2) | mask(x3) | + mask(x4) | mask(x5) ; + } + + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6) { + return mask(x0) | mask(x1) | mask(x2) | mask(x3) | + mask(x4) | mask(x5) | mask(x6) ; + } + + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7) { + return mask(x0) | mask(x1) | mask(x2) | mask(x3) | + mask(x4) | mask(x5) | mask(x6) | mask(x7) ; + } + + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7, uint32_t x8) { + return mask(x0) | mask(x1) | mask(x2) | mask(x3) | + mask(x4) | mask(x5) | mask(x6) | mask(x7) | + mask(x8) ; + } + + static ASMJIT_INLINE uint32_t mask(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7, uint32_t x8, uint32_t x9) { + return mask(x0) | mask(x1) | mask(x2) | mask(x3) | + mask(x4) | mask(x5) | mask(x6) | mask(x7) | + mask(x8) | mask(x9) ; + } + + // -------------------------------------------------------------------------- + // [AsmJit - Bits] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t bits(uint32_t x) { + // Shifting more bits that the type has has undefined behavior. Everything + // we need is that application shouldn't crash because of that, but the + // content of register after shift is not defined. So in case that the + // requested shift is too large for the type we correct this undefined + // behavior by setting all bits to ones (this is why we generate an overflow + // mask). + uint32_t overflow = static_cast( + -static_cast(x >= sizeof(uint32_t) * 8)); + + return ((static_cast(1) << x) - 1U) | overflow; + } + + // -------------------------------------------------------------------------- + // [AsmJit - HasBit] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE bool hasBit(uint32_t x, uint32_t n) + { return static_cast((x >> n) & 0x1); } + + // -------------------------------------------------------------------------- + // [AsmJit - BitCount] + // -------------------------------------------------------------------------- + + // From http://graphics.stanford.edu/~seander/bithacks.html . + static ASMJIT_INLINE uint32_t bitCount(uint32_t x) { + x = x - ((x >> 1) & 0x55555555U); + x = (x & 0x33333333U) + ((x >> 2) & 0x33333333U); + return (((x + (x >> 4)) & 0x0F0F0F0FU) * 0x01010101U) >> 24; + } + + // -------------------------------------------------------------------------- + // [AsmJit - FindFirstBit] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t findFirstBitSlow(uint32_t mask) { + // This is a reference (slow) implementation of findFirstBit(), used when + // we don't have compiler support for this task. The implementation speed + // has been improved to check for 2 bits per iteration. + uint32_t i = 1; + + while (mask != 0) { + uint32_t two = mask & 0x3; + if (two != 0x0) + return i - (two & 0x1); + + i += 2; + mask >>= 2; + } + + return 0xFFFFFFFFU; + } + + static ASMJIT_INLINE uint32_t findFirstBit(uint32_t mask) { +#if defined(_MSC_VER) + DWORD i; + if (_BitScanForward(&i, mask)) + { + ASMJIT_ASSERT(findFirstBitSlow(mask) == i); + return static_cast(i); + } + return 0xFFFFFFFFU; +#else + return findFirstBitSlow(mask); +#endif + } + + // -------------------------------------------------------------------------- + // [AsmJit - Misc] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t keepNOnesFromRight(uint32_t mask, uint32_t nBits) { + uint32_t m = 0x1; + + do { + nBits -= (mask & m) == 0; + m <<= 1; + if (nBits == 0) { + m -= 1; + mask &= m; + break; + } + } while (m); + + return mask; + } + + static ASMJIT_INLINE uint32_t indexNOnesFromRight(uint8_t* dst, uint32_t mask, uint32_t nBits) { + uint32_t totalBits = nBits; + uint8_t i = 0; + uint32_t m = 0x1; + + do { + if (mask & m) { + *dst++ = i; + if (--nBits == 0) + break; + } + + m <<= 1; + i++; + } while (m); + + return totalBits - nBits; + } + + // -------------------------------------------------------------------------- + // [AsmJit - Alignment] + // -------------------------------------------------------------------------- + + template + static ASMJIT_INLINE bool isAligned(T base, T alignment) + { return (base % alignment) == 0; } + + //! @brief Align @a base to @a alignment. + template + static ASMJIT_INLINE T alignTo(T base, T alignment) + { return (base + (alignment - 1)) & ~(alignment - 1); } + + //! @brief Get delta required to align @a base to @a alignment. + template + static ASMJIT_INLINE T deltaTo(T base, T alignment) + { return alignTo(base, alignment) - base; } + + // -------------------------------------------------------------------------- + // [AsmJit - Round] + // -------------------------------------------------------------------------- + + template + static ASMJIT_INLINE T roundUp(T base, T alignment) { + T over = base % alignment; + return base + (over > 0 ? alignment - over : 0); + } + + template + static ASMJIT_INLINE T roundUpToPowerOf2(T base) { + // Implementation is from "Hacker's Delight" by Henry S. Warren, Jr., + // figure 3-3, page 48, where the function is called clp2. + base -= 1; + + // I'm trying to make this portable and MSVC strikes me the warning C4293: + // "Shift count negative or too big, undefined behavior" + // Fixing... +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4293) +#endif // _MSC_VER + + base = base | (base >> 1); + base = base | (base >> 2); + base = base | (base >> 4); + + // 8/16/32 constants are multiplied by the condition to prevent a compiler + // complaining about the 'shift count >= type width' (GCC). + if (sizeof(T) >= 2) base = base | (base >> ( 8 * (sizeof(T) >= 2))); // Base >> 8. + if (sizeof(T) >= 4) base = base | (base >> (16 * (sizeof(T) >= 4))); // Base >> 16. + if (sizeof(T) >= 8) base = base | (base >> (32 * (sizeof(T) >= 8))); // Base >> 32. + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif // _MSC_VER + + return base + 1; + } +}; + +// ============================================================================ +// [asmjit::UInt64] +// ============================================================================ + +union UInt64 { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64 fromUInt64(uint64_t val) { + UInt64 data; + data.setUInt64(val); + return data; + } + + ASMJIT_INLINE UInt64 fromUInt64(const UInt64& val) { + UInt64 data; + data.setUInt64(val); + return data; + } + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset() { + if (kArchHost64Bit) { + u64 = 0; + } + else { + u32[0] = 0; + u32[1] = 0; + } + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint64_t getUInt64() const { + return u64; + } + + ASMJIT_INLINE UInt64& setUInt64(uint64_t val) { + u64 = val; + return *this; + } + + ASMJIT_INLINE UInt64& setUInt64(const UInt64& val) { + if (kArchHost64Bit) { + u64 = val.u64; + } + else { + u32[0] = val.u32[0]; + u32[1] = val.u32[1]; + } + return *this; + } + + ASMJIT_INLINE UInt64& setPacked_2x32(uint32_t u0, uint32_t u1) { + if (kArchHost64Bit) { + u64 = IntUtil::pack64_2x32(u0, u1); + } + else { + u32[0] = u0; + u32[1] = u1; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Add] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& add(uint64_t val) { + u64 += val; + return *this; + } + + ASMJIT_INLINE UInt64& add(const UInt64& val) { + if (kArchHost64Bit) { + u64 += val.u64; + } + else { + u32[0] += val.u32[0]; + u32[1] += val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Sub] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& sub(uint64_t val) { + u64 -= val; + return *this; + } + + ASMJIT_INLINE UInt64& sub(const UInt64& val) { + if (kArchHost64Bit) { + u64 -= val.u64; + } + else { + u32[0] -= val.u32[0]; + u32[1] -= val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [And] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& and_(uint64_t val) { + u64 &= val; + return *this; + } + + ASMJIT_INLINE UInt64& and_(const UInt64& val) { + if (kArchHost64Bit) { + u64 &= val.u64; + } + else { + u32[0] &= val.u32[0]; + u32[1] &= val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Or] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& or_(uint64_t val) { + u64 |= val; + return *this; + } + + ASMJIT_INLINE UInt64& or_(const UInt64& val) { + if (kArchHost64Bit) { + u64 |= val.u64; + } + else { + u32[0] |= val.u32[0]; + u32[1] |= val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Xor] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& xor_(uint64_t val) { + u64 ^= val; + return *this; + } + + ASMJIT_INLINE UInt64& xor_(const UInt64& val) { + if (kArchHost64Bit) { + u64 ^= val.u64; + } + else { + u32[0] ^= val.u32[0]; + u32[1] ^= val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Del] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& del(uint64_t val) { + u64 &= ~val; + return *this; + } + + ASMJIT_INLINE UInt64& del(const UInt64& val) { + if (kArchHost64Bit) { + u64 &= ~val.u64; + } + else { + u32[0] &= ~val.u32[0]; + u32[1] &= ~val.u32[1]; + } + return *this; + } + + // -------------------------------------------------------------------------- + // [Eq] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool isZero() const + { return kArchHost64Bit ? u64 == 0 : (u32[0] | u32[1]) == 0; } + + ASMJIT_INLINE bool isNonZero() const + { return kArchHost64Bit ? u64 != 0 : (u32[0] | u32[1]) != 0; } + + ASMJIT_INLINE bool eq(uint64_t val) const + { return u64 == val; } + + ASMJIT_INLINE bool eq(const UInt64& val) const + { return kArchHost64Bit ? u64 == val.u64 : (u32[0] == val.u32[0]) & (u32[1] == val.u32[1]); } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE UInt64& operator+=(uint64_t val) { return add(val); } + ASMJIT_INLINE UInt64& operator+=(const UInt64& val) { return add(val); } + + ASMJIT_INLINE UInt64& operator-=(uint64_t val) { return sub(val); } + ASMJIT_INLINE UInt64& operator-=(const UInt64& val) { return sub(val); } + + ASMJIT_INLINE UInt64& operator&=(uint64_t val) { return and_(val); } + ASMJIT_INLINE UInt64& operator&=(const UInt64& val) { return and_(val); } + + ASMJIT_INLINE UInt64& operator|=(uint64_t val) { return or_(val); } + ASMJIT_INLINE UInt64& operator|=(const UInt64& val) { return or_(val); } + + ASMJIT_INLINE UInt64& operator^=(uint64_t val) { return xor_(val); } + ASMJIT_INLINE UInt64& operator^=(const UInt64& val) { return xor_(val); } + + ASMJIT_INLINE bool operator==(uint64_t val) const { return eq(val); } + ASMJIT_INLINE bool operator==(const UInt64& val) const { return eq(val); } + + ASMJIT_INLINE bool operator!=(uint64_t val) const { return !eq(val); } + ASMJIT_INLINE bool operator!=(const UInt64& val) const { return !eq(val); } + + ASMJIT_INLINE bool operator<(uint64_t val) const { return u64 < val; } + ASMJIT_INLINE bool operator<(const UInt64& val) const { return u64 < val.u64; } + + ASMJIT_INLINE bool operator<=(uint64_t val) const { return u64 <= val; } + ASMJIT_INLINE bool operator<=(const UInt64& val) const { return u64 <= val.u64; } + + ASMJIT_INLINE bool operator>(uint64_t val) const { return u64 > val; } + ASMJIT_INLINE bool operator>(const UInt64& val) const { return u64 > val.u64; } + + ASMJIT_INLINE bool operator>=(uint64_t val) const { return u64 >= val; } + ASMJIT_INLINE bool operator>=(const UInt64& val) const { return u64 >= val.u64; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + uint64_t u64; + + uint32_t u32[2]; + uint16_t u16[4]; + uint8_t u8[8]; + + struct { +#if defined(ASMJIT_HOST_LE) + uint32_t lo, hi; +#else + uint32_t hi, lo; +#endif // ASMJIT_HOST_LE + }; +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_INTUTIL_H diff --git a/src/asmjit/base/lock.h b/src/asmjit/base/lock.h new file mode 100644 index 0000000..3b18eb3 --- /dev/null +++ b/src/asmjit/base/lock.h @@ -0,0 +1,132 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_LOCK_H +#define _ASMJIT_BASE_LOCK_H + +// [Dependencies - AsmJit] +#include "../build.h" + +// [Dependencies - Windows] +#if defined(ASMJIT_OS_WINDOWS) +# include +#endif // ASMJIT_OS_WINDOWS + +// [Dependencies - Posix] +#if defined(ASMJIT_OS_POSIX) +# include +#endif // ASMJIT_OS_POSIX + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::Lock] +// ============================================================================ + +//! @brief Lock - used in thread-safe code for locking. +struct Lock { + ASMJIT_NO_COPY(Lock) + + // -------------------------------------------------------------------------- + // [Windows] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_OS_WINDOWS) + typedef CRITICAL_SECTION Handle; + + //! @brief Create a new @ref Lock instance. + ASMJIT_INLINE Lock() { InitializeCriticalSection(&_handle); } + //! @brief Destroy the @ref Lock instance. + ASMJIT_INLINE ~Lock() { DeleteCriticalSection(&_handle); } + + //! @brief Lock. + ASMJIT_INLINE void lock() { EnterCriticalSection(&_handle); } + //! @brief Unlock. + ASMJIT_INLINE void unlock() { LeaveCriticalSection(&_handle); } + +#endif // ASMJIT_OS_WINDOWS + + // -------------------------------------------------------------------------- + // [Posix] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_OS_POSIX) + typedef pthread_mutex_t Handle; + + //! @brief Create a new @ref Lock instance. + ASMJIT_INLINE Lock() { pthread_mutex_init(&_handle, NULL); } + //! @brief Destroy the @ref Lock instance. + ASMJIT_INLINE ~Lock() { pthread_mutex_destroy(&_handle); } + + //! @brief Lock. + ASMJIT_INLINE void lock() { pthread_mutex_lock(&_handle); } + //! @brief Unlock. + ASMJIT_INLINE void unlock() { pthread_mutex_unlock(&_handle); } +#endif // ASMJIT_OS_POSIX + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get handle. + ASMJIT_INLINE Handle& getHandle() { return _handle; } + //! @overload + ASMJIT_INLINE const Handle& getHandle() const { return _handle; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Handle. + Handle _handle; +}; + +// ============================================================================ +// [asmjit::AutoLock] +// ============================================================================ + +//! @brief Scope auto locker. +struct AutoLock { + ASMJIT_NO_COPY(AutoLock) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Locks @a target. + ASMJIT_INLINE AutoLock(Lock& target) : _target(target) { + _target.lock(); + } + + //! @brief Unlocks target. + ASMJIT_INLINE ~AutoLock() { + _target.unlock(); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Pointer to target (lock). + Lock& _target; +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_LOCK_H diff --git a/src/asmjit/base/logger.cpp b/src/asmjit/base/logger.cpp new file mode 100644 index 0000000..421c31b --- /dev/null +++ b/src/asmjit/base/logger.cpp @@ -0,0 +1,160 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/intutil.h" +#include "../base/logger.h" +#include "../base/string.h" + +// [Dependencies - C] +#include + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::BaseLogger - Construction / Destruction] +// ============================================================================ + +BaseLogger::BaseLogger() { + _options = 0; + ::memset(_indentation, 0, ASMJIT_ARRAY_SIZE(_indentation)); +} + +BaseLogger::~BaseLogger() {} + +// ============================================================================ +// [asmjit::BaseLogger - Logging] +// ============================================================================ + +void BaseLogger::logFormat(uint32_t style, const char* fmt, ...) { + char buf[1024]; + size_t len; + + va_list ap; + va_start(ap, fmt); + len = vsnprintf(buf, 1023, fmt, ap); + va_end(ap); + + logString(style, buf, len); +} + +void BaseLogger::logBinary(uint32_t style, const void* data, size_t size) { + static const char prefix[] = ".data "; + static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + + const uint8_t* s = static_cast(data); + size_t i = size; + + char buffer[128]; + ::memcpy(buffer, prefix, ASMJIT_ARRAY_SIZE(prefix) - 1); + + while (i) { + uint32_t n = static_cast(IntUtil::iMax(i, 16)); + char* p = buffer + ASMJIT_ARRAY_SIZE(prefix) - 1; + + i -= n; + do { + uint32_t c = s[0]; + + p[0] = hex[c >> 4]; + p[1] = hex[c & 15]; + + p += 2; + s += 1; + } while (--n); + + *p++ = '\n'; + logString(style, buffer, (size_t)(p - buffer)); + } +} + +// ============================================================================ +// [asmjit::BaseLogger - LogBinary] +// ============================================================================ + +void BaseLogger::setOption(uint32_t id, bool value) { + if (id >= kLoggerOptionCount) + return; + + uint32_t mask = 1 << id; + + if (value) + _options |= mask; + else + _options &= ~mask; +} + +// ============================================================================ +// [asmjit::BaseLogger - Indentation] +// ============================================================================ + +void BaseLogger::setIndentation(const char* indentation) { + ::memset(_indentation, 0, ASMJIT_ARRAY_SIZE(_indentation)); + if (!indentation) + return; + + size_t length = StringUtil::nlen(indentation, ASMJIT_ARRAY_SIZE(_indentation) - 1); + ::memcpy(_indentation, indentation, length); +} + +// ============================================================================ +// [asmjit::FileLogger - Construction / Destruction] +// ============================================================================ + +FileLogger::FileLogger(FILE* stream) : _stream(NULL) { + setStream(stream); +} + +FileLogger::~FileLogger() {} + +// ============================================================================ +// [asmjit::FileLogger - Accessors] +// ============================================================================ + +//! @brief Set file stream. +void FileLogger::setStream(FILE* stream) { + _stream = stream; +} + +// ============================================================================ +// [asmjit::FileLogger - Logging] +// ============================================================================ + +void FileLogger::logString(uint32_t style, const char* buf, size_t len) { + if (!_stream) + return; + + if (len == kInvalidIndex) + len = strlen(buf); + + fwrite(buf, 1, len, _stream); +} + +// ============================================================================ +// [asmjit::StringLogger - Construction / Destruction] +// ============================================================================ + +StringLogger::StringLogger() {} +StringLogger::~StringLogger() {} + +// ============================================================================ +// [asmjit::StringLogger - Logging] +// ============================================================================ + +void StringLogger::logString(uint32_t style, const char* buf, size_t len) { + _stringBuilder.appendString(buf, len); +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/logger.h b/src/asmjit/base/logger.h new file mode 100644 index 0000000..95286a4 --- /dev/null +++ b/src/asmjit/base/logger.h @@ -0,0 +1,238 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_LOGGER_H +#define _ASMJIT_BASE_LOGGER_H + +// [Dependencies - AsmJit] +#include "../base/defs.h" +#include "../base/string.h" + +// [Dependencies - C] +#include + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_logging +//! @{ + +// ============================================================================ +// [asmjit::kLoggerOption] +// ============================================================================ + +//! @brief Logger options. +ASMJIT_ENUM(kLoggerOption) { + //! @brief Whether to output instructions also in binary form. + kLoggerOptionBinaryForm = 0, + + //! @brief Whether to output immediates as hexadecimal numbers. + kLoggerOptionHexImmediate = 1, + //! @brief Whether to output displacements as hexadecimal numbers. + kLoggerOptionHexDisplacement = 2, + + //! @brief Count of logger options. + kLoggerOptionCount = 3 +}; + +// ============================================================================ +// [asmjit::kLoggerStyle] +// ============================================================================ + +ASMJIT_ENUM(kLoggerStyle) { + kLoggerStyleDefault = 0, + kLoggerStyleDirective = 1, + kLoggerStyleLabel = 2, + kLoggerStyleData = 3, + kLoggerStyleComment = 4, + + kLoggerStyleCount = 5 +}; + +// ============================================================================ +// [asmjit::Logger] +// ============================================================================ + +//! @brief Abstract logging class. +//! +//! This class can be inherited and reimplemented to fit into your logging +//! subsystem. When reimplementing use @c asmjit::Logger::log() method to +//! log into your stream. +//! +//! This class also contain @c _enabled member that can be used to enable +//! or disable logging. +struct BaseLogger { + ASMJIT_NO_COPY(BaseLogger) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a @ref BaseLogger instance. + ASMJIT_API BaseLogger(); + //! @brief Destroy the @ref BaseLogger instance. + ASMJIT_API virtual ~BaseLogger(); + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + + //! @brief Abstract method to log output. + //! + //! Default implementation that is in @c asmjit::Logger is to do nothing. + //! It's virtual to fit to your logging system. + virtual void logString(uint32_t style, const char* buf, size_t len = kInvalidIndex) = 0; + + //! @brief Log formatter message (like sprintf) sending output to @c logString() method. + ASMJIT_API void logFormat(uint32_t style, const char* fmt, ...); + //! @brief Log binary data. + ASMJIT_API void logBinary(uint32_t style, const void* data, size_t size); + + // -------------------------------------------------------------------------- + // [Options] + // -------------------------------------------------------------------------- + + //! @brief Get all logger options as a single integer. + ASMJIT_INLINE uint32_t getOptions() const + { return _options; } + + //! @brief Get the given logger option. + ASMJIT_INLINE bool getOption(uint32_t id) const { + ASMJIT_ASSERT(id < kLoggerOptionCount); + return static_cast((_options >> id) & 0x1); + } + + //! @brief Set the given logger option. + ASMJIT_API void setOption(uint32_t id, bool value); + + // -------------------------------------------------------------------------- + // [Indentation] + // -------------------------------------------------------------------------- + + //! @brief Get indentation. + ASMJIT_INLINE const char* getIndentation() const { return _indentation; } + //! @brief Set indentation. + ASMJIT_API void setIndentation(const char* indentation); + //! @brief Reset indentation. + ASMJIT_INLINE void resetIndentation() { setIndentation(NULL); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Options, see @ref kLoggerOption. + uint32_t _options; + + //! @brief Indentation. + char _indentation[12]; +}; + +// ============================================================================ +// [asmjit::FileLogger] +// ============================================================================ + +//! @brief Logger that can log to standard C @c FILE* stream. +struct FileLogger : public BaseLogger { + ASMJIT_NO_COPY(FileLogger) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @c FileLogger. + //! @param stream FILE stream where logging will be sent (can be @c NULL + //! to disable logging). + ASMJIT_API FileLogger(FILE* stream = NULL); + + //! @brief Destroy the @ref FileLogger. + ASMJIT_API virtual ~FileLogger(); + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get @c FILE* stream. + //! + //! @note Return value can be @c NULL. + ASMJIT_INLINE FILE* getStream() const { return _stream; } + + //! @brief Set @c FILE* stream. + //! + //! @param stream @c FILE stream where to log output (can be @c NULL to + //! disable logging). + ASMJIT_API void setStream(FILE* stream); + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual void logString(uint32_t style, const char* buf, size_t len = kInvalidIndex); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief C file stream. + FILE* _stream; +}; + +// ============================================================================ +// [asmjit::StringLogger] +// ============================================================================ + +//! @brief String logger. +struct StringLogger : public BaseLogger { + ASMJIT_NO_COPY(StringLogger) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new @ref StringLogger. + ASMJIT_API StringLogger(); + + //! @brief Destroy the @ref StringLogger. + ASMJIT_API virtual ~StringLogger(); + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get char* pointer which represents the resulting + //! string. + //! + //! The pointer is owned by @ref StringLogger, it can't be modified or freed. + ASMJIT_INLINE const char* getString() const { return _stringBuilder.getData(); } + + //! @brief Clear the resulting string. + ASMJIT_INLINE void clearString() { _stringBuilder.clear(); } + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual void logString(uint32_t style, const char* buf, size_t len = kInvalidIndex); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Output. + StringBuilder _stringBuilder; +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_LOGGER_H diff --git a/src/asmjit/base/memorymanager.cpp b/src/asmjit/base/memorymanager.cpp new file mode 100644 index 0000000..627b0a4 --- /dev/null +++ b/src/asmjit/base/memorymanager.cpp @@ -0,0 +1,1047 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/assert.h" +#include "../base/error.h" +#include "../base/lock.h" +#include "../base/memorymanager.h" +#include "../base/vmem.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +// This file contains implementation of virtual memory management for AsmJit +// library. The initial concept is to keep this implementation simple but +// efficient. There are several goals I decided to write implementation myself. +// +// Goals: +// - We need usually to allocate blocks of 64 bytes long and more. +// - Alignment of allocated blocks is large - 32 bytes or 64 bytes. +// - Keep memory manager information outside allocated virtual memory pages +// (these pages allows execution of code). +// - Keep implementation small. +// +// I think that implementation is not small and probably not too much readable, +// so there is small know how. +// +// - Implementation is based on bit arrays and binary trees. Bit arrays +// contains information about allocated and unused blocks of memory. Each +// block size describes MemNode::density member. Count of blocks are +// stored in MemNode::blocks member. For example if density is 64 and +// count of blocks is 20, memory node contains 64*20 bytes of memory and +// smallest possible allocation (and also alignment) is 64 bytes. So density +// describes also memory alignment. Binary trees are used to enable fast +// lookup into all addresses allocated by memory manager instance. This is +// used mainly in MemoryManagerPrivate::release(). +// +// Bit array looks like this (empty = unused, X = used) - Size of block 64 +// ------------------------------------------------------------------------- +// | |X|X| | | | | |X|X|X|X|X|X| | | | | | | | | | | | |X| | | | |X|X|X| | | +// ------------------------------------------------------------------------- +// Bits array shows that there are 12 allocated blocks of 64 bytes, so total +// allocated size is 768 bytes. Maximum count of continuous blocks is 12 +// (see largest gap). + +namespace asmjit { + +// ============================================================================ +// [Ops] +// ============================================================================ + +enum { kBitsPerEntity = (sizeof(size_t) * 8) }; + +static void _SetBits(size_t* buf, size_t index, size_t len) { + if (len == 0) + return; + + size_t i = index / kBitsPerEntity; // size_t[] + size_t j = index % kBitsPerEntity; // size_t[][] bit index + + // How many bytes process in the first group. + size_t c = kBitsPerEntity - j; + if (c > len) c = len; + + // Offset. + buf += i; + + *buf++ |= ((~(size_t)0) >> (kBitsPerEntity - c)) << j; + len -= c; + + while (len >= kBitsPerEntity) { + *buf++ = ~(size_t)0; + len -= kBitsPerEntity; + } + + if (len) + *buf |= ((~(size_t)0) >> (kBitsPerEntity - len)); +} + +// ============================================================================ +// [asmjit::MemNode] +// ============================================================================ + +#define M_DIV(x, y) ((x) / (y)) +#define M_MOD(x, y) ((x) % (y)) + +struct RbNode { + // -------------------------------------------------------------------------- + // [Red-black tree node, key is mem pointer]. + // -------------------------------------------------------------------------- + + // Implementation is based on article by Julienne Walker (Public Domain), + // including C code and original comments. Thanks for the excellent article. + + // Left[0] and right[1] nodes. + RbNode* node[2]; + // Whether the node is RED. + uint32_t red; + + // -------------------------------------------------------------------------- + // [Chunk Memory] + // -------------------------------------------------------------------------- + + // Virtual memory address. + uint8_t* mem; +}; + +// Get whether the node is red (NULL or node with red flag). +ASMJIT_INLINE bool rbIsRed(RbNode* node) { + return node != NULL && node->red; +} + +struct MemNode : public RbNode { + // -------------------------------------------------------------------------- + // [Node double-linked list] + // -------------------------------------------------------------------------- + + MemNode* prev; // Prev node in list. + MemNode* next; // Next node in list. + + // -------------------------------------------------------------------------- + // [Chunk Data] + // -------------------------------------------------------------------------- + + size_t size; // How many bytes contain this node. + size_t blocks; // How many blocks are here. + size_t density; // Minimum count of allocated bytes in this node (also alignment). + size_t used; // How many bytes are used in this node. + size_t largestBlock; // Contains largest block that can be allocated. + + size_t* baUsed; // Contains bits about used blocks (0 = unused, 1 = used). + size_t* baCont; // Contains bits about continuous blocks (0 = stop , 1 = continue). + + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + // Get available space. + ASMJIT_INLINE size_t getAvailable() const { return size - used; } + + ASMJIT_INLINE void fillData(MemNode* other) { + mem = other->mem; + + size = other->size; + blocks = other->blocks; + density = other->density; + used = other->used; + largestBlock = other->largestBlock; + baUsed = other->baUsed; + baCont = other->baCont; + } +}; + +// ============================================================================ +// [asmjit::PermanentNode] +// ============================================================================ + +//! @brief Permanent node. +struct PermanentNode { + uint8_t* mem; // Base pointer (virtual memory address). + size_t size; // Count of bytes allocated. + size_t used; // Count of bytes used. + PermanentNode* prev; // Pointer to prev chunk or NULL. + + // Get available space. + ASMJIT_INLINE size_t getAvailable() const { return size - used; } +}; + +// ============================================================================ +// [asmjit::MemoryManagerPrivate] +// ============================================================================ + +struct MemoryManagerPrivate { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_OS_WINDOWS) + MemoryManagerPrivate(); +#else + MemoryManagerPrivate(HANDLE hProcess); +#endif // ASMJIT_OS_WINDOWS + ~MemoryManagerPrivate(); + + // -------------------------------------------------------------------------- + // [Allocation] + // -------------------------------------------------------------------------- + + MemNode* createNode(size_t size, size_t density); + + void reset(bool keepVirtualMemory); + + void* allocPermanent(size_t vsize); + void* allocFreeable(size_t vsize); + Error release(void* address); + Error shrink(void* address, size_t used); + + // Helpers to avoid ifdefs in the code. + ASMJIT_INLINE uint8_t* allocVirtualMemory(size_t size, size_t* vsize) { +#if !defined(ASMJIT_OS_WINDOWS) + return (uint8_t*)VMem::alloc(size, vsize, true); +#else + return (uint8_t*)VMem::allocProcessMemory(_hProcess, size, vsize, true); +#endif + } + + ASMJIT_INLINE void freeVirtualMemory(void* vmem, size_t vsize) { +#if !defined(ASMJIT_OS_WINDOWS) + VMem::release(vmem, vsize); +#else + VMem::releaseProcessMemory(_hProcess, vmem, vsize); +#endif + } + + // -------------------------------------------------------------------------- + // [NodeList RB-Tree] + // -------------------------------------------------------------------------- + + bool checkTree(); + + void insertNode(MemNode* node); + MemNode* removeNode(MemNode* node); + MemNode* findPtr(uint8_t* mem); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_OS_WINDOWS) + HANDLE _hProcess; // Process where to allocate memory. +#endif // ASMJIT_OS_WINDOWS + Lock _lock; // Lock for thread safety. + + size_t _newChunkSize; // Default node size. + size_t _newChunkDensity; // Default node density. + size_t _allocated; // How many bytes are allocated. + size_t _used; // How many bytes are used. + + // Memory nodes list. + MemNode* _first; + MemNode* _last; + MemNode* _optimal; + + // Memory nodes tree. + MemNode* _root; + + // Permanent memory. + PermanentNode* _permanent; + + // Whether to keep virtual memory after destroy. + bool _keepVirtualMemory; +}; + +// ============================================================================ +// [asmjit::MemoryManagerPrivate - Construction / Destruction] +// ============================================================================ + +#if !defined(ASMJIT_OS_WINDOWS) +MemoryManagerPrivate::MemoryManagerPrivate() : +#else +MemoryManagerPrivate::MemoryManagerPrivate(HANDLE hProcess) : + _hProcess(hProcess), +#endif + _newChunkSize(65536), + _newChunkDensity(64), + _allocated(0), + _used(0), + _root(NULL), + _first(NULL), + _last(NULL), + _optimal(NULL), + _permanent(NULL), + _keepVirtualMemory(false) {} + +MemoryManagerPrivate::~MemoryManagerPrivate() { + // Freeable memory cleanup - Also frees the virtual memory if configured to. + reset(_keepVirtualMemory); + + // Permanent memory cleanup - Never frees the virtual memory. + PermanentNode* node = _permanent; + while (node) { + PermanentNode* prev = node->prev; + ::free(node); + node = prev; + } +} + +// ============================================================================ +// [asmjit::MemoryManagerPrivate - Allocation] +// ============================================================================ + +// Allocate virtual memory node and MemNode structure. +// +// Returns MemNode* on success, otherwise NULL. +MemNode* MemoryManagerPrivate::createNode(size_t size, size_t density) { + size_t vsize; + uint8_t* vmem = allocVirtualMemory(size, &vsize); + + // Out of memory. + if (vmem == NULL) return NULL; + + size_t blocks = (vsize / density); + size_t bsize = (((blocks + 7) >> 3) + sizeof(size_t) - 1) & ~(size_t)(sizeof(size_t) - 1); + + MemNode* node = static_cast(::malloc(sizeof(MemNode))); + uint8_t* data = static_cast(::malloc(bsize * 2)); + + // Out of memory. + if (node == NULL || data == NULL) { + freeVirtualMemory(vmem, vsize); + if (node) ::free(node); + if (data) ::free(data); + return NULL; + } + + // Initialize RbNode data. + node->node[0] = NULL; + node->node[1] = NULL; + node->red = 1; + node->mem = vmem; + + // Initialize MemNode data. + node->prev = NULL; + node->next = NULL; + + node->size = vsize; + node->blocks = blocks; + node->density = density; + node->used = 0; + node->largestBlock = vsize; + + ::memset(data, 0, bsize * 2); + node->baUsed = reinterpret_cast(data); + node->baCont = reinterpret_cast(data + bsize); + + return node; +} + +void MemoryManagerPrivate::reset(bool keepVirtualMemory) { + MemNode* node = _first; + + while (node) { + MemNode* next = node->next; + + if (!keepVirtualMemory) + freeVirtualMemory(node->mem, node->size); + + ::free(node->baUsed); + ::free(node); + + node = next; + } + + _allocated = 0; + _used = 0; + + _root = NULL; + _first = NULL; + _last = NULL; + _optimal = NULL; +} + +void* MemoryManagerPrivate::allocPermanent(size_t vsize) { + static const size_t permanentAlignment = 32; + static const size_t permanentNodeSize = 32768; + + size_t over = vsize % permanentAlignment; + if (over) + over = permanentAlignment - over; + + size_t alignedSize = vsize + over; + AutoLock locked(_lock); + + PermanentNode* node = _permanent; + + // Try to find space in allocated chunks. + while (node && alignedSize > node->getAvailable()) + node = node->prev; + + // Or allocate new node. + if (node == NULL) { + size_t nodeSize = permanentNodeSize; + + if (vsize > nodeSize) + nodeSize = vsize; + + node = static_cast(::malloc(sizeof(PermanentNode))); + + // Out of memory. + if (node == NULL) + return NULL; + + node->mem = allocVirtualMemory(nodeSize, &node->size); + + // Out of memory. + if (node->mem == NULL) { + ::free(node); + return NULL; + } + + node->used = 0; + node->prev = _permanent; + _permanent = node; + } + + // Finally, copy function code to our space we reserved for. + uint8_t* result = node->mem + node->used; + + // Update Statistics. + node->used += alignedSize; + _used += alignedSize; + + // Code can be null to only reserve space for code. + return static_cast(result); +} + +void* MemoryManagerPrivate::allocFreeable(size_t vsize) { + size_t i; // Current index. + size_t need; // How many we need to be freed. + size_t minVSize; + + // Align to 32 bytes (our default alignment). + vsize = (vsize + 31) & ~(size_t)31; + if (vsize == 0) return NULL; + + AutoLock locked(_lock); + MemNode* node = _optimal; + + minVSize = _newChunkSize; + + // Try to find memory block in existing nodes. + while (node) { + // Skip this node? + if ((node->getAvailable() < vsize) || (node->largestBlock < vsize && node->largestBlock != 0)) { + MemNode* next = node->next; + if (node->getAvailable() < minVSize && node == _optimal && next) _optimal = next; + node = next; + continue; + } + + size_t* up = node->baUsed; // Current ubits address. + size_t ubits; // Current ubits[0] value. + size_t bit; // Current bit mask. + size_t blocks = node->blocks; // Count of blocks in node. + size_t cont = 0; // How many bits are currently freed in find loop. + size_t maxCont = 0; // Largest continuous block (bits count). + size_t j; + + need = M_DIV((vsize + node->density - 1), node->density); + i = 0; + + // Try to find node that is large enough. + while (i < blocks) { + ubits = *up++; + + // Fast skip used blocks. + if (ubits == ~(size_t)0) { + if (cont > maxCont) maxCont = cont; + cont = 0; + + i += kBitsPerEntity; + continue; + } + + size_t max = kBitsPerEntity; + if (i + max > blocks) + max = blocks - i; + + for (j = 0, bit = 1; j < max; bit <<= 1) { + j++; + if ((ubits & bit) == 0) { + if (++cont == need) { i += j; i -= cont; goto _Found; } + continue; + } + + if (cont > maxCont) maxCont = cont; + cont = 0; + } + + i += kBitsPerEntity; + } + + // Because we traversed entire node, we can set largest node size that + // will be used to cache next traversing.. + node->largestBlock = maxCont * node->density; + + node = node->next; + } + + // If we are here, we failed to find existing memory block and we must + // allocate new. + { + size_t chunkSize = _newChunkSize; + if (chunkSize < vsize) chunkSize = vsize; + + node = createNode(chunkSize, _newChunkDensity); + if (node == NULL) return NULL; + + // Update binary tree. + insertNode(node); + ASMJIT_ASSERT(checkTree()); + + // Alloc first node at start. + i = 0; + need = (vsize + node->density - 1) / node->density; + + // Update statistics. + _allocated += node->size; + } + +_Found: + // Update bits. + _SetBits(node->baUsed, i, need); + _SetBits(node->baCont, i, need - 1); + + // Update statistics. + { + size_t u = need * node->density; + node->used += u; + node->largestBlock = 0; + _used += u; + } + + // And return pointer to allocated memory. + uint8_t* result = node->mem + i * node->density; + ASMJIT_ASSERT(result >= node->mem && result <= node->mem + node->size - vsize); + return result; +} + +Error MemoryManagerPrivate::release(void* address) { + if (address == NULL) + return kErrorOk; + + AutoLock locked(_lock); + + MemNode* node = findPtr((uint8_t*)address); + if (node == NULL) + return kErrorInvalidArgument; + + size_t offset = (size_t)((uint8_t*)address - (uint8_t*)node->mem); + size_t bitpos = M_DIV(offset, node->density); + size_t i = (bitpos / kBitsPerEntity); + + size_t* up = node->baUsed + i; // Current ubits address. + size_t* cp = node->baCont + i; // Current cbits address. + size_t ubits = *up; // Current ubits[0] value. + size_t cbits = *cp; // Current cbits[0] value. + size_t bit = (size_t)1 << (bitpos % kBitsPerEntity); + + size_t cont = 0; + bool stop; + + for (;;) { + stop = (cbits & bit) == 0; + ubits &= ~bit; + cbits &= ~bit; + + bit <<= 1; + cont++; + + if (stop || bit == 0) { + *up = ubits; + *cp = cbits; + if (stop) + break; + + ubits = *++up; + cbits = *++cp; + bit = 1; + } + } + + // If the freed block is fully allocated node then it's needed to + // update 'optimal' pointer in memory manager. + if (node->used == node->size) { + MemNode* cur = _optimal; + + do { + cur = cur->prev; + if (cur == node) { + _optimal = node; + break; + } + } while (cur); + } + + // Statistics. + cont *= node->density; + if (node->largestBlock < cont) + node->largestBlock = cont; + + node->used -= cont; + _used -= cont; + + // If page is empty, we can free it. + if (node->used == 0) { + // Free memory associated with node (this memory is not accessed + // anymore so it's safe). + freeVirtualMemory(node->mem, node->size); + ::free(node->baUsed); + + node->baUsed = NULL; + node->baCont = NULL; + + // Statistics. + _allocated -= node->size; + + // Remove node. This function can return different node than + // passed into, but data is copied into previous node if needed. + ::free(removeNode(node)); + ASMJIT_ASSERT(checkTree()); + } + + return kErrorOk; +} + +Error MemoryManagerPrivate::shrink(void* address, size_t used) { + if (address == NULL) + return kErrorOk; + + if (used == 0) + return release(address); + + AutoLock locked(_lock); + + MemNode* node = findPtr((uint8_t*)address); + if (node == NULL) + return kErrorInvalidArgument; + + size_t offset = (size_t)((uint8_t*)address - (uint8_t*)node->mem); + size_t bitpos = M_DIV(offset, node->density); + size_t i = (bitpos / kBitsPerEntity); + + size_t* up = node->baUsed + i; // Current ubits address. + size_t* cp = node->baCont + i; // Current cbits address. + size_t ubits = *up; // Current ubits[0] value. + size_t cbits = *cp; // Current cbits[0] value. + size_t bit = (size_t)1 << (bitpos % kBitsPerEntity); + + size_t cont = 0; + size_t usedBlocks = (used + node->density - 1) / node->density; + + bool stop; + + // Find the first block we can mark as free. + for (;;) { + stop = (cbits & bit) == 0; + if (stop) + return kErrorOk; + + if (++cont == usedBlocks) + break; + + bit <<= 1; + if (bit == 0) { + ubits = *++up; + cbits = *++cp; + bit = 1; + } + } + + // Free the tail blocks. + cont = ~(size_t)0; + goto _EnterFreeLoop; + + for (;;) { + stop = (cbits & bit) == 0; + ubits &= ~bit; + +_EnterFreeLoop: + cbits &= ~bit; + + bit <<= 1; + cont++; + + if (stop || bit == 0) { + *up = ubits; + *cp = cbits; + if (stop) + break; + + ubits = *++up; + cbits = *++cp; + bit = 1; + } + } + + // Statistics. + cont *= node->density; + if (node->largestBlock < cont) + node->largestBlock = cont; + + node->used -= cont; + _used -= cont; + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::MemoryManagerPrivate - NodeList RB-Tree] +// ============================================================================ + +static int rbAssert(RbNode* root) { + if (root == NULL) + return 1; + + RbNode* ln = root->node[0]; + RbNode* rn = root->node[1]; + + // Red violation. + ASMJIT_ASSERT( !(rbIsRed(root) && (rbIsRed(ln) || rbIsRed(rn))) ); + + int lh = rbAssert(ln); + int rh = rbAssert(rn); + + // Invalid btree. + ASMJIT_ASSERT(ln == NULL || ln->mem < root->mem); + ASMJIT_ASSERT(rn == NULL || rn->mem > root->mem); + + // Black violation. + ASMJIT_ASSERT( !(lh != 0 && rh != 0 && lh != rh) ); + + // Only count black links. + if (lh != 0 && rh != 0) + return rbIsRed(root) ? lh : lh + 1; + else + return 0; +} + +static ASMJIT_INLINE RbNode* rbRotateSingle(RbNode* root, int dir) { + RbNode* save = root->node[!dir]; + + root->node[!dir] = save->node[dir]; + save->node[dir] = root; + + root->red = 1; + save->red = 0; + + return save; +} + +static ASMJIT_INLINE RbNode* rbRotateDouble(RbNode* root, int dir) { + root->node[!dir] = rbRotateSingle(root->node[!dir], !dir); + return rbRotateSingle(root, dir); +} + +bool MemoryManagerPrivate::checkTree() { + return rbAssert(_root) > 0; +} + +void MemoryManagerPrivate::insertNode(MemNode* node) { + if (_root == NULL) { + // Empty tree case. + _root = node; + } + else { + // False tree root. + RbNode head = { 0 }; + + // Grandparent & parent. + RbNode* g = NULL; + RbNode* t = &head; + + // Iterator & parent. + RbNode* p = NULL; + RbNode* q = t->node[1] = _root; + + int dir = 0, last; + + // Search down the tree. + for (;;) { + if (q == NULL) { + // Insert new node at the bottom. + q = node; + p->node[dir] = node; + } + else if (rbIsRed(q->node[0]) && rbIsRed(q->node[1])) { + // Color flip. + q->red = 1; + q->node[0]->red = 0; + q->node[1]->red = 0; + } + + // Fix red violation. + if (rbIsRed(q) && rbIsRed(p)) { + int dir2 = t->node[1] == g; + t->node[dir2] = q == p->node[last] ? rbRotateSingle(g, !last) : rbRotateDouble(g, !last); + } + + // Stop if found. + if (q == node) + break; + + last = dir; + dir = q->mem < node->mem; + + // Update helpers. + if (g != NULL) + t = g; + + g = p; + p = q; + q = q->node[dir]; + } + + // Update root. + _root = static_cast(head.node[1]); + } + + // Make root black. + _root->red = 0; + + // Link with others. + node->prev = _last; + + if (_first == NULL) { + _first = node; + _last = node; + _optimal = node; + } + else { + node->prev = _last; + _last->next = node; + _last = node; + } +} + +MemNode* MemoryManagerPrivate::removeNode(MemNode* node) { + // False tree root. + RbNode head = { 0 }; + + // Helpers. + RbNode* q = &head; + RbNode* p = NULL; + RbNode* g = NULL; + + // Found item. + RbNode* f = NULL; + int dir = 1; + + // Set up. + q->node[1] = _root; + + // Search and push a red down. + while (q->node[dir] != NULL) { + int last = dir; + + // Update helpers. + g = p; + p = q; + q = q->node[dir]; + dir = q->mem < node->mem; + + // Save found node. + if (q == node) + f = q; + + // Push the red node down. + if (!rbIsRed(q) && !rbIsRed(q->node[dir])) { + if (rbIsRed(q->node[!dir])) { + p = p->node[last] = rbRotateSingle(q, dir); + } + else if (!rbIsRed(q->node[!dir])) { + RbNode* s = p->node[!last]; + + if (s != NULL) { + if (!rbIsRed(s->node[!last]) && !rbIsRed(s->node[last])) { + // Color flip. + p->red = 0; + s->red = 1; + q->red = 1; + } + else { + int dir2 = g->node[1] == p; + + if (rbIsRed(s->node[last])) + g->node[dir2] = rbRotateDouble(p, last); + else if (rbIsRed(s->node[!last])) + g->node[dir2] = rbRotateSingle(p, last); + + // Ensure correct coloring. + q->red = g->node[dir2]->red = 1; + g->node[dir2]->node[0]->red = 0; + g->node[dir2]->node[1]->red = 0; + } + } + } + } + } + + // Replace and remove. + ASMJIT_ASSERT(f != NULL); + ASMJIT_ASSERT(f != &head); + ASMJIT_ASSERT(q != &head); + + if (f != q) { + ASMJIT_ASSERT(f != &head); + static_cast(f)->fillData(static_cast(q)); + } + + p->node[p->node[1] == q] = q->node[q->node[0] == NULL]; + + // Update root and make it black. + _root = static_cast(head.node[1]); + if (_root != NULL) + _root->red = 0; + + // Unlink. + MemNode* next = static_cast(q)->next; + MemNode* prev = static_cast(q)->prev; + + if (prev) + prev->next = next; + else + _first = next; + + if (next) + next->prev = prev; + else + _last = prev; + + if (_optimal == q) + _optimal = prev ? prev : next; + + return static_cast(q); +} + +MemNode* MemoryManagerPrivate::findPtr(uint8_t* mem) { + MemNode* cur = _root; + + while (cur != NULL) { + uint8_t* curMem = cur->mem; + if (mem < curMem) { + // Go left. + cur = static_cast(cur->node[0]); + continue; + } + else { + uint8_t* curEnd = curMem + cur->size; + if (mem >= curEnd) { + // Go right. + cur = static_cast(cur->node[1]); + continue; + } + else { + // Match. + break; + } + } + } + return cur; +} + +// ============================================================================ +// [asmjit::MemoryManager] +// ============================================================================ + +MemoryManager::MemoryManager() {} +MemoryManager::~MemoryManager() {} + +MemoryManager* MemoryManager::getGlobal() { + static VirtualMemoryManager memmgr; + return &memmgr; +} + +// ============================================================================ +// [asmjit::VirtualMemoryManager] +// ============================================================================ + +#if !defined(ASMJIT_OS_WINDOWS) +VirtualMemoryManager::VirtualMemoryManager() { + MemoryManagerPrivate* d = new(std::nothrow) MemoryManagerPrivate(); + _d = static_cast(d); +} +#else +VirtualMemoryManager::VirtualMemoryManager() { + MemoryManagerPrivate* d = new(std::nothrow) MemoryManagerPrivate(GetCurrentProcess()); + _d = static_cast(d); +} + +VirtualMemoryManager::VirtualMemoryManager(HANDLE hProcess) { + MemoryManagerPrivate* d = new(std::nothrow) MemoryManagerPrivate(hProcess); + _d = static_cast(d); +} +#endif // ASMJIT_OS_WINDOWS + +VirtualMemoryManager::~VirtualMemoryManager() { + MemoryManagerPrivate* d = static_cast(_d); + delete d; +} + +void VirtualMemoryManager::reset() { + MemoryManagerPrivate* d = static_cast(_d); + + // Calling MemoryManager::reset() will never keep allocated memory. + return d->reset(false); +} + +void* VirtualMemoryManager::alloc(size_t size, uint32_t type) { + MemoryManagerPrivate* d = static_cast(_d); + + if (type == kVirtualAllocPermanent) + return d->allocPermanent(size); + else + return d->allocFreeable(size); +} + +Error VirtualMemoryManager::release(void* address) { + MemoryManagerPrivate* d = static_cast(_d); + return d->release(address); +} + +Error VirtualMemoryManager::shrink(void* address, size_t used) { + MemoryManagerPrivate* d = static_cast(_d); + return d->shrink(address, used); +} + +size_t VirtualMemoryManager::getUsedBytes() { + MemoryManagerPrivate* d = static_cast(_d); + return d->_used; +} + +size_t VirtualMemoryManager::getAllocatedBytes() { + MemoryManagerPrivate* d = static_cast(_d); + return d->_allocated; +} + +bool VirtualMemoryManager::getKeepVirtualMemory() const { + MemoryManagerPrivate* d = static_cast(_d); + return d->_keepVirtualMemory; +} + +void VirtualMemoryManager::setKeepVirtualMemory(bool keepVirtualMemory) { + MemoryManagerPrivate* d = static_cast(_d); + d->_keepVirtualMemory = keepVirtualMemory; +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/memorymanager.h b/src/asmjit/base/memorymanager.h new file mode 100644 index 0000000..cb700b4 --- /dev/null +++ b/src/asmjit/base/memorymanager.h @@ -0,0 +1,170 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_MEMORYMANAGER_H +#define _ASMJIT_BASE_MEMORYMANAGER_H + +// [Dependencies - AsmJit] +#include "../base/defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup AsmJit_MemoryManagement +//! @{ + +// ============================================================================ +// [asmjit::kVirtualAlloc] +// ============================================================================ + +//! @brief Type of virtual memory allocation, see @c asmjit::MemoryManager::alloc(). +ASMJIT_ENUM(kVirtualAlloc) { + //! @brief Normal memory allocation, allocated memory can be free by calling + //! @ref asmjit::MemoryManager::free() + //! method. + kVirtualAllocFreeable = 0, + //! @brief Allocate permanent memory that will be never freed. + kVirtualAllocPermanent = 1 +}; + +// ============================================================================ +// [asmjit::MemoryManager] +// ============================================================================ + +//! @brief Virtual memory manager interface. +//! +//! This class is pure virtual. You can get default virtual memory manager using +//! @c getGlobal() method. If you want to create more memory managers with same +//! functionality as global memory manager use @c VirtualMemoryManager class. +struct MemoryManager { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create memory manager instance. + ASMJIT_API MemoryManager(); + //! @brief Destroy memory manager instance, this means also to free all memory + //! blocks. + ASMJIT_API virtual ~MemoryManager(); + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + //! @brief Free all allocated memory. + virtual void reset() = 0; + + //! @brief Allocate a @a size bytes of virtual memory. + //! + //! Note that if you are implementing your own virtual memory manager then you + //! can quitly ignore type of allocation. This is mainly for AsmJit to memory + //! manager that allocated memory will be never freed. + virtual void* alloc(size_t size, uint32_t type = kVirtualAllocFreeable) = 0; + //! @brief Free previously allocated memory at a given @a address. + virtual Error release(void* address) = 0; + //! @brief Free some tail memory. + virtual Error shrink(void* address, size_t used) = 0; + + //! @brief Get how many bytes are currently used. + virtual size_t getUsedBytes() = 0; + //! @brief Get how many bytes are currently allocated. + virtual size_t getAllocatedBytes() = 0; + + // -------------------------------------------------------------------------- + // [Statics] + // -------------------------------------------------------------------------- + + //! @brief Get global memory manager instance. + //! + //! Global instance is instance of @c VirtualMemoryManager class. Global memory + //! manager is used by default by @ref Assembler::make() and @ref Compiler::make() + //! methods. + static ASMJIT_API MemoryManager* getGlobal(); +}; + +// ============================================================================ +// [asmjit::VirtualMemoryManager] +// ============================================================================ + +//! @brief Reference implementation of memory manager that uses @ref asmjit::VMem +//! class to allocate chunks of virtual memory and bit arrays to manage it. +struct VirtualMemoryManager : public MemoryManager { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a @c VirtualMemoryManager instance. + ASMJIT_API VirtualMemoryManager(); + +#if defined(ASMJIT_OS_WINDOWS) + //! @brief Create a @c VirtualMemoryManager instance for process @a hProcess. + //! + //! This is specialized version of constructor available only for windows and + //! usable to alloc/free memory of different process. + ASMJIT_API VirtualMemoryManager(HANDLE hProcess); +#endif // ASMJIT_OS_WINDOWS + + //! @brief Destroy the @c VirtualMemoryManager instance, this means also to + //! free all blocks. + ASMJIT_API virtual ~VirtualMemoryManager(); + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual void reset(); + + ASMJIT_API virtual void* alloc(size_t size, uint32_t type = kVirtualAllocFreeable); + ASMJIT_API virtual Error release(void* address); + ASMJIT_API virtual Error shrink(void* address, size_t used); + + ASMJIT_API virtual size_t getUsedBytes(); + ASMJIT_API virtual size_t getAllocatedBytes(); + + // -------------------------------------------------------------------------- + // [Virtual Memory Manager Specific] + // -------------------------------------------------------------------------- + + //! @brief Get whether to keep allocated memory after memory manager is + //! destroyed. + //! + //! @sa @c setKeepVirtualMemory(). + ASMJIT_API bool getKeepVirtualMemory() const; + + //! @brief Set whether to keep allocated memory after memory manager is + //! destroyed. + //! + //! This method is usable when patching code of remote process. You need to + //! allocate process memory, store generated assembler into it and patch the + //! method you want to redirect (into your code). This method affects only + //! VirtualMemoryManager destructor. After destruction all internal + //! structures are freed, only the process virtual memory remains. + //! + //! @note Memory allocated with kVirtualAllocPermanent is always kept. + //! + //! @sa @c getKeepVirtualMemory(). + ASMJIT_API void setKeepVirtualMemory(bool keepVirtualMemory); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Pointer to private data hidden out of the public API. + void* _d; +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_MEMORYMANAGER_H diff --git a/src/asmjit/base/podlist.h b/src/asmjit/base/podlist.h new file mode 100644 index 0000000..a33c56b --- /dev/null +++ b/src/asmjit/base/podlist.h @@ -0,0 +1,116 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_PODLIST_H +#define _ASMJIT_BASE_PODLIST_H + +// [Dependencies - AsmJit] +#include "../base/assert.h" +#include "../base/defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::PodList] +// ============================================================================ + +template +struct PodList { + ASMJIT_NO_COPY(PodList) + + // -------------------------------------------------------------------------- + // [Link] + // -------------------------------------------------------------------------- + + struct Link { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get next node. + ASMJIT_INLINE Link* getNext() const { return _next; } + + //! @brief Get value. + ASMJIT_INLINE T getValue() const { return _value; } + //! @brief Set value to @a value. + ASMJIT_INLINE void setValue(const T& value) { _value = value; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + Link* _next; + T _value; + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE PodList() : _first(NULL), _last(NULL) {} + ASMJIT_INLINE ~PodList() {} + + // -------------------------------------------------------------------------- + // [Data] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool isEmpty() const { return _first != NULL; } + + ASMJIT_INLINE Link* getFirst() const { return _first; } + ASMJIT_INLINE Link* getLast() const { return _last; } + + // -------------------------------------------------------------------------- + // [Ops] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void clear() { + reset(); + } + + ASMJIT_INLINE void reset() { + _first = NULL; + _last = NULL; + } + + ASMJIT_INLINE void prepend(Link* link) { + link->_next = _first; + if (_first == NULL) + _last = link; + _first = link; + } + + ASMJIT_INLINE void append(Link* link) { + link->_next = NULL; + if (_first == NULL) + _first = link; + else + _last->_next = link; + _last = link; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + Link* _first; + Link* _last; +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +#endif // _ASMJIT_BASE_PODLIST_H diff --git a/src/asmjit/base/podvector.cpp b/src/asmjit/base/podvector.cpp new file mode 100644 index 0000000..3fccd51 --- /dev/null +++ b/src/asmjit/base/podvector.cpp @@ -0,0 +1,96 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/intutil.h" +#include "../base/podvector.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::PodVectorBase - NullData] +// ============================================================================ + +const PodVectorData PodVectorBase::_nullData = { 0, 0 }; + +// ============================================================================ +// [asmjit::PodVectorBase - Helpers] +// ============================================================================ + +Error PodVectorBase::_grow(size_t n, size_t sizeOfT) { + PodVectorData* d = _d; + + size_t threshold = kMemAllocGrowMax / sizeOfT; + size_t capacity = d->capacity; + size_t after = d->length; + + if (IntUtil::maxUInt() - n < after) + return kErrorNoHeapMemory; + + after += n; + + if (capacity >= after) + return kErrorOk; + + // PodVector is used as a linear array for some data structures used by + // AsmJit code generation. The purpose of this agressive growing schema + // is to minimize memory reallocations, because AsmJit code generation + // classes live short life and will be freed or reused soon. + if (capacity < 32) + capacity = 32; + else if (capacity < 128) + capacity = 128; + else if (capacity < 512) + capacity = 512; + + while (capacity < after) { + if (capacity < threshold) + capacity *= 2; + else + capacity += threshold; + } + + return _reserve(capacity, sizeOfT); +} + +Error PodVectorBase::_reserve(size_t n, size_t sizeOfT) { + PodVectorData* d = _d; + + if (d->capacity >= n) + return kErrorOk; + + size_t nBytes = sizeof(PodVectorData) + n * sizeOfT; + if (nBytes < n) + return kErrorNoHeapMemory; + + if (d == &_nullData) { + d = static_cast(::malloc(nBytes)); + if (d == NULL) + return kErrorNoHeapMemory; + d->length = 0; + } + else { + d = static_cast(::realloc(d, nBytes)); + if (d == NULL) + return kErrorNoHeapMemory; + } + + d->capacity = n; + _d = d; + + return kErrorOk; +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/podvector.h b/src/asmjit/base/podvector.h new file mode 100644 index 0000000..05f8cac --- /dev/null +++ b/src/asmjit/base/podvector.h @@ -0,0 +1,262 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_PODVECTOR_H +#define _ASMJIT_BASE_PODVECTOR_H + +// [Dependencies - AsmJit] +#include "../base/assert.h" +#include "../base/defs.h" +#include "../base/error.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::PodVectorData] +// ============================================================================ + +struct PodVectorData { + //! @brief Get data. + ASMJIT_INLINE void* getData() const { return (void*)(this + 1); } + + //! @brief Capacity of the vector. + size_t capacity; + //! @brief Length of the vector. + size_t length; +}; + +// ============================================================================ +// [asmjit::PodVectorBase] +// ============================================================================ + +struct PodVectorBase { + static ASMJIT_API const PodVectorData _nullData; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new instance of @ref PodVectorBase. + ASMJIT_INLINE PodVectorBase() : + _d(const_cast(&_nullData)) {} + + //! @brief Destroy the @ref PodVectorBase and data. + ASMJIT_INLINE ~PodVectorBase() { + if (_d != &_nullData) + ::free(_d); + } + + // -------------------------------------------------------------------------- + // [Grow / Reserve] + // -------------------------------------------------------------------------- + +protected: + ASMJIT_API Error _grow(size_t n, size_t sizeOfT); + ASMJIT_API Error _reserve(size_t n, size_t sizeOfT); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +public: + PodVectorData* _d; +}; + +// ============================================================================ +// [asmjit::PodVector] +// ============================================================================ + +//! @brief Template used to store and manage array of POD data. +//! +//! This template has these adventages over other vector<> templates: +//! - Non-copyable (designed to be non-copyable, we want it) +//! - No copy-on-write (some implementations of stl can use it) +//! - Optimized for working only with POD types +//! - Uses ASMJIT_... memory management macros +template +struct PodVector : PodVectorBase { + ASMJIT_NO_COPY(PodVector) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new instance of @ref PodVector<>. + ASMJIT_INLINE PodVector() {} + //! @brief Destroy the @ref PodVector<> and data. + ASMJIT_INLINE ~PodVector() {} + + // -------------------------------------------------------------------------- + // [Data] + // -------------------------------------------------------------------------- + + //! @brief Get whether the vector is empty. + ASMJIT_INLINE bool isEmpty() const { return _d->length == 0; } + //! @brief Get length. + ASMJIT_INLINE size_t getLength() const { return _d->length; } + //! @brief Get capacity. + ASMJIT_INLINE size_t getCapacity() const { return _d->capacity; } + + //! @brief Get data. + ASMJIT_INLINE T* getData() { return static_cast(_d->getData()); } + //! @overload + ASMJIT_INLINE const T* getData() const { return static_cast(_d->getData()); } + + // -------------------------------------------------------------------------- + // [Clear / Reset] + // -------------------------------------------------------------------------- + + //! @brief Clear vector data, but don't free an internal buffer. + ASMJIT_INLINE void clear() { + if (_d != &_nullData) + _d->length = 0; + } + + //! @brief Clear vector data and free internal buffer. + ASMJIT_INLINE void reset() { + if (_d != &_nullData) { + ::free(_d); + _d = const_cast(&_nullData); + } + } + + // -------------------------------------------------------------------------- + // [Grow / Reserve] + // -------------------------------------------------------------------------- + + //! @brief Called to grow the buffer to fit at least @a n elements more. + ASMJIT_INLINE Error _grow(size_t n) + { return PodVectorBase::_grow(n, sizeof(T)); } + + //! @brief Realloc internal array to fit at least @a to items. + ASMJIT_INLINE Error _reserve(size_t n) + { return PodVectorBase::_reserve(n, sizeof(T)); } + + // -------------------------------------------------------------------------- + // [Ops] + // -------------------------------------------------------------------------- + + //! @brief Prepend @a item to vector. + Error prepend(const T& item) { + PodVectorData* d = _d; + + if (d->length == d->capacity) { + ASMJIT_PROPAGATE_ERROR(_grow(1)); + _d = d; + } + + ::memmove(static_cast(d->getData()) + 1, d->getData(), d->length * sizeof(T)); + ::memcpy(d->getData(), &item, sizeof(T)); + + d->length++; + return kErrorOk; + } + + //! @brief Insert an @a item at the @a index. + Error insert(size_t index, const T& item) { + PodVectorData* d = _d; + ASMJIT_ASSERT(index <= d->length); + + if (d->length == d->capacity) { + ASMJIT_PROPAGATE_ERROR(_grow(1)); + d = _d; + } + + T* dst = static_cast(d->getData()) + index; + ::memmove(dst + 1, dst, d->length - index); + ::memcpy(dst, &item, sizeof(T)); + + d->length++; + return kErrorOk; + } + + //! @brief Append @a item to vector. + Error append(const T& item) { + PodVectorData* d = _d; + + if (d->length == d->capacity) { + ASMJIT_PROPAGATE_ERROR(_grow(1)); + d = _d; + } + + ::memcpy(static_cast(d->getData()) + d->length, &item, sizeof(T)); + + d->length++; + return kErrorOk; + } + + //! @brief Get index of @a val or kInvalidIndex if not found. + size_t indexOf(const T& val) const { + PodVectorData* d = _d; + + const T* data = static_cast(d->getData()); + size_t len = d->length; + + for (size_t i = 0; i < len; i++) + if (data[i] == val) + return i; + + return kInvalidIndex; + } + + //! @brief Remove item at index @a i. + void removeAt(size_t i) { + PodVectorData* d = _d; + ASMJIT_ASSERT(i < d->length); + + T* data = static_cast(d->getData()) + i; + d->length--; + ::memmove(data, data + 1, d->length - i); + } + + //! @brief Swap this pod-vector with @a other. + void swap(PodVector& other) { + T* otherData = other._d; + other._d = _d; + _d = otherData; + } + + //! @brief Get item at index @a i. + ASMJIT_INLINE T& operator[](size_t i) { + ASMJIT_ASSERT(i < getLength()); + return getData()[i]; + } + + //! @brief Get item at index @a i. + ASMJIT_INLINE const T& operator[](size_t i) const { + ASMJIT_ASSERT(i < getLength()); + return getData()[i]; + } + + //! @brief Allocate and append a new item and return its address. + T* newElement() { + PodVectorData* d = _d; + + if (d->length == d->capacity) { + if (!_grow(1)) + return NULL; + d = _d; + } + + return static_cast(d->getData()) + (d->length++); + } +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +#endif // _ASMJIT_BASE_PODVECTOR_H diff --git a/src/asmjit/base/runtime.cpp b/src/asmjit/base/runtime.cpp new file mode 100644 index 0000000..cda9391 --- /dev/null +++ b/src/asmjit/base/runtime.cpp @@ -0,0 +1,113 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/assembler.h" +#include "../base/cpu.h" +#include "../base/defs.h" +#include "../base/error.h" +#include "../base/memorymanager.h" +#include "../base/runtime.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::BaseRuntime - Construction / Destruction] +// ============================================================================ + +BaseRuntime::BaseRuntime() {} +BaseRuntime::~BaseRuntime() {} + +// ============================================================================ +// [asmjit::JitRuntime - Construction / Destruction] +// ============================================================================ + +JitRuntime::JitRuntime(MemoryManager* memmgr) : + _memoryManager(memmgr ? memmgr : MemoryManager::getGlobal()), + _allocType(kVirtualAllocFreeable) {} + +JitRuntime::~JitRuntime() {} + +// ============================================================================ +// [asmjit::JitRuntime - Get] +// ============================================================================ + +uint32_t JitRuntime::getStackAlignment() { + uint32_t alignment = sizeof(intptr_t); + +#if defined(ASMJIT_HOST_X86) + // Modern Linux, APPLE and UNIX guarantees 16-byte stack alignment, but I'm + // not sure about all other UNIX operating systems, because 16-byte alignment + // is addition to an older specification. +#if (defined(__linux__) || \ + defined(__linux) || \ + defined(__unix__) || \ + defined(__FreeBSD__) || \ + defined(__NetBSD__) || \ + defined(__OpenBSD__) || \ + defined(__DARWIN__) || \ + defined(__APPLE__) ) + alignment = 16; +#endif +#elif defined(ASMJIT_HOST_X64) + alignment = 16; +#endif + + return alignment; +} + +const BaseCpu* JitRuntime::getCpu() { + return BaseCpu::getHost(); +} + +// ============================================================================ +// [asmjit::JitRuntime - Add] +// ============================================================================ + +Error JitRuntime::add(void** dst, BaseAssembler* assembler) { + // Disallow empty code generation. + size_t codeSize = assembler->getCodeSize(); + + if (codeSize == 0) { + *dst = NULL; + return kErrorCompilerNoFunc; + } + + MemoryManager* memmgr = getMemoryManager(); + void* p = memmgr->alloc(codeSize, getAllocType()); + + if (p == NULL) { + *dst = NULL; + return kErrorNoVirtualMemory; + } + + // Relocate the code. + size_t relocSize = assembler->relocCode(p); + + // Return unused memory to MemoryManager. + if (relocSize < codeSize) + memmgr->shrink(p, relocSize); + + // Return the code. + *dst = p; + return kErrorOk; +} + +Error JitRuntime::release(void* p) { + MemoryManager* memmgr = getMemoryManager(); + return memmgr->release(p); +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/runtime.h b/src/asmjit/base/runtime.h new file mode 100644 index 0000000..8d137a6 --- /dev/null +++ b/src/asmjit/base/runtime.h @@ -0,0 +1,132 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_RUNTIME_H +#define _ASMJIT_BASE_RUNTIME_H + +// [Dependencies - AsmJit] +#include "../base/error.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct BaseAssembler; +struct BaseCpu; +struct MemoryManager; + +// ============================================================================ +// [asmjit::BaseRuntime] +// ============================================================================ + +//! @brief Base runtime. +struct BaseRuntime { + ASMJIT_NO_COPY(BaseRuntime) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a @ref BaseRuntime instance. + ASMJIT_API BaseRuntime(); + //! @brief Destroy the @ref BaseRuntime instance. + ASMJIT_API virtual ~BaseRuntime(); + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + //! @brief Get stack alignment of target runtime. + virtual uint32_t getStackAlignment() = 0; + + //! @brief Get CPU information. + virtual const BaseCpu* getCpu() = 0; + + //! @brief Allocate memory for code generated in @a assembler and reloc it + //! to the target location. + //! + //! This method is universal allowing any preprocessing / postprocessing + //! with code generated by @ref BaseAssembler or @ref BaseCompiler. Because + //! @ref BaseCompiler always uses @ref BaseAssembler it's allowed to access + //! only the @ref BaseAssembler instance. + //! + //! This method is always last step when using code generation. You can use + //! it to allocate memory for JIT code, saving code to remote process or a + //! file. + //! + //! @retrurn Status code, see @ref kError. + virtual Error add(void** dst, BaseAssembler* assembler) = 0; + + //! @brief Release memory allocated by add. + virtual Error release(void* p) = 0; +}; + +// ============================================================================ +// [asmjit::JitRuntime] +// ============================================================================ + +struct JitRuntime : public BaseRuntime { + ASMJIT_NO_COPY(JitRuntime) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a @c JitRuntime instance. + ASMJIT_API JitRuntime(MemoryManager* memmgr = NULL); + //! @brief Destroy the @c JitRuntime instance. + ASMJIT_API virtual ~JitRuntime(); + + // -------------------------------------------------------------------------- + // [Memory Manager and Alloc Type] + // -------------------------------------------------------------------------- + + // Note: These members can be ignored by all derived classes. They are here + // only to privide default implementation. All other implementations (remote + // code patching or making dynamic loadable libraries/executables) ignore + // members accessed by these accessors. + + //! @brief Get the @c MemoryManager instance. + ASMJIT_INLINE MemoryManager* getMemoryManager() const { return _memoryManager; } + + //! @brief Get the type of allocation. + ASMJIT_INLINE uint32_t getAllocType() const { return _allocType; } + //! @brief Set the type of allocation. + ASMJIT_INLINE void setAllocType(uint32_t allocType) { _allocType = allocType; } + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual uint32_t getStackAlignment(); + ASMJIT_API virtual const BaseCpu* getCpu(); + + ASMJIT_API virtual Error add(void** dst, BaseAssembler* assembler); + ASMJIT_API virtual Error release(void* p); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Memory manager. + MemoryManager* _memoryManager; + //! @brief Type of allocation. + uint32_t _allocType; +}; + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_RUNTIME_H diff --git a/src/asmjit/base/string.cpp b/src/asmjit/base/string.cpp new file mode 100644 index 0000000..910e8c9 --- /dev/null +++ b/src/asmjit/base/string.cpp @@ -0,0 +1,375 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/defs.h" +#include "../base/intutil.h" +#include "../base/string.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// Should be placed in read-only memory. +static const char StringBuilder_empty[4] = { 0 }; + +// ============================================================================ +// [asmjit::StringBuilder - Construction / Destruction] +// ============================================================================ + +StringBuilder::StringBuilder() : + _data(const_cast(StringBuilder_empty)), + _length(0), + _capacity(0), + _canFree(false) {} + +StringBuilder::~StringBuilder() { + if (_canFree) + ::free(_data); +} + +// ============================================================================ +// [asmjit::StringBuilder - Prepare / Reserve] +// ============================================================================ + +char* StringBuilder::prepare(uint32_t op, size_t len) { + // -------------------------------------------------------------------------- + // [Set] + // -------------------------------------------------------------------------- + + if (op == kStringOpSet) { + // We don't care here, but we can't return a NULL pointer since it indicates + // failure in memory allocation. + if (len == 0) { + if (_data != StringBuilder_empty) + _data[0] = 0; + + _length = 0; + return _data; + } + + if (_capacity < len) { + if (len >= IntUtil::maxUInt() - sizeof(intptr_t) * 2) + return NULL; + + size_t to = IntUtil::alignTo(len, sizeof(intptr_t)); + if (to < 256 - sizeof(intptr_t)) + to = 256 - sizeof(intptr_t); + + char* newData = static_cast(::malloc(to + sizeof(intptr_t))); + if (newData == NULL) { + clear(); + return NULL; + } + + if (_canFree) + ::free(_data); + + _data = newData; + _capacity = to + sizeof(intptr_t) - 1; + _canFree = true; + } + + _data[len] = 0; + _length = len; + + ASMJIT_ASSERT(_length <= _capacity); + return _data; + } + + // -------------------------------------------------------------------------- + // [Append] + // -------------------------------------------------------------------------- + + else { + // We don't care here, but we can't return a NULL pointer since it indicates + // failure in memory allocation. + if (len == 0) + return _data + _length; + + // Overflow. + if (IntUtil::maxUInt() - sizeof(intptr_t) * 2 - _length < len) + return NULL; + + size_t after = _length + len; + if (_capacity < after) { + size_t to = _capacity; + + if (to < 256) + to = 256; + + while (to < 1024 * 1024 && to < after) + to *= 2; + + if (to < after) { + to = after; + if (to < (IntUtil::maxUInt() - 1024 * 32)) + to = IntUtil::alignTo(to, 1024 * 32); + } + + to = IntUtil::alignTo(to, sizeof(intptr_t)); + char* newData = static_cast(::malloc(to + sizeof(intptr_t))); + + if (newData == NULL) + return NULL; + + ::memcpy(newData, _data, _length); + if (_canFree) + ::free(_data); + + _data = newData; + _capacity = to + sizeof(intptr_t) - 1; + _canFree = true; + } + + char* ret = _data + _length; + _data[after] = 0; + _length = after; + + ASMJIT_ASSERT(_length <= _capacity); + return ret; + } +} + +bool StringBuilder::reserve(size_t to) { + if (_capacity >= to) + return true; + + if (to >= IntUtil::maxUInt() - sizeof(intptr_t) * 2) + return false; + + to = IntUtil::alignTo(to, sizeof(intptr_t)); + + char* newData = static_cast(::malloc(to + sizeof(intptr_t))); + if (newData == NULL) + return false; + + ::memcpy(newData, _data, _length + 1); + if (_canFree) + ::free(_data); + + _data = newData; + _capacity = to + sizeof(intptr_t) - 1; + _canFree = true; + return true; +} + +// ============================================================================ +// [asmjit::StringBuilder - Clear] +// ============================================================================ + +void StringBuilder::clear() { + if (_data != StringBuilder_empty) + _data[0] = 0; + _length = 0; +} + +// ============================================================================ +// [asmjit::StringBuilder - Methods] +// ============================================================================ + +bool StringBuilder::_opString(uint32_t op, const char* str, size_t len) { + if (len == kInvalidIndex) + len = str != NULL ? ::strlen(str) : static_cast(0); + + char* p = prepare(op, len); + if (p == NULL) + return false; + + ::memcpy(p, str, len); + return true; +} + +bool StringBuilder::_opChar(uint32_t op, char c) { + char* p = prepare(op, 1); + if (p == NULL) + return false; + + *p = c; + return true; +} + +bool StringBuilder::_opChars(uint32_t op, char c, size_t len) { + char* p = prepare(op, len); + if (p == NULL) + return false; + + ::memset(p, c, len); + return true; +} + +static const char StringBuilder_numbers[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + +bool StringBuilder::_opNumber(uint32_t op, uint64_t i, uint32_t base, size_t width, uint32_t flags) { + if (base < 2 || base > 36) + base = 10; + + char buf[128]; + char* p = buf + ASMJIT_ARRAY_SIZE(buf); + + uint64_t orig = i; + char sign = '\0'; + + // -------------------------------------------------------------------------- + // [Sign] + // -------------------------------------------------------------------------- + + if ((flags & kStringFormatSigned) != 0 && static_cast(i) < 0) { + i = static_cast(-static_cast(i)); + sign = '-'; + } + else if ((flags & kStringFormatShowSign) != 0) { + sign = '+'; + } + else if ((flags & kStringFormatShowSpace) != 0) { + sign = ' '; + } + + // -------------------------------------------------------------------------- + // [Number] + // -------------------------------------------------------------------------- + + do { + uint64_t d = i / base; + uint64_t r = i % base; + + *--p = StringBuilder_numbers[r]; + i = d; + } while (i); + + size_t numberLength = (size_t)(buf + ASMJIT_ARRAY_SIZE(buf) - p); + + // -------------------------------------------------------------------------- + // [Alternate Form] + // -------------------------------------------------------------------------- + + if ((flags & kStringFormatAlternate) != 0) { + if (base == 8) { + if (orig != 0) + *--p = '0'; + } + if (base == 16) { + *--p = 'x'; + *--p = '0'; + } + } + + // -------------------------------------------------------------------------- + // [Width] + // -------------------------------------------------------------------------- + + if (sign != 0) + *--p = sign; + + if (width > 256) + width = 256; + + if (width <= numberLength) + width = 0; + else + width -= numberLength; + + // -------------------------------------------------------------------------- + // Write] + // -------------------------------------------------------------------------- + + size_t prefixLength = (size_t)(buf + ASMJIT_ARRAY_SIZE(buf) - p) - numberLength; + char* data = prepare(op, prefixLength + width + numberLength); + + if (data == NULL) + return false; + + ::memcpy(data, p, prefixLength); + data += prefixLength; + + ::memset(data, '0', width); + data += width; + + ::memcpy(data, p + prefixLength, numberLength); + return true; +} + +bool StringBuilder::_opHex(uint32_t op, const void* data, size_t len) { + if (len >= IntUtil::maxUInt() / 2) + return false; + + char* dst = prepare(op, len * 2); + if (dst == NULL) + return false; + + const char* src = static_cast(data); + for (size_t i = 0; i < len; i++, dst += 2, src += 1) + { + dst[0] = StringBuilder_numbers[(src[0] >> 4) & 0xF]; + dst[1] = StringBuilder_numbers[(src[0] ) & 0xF]; + } + + return true; +} + +bool StringBuilder::_opVFormat(uint32_t op, const char* fmt, va_list ap) { + char buf[1024]; + + vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf), fmt, ap); + buf[ASMJIT_ARRAY_SIZE(buf) - 1] = '\0'; + + return _opString(op, buf); +} + +bool StringBuilder::setFormat(const char* fmt, ...) { + bool result; + + va_list ap; + va_start(ap, fmt); + result = _opVFormat(kStringOpSet, fmt, ap); + va_end(ap); + + return result; +} + +bool StringBuilder::appendFormat(const char* fmt, ...) { + bool result; + + va_list ap; + va_start(ap, fmt); + result = _opVFormat(kStringOpAppend, fmt, ap); + va_end(ap); + + return result; +} + +bool StringBuilder::eq(const char* str, size_t len) const { + const char* aData = _data; + const char* bData = str; + + size_t aLength = _length; + size_t bLength = len; + + if (bLength == kInvalidIndex) { + size_t i; + for (i = 0; i < aLength; i++) { + if (aData[i] != bData[i] || bData[i] == 0) + return false; + } + + return bData[i] == 0; + } + else { + if (aLength != bLength) + return false; + + return ::memcmp(aData, bData, aLength) == 0; + } +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/string.h b/src/asmjit/base/string.h new file mode 100644 index 0000000..831e3e2 --- /dev/null +++ b/src/asmjit/base/string.h @@ -0,0 +1,350 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_STRING_H +#define _ASMJIT_BASE_STRING_H + +// [Dependencies - AsmJit] +#include "../base/assert.h" +#include "../base/defs.h" + +// [Dependencies - C] +#include + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::kStringOp] +// ============================================================================ + +//! @brief String operation. +ASMJIT_ENUM(kStringOp) { + //! @brief Replace the current string by a given content. + kStringOpSet = 0, + //! @brief Append a given content to the current string. + kStringOpAppend = 1 +}; + +// ============================================================================ +// [asmjit::kStringFormat] +// ============================================================================ + +//! @brief String format flags. +ASMJIT_ENUM(kStringFormat) { + kStringFormatShowSign = 0x00000001, + kStringFormatShowSpace = 0x00000002, + kStringFormatAlternate = 0x00000004, + kStringFormatSigned = 0x80000000 +}; + +// ============================================================================ +// [asmjit::StringUtil] +// ============================================================================ + +//! @brief String utilities. +struct StringUtil { + static ASMJIT_INLINE size_t nlen(const char* s, size_t maxlen) { + size_t i; + for (i = 0; i < maxlen; i++) + if (!s[i]) + break; + return i; + } +}; + +// ============================================================================ +// [asmjit::StringBuilder] +// ============================================================================ + +//! @brief String builder. +//! +//! String builder was designed to be able to build a string using append like +//! operation to append numbers, other strings, or signle characters. It can +//! allocate it's own buffer or use a buffer created on the stack. +//! +//! String builder contains method specific to AsmJit functionality, used for +//! logging or HTML output. +struct StringBuilder { + ASMJIT_NO_COPY(StringBuilder) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_API StringBuilder(); + ASMJIT_API ~StringBuilder(); + + ASMJIT_INLINE StringBuilder(const _DontInitialize&) {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get string builder capacity. + ASMJIT_INLINE size_t getCapacity() const { return _capacity; } + //! @brief Get length. + ASMJIT_INLINE size_t getLength() const { return _length; } + + //! @brief Get null-terminated string data. + ASMJIT_INLINE char* getData() { return _data; } + //! @brief Get null-terminated string data (const). + ASMJIT_INLINE const char* getData() const { return _data; } + + // -------------------------------------------------------------------------- + // [Prepare / Reserve] + // -------------------------------------------------------------------------- + + //! @brief Prepare to set/append. + ASMJIT_API char* prepare(uint32_t op, size_t len); + + //! @brief Reserve @a to bytes in string builder. + ASMJIT_API bool reserve(size_t to); + + // -------------------------------------------------------------------------- + // [Clear] + // -------------------------------------------------------------------------- + + //! @brief Clear the content in String builder. + ASMJIT_API void clear(); + + // -------------------------------------------------------------------------- + // [Op] + // -------------------------------------------------------------------------- + + ASMJIT_API bool _opString(uint32_t op, const char* str, size_t len = kInvalidIndex); + ASMJIT_API bool _opVFormat(uint32_t op, const char* fmt, va_list ap); + ASMJIT_API bool _opChar(uint32_t op, char c); + ASMJIT_API bool _opChars(uint32_t op, char c, size_t len); + ASMJIT_API bool _opNumber(uint32_t op, uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0); + ASMJIT_API bool _opHex(uint32_t op, const void* data, size_t len); + + // -------------------------------------------------------------------------- + // [Set] + // -------------------------------------------------------------------------- + + //! @brief Replace the current content by @a str of @a len. + ASMJIT_INLINE bool setString(const char* str, size_t len = kInvalidIndex) + { return _opString(kStringOpSet, str, len); } + + //! @brief Replace the current content by formatted string @a fmt. + ASMJIT_INLINE bool setVFormat(const char* fmt, va_list ap) + { return _opVFormat(kStringOpSet, fmt, ap); } + + //! @brief Replace the current content by formatted string @a fmt. + ASMJIT_API bool setFormat(const char* fmt, ...); + + //! @brief Replace the current content by @a c character. + ASMJIT_INLINE bool setChar(char c) + { return _opChar(kStringOpSet, c); } + + //! @brief Replace the current content by @a c of @a len. + ASMJIT_INLINE bool setChars(char c, size_t len) + { return _opChars(kStringOpSet, c, len); } + + //! @brief Replace the current content by @a i.. + ASMJIT_INLINE bool setInt(uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) + { return _opNumber(kStringOpSet, i, base, width, flags | kStringFormatSigned); } + + //! @brief Replace the current content by @a i.. + ASMJIT_INLINE bool setUInt(uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) + { return _opNumber(kStringOpSet, i, base, width, flags); } + + //! @brief Replace the current content by the given @a data converted to a HEX string. + ASMJIT_INLINE bool setHex(const void* data, size_t len) + { return _opHex(kStringOpSet, data, len); } + + // -------------------------------------------------------------------------- + // [Append] + // -------------------------------------------------------------------------- + + //! @brief Append @a str of @a len. + ASMJIT_INLINE bool appendString(const char* str, size_t len = kInvalidIndex) + { return _opString(kStringOpAppend, str, len); } + + //! @brief Append a formatted string @a fmt to the current content. + ASMJIT_INLINE bool appendVFormat(const char* fmt, va_list ap) + { return _opVFormat(kStringOpAppend, fmt, ap); } + + //! @brief Append a formatted string @a fmt to the current content. + ASMJIT_API bool appendFormat(const char* fmt, ...); + + //! @brief Append @a c character. + ASMJIT_INLINE bool appendChar(char c) + { return _opChar(kStringOpAppend, c); } + + //! @brief Append @a c of @a len. + ASMJIT_INLINE bool appendChars(char c, size_t len) + { return _opChars(kStringOpAppend, c, len); } + + //! @brief Append @a i. + ASMJIT_INLINE bool appendInt(int64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) + { return _opNumber(kStringOpAppend, static_cast(i), base, width, flags | kStringFormatSigned); } + + //! @brief Append @a i. + ASMJIT_INLINE bool appendUInt(uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) + { return _opNumber(kStringOpAppend, i, base, width, flags); } + + //! @brief Append the given @a data converted to a HEX string. + ASMJIT_INLINE bool appendHex(const void* data, size_t len) + { return _opHex(kStringOpAppend, data, len); } + + // -------------------------------------------------------------------------- + // [_Append] + // -------------------------------------------------------------------------- + + //! @brief Append @a str of @a len (inlined, without buffer overflow check). + ASMJIT_INLINE void _appendString(const char* str, size_t len = kInvalidIndex) { + // len should be a constant if we are inlining. + if (len == kInvalidIndex) { + char* p = &_data[_length]; + + while (*str) { + ASMJIT_ASSERT(p < _data + _capacity); + *p++ = *str++; + } + + *p = '\0'; + _length = (size_t)(p - _data); + } + else { + ASMJIT_ASSERT(_capacity - _length >= len); + + char* p = &_data[_length]; + char* pEnd = p + len; + + while (p < pEnd) + *p++ = *str++; + + *p = '\0'; + _length += len; + } + } + + //! @brief Append @a c character (inlined, without buffer overflow check). + ASMJIT_INLINE void _appendChar(char c) { + ASMJIT_ASSERT(_capacity - _length >= 1); + + _data[_length] = c; + _length++; + _data[_length] = '\0'; + } + + //! @brief Append @a c of @a len (inlined, without buffer overflow check). + ASMJIT_INLINE void _appendChars(char c, size_t len) { + ASMJIT_ASSERT(_capacity - _length >= len); + + char* p = &_data[_length]; + char* pEnd = p + len; + + while (p < pEnd) + *p++ = c; + + *p = '\0'; + _length += len; + } + + ASMJIT_INLINE void _appendUInt32(uint32_t i) { + char buf_[32]; + + char* pEnd = buf_ + ASMJIT_ARRAY_SIZE(buf_); + char* pBuf = pEnd; + + do { + uint32_t d = i / 10; + uint32_t r = i % 10; + + *--pBuf = static_cast(r + '0'); + i = d; + } while (i); + + ASMJIT_ASSERT(_capacity - _length >= (size_t)(pEnd - pBuf)); + char* p = &_data[_length]; + + do { + *p++ = *pBuf; + } while (++pBuf != pEnd); + + *p = '\0'; + _length = (size_t)(p - _data); + } + + // -------------------------------------------------------------------------- + // [Eq] + // -------------------------------------------------------------------------- + + //! @brief Check for equality with other @a str. + ASMJIT_API bool eq(const char* str, size_t len = kInvalidIndex) const; + //! @brief Check for equality with StringBuilder @a other. + ASMJIT_INLINE bool eq(const StringBuilder& other) const { return eq(other._data); } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool operator==(const StringBuilder& other) const { return eq(other); } + ASMJIT_INLINE bool operator!=(const StringBuilder& other) const { return !eq(other); } + + ASMJIT_INLINE bool operator==(const char* str) const { return eq(str); } + ASMJIT_INLINE bool operator!=(const char* str) const { return !eq(str); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief String data. + char* _data; + //! @brief Length. + size_t _length; + //! @brief Capacity. + size_t _capacity; + //! @brief Whether the string can be freed. + size_t _canFree; +}; + +// ============================================================================ +// [asmjit::StringBuilderT] +// ============================================================================ + +template +struct StringBuilderT : public StringBuilder { + ASMJIT_NO_COPY(StringBuilderT) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE StringBuilderT() : StringBuilder(DontInitialize) { + _data = _embeddedData; + _data[0] = 0; + + _length = 0; + _capacity = N; + _canFree = false; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Embedded data. + char _embeddedData[static_cast(N + 1 + sizeof(intptr_t)) & ~static_cast(sizeof(intptr_t) - 1)]; +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +#endif // _ASMJIT_BASE_STRING_H diff --git a/src/asmjit/base/vectypes.h b/src/asmjit/base/vectypes.h new file mode 100644 index 0000000..41f73c1 --- /dev/null +++ b/src/asmjit/base/vectypes.h @@ -0,0 +1,1248 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_VECTYPES_H +#define _ASMJIT_BASE_VECTYPES_H + +// [Dependencies - AsmJit] +#include "../base/defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::Vec64Data] +// ============================================================================ + +//! @brief 64-bit vector register data. +union Vec64Data { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Set all eight signed 8-bit integers. + static ASMJIT_INLINE Vec64Data fromSb( + int8_t x0, int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7) + { + Vec64Data self; + self.setSb(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! @brief Set all eight signed 8-bit integers. + static ASMJIT_INLINE Vec64Data fromSb( + int8_t x0) + { + Vec64Data self; + self.setSb(x0); + return self; + } + + //! @brief Set all eight unsigned 8-bit integers. + static ASMJIT_INLINE Vec64Data fromUb( + uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) + { + Vec64Data self; + self.setUb(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! @brief Set all eight unsigned 8-bit integers. + static ASMJIT_INLINE Vec64Data fromUb( + uint8_t x0) + { + Vec64Data self; + self.setUb(x0); + return self; + } + + //! @brief Set all four signed 16-bit integers. + static ASMJIT_INLINE Vec64Data fromSw( + int16_t x0, int16_t x1, int16_t x2, int16_t x3) + { + Vec64Data self; + self.setSw(x0, x1, x2, x3); + return self; + } + + //! @brief Set all four signed 16-bit integers. + static ASMJIT_INLINE Vec64Data fromSw( + int16_t x0) + { + Vec64Data self; + self.setSw(x0); + return self; + } + + //! @brief Set all four unsigned 16-bit integers. + static ASMJIT_INLINE Vec64Data fromUw( + uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3) + { + Vec64Data self; + self.setUw(x0, x1, x2, x3); + return self; + } + + //! @brief Set all four unsigned 16-bit integers. + static ASMJIT_INLINE Vec64Data fromUw( + uint16_t x0) + { + Vec64Data self; + self.setUw(x0); + return self; + } + + //! @brief Set all two signed 32-bit integers. + static ASMJIT_INLINE Vec64Data fromSd( + int32_t x0, int32_t x1) + { + Vec64Data self; + self.setSd(x0, x1); + return self; + } + + //! @brief Set all two signed 32-bit integers. + static ASMJIT_INLINE Vec64Data fromSd( + int32_t x0) + { + Vec64Data self; + self.setSd(x0); + return self; + } + + //! @brief Set all two unsigned 32-bit integers. + static ASMJIT_INLINE Vec64Data fromUd( + uint32_t x0, uint32_t x1) + { + Vec64Data self; + self.setUd(x0, x1); + return self; + } + + //! @brief Set all two unsigned 32-bit integers. + static ASMJIT_INLINE Vec64Data fromUd( + uint32_t x0) + { + Vec64Data self; + self.setUd(x0); + return self; + } + + //! @brief Set signed 64-bit integer. + static ASMJIT_INLINE Vec64Data fromSq( + int64_t x0) + { + Vec64Data self; + self.setSq(x0); + return self; + } + + //! @brief Set unsigned 64-bit integer. + static ASMJIT_INLINE Vec64Data fromUq( + uint64_t x0) + { + Vec64Data self; + self.setUq(x0); + return self; + } + + //! @brief Set all two SP-FP values. + static ASMJIT_INLINE Vec64Data fromSf( + float x0, float x1) + { + Vec64Data self; + self.setSf(x0, x1); + return self; + } + + //! @brief Set all two SP-FP values. + static ASMJIT_INLINE Vec64Data fromSf( + float x0) + { + Vec64Data self; + self.setSf(x0); + return self; + } + + //! @brief Set all two SP-FP values. + static ASMJIT_INLINE Vec64Data fromDf( + double x0) + { + Vec64Data self; + self.setDf(x0); + return self; + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Set all eight signed 8-bit integers. + ASMJIT_INLINE void setSb( + int8_t x0, int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7) + { + sb[0] = x0; sb[1] = x1; sb[2] = x2; sb[3] = x3; + sb[4] = x4; sb[5] = x5; sb[6] = x6; sb[7] = x7; + } + + //! @brief Set all eight signed 8-bit integers. + ASMJIT_INLINE void setSb( + int8_t x0) + { + setUb(static_cast(x0)); + } + + //! @brief Set all eight unsigned 8-bit integers. + ASMJIT_INLINE void setUb( + uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) + { + ub[0] = x0; ub[1] = x1; ub[2] = x2; ub[3] = x3; + ub[4] = x4; ub[5] = x5; ub[6] = x6; ub[7] = x7; + } + + //! @brief Set all eight unsigned 8-bit integers. + ASMJIT_INLINE void setUb( + uint8_t x0) + { + if (kArchHost64Bit) { + uint64_t t = static_cast(x0) * ASMJIT_UINT64_C(0x0101010101010101); + uq[0] = t; + } + else { + uint32_t t = static_cast(x0) * static_cast(0x01010101U); + ud[0] = t; + ud[1] = t; + } + } + + //! @brief Set all four signed 16-bit integers. + ASMJIT_INLINE void setSw( + int16_t x0, int16_t x1, int16_t x2, int16_t x3) + { + sw[0] = x0; sw[1] = x1; sw[2] = x2; sw[3] = x3; + } + + //! @brief Set all four signed 16-bit integers. + ASMJIT_INLINE void setSw( + int16_t x0) + { + setUw(static_cast(x0)); + } + + //! @brief Set all four unsigned 16-bit integers. + ASMJIT_INLINE void setUw( + uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3) + { + uw[0] = x0; uw[1] = x1; uw[2] = x2; uw[3] = x3; + } + + //! @brief Set all four unsigned 16-bit integers. + ASMJIT_INLINE void setUw( + uint16_t x0) + { + if (kArchHost64Bit) { + uint64_t t = static_cast(x0) * ASMJIT_UINT64_C(0x0001000100010001); + uq[0] = t; + } + else { + uint32_t t = static_cast(x0) * static_cast(0x00010001U); + ud[0] = t; + ud[1] = t; + } + } + + //! @brief Set all two signed 32-bit integers. + ASMJIT_INLINE void setSd( + int32_t x0, int32_t x1) + { + sd[0] = x0; sd[1] = x1; + } + + //! @brief Set all two signed 32-bit integers. + ASMJIT_INLINE void setSd( + int32_t x0) + { + sd[0] = x0; sd[1] = x0; + } + + //! @brief Set all two unsigned 32-bit integers. + ASMJIT_INLINE void setUd( + uint32_t x0, uint32_t x1) + { + ud[0] = x0; ud[1] = x1; + } + + //! @brief Set all two unsigned 32-bit integers. + ASMJIT_INLINE void setUd( + uint32_t x0) + { + ud[0] = x0; ud[1] = x0; + } + + //! @brief Set signed 64-bit integer. + ASMJIT_INLINE void setSq( + int64_t x0) + { + sq[0] = x0; + } + + //! @brief Set unsigned 64-bit integer. + ASMJIT_INLINE void setUq( + uint64_t x0) + { + uq[0] = x0; + } + + //! @brief Set all two SP-FP values. + ASMJIT_INLINE void setSf( + float x0, float x1) + { + sf[0] = x0; sf[1] = x1; + } + + //! @brief Set all two SP-FP values. + ASMJIT_INLINE void setSf( + float x0) + { + sf[0] = x0; sf[1] = x0; + } + + //! @brief Set all two SP-FP values. + ASMJIT_INLINE void setDf( + double x0) + { + df[0] = x0; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Array of eight signed 8-bit integers. + int8_t sb[8]; + //! @brief Array of eight unsigned 8-bit integers. + uint8_t ub[8]; + //! @brief Array of four signed 16-bit integers. + int16_t sw[4]; + //! @brief Array of four unsigned 16-bit integers. + uint16_t uw[4]; + //! @brief Array of two signed 32-bit integers. + int32_t sd[2]; + //! @brief Array of two unsigned 32-bit integers. + uint32_t ud[2]; + //! @brief Array of one signed 64-bit integer. + int64_t sq[1]; + //! @brief Array of one unsigned 64-bit integer. + uint64_t uq[1]; + + //! @brief Array of two SP-FP values. + float sf[2]; + //! @brief Array of one DP-FP value. + double df[1]; +}; + +// ============================================================================ +// [asmjit::Vec128Data] +// ============================================================================ + +//! @brief 128-bit vector register data. +union Vec128Data { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Set all sixteen signed 8-bit integers. + static ASMJIT_INLINE Vec128Data fromSb( + int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 , + int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 , + int8_t x8 , int8_t x9 , int8_t x10, int8_t x11, + int8_t x12, int8_t x13, int8_t x14, int8_t x15) + { + Vec128Data self; + self.setSb(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); + return self; + } + + //! @brief Set all sixteen signed 8-bit integers. + static ASMJIT_INLINE Vec128Data fromSb( + int8_t x0) + { + Vec128Data self; + self.setSb(x0); + return self; + } + + //! @brief Set all sixteen unsigned 8-bit integers. + static ASMJIT_INLINE Vec128Data fromUb( + uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 , + uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 , + uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11, + uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) + { + Vec128Data self; + self.setUb(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); + return self; + } + + //! @brief Set all sixteen unsigned 8-bit integers. + static ASMJIT_INLINE Vec128Data fromUb( + uint8_t x0) + { + Vec128Data self; + self.setUb(x0); + return self; + } + + //! @brief Set all eight signed 16-bit integers. + static ASMJIT_INLINE Vec128Data fromSw( + int16_t x0, int16_t x1, int16_t x2, int16_t x3, int16_t x4, int16_t x5, int16_t x6, int16_t x7) + { + Vec128Data self; + self.setSw(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! @brief Set all eight signed 16-bit integers. + static ASMJIT_INLINE Vec128Data fromSw( + int16_t x0) + { + Vec128Data self; + self.setSw(x0); + return self; + } + + //! @brief Set all eight unsigned 16-bit integers. + static ASMJIT_INLINE Vec128Data fromUw( + uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) + { + Vec128Data self; + self.setUw(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! @brief Set all eight unsigned 16-bit integers. + static ASMJIT_INLINE Vec128Data fromUw( + uint16_t x0) + { + Vec128Data self; + self.setUw(x0); + return self; + } + + //! @brief Set all four signed 32-bit integers. + static ASMJIT_INLINE Vec128Data fromSd( + int32_t x0, int32_t x1, int32_t x2, int32_t x3) + { + Vec128Data self; + self.setSd(x0, x1, x2, x3); + return self; + } + + //! @brief Set all four signed 32-bit integers. + static ASMJIT_INLINE Vec128Data fromSd( + int32_t x0) + { + Vec128Data self; + self.setSd(x0); + return self; + } + + //! @brief Set all four unsigned 32-bit integers. + static ASMJIT_INLINE Vec128Data fromUd( + uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) + { + Vec128Data self; + self.setUd(x0, x1, x2, x3); + return self; + } + + //! @brief Set all four unsigned 32-bit integers. + static ASMJIT_INLINE Vec128Data fromUd( + uint32_t x0) + { + Vec128Data self; + self.setUd(x0); + return self; + } + + //! @brief Set all two signed 64-bit integers. + static ASMJIT_INLINE Vec128Data fromSq( + int64_t x0, int64_t x1) + { + Vec128Data self; + self.setSq(x0, x1); + return self; + } + + //! @brief Set all two signed 64-bit integers. + static ASMJIT_INLINE Vec128Data fromSq( + int64_t x0) + { + Vec128Data self; + self.setSq(x0); + return self; + } + + //! @brief Set all two unsigned 64-bit integers. + static ASMJIT_INLINE Vec128Data fromUq( + uint64_t x0, uint64_t x1) + { + Vec128Data self; + self.setUq(x0, x1); + return self; + } + + //! @brief Set all two unsigned 64-bit integers. + static ASMJIT_INLINE Vec128Data fromUq( + uint64_t x0) + { + Vec128Data self; + self.setUq(x0); + return self; + } + + //! @brief Set all four SP-FP floats. + static ASMJIT_INLINE Vec128Data fromSf( + float x0, float x1, float x2, float x3) + { + Vec128Data self; + self.setSf(x0, x1, x2, x3); + return self; + } + + //! @brief Set all four SP-FP floats. + static ASMJIT_INLINE Vec128Data fromSf( + float x0) + { + Vec128Data self; + self.setSf(x0); + return self; + } + + //! @brief Set all two DP-FP floats. + static ASMJIT_INLINE Vec128Data fromDf( + double x0, double x1) + { + Vec128Data self; + self.setDf(x0, x1); + return self; + } + + //! @brief Set all two DP-FP floats. + static ASMJIT_INLINE Vec128Data fromDf( + double x0) + { + Vec128Data self; + self.setDf(x0); + return self; + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Set all sixteen signed 8-bit integers. + ASMJIT_INLINE void setSb( + int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 , + int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 , + int8_t x8 , int8_t x9 , int8_t x10, int8_t x11, + int8_t x12, int8_t x13, int8_t x14, int8_t x15) + { + sb[0 ] = x0 ; sb[1 ] = x1 ; sb[2 ] = x2 ; sb[3 ] = x3 ; + sb[4 ] = x4 ; sb[5 ] = x5 ; sb[6 ] = x6 ; sb[7 ] = x7 ; + sb[8 ] = x8 ; sb[9 ] = x9 ; sb[10] = x10; sb[11] = x11; + sb[12] = x12; sb[13] = x13; sb[14] = x14; sb[15] = x15; + } + + //! @brief Set all sixteen signed 8-bit integers. + ASMJIT_INLINE void setSb( + int8_t x0) + { + setUb(static_cast(x0)); + } + + //! @brief Set all sixteen unsigned 8-bit integers. + ASMJIT_INLINE void setUb( + uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 , + uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 , + uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11, + uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) + { + ub[0 ] = x0 ; ub[1 ] = x1 ; ub[2 ] = x2 ; ub[3 ] = x3 ; + ub[4 ] = x4 ; ub[5 ] = x5 ; ub[6 ] = x6 ; ub[7 ] = x7 ; + ub[8 ] = x8 ; ub[9 ] = x9 ; ub[10] = x10; ub[11] = x11; + ub[12] = x12; ub[13] = x13; ub[14] = x14; ub[15] = x15; + } + + //! @brief Set all sixteen unsigned 8-bit integers. + ASMJIT_INLINE void setUb( + uint8_t x0) + { + if (kArchHost64Bit) { + uint64_t t = static_cast(x0) * ASMJIT_UINT64_C(0x0101010101010101); + uq[0] = t; + uq[1] = t; + } + else { + uint32_t t = static_cast(x0) * static_cast(0x01010101U); + ud[0] = t; + ud[1] = t; + ud[2] = t; + ud[3] = t; + } + } + + //! @brief Set all eight signed 16-bit integers. + ASMJIT_INLINE void setSw( + int16_t x0, int16_t x1, int16_t x2, int16_t x3, int16_t x4, int16_t x5, int16_t x6, int16_t x7) + { + sw[0] = x0; sw[1] = x1; sw[2] = x2; sw[3] = x3; + sw[4] = x4; sw[5] = x5; sw[6] = x6; sw[7] = x7; + } + + //! @brief Set all eight signed 16-bit integers. + ASMJIT_INLINE void setSw( + int16_t x0) + { + setUw(static_cast(x0)); + } + + //! @brief Set all eight unsigned 16-bit integers. + ASMJIT_INLINE void setUw( + uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) + { + uw[0] = x0; uw[1] = x1; uw[2] = x2; uw[3] = x3; + uw[4] = x4; uw[5] = x5; uw[6] = x6; uw[7] = x7; + } + + //! @brief Set all eight unsigned 16-bit integers. + ASMJIT_INLINE void setUw( + uint16_t x0) + { + if (kArchHost64Bit) { + uint64_t t = static_cast(x0) * ASMJIT_UINT64_C(0x0001000100010001); + uq[0] = t; + uq[1] = t; + } + else { + uint32_t t = static_cast(x0) * static_cast(0x00010001U); + ud[0] = t; + ud[1] = t; + ud[2] = t; + ud[3] = t; + } + } + + //! @brief Set all four signed 32-bit integers. + ASMJIT_INLINE void setSd( + int32_t x0, int32_t x1, int32_t x2, int32_t x3) + { + sd[0] = x0; sd[1] = x1; sd[2] = x2; sd[3] = x3; + } + + //! @brief Set all four signed 32-bit integers. + ASMJIT_INLINE void setSd( + int32_t x0) + { + setUd(static_cast(x0)); + } + + //! @brief Set all four unsigned 32-bit integers. + ASMJIT_INLINE void setUd( + uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) + { + ud[0] = x0; ud[1] = x1; ud[2] = x2; ud[3] = x3; + } + + //! @brief Set all four unsigned 32-bit integers. + ASMJIT_INLINE void setUd( + uint32_t x0) + { + if (kArchHost64Bit) { + uint64_t t = (static_cast(x0) << 32) + x0; + uq[0] = t; + uq[1] = t; + } + else { + ud[0] = x0; + ud[1] = x0; + ud[2] = x0; + ud[3] = x0; + } + } + + //! @brief Set all two signed 64-bit integers. + ASMJIT_INLINE void setSq( + int64_t x0, int64_t x1) + { + sq[0] = x0; sq[1] = x1; + } + + //! @brief Set all two signed 64-bit integers. + ASMJIT_INLINE void setSq( + int64_t x0) + { + sq[0] = x0; sq[1] = x0; + } + + //! @brief Set all two unsigned 64-bit integers. + ASMJIT_INLINE void setUq( + uint64_t x0, uint64_t x1) + { + uq[0] = x0; uq[1] = x1; + } + + //! @brief Set all two unsigned 64-bit integers. + ASMJIT_INLINE void setUq( + uint64_t x0) + { + uq[0] = x0; uq[1] = x0; + } + + //! @brief Set all four SP-FP floats. + ASMJIT_INLINE void setSf( + float x0, float x1, float x2, float x3) + { + sf[0] = x0; sf[1] = x1; sf[2] = x2; sf[3] = x3; + } + + //! @brief Set all four SP-FP floats. + ASMJIT_INLINE void setSf( + float x0) + { + sf[0] = x0; sf[1] = x0; sf[2] = x0; sf[3] = x0; + } + + //! @brief Set all two DP-FP floats. + ASMJIT_INLINE void setDf( + double x0, double x1) + { + df[0] = x0; df[1] = x1; + } + + //! @brief Set all two DP-FP floats. + ASMJIT_INLINE void setDf( + double x0) + { + df[0] = x0; df[1] = x0; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Array of sixteen signed 8-bit integers. + int8_t sb[16]; + //! @brief Array of sixteen unsigned 8-bit integers. + uint8_t ub[16]; + //! @brief Array of eight signed 16-bit integers. + int16_t sw[8]; + //! @brief Array of eight unsigned 16-bit integers. + uint16_t uw[8]; + //! @brief Array of four signed 32-bit integers. + int32_t sd[4]; + //! @brief Array of four unsigned 32-bit integers. + uint32_t ud[4]; + //! @brief Array of two signed 64-bit integers. + int64_t sq[2]; + //! @brief Array of two unsigned 64-bit integers. + uint64_t uq[2]; + + //! @brief Array of four 32-bit single precision floating points. + float sf[4]; + //! @brief Array of two 64-bit double precision floating points. + double df[2]; +}; + +// ============================================================================ +// [asmjit::Vec256Data] +// ============================================================================ + +//! @brief 256-bit vector register data. +union Vec256Data { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Set all thirty two signed 8-bit integers. + static ASMJIT_INLINE Vec256Data fromSb( + int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 , + int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 , + int8_t x8 , int8_t x9 , int8_t x10, int8_t x11, + int8_t x12, int8_t x13, int8_t x14, int8_t x15, + int8_t x16, int8_t x17, int8_t x18, int8_t x19, + int8_t x20, int8_t x21, int8_t x22, int8_t x23, + int8_t x24, int8_t x25, int8_t x26, int8_t x27, + int8_t x28, int8_t x29, int8_t x30, int8_t x31) + { + Vec256Data self; + self.setSb( + x0, x1 , x2 , x3 , x4 , x5 , x6 , x7 , x8 , x9 , x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31); + return self; + } + + //! @brief Set all thirty two signed 8-bit integers. + static ASMJIT_INLINE Vec256Data fromSb( + int8_t x0) + { + Vec256Data self; + self.setSb(x0); + return self; + } + + //! @brief Set all thirty two unsigned 8-bit integers. + static ASMJIT_INLINE Vec256Data fromUb( + uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 , + uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 , + uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11, + uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15, + uint8_t x16, uint8_t x17, uint8_t x18, uint8_t x19, + uint8_t x20, uint8_t x21, uint8_t x22, uint8_t x23, + uint8_t x24, uint8_t x25, uint8_t x26, uint8_t x27, + uint8_t x28, uint8_t x29, uint8_t x30, uint8_t x31) + { + Vec256Data self; + self.setUb( + x0, x1 , x2 , x3 , x4 , x5 , x6 , x7 , x8 , x9 , x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31); + return self; + } + + //! @brief Set all thirty two unsigned 8-bit integers. + static ASMJIT_INLINE Vec256Data fromUb( + uint8_t x0) + { + Vec256Data self; + self.setUb(x0); + return self; + } + + //! @brief Set all sixteen signed 16-bit integers. + static ASMJIT_INLINE Vec256Data fromSw( + int16_t x0, int16_t x1, int16_t x2 , int16_t x3 , int16_t x4 , int16_t x5 , int16_t x6 , int16_t x7 , + int16_t x8, int16_t x9, int16_t x10, int16_t x11, int16_t x12, int16_t x13, int16_t x14, int16_t x15) + { + Vec256Data self; + self.setSw(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); + return self; + } + + //! @brief Set all sixteen signed 16-bit integers. + static ASMJIT_INLINE Vec256Data fromSw( + int16_t x0) + { + Vec256Data self; + self.setSw(x0); + return self; + } + + //! @brief Set all sixteen unsigned 16-bit integers. + static ASMJIT_INLINE Vec256Data fromUw( + uint16_t x0, uint16_t x1, uint16_t x2 , uint16_t x3 , uint16_t x4 , uint16_t x5 , uint16_t x6 , uint16_t x7 , + uint16_t x8, uint16_t x9, uint16_t x10, uint16_t x11, uint16_t x12, uint16_t x13, uint16_t x14, uint16_t x15) + { + Vec256Data self; + self.setUw(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); + return self; + } + + //! @brief Set all sixteen unsigned 16-bit integers. + static ASMJIT_INLINE Vec256Data fromUw( + uint16_t x0) + { + Vec256Data self; + self.setUw(x0); + return self; + } + + //! @brief Set all eight signed 32-bit integers. + static ASMJIT_INLINE Vec256Data fromSd( + int32_t x0, int32_t x1, int32_t x2, int32_t x3, + int32_t x4, int32_t x5, int32_t x6, int32_t x7) + { + Vec256Data self; + self.setSd(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! @brief Set all eight signed 32-bit integers. + static ASMJIT_INLINE Vec256Data fromSd( + int32_t x0) + { + Vec256Data self; + self.setSd(x0); + return self; + } + + //! @brief Set all eight unsigned 32-bit integers. + static ASMJIT_INLINE Vec256Data fromUd( + uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7) + { + Vec256Data self; + self.setUd(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! @brief Set all eight unsigned 32-bit integers. + static ASMJIT_INLINE Vec256Data fromUd( + uint32_t x0) + { + Vec256Data self; + self.setUd(x0); + return self; + } + + //! @brief Set all four signed 64-bit integers. + static ASMJIT_INLINE Vec256Data fromSq( + int64_t x0, int64_t x1, int64_t x2, int64_t x3) + { + Vec256Data self; + self.setSq(x0, x1, x2, x3); + return self; + } + + //! @brief Set all four signed 64-bit integers. + static ASMJIT_INLINE Vec256Data fromSq( + int64_t x0) + { + Vec256Data self; + self.setSq(x0); + return self; + } + + //! @brief Set all four unsigned 64-bit integers. + static ASMJIT_INLINE Vec256Data fromUq( + uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3) + { + Vec256Data self; + self.setUq(x0, x1, x2, x3); + return self; + } + + //! @brief Set all four unsigned 64-bit integers. + static ASMJIT_INLINE Vec256Data fromUq( + uint64_t x0) + { + Vec256Data self; + self.setUq(x0); + return self; + } + + //! @brief Set all eight SP-FP floats. + static ASMJIT_INLINE Vec256Data fromSf( + float x0, float x1, float x2, float x3, + float x4, float x5, float x6, float x7) + { + Vec256Data self; + self.setSf(x0, x1, x2, x3, x4, x5, x6, x7); + return self; + } + + //! @brief Set all eight SP-FP floats. + static ASMJIT_INLINE Vec256Data fromSf( + float x0) + { + Vec256Data self; + self.setSf(x0); + return self; + } + + //! @brief Set all four DP-FP floats. + static ASMJIT_INLINE Vec256Data fromDf( + double x0, double x1, double x2, double x3) + { + Vec256Data self; + self.setDf(x0, x1, x2, x3); + return self; + } + + //! @brief Set all four DP-FP floats. + static ASMJIT_INLINE Vec256Data fromDf( + double x0) + { + Vec256Data self; + self.setDf(x0); + return self; + } + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Set all thirty two signed 8-bit integers. + ASMJIT_INLINE void setSb( + int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 , + int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 , + int8_t x8 , int8_t x9 , int8_t x10, int8_t x11, + int8_t x12, int8_t x13, int8_t x14, int8_t x15, + int8_t x16, int8_t x17, int8_t x18, int8_t x19, + int8_t x20, int8_t x21, int8_t x22, int8_t x23, + int8_t x24, int8_t x25, int8_t x26, int8_t x27, + int8_t x28, int8_t x29, int8_t x30, int8_t x31) + { + sb[0 ] = x0 ; sb[1 ] = x1 ; sb[2 ] = x2 ; sb[3 ] = x3 ; + sb[4 ] = x4 ; sb[5 ] = x5 ; sb[6 ] = x6 ; sb[7 ] = x7 ; + sb[8 ] = x8 ; sb[9 ] = x9 ; sb[10] = x10; sb[11] = x11; + sb[12] = x12; sb[13] = x13; sb[14] = x14; sb[15] = x15; + sb[16] = x16; sb[17] = x17; sb[18] = x18; sb[19] = x19; + sb[20] = x20; sb[21] = x21; sb[22] = x22; sb[23] = x23; + sb[24] = x24; sb[25] = x25; sb[26] = x26; sb[27] = x27; + sb[28] = x28; sb[29] = x29; sb[30] = x30; sb[31] = x31; + } + + //! @brief Set all thirty two signed 8-bit integers. + ASMJIT_INLINE void setSb( + int8_t x0) + { + setUb(static_cast(x0)); + } + + //! @brief Set all thirty two unsigned 8-bit integers. + ASMJIT_INLINE void setUb( + uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 , + uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 , + uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11, + uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15, + uint8_t x16, uint8_t x17, uint8_t x18, uint8_t x19, + uint8_t x20, uint8_t x21, uint8_t x22, uint8_t x23, + uint8_t x24, uint8_t x25, uint8_t x26, uint8_t x27, + uint8_t x28, uint8_t x29, uint8_t x30, uint8_t x31) + { + ub[0 ] = x0 ; ub[1 ] = x1 ; ub[2 ] = x2 ; ub[3 ] = x3 ; + ub[4 ] = x4 ; ub[5 ] = x5 ; ub[6 ] = x6 ; ub[7 ] = x7 ; + ub[8 ] = x8 ; ub[9 ] = x9 ; ub[10] = x10; ub[11] = x11; + ub[12] = x12; ub[13] = x13; ub[14] = x14; ub[15] = x15; + ub[16] = x16; ub[17] = x17; ub[18] = x18; ub[19] = x19; + ub[20] = x20; ub[21] = x21; ub[22] = x22; ub[23] = x23; + ub[24] = x24; ub[25] = x25; ub[26] = x26; ub[27] = x27; + ub[28] = x28; ub[29] = x29; ub[30] = x30; ub[31] = x31; + } + + //! @brief Set all thirty two unsigned 8-bit integers. + ASMJIT_INLINE void setUb( + uint8_t x0) + { + if (kArchHost64Bit) { + uint64_t t = static_cast(x0)* ASMJIT_UINT64_C(0x0101010101010101); + uq[0] = t; + uq[1] = t; + uq[2] = t; + uq[3] = t; + } + else { + uint32_t t = static_cast(x0)* static_cast(0x01010101U); + ud[0] = t; + ud[1] = t; + ud[2] = t; + ud[3] = t; + ud[4] = t; + ud[5] = t; + ud[6] = t; + ud[7] = t; + } + } + + //! @brief Set all sixteen signed 16-bit integers. + ASMJIT_INLINE void setSw( + int16_t x0, int16_t x1, int16_t x2, int16_t x3, int16_t x4, int16_t x5, int16_t x6, int16_t x7, + int16_t x8, int16_t x9, int16_t x10, int16_t x11, int16_t x12, int16_t x13, int16_t x14, int16_t x15) + { + sw[0 ] = x0 ; sw[1 ] = x1 ; sw[2 ] = x2 ; sw[3 ] = x3 ; + sw[4 ] = x4 ; sw[5 ] = x5 ; sw[6 ] = x6 ; sw[7 ] = x7 ; + sw[8 ] = x8 ; sw[9 ] = x9 ; sw[10] = x10; sw[11] = x11; + sw[12] = x12; sw[13] = x13; sw[14] = x14; sw[15] = x15; + } + + //! @brief Set all sixteen signed 16-bit integers. + ASMJIT_INLINE void setSw( + int16_t x0) + { + setUw(static_cast(x0)); + } + + //! @brief Set all sixteen unsigned 16-bit integers. + ASMJIT_INLINE void setUw( + uint16_t x0, uint16_t x1, uint16_t x2 , uint16_t x3 , uint16_t x4 , uint16_t x5 , uint16_t x6 , uint16_t x7 , + uint16_t x8, uint16_t x9, uint16_t x10, uint16_t x11, uint16_t x12, uint16_t x13, uint16_t x14, uint16_t x15) + { + uw[0 ] = x0 ; uw[1 ] = x1 ; uw[2 ] = x2 ; uw[3 ] = x3 ; + uw[4 ] = x4 ; uw[5 ] = x5 ; uw[6 ] = x6 ; uw[7 ] = x7 ; + uw[8 ] = x8 ; uw[9 ] = x9 ; uw[10] = x10; uw[11] = x11; + uw[12] = x12; uw[13] = x13; uw[14] = x14; uw[15] = x15; + } + + //! @brief Set all eight unsigned 16-bit integers. + ASMJIT_INLINE void setUw( + uint16_t x0) + { + if (kArchHost64Bit) { + uint64_t t = static_cast(x0)* ASMJIT_UINT64_C(0x0001000100010001); + uq[0] = t; + uq[1] = t; + uq[2] = t; + uq[3] = t; + } + else { + uint32_t t = static_cast(x0)* static_cast(0x00010001U); + ud[0] = t; + ud[1] = t; + ud[2] = t; + ud[3] = t; + ud[4] = t; + ud[5] = t; + ud[6] = t; + ud[7] = t; + } + } + + //! @brief Set all eight signed 32-bit integers. + ASMJIT_INLINE void setSd( + int32_t x0, int32_t x1, int32_t x2, int32_t x3, + int32_t x4, int32_t x5, int32_t x6, int32_t x7) + { + sd[0] = x0; sd[1] = x1; sd[2] = x2; sd[3] = x3; + sd[4] = x4; sd[5] = x5; sd[6] = x6; sd[7] = x7; + } + + //! @brief Set all eight signed 32-bit integers. + ASMJIT_INLINE void setSd( + int32_t x0) + { + setUd(static_cast(x0)); + } + + //! @brief Set all eight unsigned 32-bit integers. + ASMJIT_INLINE void setUd( + uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7) + { + ud[0] = x0; ud[1] = x1; ud[2] = x2; ud[3] = x3; + ud[4] = x4; ud[5] = x5; ud[6] = x6; ud[7] = x7; + } + + //! @brief Set all eight unsigned 32-bit integers. + ASMJIT_INLINE void setUd( + uint32_t x0) + { + if (kArchHost64Bit) { + uint64_t t = (static_cast(x0) << 32) + x0; + uq[0] = t; + uq[1] = t; + uq[2] = t; + uq[3] = t; + } + else { + ud[0] = x0; + ud[1] = x0; + ud[2] = x0; + ud[3] = x0; + ud[4] = x0; + ud[5] = x0; + ud[6] = x0; + ud[7] = x0; + } + } + + //! @brief Set all four signed 64-bit integers. + ASMJIT_INLINE void setSq( + int64_t x0, int64_t x1, int64_t x2, int64_t x3) + { + sq[0] = x0; sq[1] = x1; sq[2] = x2; sq[3] = x3; + } + + //! @brief Set all four signed 64-bit integers. + ASMJIT_INLINE void setSq( + int64_t x0) + { + sq[0] = x0; sq[1] = x0; sq[2] = x0; sq[3] = x0; + } + + //! @brief Set all four unsigned 64-bit integers. + ASMJIT_INLINE void setUq( + uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3) + { + uq[0] = x0; uq[1] = x1; uq[2] = x2; uq[3] = x3; + } + + //! @brief Set all four unsigned 64-bit integers. + ASMJIT_INLINE void setUq( + uint64_t x0) + { + uq[0] = x0; uq[1] = x0; uq[2] = x0; uq[3] = x0; + } + + //! @brief Set all eight SP-FP floats. + ASMJIT_INLINE void setSf( + float x0, float x1, float x2, float x3, + float x4, float x5, float x6, float x7) + { + sf[0] = x0; sf[1] = x1; sf[2] = x2; sf[3] = x3; + sf[4] = x4; sf[5] = x5; sf[6] = x6; sf[7] = x7; + } + + //! @brief Set all eight SP-FP floats. + ASMJIT_INLINE void setSf( + float x0) + { + sf[0] = x0; sf[1] = x0; sf[2] = x0; sf[3] = x0; + sf[4] = x0; sf[5] = x0; sf[6] = x0; sf[7] = x0; + } + + //! @brief Set all four DP-FP floats. + ASMJIT_INLINE void setDf( + double x0, double x1, double x2, double x3) + { + df[0] = x0; df[1] = x1; df[2] = x2; df[3] = x3; + } + + //! @brief Set all four DP-FP floats. + ASMJIT_INLINE void setDf( + double x0) + { + df[0] = x0; df[1] = x0; df[2] = x0; df[3] = x0; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Array of thirty two signed 8-bit integers. + int8_t sb[32]; + //! @brief Array of thirty two unsigned 8-bit integers. + uint8_t ub[32]; + //! @brief Array of sixteen signed 16-bit integers. + int16_t sw[16]; + //! @brief Array of sixteen unsigned 16-bit integers. + uint16_t uw[16]; + //! @brief Array of eight signed 32-bit integers. + int32_t sd[8]; + //! @brief Array of eight unsigned 32-bit integers. + uint32_t ud[8]; + //! @brief Array of four signed 64-bit integers. + int64_t sq[4]; + //! @brief Array of four unsigned 64-bit integers. + uint64_t uq[4]; + + //! @brief Array of eight 32-bit single precision floating points. + float sf[8]; + //! @brief Array of four 64-bit double precision floating points. + double df[4]; +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_VECTYPES_H diff --git a/src/asmjit/base/vmem.cpp b/src/asmjit/base/vmem.cpp new file mode 100644 index 0000000..89ba80c --- /dev/null +++ b/src/asmjit/base/vmem.cpp @@ -0,0 +1,145 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/intutil.h" +#include "../base/vmem.h" + +// [Dependencies - Windows] +#if defined(ASMJIT_OS_WINDOWS) +# include +#endif // ASMJIT_OS_WINDOWS + +// [Dependencies - Posix] +#if defined(ASMJIT_OS_POSIX) +# include +# include +# include +#endif // ASMJIT_OS_POSIX + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::VMem - Windows] +// ============================================================================ + +#if defined(ASMJIT_OS_WINDOWS) +struct VMemLocal { + VMemLocal() { + SYSTEM_INFO info; + GetSystemInfo(&info); + + alignment = info.dwAllocationGranularity; + pageSize = IntUtil::roundUpToPowerOf2(info.dwPageSize); + } + + size_t alignment; + size_t pageSize; +}; + +static VMemLocal& vm() { + static VMemLocal vm; + return vm; +}; + +void* VMem::alloc(size_t length, size_t* allocated, bool canExecute) { + return allocProcessMemory(GetCurrentProcess(), length, allocated, canExecute); +} + +void VMem::release(void* addr, size_t length) { + return releaseProcessMemory(GetCurrentProcess(), addr, length); +} + +void* VMem::allocProcessMemory(HANDLE hProcess, size_t length, size_t* allocated, bool canExecute) { + // VirtualAlloc rounds allocated size to page size automatically. + size_t msize = IntUtil::roundUp(length, vm().pageSize); + + // Windows XP SP2 / Vista allow Data Excution Prevention (DEP). + WORD protect = canExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; + LPVOID mbase = VirtualAllocEx(hProcess, NULL, msize, MEM_COMMIT | MEM_RESERVE, protect); + if (mbase == NULL) return NULL; + + ASMJIT_ASSERT(IntUtil::isAligned(reinterpret_cast(mbase), vm().alignment)); + + if (allocated != NULL) + *allocated = msize; + return mbase; +} + +void VMem::releaseProcessMemory(HANDLE hProcess, void* addr, size_t /* length */) { + VirtualFreeEx(hProcess, addr, 0, MEM_RELEASE); +} + +size_t VMem::getAlignment() { + return vm().alignment; +} + +size_t VMem::getPageSize() { + return vm().pageSize; +} +#endif // ASMJIT_OS_WINDOWS + +// ============================================================================ +// [asmjit::VMem - Posix] +// ============================================================================ + +#if defined(ASMJIT_OS_POSIX) + +// MacOS uses MAP_ANON instead of MAP_ANONYMOUS. +#if !defined(MAP_ANONYMOUS) +# define MAP_ANONYMOUS MAP_ANON +#endif // MAP_ANONYMOUS + +struct VMemLocal { + VMemLocal() { + alignment = pageSize = ::getpagesize(); + } + + size_t alignment; + size_t pageSize; +}; + +static VMemLocal& vm() { + static VMemLocal vm; + return vm; +} + +void* VMem::alloc(size_t length, size_t* allocated, bool canExecute) { + size_t msize = IntUtil::roundUp(length, vm().pageSize); + int protection = PROT_READ | PROT_WRITE | (canExecute ? PROT_EXEC : 0); + + void* mbase = ::mmap(NULL, msize, protection, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mbase == MAP_FAILED) + return NULL; + + if (allocated != NULL) + *allocated = msize; + return mbase; +} + +void VMem::release(void* addr, size_t length) { + munmap(addr, length); +} + +size_t VMem::getAlignment() { + return vm().alignment; +} + +size_t VMem::getPageSize() { + return vm().pageSize; +} +#endif // ASMJIT_OS_POSIX + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/vmem.h b/src/asmjit/base/vmem.h new file mode 100644 index 0000000..5e95237 --- /dev/null +++ b/src/asmjit/base/vmem.h @@ -0,0 +1,76 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_VMEM_H +#define _ASMJIT_BASE_VMEM_H + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::VMem] +// ============================================================================ + +//! @brief Class that helps with allocating memory for executing code +//! generated by JIT compiler. +//! +//! There are defined functions that provides facility to allocate and free +//! memory where can be executed code. If processor and operating system +//! supports execution protection then you can't run code from normally +//! malloc()'ed memory. +//! +//! Functions are internally implemented by operating system dependent way. +//! VirtualAlloc() function is used for Windows operating system and mmap() +//! for posix ones. If you want to study or create your own functions, look +//! at VirtualAlloc() or mmap() documentation (depends on you target OS). +//! +//! Under posix operating systems is also useable mprotect() function, that +//! can enable execution protection to malloc()'ed memory block. +struct VMem { + //! @brief Allocate virtual memory. + //! + //! Pages are readable/writeable, but they are not guaranteed to be + //! executable unless 'canExecute' is true. Returns the address of + //! allocated memory, or NULL on failure. + static ASMJIT_API void* alloc(size_t length, size_t* allocated, bool canExecute); + + //! @brief Free memory allocated by @c alloc() + static ASMJIT_API void release(void* addr, size_t length); + +#if defined(ASMJIT_OS_WINDOWS) + //! @brief Allocate virtual memory of @a hProcess. + //! + //! @note This function is Windows specific. + static ASMJIT_API void* allocProcessMemory(HANDLE hProcess, size_t length, size_t* allocated, bool canExecute); + + //! @brief Free virtual memory of @a hProcess. + //! + //! @note This function is Windows specific. + static ASMJIT_API void releaseProcessMemory(HANDLE hProcess, void* addr, size_t length); +#endif // ASMJIT_OS_WINDOWS + + //! @brief Get the alignment guaranteed by alloc(). + static ASMJIT_API size_t getAlignment(); + + //! @brief Get size of the single page. + static ASMJIT_API size_t getPageSize(); +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_BASE_VMEM_H diff --git a/src/asmjit/base/zone.cpp b/src/asmjit/base/zone.cpp new file mode 100644 index 0000000..a80266b --- /dev/null +++ b/src/asmjit/base/zone.cpp @@ -0,0 +1,163 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base/defs.h" +#include "../base/intutil.h" +#include "../base/zone.h" + +// [Dependencies - C] +#include + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +// ============================================================================ +// [asmjit::Zone - Construction / Destruction] +// ============================================================================ + +Zone::Zone(size_t chunkSize) { + _chunks = NULL; + _chunkSize = chunkSize; +} + +Zone::~Zone() { + reset(); +} + +// ============================================================================ +// [asmjit::Zone - Clear / Reset] +// ============================================================================ + +void Zone::clear() { + Chunk* cur = _chunks; + + if (cur == NULL) + return; + + cur = cur->prev; + while (cur != NULL) { + Chunk* prev = cur->prev; + ::free(cur); + cur = prev; + } + + _chunks->pos = 0; + _chunks->prev = NULL; +} + +void Zone::reset() { + Chunk* cur = _chunks; + _chunks = NULL; + + while (cur != NULL) { + Chunk* prev = cur->prev; + ::free(cur); + cur = prev; + } +} + +// ============================================================================ +// [asmjit::Zone - Alloc] +// ============================================================================ + +void* Zone::_alloc(size_t size) { + Chunk* cur = _chunks; + ASMJIT_ASSERT(cur == NULL || cur->getRemainingSize() < size); + + size_t chunkSize = _chunkSize; + if (chunkSize < size) + chunkSize = size; + + cur = static_cast(::malloc(sizeof(Chunk) - sizeof(void*) + chunkSize)); + if (cur == NULL) + return NULL; + + cur->prev = _chunks; + cur->pos = 0; + cur->size = chunkSize; + + _chunks = cur; + + uint8_t* p = cur->data + cur->pos; + cur->pos += size; + + ASMJIT_ASSERT(cur->pos <= cur->size); + return (void*)p; +} + +void* Zone::_calloc(size_t size) { + void* p = _alloc(size); + + if (p != NULL) + ::memset(p, 0, size); + + return p; +} + +void* Zone::dup(const void* data, size_t size) { + if (data == NULL) + return NULL; + + if (size == 0) + return NULL; + + void* m = alloc(size); + if (m == NULL) + return NULL; + + ::memcpy(m, data, size); + return m; +} + +char* Zone::sdup(const char* str) { + if (str == NULL) + return NULL; + + size_t len = strlen(str); + if (len == 0) + return NULL; + + // Include NULL terminator and limit string length. + if (++len > 256) + len = 256; + + char* m = static_cast(alloc(len)); + if (m == NULL) + return NULL; + + ::memcpy(m, str, len); + m[len - 1] = '\0'; + return m; +} + +char* Zone::sformat(const char* fmt, ...) { + if (fmt == NULL) + return NULL; + + char buf[256]; + size_t len; + + va_list ap; + va_start(ap, fmt); + len = vsnprintf(buf, 256, fmt, ap); + va_end(ap); + + len = IntUtil::iMin(len, 255); + buf[len++] = 0; + + return static_cast(dup(buf, len)); +} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" diff --git a/src/asmjit/base/zone.h b/src/asmjit/base/zone.h new file mode 100644 index 0000000..4d11248 --- /dev/null +++ b/src/asmjit/base/zone.h @@ -0,0 +1,191 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BASE_ZONE_H +#define _ASMJIT_BASE_ZONE_H + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { + +//! @addtogroup asmjit_base +//! @{ + +// ============================================================================ +// [asmjit::Zone] +// ============================================================================ + +//! @brief Fast incremental memory allocator. +//! +//! Memory allocator designed to allocate small objects that will be invalidated +//! (free) all at once. +struct Zone { + // -------------------------------------------------------------------------- + // [Chunk] + // -------------------------------------------------------------------------- + + //! @internal + //! + //! @brief One allocated chunk of memory. + struct Chunk { + //! @brief Get count of remaining (unused) bytes in chunk. + ASMJIT_INLINE size_t getRemainingSize() const { return size - pos; } + + //! @brief Link to previous chunk. + Chunk* prev; + //! @brief Position in this chunk. + size_t pos; + //! @brief Size of this chunk (in bytes). + size_t size; + + //! @brief Data. + uint8_t data[sizeof(void*)]; + }; + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new instance of @c Zone allocator. + //! + //! @param chunkSize Default size of the first chunk. + ASMJIT_API Zone(size_t chunkSize); + + //! @brief Destroy @ref Zone instance. + ASMJIT_API ~Zone(); + + // -------------------------------------------------------------------------- + // [Clear / Reset] + // -------------------------------------------------------------------------- + + //! @brief Free all allocated memory except first block that remains for reuse. + //! + //! Note that this method will invalidate all instances using this memory + //! allocated by this zone instance. + ASMJIT_API void clear(); + + //! @brief Free all allocated memory at once. + //! + //! Note that this method will invalidate all instances using this memory + //! allocated by this zone instance. + ASMJIT_API void reset(); + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get (default) chunk size. + ASMJIT_INLINE size_t getChunkSize() const { return _chunkSize; } + + // -------------------------------------------------------------------------- + // [Alloc] + // -------------------------------------------------------------------------- + + //! @brief Allocate @c size bytes of memory. + //! + //! Pointer allocated by this way will be valid until @c Zone object is + //! destroyed. To create class by this way use placement @c new and @c delete + //! operators: + //! + //! @code + //! // Example of simple class allocation. + //! using namespace asmjit + //! + //! // Your class. + //! class Object + //! { + //! // members... + //! }; + //! + //! // Your function + //! void f() + //! { + //! // Create zone object with chunk size of 65536 bytes. + //! Zone zone(65536); + //! + //! // Create your objects using zone object allocating, for example: + //! Object* obj = new(zone.alloc(sizeof(YourClass))) Object(); + //! + //! // ... lifetime of your objects ... + //! + //! // Destroy your objects: + //! obj->~Object(); + //! + //! // Zone destructor will free all memory allocated through it, you can + //! // call @c zone.reset() if you wan't to reuse current @ref Zone. + //! } + //! @endcode + ASMJIT_INLINE void* alloc(size_t size) { + Chunk* cur = _chunks; + + if (cur == NULL || cur->getRemainingSize() < size) + return _alloc(size); + + uint8_t* p = cur->data + cur->pos; + + cur->pos += size; + ASMJIT_ASSERT(cur->pos <= cur->size); + + return (void*)p; + } + + //! @brief Like @ref alloc(), but returns T*. + template + ASMJIT_INLINE T* allocT(size_t size = sizeof(T)) { + return static_cast(alloc(size)); + } + + //! @internal + ASMJIT_API void* _alloc(size_t size); + + //! @brief Allocate @c size bytes of zeroed memory. + ASMJIT_INLINE void* calloc(size_t size) { + Chunk* cur = _chunks; + + if (cur == NULL || cur->getRemainingSize() < size) + return _calloc(size); + + uint8_t* p = cur->data + cur->pos; + + cur->pos += size; + ASMJIT_ASSERT(cur->pos <= cur->size); + + ::memset(p, 0, size); + return (void*)p; + } + + //! @internal + ASMJIT_API void* _calloc(size_t size); + + //! @brief Helper to duplicate data. + ASMJIT_API void* dup(const void* data, size_t size); + + //! @brief Helper to duplicate string. + ASMJIT_API char* sdup(const char* str); + + //! @brief Helper to duplicate formatted string, maximum length is 256 bytes. + ASMJIT_API char* sformat(const char* str, ...); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Last allocated chunk of memory. + Chunk* _chunks; + //! @brief Default chunk size. + size_t _chunkSize; +}; + +//! @} + +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +#endif // _ASMJIT_BASE_ZONE_H diff --git a/src/asmjit/build.h b/src/asmjit/build.h new file mode 100644 index 0000000..48edadb --- /dev/null +++ b/src/asmjit/build.h @@ -0,0 +1,274 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_BUILD_H +#define _ASMJIT_BUILD_H + +// [Include] +#if !defined(ASMJIT_CONFIG_FILE) +#include "config.h" +#endif // !ASMJIT_CONFIG_FILE + +// Turn of deprecation warnings for this compiler when compiling AsmJit. +#if defined(ASMJIT_EXPORTS) && defined(_MSC_VER) +# if !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +# endif // !_CRT_SECURE_NO_DEPRECATE +# if !defined(_CRT_SECURE_NO_WARNINGS) +# define _CRT_SECURE_NO_WARNINGS +# endif // !_CRT_SECURE_NO_WARNINGS +#endif // ASMJIT_EXPORTS + +// Default includes. +#include +#include +#include + +#include + +// ============================================================================ +// [ASMJIT_OS] +// ============================================================================ + +#if defined(WINDOWS) || defined(_WINDOWS) || defined(__WINDOWS__) || defined(_WIN32) || defined(_WIN64) +# define ASMJIT_OS_WINDOWS +#elif defined(__linux__) || defined(__unix__) || \ + defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || \ + defined(__DragonFly__) || defined(__BSD__) || defined(__FREEBSD__) || \ + defined(__APPLE__) +# define ASMJIT_OS_POSIX +#else +# warning "AsmJit - Can't match host operating system, using ASMJIT_OS_POSIX" +# define ASMJIT_OS_POSIX +#endif + +// ============================================================================ +// [ASMJIT_HOST] +// ============================================================================ + +// Define it only if it's not defined. On some systems -D command can be passed +// to the compiler to bypass this autodetection. +#if !defined(ASMJIT_HOST_X86) && !defined(ASMJIT_HOST_X64) +# if defined(__x86_64__) || defined(__LP64) || defined(__IA64__) || defined(_M_X64) || defined(_WIN64) +# define ASMJIT_HOST_X64 +# define ASMJIT_HOST_LE +# else +// _M_IX86, __INTEL__, __i386__ +# define ASMJIT_HOST_X86 +# define ASMJIT_HOST_LE +# endif +#endif + +// ============================================================================ +// [ASMJIT_BUILD] +// ============================================================================ + +// Build host architecture if no architecture is selected. +#if !defined(ASMJIT_BUILD_HOST) && \ + !defined(ASMJIT_BUILD_X86) && \ + !defined(ASMJIT_BUILD_X64) +# define ASMJIT_BUILD_HOST +#endif + +// Autodetect host architecture if enabled. +#if defined(ASMJIT_BUILD_HOST) +# if defined(ASMJIT_HOST_X86) && !defined(ASMJIT_BUILD_X86) +# define ASMJIT_BUILD_X86 +# endif // ASMJIT_HOST_X86 && !ASMJIT_BUILD_X86 +# if defined(ASMJIT_HOST_X64) && !defined(ASMJIT_BUILD_X64) +# define ASMJIT_BUILD_X64 +# endif // ASMJIT_HOST_X64 && !ASMJIT_BUILD_X64 +#endif // ASMJIT_BUILD_HOST + +// ============================================================================ +// [ASMJIT_API] +// ============================================================================ + +#if !defined(ASMJIT_API) +# if defined(ASMJIT_STATIC) +# define ASMJIT_API +# elif defined(ASMJIT_OS_WINDOWS) +# if defined(__GNUC__) +# if defined(ASMJIT_EXPORTS) +# define ASMJIT_API __attribute__((dllexport)) +# else +# define ASMJIT_API __attribute__((dllimport)) +# endif // ASMJIT_EXPORTS +# else +# if defined(ASMJIT_EXPORTS) +# define ASMJIT_API __declspec(dllexport) +# else +# define ASMJIT_API __declspec(dllimport) +# endif // ASMJIT_EXPORTS +# endif // __GNUC__ +# else +# if defined(__GNUC__) +# if __GNUC__ >= 4 +# define ASMJIT_API __attribute__((visibility("default"))) +# define ASMJIT_VAR extern ASMJIT_API +# endif // __GNUC__ >= 4 +# endif // __GNUC__ +# endif +#endif // ASMJIT_API + +#if !defined(ASMJIT_VAR) +# define ASMJIT_VAR extern ASMJIT_API +#endif // !ASMJIT_VAR + +// ============================================================================ +// [ASMJIT_INLINE] +// ============================================================================ + +#if defined(_MSC_VER) +# define ASMJIT_INLINE __forceinline +#elif defined(__GNUC__) || defined(__clang__) && !defined(__MINGW32__) +# define ASMJIT_INLINE inline __attribute__((always_inline)) +#else +# define ASMJIT_INLINE inline +#endif + +// ============================================================================ +// [ASMJIT_ENUM] +// ============================================================================ + +#if defined(_MSC_VER) +# define ASMJIT_ENUM(_Name_) enum _Name_ : uint32_t +#else +# define ASMJIT_ENUM(_Name_) enum _Name_ +#endif + +// ============================================================================ +// [_ASMJIT_HOST_INDEX] +// ============================================================================ + +#if defined(ASMJIT_HOST_LE) +# define _ASMJIT_HOST_INDEX(_Total_, _Index_) (_Index_) +#else +# define _ASMJIT_HOST_INDEX(_Total_, _Index_) ((_Total_) - 1 - (_Index_) +#endif + +// ============================================================================ +// [ASMJIT_ARRAY_SIZE] +// ============================================================================ + +#define ASMJIT_ARRAY_SIZE(_Array_) (sizeof(_Array_) / sizeof(*_Array_)) + +// ============================================================================ +// [ASMJIT_NO_COPY] +// ============================================================================ + +#define ASMJIT_NO_COPY(_Type_) \ +private: \ + ASMJIT_INLINE _Type_(const _Type_& other); \ + ASMJIT_INLINE _Type_& operator=(const _Type_& other); \ +public: + +// ============================================================================ +// [ASMJIT_DEBUG] +// ============================================================================ + +// If ASMJIT_DEBUG and ASMJIT_RELEASE is not defined ASMJIT_DEBUG will be +// detected using the compiler specific macros. This enables to set the build +// type using IDE. +#if !defined(ASMJIT_DEBUG) && !defined(ASMJIT_RELEASE) +# if defined(_DEBUG) +# define ASMJIT_DEBUG +# endif // _DEBUG +#endif // !ASMJIT_DEBUG && !ASMJIT_RELEASE + +// ============================================================================ +// [ASMJIT_UNUSED] +// ============================================================================ + +#if !defined(ASMJIT_UNUSED) +# define ASMJIT_UNUSED(_Var_) ((void)_Var_) +#endif // ASMJIT_UNUSED + +// ============================================================================ +// [ASMJIT_NOP] +// ============================================================================ + +#if !defined(ASMJIT_NOP) +# define ASMJIT_NOP() ((void)0) +#endif // ASMJIT_NOP + +// ============================================================================ +// [ASMJIT_CCONV] +// ============================================================================ + +#if defined(ASMJIT_HOST_X86) +# if defined(__GNUC__) +# define ASMJIT_REGPARM_1 __attribute__((regparm(1))) +# define ASMJIT_REGPARM_2 __attribute__((regparm(2))) +# define ASMJIT_REGPARM_3 __attribute__((regparm(3))) +# define ASMJIT_FASTCALL __attribute__((fastcall)) +# define ASMJIT_STDCALL __attribute__((stdcall)) +# define ASMJIT_CDECL __attribute__((cdecl)) +# else +# define ASMJIT_FASTCALL __fastcall +# define ASMJIT_STDCALL __stdcall +# define ASMJIT_CDECL __cdecl +# endif +#else +# define ASMJIT_FASTCALL +# define ASMJIT_STDCALL +# define ASMJIT_CDECL +#endif // ASMJIT_HOST + +// ============================================================================ +// [IntTypes] +// ============================================================================ + +#if defined(__MINGW32__) || defined(__MINGW64__) +# include +#endif // __MINGW32__ || __MINGW64__ + +#if defined(_MSC_VER) && (_MSC_VER < 1600) +# if !defined(ASMJIT_SUPRESS_STD_TYPES) +# if (_MSC_VER < 1300) +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef signed __int64 int64_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; +# else +typedef signed __int8 int8_t; +typedef signed __int16 int16_t; +typedef signed __int32 int32_t; +typedef signed __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +# endif // _MSC_VER +# endif // ASMJIT_SUPRESS_STD_TYPES +#else +# include +# include +#endif + +#if defined(_MSC_VER) +# define ASMJIT_INT64_C(_Num_) _Num_##i64 +# define ASMJIT_UINT64_C(_Num_) _Num_##ui64 +#else +# define ASMJIT_INT64_C(_Num_) _Num_##LL +# define ASMJIT_UINT64_C(_Num_) _Num_##ULL +#endif + +// ============================================================================ +// [OS Support] +// ============================================================================ + +#if defined(ASMJIT_OS_WINDOWS) && !defined(ASMJIT_SUPRESS_WINDOWS_H) +#include +#endif // ASMJIT_OS_WINDOWS && !ASMJIT_SUPRESS_WINDOWS_H + +// [Guard] +#endif // _ASMJIT_BUILD_H diff --git a/src/asmjit/config.h b/src/asmjit/config.h new file mode 100644 index 0000000..3b4eea1 --- /dev/null +++ b/src/asmjit/config.h @@ -0,0 +1,42 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_CONFIG_H +#define _ASMJIT_CONFIG_H + +// This file can be used to modify built-in features of AsmJit. AsmJit is by +// default compiled only for host processor to enable JIT compilation. Both +// Assembler and Compiler code generators are compiled by default. However, any +// ASMJIT_BUILD_... flag can be defined to enable building of additional +// backends that can be used for remote code generation. + +// ============================================================================ +// [AsmJit - Debugging] +// ============================================================================ + +// #define ASMJIT_DEBUG // Define to enable debug-mode. +// #define ASMJIT_RELEASE // Define to enable release-mode (no debugging). + +// ============================================================================ +// [AsmJit - Library] +// ============================================================================ + +// #define ASMJIT_STATIC // Define to enable static-library build. +// #define ASMJIT_API // Define to override ASMJIT_API decorator. + +// ============================================================================ +// [AsmJit - Features] +// ============================================================================ + +// If none of these is defined AsmJit will select host architecture by default. + +// #define ASMJIT_BUILD_X86 // Define to enable x86 instruction set (32-bit). +// #define ASMJIT_BUILD_X64 // Define to enable x64 instruction set (64-bit). +// #define ASMJIT_BUILD_HOST // Define to enable host instruction set. + +// [Guard] +#endif // _ASMJIT_CONFIG_H diff --git a/src/asmjit/contrib.h b/src/asmjit/contrib.h new file mode 100644 index 0000000..dece8b3 --- /dev/null +++ b/src/asmjit/contrib.h @@ -0,0 +1,18 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in this package. + +// [Guard] +#ifndef _ASMJIT_CONTRIB_H +#define _ASMJIT_CONTRIB_H + +// [Dependencies - Core] +#include "base.h" + +// [Dependencies - Contrib] +#include "contrib/winremoteruntime.h" + +// [Guard] +#endif // _ASMJIT_CONTRIB_H diff --git a/src/asmjit/contrib/winremoteruntime.cpp b/src/asmjit/contrib/winremoteruntime.cpp new file mode 100644 index 0000000..8be26ed --- /dev/null +++ b/src/asmjit/contrib/winremoteruntime.cpp @@ -0,0 +1,78 @@ +// [AsmJit/WinRemoteRuntime] +// Contribution for remote process handling. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Dependencies - AsmJit] +#include "../base.h" + +// [Guard - Windows] +#if defined(ASMJIT_OS_WINDOWS) +#include "winremoteruntime.h" + +namespace asmjit { +namespace contrib { + +// ============================================================================ +// [asmjit::contrib::WinRemoteRuntime - Construction / Destruction] +// ============================================================================ + +WinRemoteRuntime::WinRemoteRuntime(HANDLE hProcess) : + _hProcess(hProcess), + _memoryManager(hProcess) { + + // We are patching another process so enable keep-virtual-memory option. + _memoryManager.setKeepVirtualMemory(true); +} + +WinRemoteRuntime::~WinRemoteRuntime() {} + +// ============================================================================ +// [asmjit::contrib::WinRemoteRuntime - Interface] +// ============================================================================ + +uint32_t WinRemoteRuntime::add(void** dest, BaseAssembler* assembler) { + // Disallow generation of no code. + size_t codeSize = assembler->getCodeSize(); + + if (codeSize == 0) { + *dest = NULL; + return kErrorInvalidState; + } + + // Allocate temporary memory where the code will be stored and relocated. + void* codeData = ::malloc(codeSize); + + if (codeData == NULL) { + *dest = NULL; + return kErrorNoHeapMemory; + } + + // Allocate a pernament remote process memory. + void* processMemPtr = _memoryManager.alloc(codeSize, kVirtualAllocPermanent); + + if (processMemPtr == NULL) { + ::free(codeData); + *dest = NULL; + return kErrorNoVirtualMemory; + } + + // Relocate and write the code to the process memory. + assembler->relocCode(codeData, (uintptr_t)processMemPtr); + + ::WriteProcessMemory(_hProcess, processMemPtr, codeData, codeSize, NULL); + ::free(codeData); + + *dest = processMemPtr; + return kErrorOk; +} + +} // contrib namespace +} // asmjit namespace + +// [Guard - Windows] +#endif // ASMJIT_OS_WINDOWS diff --git a/src/asmjit/contrib/winremoteruntime.h b/src/asmjit/contrib/winremoteruntime.h new file mode 100644 index 0000000..c65eaae --- /dev/null +++ b/src/asmjit/contrib/winremoteruntime.h @@ -0,0 +1,71 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_CONTRIB_WINREMOTERUNTIME_H +#define _ASMJIT_CONTRIB_WINREMOTERUNTIME_H + +// [Dependencies] +#include "../base.h" + +// [Guard - Windows] +#if defined(ASMJIT_OS_WINDOWS) + +namespace asmjit { +namespace contrib { + +// ============================================================================ +// [asmjit::contrib::WinRemoteRuntime] +// ============================================================================ + +//! @brief WinRemoteRuntime can be used to inject code to a remote process. +struct WinRemoteRuntime : public BaseRuntime { + ASMJIT_NO_COPY(WinRemoteRuntime) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + //! @brief Create a @c WinRemoteRuntime instance for a given @a hProcess. + ASMJIT_API WinRemoteRuntime(HANDLE hProcess); + + //! @brief Destroy the @c WinRemoteRuntime instance. + ASMJIT_API virtual ~WinRemoteRuntime(); + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get the remote process handle. + ASMJIT_INLINE HANDLE getProcess() const { return _hProcess; } + + //! @brief Get the virtual memory manager. + ASMJIT_INLINE VirtualMemoryManager* getMemoryManager() { return &_memoryManager; } + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual uint32_t add(void** dest, BaseAssembler* assembler); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Process. + HANDLE _hProcess; + + //! @brief Virtual memory manager. + VirtualMemoryManager _memoryManager; +}; + +} // contrib namespace +} // asmjit namespace + +// [Guard - Windows] +#endif // ASMJIT_OS_WINDOWS + +// [Guard] +#endif // _ASMJIT_CONTRIB_WINREMOTERUNTIME_H diff --git a/src/asmjit/host.h b/src/asmjit/host.h new file mode 100644 index 0000000..408aa3d --- /dev/null +++ b/src/asmjit/host.h @@ -0,0 +1,38 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_HOST_H +#define _ASMJIT_HOST_H + +// [Dependencies - Core] +#include "base.h" + +// [Host - Helpers] +#define ASMJIT_USE_HOST(_Arch_) \ + namespace asmjit { \ + namespace host { \ + using namespace ::asmjit::_Arch_; \ + } \ + } + +// [Host - X86] +#if defined(ASMJIT_HOST_X86) +#include "x86.h" +ASMJIT_USE_HOST(x86) +#endif // ASMJIT_HOST_X86 + +// [Host - X64] +#if defined(ASMJIT_HOST_X64) +#include "x86.h" +ASMJIT_USE_HOST(x64) +#endif // ASMJIT_HOST_X64 + +// [Host - Cleanup] +#undef ASMJIT_USE_HOST + +// [Guard] +#endif // _ASMJIT_HOST_H diff --git a/src/asmjit/x86.h b/src/asmjit/x86.h new file mode 100644 index 0000000..fc1d4b5 --- /dev/null +++ b/src/asmjit/x86.h @@ -0,0 +1,21 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_H +#define _ASMJIT_X86_H + +// [Dependencies - AsmJit] +#include "base.h" + +#include "x86/x86assembler.h" +#include "x86/x86compiler.h" +#include "x86/x86cpu.h" +#include "x86/x86defs.h" +#include "x86/x86func.h" + +// [Guard] +#endif // _ASMJIT_X86_H diff --git a/src/asmjit/x86/x86assembler.cpp b/src/asmjit/x86/x86assembler.cpp new file mode 100644 index 0000000..a142c9d --- /dev/null +++ b/src/asmjit/x86/x86assembler.cpp @@ -0,0 +1,3732 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + +// [Dependencies - AsmJit] +#include "../base/assembler.h" +#include "../base/cpu.h" +#include "../base/defs.h" +#include "../base/intutil.h" +#include "../base/logger.h" +#include "../base/memorymanager.h" +#include "../base/runtime.h" +#include "../base/string.h" +#include "../x86/x86assembler.h" +#include "../x86/x86cpu.h" +#include "../x86/x86defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +// ============================================================================ +// [Constants] +// ============================================================================ + +enum { kRexShift = 6 }; +enum { kRexForbidden = 0x80 }; +enum { kMaxCommentLength = 80 }; + +// 2-byte VEX prefix. +// [0] kVex2Byte. +// [1] RvvvvLpp. +enum { kVex2Byte = 0xC5 }; + +// 3-byte VEX prefix. +// [0] kVex3Byte. +// [1] RXBmmmmm. +// [2] WvvvvLpp. +enum { kVex3Byte = 0xC4 }; + +enum kVexVVVV { + kVexVVVVShift = 12, + kVexVVVVMask = 0xF << kVexVVVVShift +}; + +//! @internal +//! +//! @brief Instruction 2-byte/3-byte opcode prefix definition. +struct OpCodeMM { + uint8_t len; + uint8_t data[3]; +}; + +//! @internal +//! +//! @brief Mandatory prefixes encoded in 'asmjit' opcode [66, F3, F2] and asmjit +//! extensions +static const uint8_t x86OpCodePP[8] = { + 0x00, + 0x66, + 0xF3, + 0xF2, + 0x00, + 0x00, + 0x00, + 0x9B +}; + +//! @internal +//! +//! @brief Instruction 2-byte/3-byte opcode prefix data. +static const OpCodeMM x86OpCodeMM[] = { + { 0, { 0x00, 0x00, 0 } }, + { 1, { 0x0F, 0x00, 0 } }, + { 2, { 0x0F, 0x38, 0 } }, + { 2, { 0x0F, 0x3A, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 0, { 0x00, 0x00, 0 } }, + { 2, { 0x0F, 0x01, 0 } } +}; + +static const uint8_t x86SegmentPrefix[8] = { 0x00, 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65 }; +static const uint8_t x86OpCodePushSeg[8] = { 0x00, 0x06, 0x0E, 0x16, 0x1E, 0xA0, 0xA8 }; +static const uint8_t x86OpCodePopSeg[8] = { 0x00, 0x07, 0x00, 0x17, 0x1F, 0xA1, 0xA9 }; + +// ============================================================================ +// [asmjit::X64TrampolineWriter] +// ============================================================================ + +//! @brief Trampoline writer. +struct X64TrampolineWriter { + // Size of trampoline + enum { + kSizeJmp = 6, + kSizeAddr = 8, + kSizeTotal = kSizeJmp + kSizeAddr + }; + + // Write trampoline into code at address @a code that will jump to @a target. + static void writeTrampoline(uint8_t* code, uint64_t target) + { + code[0] = 0xFF; // Jmp OpCode. + code[1] = 0x25; // ModM (RIP addressing). + ((uint32_t*)(code + 2))[0] = 0; // Offset (zero). + ((uint64_t*)(code + kSizeJmp))[0] = (uint64_t)target; // Absolute address. + } +}; + +// ============================================================================ +// [asmjit::x86x64::Emit] +// ============================================================================ + +#define ENC_OPS(_Op0_, _Op1_, _Op2_) \ + ((kOperandType##_Op0_) + ((kOperandType##_Op1_) << 3) + ((kOperandType##_Op2_) << 6)) + +#define ADD_66H_P(_Exp_) \ + do { \ + opCode |= (static_cast(_Exp_) << kInstOpCode_PP_Shift); \ + } while (0) + +#define ADD_REX_W(_Exp_) \ + do { \ + if (Arch == kArchX64) \ + opX |= static_cast(_Exp_) << 3; \ + } while (0) +#define ADD_REX_B(_Reg_) \ + do { \ + if (Arch == kArchX64) \ + opX |= static_cast(_Reg_) >> 3; \ + } while (0) + +#define ADD_VEX_L(_Exp_) \ + do { \ + opCode |= static_cast(_Exp_) << kInstOpCode_L_Shift; \ + } while (0) + +#define EMIT_BYTE(_Val_) \ + do { \ + cursor[0] = static_cast(_Val_); \ + cursor += 1; \ + } while (0) + +#define EMIT_WORD(_Val_) \ + do { \ + reinterpret_cast(cursor)[0] = static_cast(_Val_); \ + cursor += 2; \ + } while (0) + +#define EMIT_DWORD(_Val_) \ + do { \ + reinterpret_cast(cursor)[0] = static_cast(_Val_); \ + cursor += 4; \ + } while (0) + +#define EMIT_QWORD(_Val_) \ + do { \ + reinterpret_cast(cursor)[0] = static_cast(_Val_); \ + cursor += 8; \ + } while (0) + +#define EMIT_OP(_Val_) \ + do { \ + EMIT_BYTE((_Val_) & 0xFF); \ + } while (0) + +#define EMIT_PP(_Val_) \ + do { \ + uint32_t ppIndex = ((_Val_) >> kInstOpCode_PP_Shift) & (kInstOpCode_PP_Mask >> kInstOpCode_PP_Shift); \ + uint8_t ppCode = x86OpCodePP[ppIndex]; \ + \ + if (!ppIndex) \ + break; \ + \ + cursor[0] = ppCode; \ + cursor++; \ + } while (0) + +#define EMIT_MM(_Val_) \ + do { \ + uint32_t mmIndex = ((_Val_) >> kInstOpCode_MM_Shift) & (kInstOpCode_MM_Mask >> kInstOpCode_MM_Shift); \ + const OpCodeMM& mmCode = x86OpCodeMM[mmIndex]; \ + \ + if (!mmIndex) \ + break; \ + \ + cursor[0] = mmCode.data[0]; \ + cursor[1] = mmCode.data[1]; \ + cursor += mmCode.len; \ + } while (0) + +// ============================================================================ +// [asmjit::x86x64::Assembler - Construction / Destruction] +// ============================================================================ + +X86X64Assembler::X86X64Assembler(BaseRuntime* runtime) : BaseAssembler(runtime) {} +X86X64Assembler::~X86X64Assembler() {} + +// ============================================================================ +// [asmjit::x86x64::Assembler - Label] +// ============================================================================ + +void X86X64Assembler::_bind(const Label& label) { + uint32_t index = label.getId(); + ASMJIT_ASSERT(index < _labels.getLength()); + + // Get label data based on label id. + LabelData& data = _labels[index]; + + // Label can be bound only once. + ASMJIT_ASSERT(data.offset == -1); + + // Log. + if (_logger) + _logger->logFormat(kLoggerStyleLabel, "L%u:\n", index); + + size_t pos = getOffset(); + + LabelLink* link = data.links; + LabelLink* prev = NULL; + + while (link) { + intptr_t offset = link->offset; + + if (link->relocId != -1) { + // If linked label points to RelocData then instead of writing relative + // displacement to assembler stream, we will write it to RelocData. + _relocData[link->relocId].data += static_cast(pos); + } + else { + // Not using relocId, this means that we overwriting real displacement + // in assembler stream. + int32_t patchedValue = static_cast( + static_cast(pos) - offset + link->displacement); + + // Size of the value we are going to patch. Only BYTE/DWORD is allowed. + uint32_t size = getByteAt(offset); + ASMJIT_ASSERT(size == 1 || size == 4); + + if (size == 4) { + setInt32At(offset, patchedValue); + } + else { // if (size) == 1 + if (IntUtil::isInt8(patchedValue)) + setByteAt(offset, static_cast(patchedValue & 0xFF)); + else + setError(kErrorAssemblerIllegalShortJump); + } + } + + prev = link->prev; + link = prev; + } + + // Chain unused links. + link = data.links; + if (link) { + if (prev == NULL) + prev = link; + + prev->prev = _unusedLinks; + _unusedLinks = link; + } + + // Unlink label if it was linked. + data.offset = pos; + data.links = NULL; +} + +// ============================================================================ +// [asmjit::x86x64::Assembler - Embed] +// ============================================================================ + +Error X86X64Assembler::embedLabel(const Label& op) { + ASMJIT_ASSERT(op.getId() != kInvalidValue); + uint32_t regSize = _regSize; + + if (getRemainingSpace() < regSize) + ASMJIT_PROPAGATE_ERROR(_grow(regSize)); + + uint8_t* cursor = getCursor(); + + LabelData& label = _labels[op.getId()]; + RelocData reloc; + + if (_logger) { + _logger->logFormat(kLoggerStyleData, regSize == 4 ? ".dd L%u\n" : ".dq L%u\n", op.getId()); + } + + reloc.type = kRelocRelToAbs; + reloc.size = regSize; + reloc.from = static_cast(getOffset()); + reloc.data = 0; + + if (label.offset != -1) { + // Bound label. + reloc.data = static_cast(static_cast(label.offset)); + } + else { + // Non-bound label. Need to chain. + LabelLink* link = _newLabelLink(); + + link->prev = (LabelLink*)label.links; + link->offset = getOffset(); + link->displacement = 0; + link->relocId = _relocData.getLength(); + + label.links = link; + } + + if (_relocData.append(reloc) != kErrorOk) + return setError(kErrorNoHeapMemory); + + // Emit dummy intptr_t (4 or 8 bytes; depends on the address size). + if (regSize == 4) + EMIT_DWORD(0); + else + EMIT_QWORD(0); + + setCursor(cursor); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::x86x64::Assembler - Align] +// ============================================================================ + +Error X86X64Assembler::_align(uint32_t m) { + if (_logger) { + _logger->logFormat(kLoggerStyleDirective, + "%s.align %u\n", _logger->getIndentation(), static_cast(m)); + } + + if (m <= 1 || !IntUtil::isPowerOf2(m) || m > 64) + return setError(kErrorInvalidArgument); + + uint32_t i = static_cast(IntUtil::deltaTo(getOffset(), m)); + if (i == 0) + return kErrorOk; + + if (getRemainingSpace() < i) + ASMJIT_PROPAGATE_ERROR(_grow(i)); + uint8_t* cursor = getCursor(); + + if (IntUtil::hasBit(_features, kCodeGenOptimizedAlign)) { + const Cpu* cpuInfo = static_cast(getRuntime()->getCpu()); + + // NOPs optimized for Intel: + // Intel 64 and IA-32 Architectures Software Developer's Manual + // - Volume 2B + // - Instruction Set Reference N-Z + // - NOP + + // NOPs optimized for AMD: + // Software Optimization Guide for AMD Family 10h Processors (Quad-Core) + // - 4.13 - Code Padding with Operand-Size Override and Multibyte NOP + + // Intel and AMD. + static const uint8_t nop1[] = { 0x90 }; + static const uint8_t nop2[] = { 0x66, 0x90 }; + static const uint8_t nop3[] = { 0x0F, 0x1F, 0x00 }; + static const uint8_t nop4[] = { 0x0F, 0x1F, 0x40, 0x00 }; + static const uint8_t nop5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 }; + static const uint8_t nop6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 }; + static const uint8_t nop7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 }; + static const uint8_t nop8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const uint8_t nop9[] = { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; + + // AMD. + static const uint8_t nop10[] = { 0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const uint8_t nop11[] = { 0x66, 0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; + + const uint8_t* p; + uint32_t n; + + if (cpuInfo->getVendorId() == kCpuVendorIntel && ((cpuInfo->getFamily() & 0x0F) == 0x06 || (cpuInfo->getFamily() & 0x0F) == 0x0F)) { + do { + switch (i) { + case 1: p = nop1; n = 1; break; + case 2: p = nop2; n = 2; break; + case 3: p = nop3; n = 3; break; + case 4: p = nop4; n = 4; break; + case 5: p = nop5; n = 5; break; + case 6: p = nop6; n = 6; break; + case 7: p = nop7; n = 7; break; + case 8: p = nop8; n = 8; break; + default: p = nop9; n = 9; break; + } + + i -= n; + do { + EMIT_BYTE(*p++); + } while (--n); + } while (i); + } + else if (cpuInfo->getVendorId() == kCpuVendorAmd && cpuInfo->getFamily() >= 0x0F) { + do { + switch (i) { + case 1: p = nop1 ; n = 1; break; + case 2: p = nop2 ; n = 2; break; + case 3: p = nop3 ; n = 3; break; + case 4: p = nop4 ; n = 4; break; + case 5: p = nop5 ; n = 5; break; + case 6: p = nop6 ; n = 6; break; + case 7: p = nop7 ; n = 7; break; + case 8: p = nop8 ; n = 8; break; + case 9: p = nop9 ; n = 9; break; + case 10: p = nop10; n = 10; break; + default: p = nop11; n = 11; break; + } + + i -= n; + do { + EMIT_BYTE(*p++); + } while (--n); + } while (i); + } + + // Legacy NOPs, 0x90 with 0x66 prefix. + if (getArch() == kArchX86) { + while (i) { + switch (i) { + default: EMIT_BYTE(0x66); i--; + case 3: EMIT_BYTE(0x66); i--; + case 2: EMIT_BYTE(0x66); i--; + case 1: EMIT_BYTE(0x90); i--; + } + } + } + } + + // Legacy NOPs, only 0x90. + while (i) { + EMIT_BYTE(0x90); + i--; + } + + setCursor(cursor); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::x86x64::Assembler - Reloc] +// ============================================================================ + +template +static ASMJIT_INLINE size_t X86X64Assembler_relocCode(const X86X64Assembler* self, void* _dst, Ptr base) { + uint8_t* dst = static_cast(_dst); + + size_t codeOffset = self->getOffset(); + size_t codeSize = self->getCodeSize(); + + // We will copy the exact size of the generated code. Extra code for trampolines + // is generated on-the-fly by the relocator (this code doesn't exist at the moment). + ::memcpy(dst, self->_buffer, codeOffset); + + // Trampoline pointer. + uint8_t* tramp; + + if (Arch == kArchX64) + tramp = dst + codeOffset; + + // Relocate all recorded locations. + size_t i; + size_t len = self->_relocData.getLength(); + + for (i = 0; i < len; i++) { + const RelocData& r = self->_relocData[i]; + Ptr ptr; + + // Whether to use trampoline, can be only used if relocation type is kRelocAbsToRel. + bool useTrampoline = false; + + // Be sure that reloc data structure is correct. + ASMJIT_ASSERT(r.from + r.size <= static_cast(codeSize)); + + switch (r.type) { + case kRelocAbsToAbs: + ptr = r.data; + break; + + case kRelocRelToAbs: + ptr = r.data + base; + break; + + case kRelocAbsToRel: + case kRelocTrampoline: + ptr = r.data - (base + r.from + 4); + + if (Arch == kArchX64 && r.type == kRelocTrampoline && !IntUtil::isInt32(ptr)) { + ptr = (Ptr)tramp - (base + r.from + 4); + useTrampoline = true; + } + break; + + default: + ASMJIT_ASSERT(!"Reached"); + } + + switch (r.size) { + case 4: + *reinterpret_cast(dst + static_cast(r.from)) = static_cast(ptr); + break; + + case 8: + *reinterpret_cast(dst + static_cast(r.from)) = static_cast(ptr); + break; + + default: + ASMJIT_ASSERT(!"Reached"); + } + + if (Arch == kArchX64 && useTrampoline) { + if (self->_logger) + self->_logger->logFormat(kLoggerStyleComment, "; Trampoline from %llX -> %llX\n", base + r.from, r.data); + + X64TrampolineWriter::writeTrampoline(tramp, (uint64_t)r.data); + tramp += X64TrampolineWriter::kSizeTotal; + } + } + + if (Arch == kArchX64) + return (size_t)(tramp - dst); + else + return (size_t)(codeOffset); +} + +// ============================================================================ +// [asmjit::Assembler - Logging] +// ============================================================================ + +// Logging helpers. +static const char* AssemblerX86_operandSize[] = { + "", + "byte ptr ", + "word ptr ", + NULL, + "dword ptr ", + NULL, + NULL, + NULL, + "qword ptr ", + NULL, + "tword ptr ", + NULL, + NULL, + NULL, + NULL, + NULL, + "oword ptr " +}; + +static const char X86Assembler_segName[] = + "\0\0\0\0" + "es:\0" + "cs:\0" + "ss:\0" + "ds:\0" + "fs:\0" + "gs:\0" + "\0\0\0\0"; + +static void X86Assembler_dumpRegister(StringBuilder& sb, uint32_t type, uint32_t index) { + // NE == Not-Encodable. + static const char reg8l[] = "al\0\0" "cl\0\0" "dl\0\0" "bl\0\0" "spl\0" "bpl\0" "sil\0" "dil\0" ; + static const char reg8h[] = "ah\0\0" "ch\0\0" "dh\0\0" "bh\0\0" "NE\0\0" "NE\0\0" "NE\0\0" "NE\0\0"; + static const char reg16[] = "ax\0\0" "cx\0\0" "dx\0\0" "bx\0\0" "sp\0\0" "bp\0\0" "si\0\0" "di\0\0"; + + char suffix = '\0'; + + switch (type) { + case kRegTypeGpbLo: + if (index >= 8) { + sb._appendChar('r'); + suffix = 'b'; + goto _EmitID; + } + + sb._appendString(®8l[index * 4]); + return; + + case kRegTypePatchedGpbHi: + if (index < 4) + goto _EmitNE; + + index -= 4; + // ... Fall through ... + + case kRegTypeGpbHi: + if (index >= 4) + goto _EmitNE; + sb._appendString(®8h[index * 4]); + return; + +_EmitNE: + sb._appendString("NE", 2); + return; + + case kRegTypeGpw: + if (index >= 8) { + sb._appendChar('r'); + suffix = 'w'; + goto _EmitID; + } + + sb._appendString(®16[index * 4]); + return; + + case kRegTypeGpd: + if (index >= 8) { + sb._appendChar('r'); + suffix = 'd'; + goto _EmitID; + } + + sb._appendChar('e'); + sb._appendString(®16[index * 4]); + return; + + case kRegTypeGpq: + sb._appendChar('r'); + if (index >= 8) + goto _EmitID; + + sb._appendString(®16[index * 4]); + return; + + case kRegTypeFp: + sb._appendString("fp", 2); + goto _EmitID; + + case kRegTypeMm: + sb._appendString("mm", 2); + goto _EmitID; + + case kRegTypeXmm: + sb._appendString("xmm", 3); + goto _EmitID; + + case kRegTypeYmm: + sb._appendString("ymm", 3); + goto _EmitID; + + case kRegTypeSeg: + if (index >= kRegCountSeg) + goto _EmitNE; + + sb._appendString(&X86Assembler_segName[index * 4], 2); + return; + + default: + return; + } + +_EmitID: + sb._appendUInt32(index); + if (suffix) + sb._appendChar(suffix); +} + +static void X86Assembler_dumpOperand(StringBuilder& sb, uint32_t arch, const Operand* op, uint32_t loggerOptions) { + if (op->isReg()) { + X86Assembler_dumpRegister(sb, + static_cast(op)->getRegType(), + static_cast(op)->getRegIndex()); + } + else if (op->isMem()) { + const Mem* m = static_cast(op); + + uint32_t type = kRegTypeGpd; + uint32_t seg = m->getSegment(); + bool isAbsolute = false; + + if (arch == kArchX86) { + if (!m->hasGpdBase()) + type = kRegTypeGpw; + } + else { + if (!m->hasGpdBase()) + type = kRegTypeGpq; + } + + if (op->getSize() <= 16) + sb._appendString(AssemblerX86_operandSize[op->getSize()]); + + if (seg < kRegCountSeg) + sb._appendString(&X86Assembler_segName[seg * 4]); + + sb._appendChar('['); + switch (m->getMemType()) { + case kMemTypeBaseIndex: + case kMemTypeStackIndex: + // [base + index << shift + displacement] + X86Assembler_dumpRegister(sb, type, m->getBase()); + break; + + case kMemTypeLabel: + // [label + index << shift + displacement] + sb.appendFormat("L%u", m->getBase()); + break; + + case kMemTypeAbsolute: + // [absolute] + isAbsolute = true; + sb.appendUInt(static_cast(m->getDisplacement()), 16); + break; + } + + if (m->hasIndex()) { + switch (m->getVSib()) { + case kMemVSibXmm: type = kRegTypeXmm; break; + case kMemVSibYmm: type = kRegTypeYmm; break; + } + + sb._appendString(" + ", 3); + X86Assembler_dumpRegister(sb, type, m->getIndex()); + + if (m->getShift()) { + sb._appendString(" * ", 3); + sb._appendChar("1248"[m->getShift() & 3]); + } + } + + if (m->getDisplacement() && !isAbsolute) { + uint32_t base = 10; + int32_t dispOffset = m->getDisplacement(); + + const char* prefix = " + "; + if (dispOffset < 0) { + dispOffset = -dispOffset; + prefix = " - "; + } + + sb._appendString(prefix, 3); + if ((loggerOptions & (1 << kLoggerOptionHexDisplacement)) != 0 && dispOffset > 9) { + sb._appendString("0x", 2); + base = 16; + } + sb.appendUInt(static_cast(dispOffset), base); + } + + sb._appendChar(']'); + } + else if (op->isImm()) { + const Imm* i = static_cast(op); + int64_t val = i->getInt64(); + + if ((loggerOptions & (1 << kLoggerOptionHexImmediate)) && static_cast(val) > 9) + sb.appendUInt(static_cast(val), 16); + else + sb.appendInt(val, 10); + } + else if (op->isLabel()) { + sb.appendFormat("L%u", op->getId()); + } + else { + sb._appendString("None", 4); + } +} + +static bool X86Assembler_dumpInstruction(StringBuilder& sb, + uint32_t arch, + uint32_t code, uint32_t options, + const Operand* o0, + const Operand* o1, + const Operand* o2, + uint32_t loggerOptions) +{ + if (!sb.reserve(sb.getLength() + 128)) + return false; + + // Rex, lock and short prefix. + if (options & kInstOptionRex) + sb._appendString("rex ", 4); + + if (options & kInstOptionLock) + sb._appendString("lock ", 5); + + if (options & kInstOptionShortForm) + sb._appendString("short ", 6); + + // Dump instruction name. + sb._appendString(_instInfo[code].getName()); + + // Dump operands. + if (!o0->isNone()) { + sb._appendChar(' '); + X86Assembler_dumpOperand(sb, arch, o0, loggerOptions); + } + + if (!o1->isNone()) { + sb._appendString(", ", 2); + X86Assembler_dumpOperand(sb, arch, o1, loggerOptions); + } + + if (!o2->isNone()) { + sb._appendString(", ", 2); + X86Assembler_dumpOperand(sb, arch, o2, loggerOptions); + } + + return true; +} + +static bool X86Assembler_dumpComment(StringBuilder& sb, size_t len, const uint8_t* binData, size_t binLength, size_t dispSize, const char* comment) { + size_t currentLength = len; + size_t commentLength = comment ? StringUtil::nlen(comment, kMaxCommentLength) : 0; + + ASMJIT_ASSERT(binLength >= dispSize); + + if (binLength || commentLength) { + size_t align = 36; + char sep = ';'; + + for (size_t i = (binLength == 0); i < 2; i++) { + size_t begin = sb.getLength(); + + // Append align. + if (currentLength < align) { + if (!sb.appendChars(' ', align - currentLength)) + return false; + } + + // Append separator. + if (sep) { + if (!(sb.appendChar(sep) & sb.appendChar(' '))) + return false; + } + + // Append binary data or comment. + if (i == 0) { + if (!sb.appendHex(binData, binLength - dispSize)) + return false; + if (!sb.appendChars('.', dispSize * 2)) + return false; + if (commentLength == 0) + break; + } + else { + if (!sb.appendString(comment, commentLength)) + return false; + } + + currentLength += sb.getLength() - begin; + align += 22; + sep = '|'; + } + } + + return sb.appendChar('\n'); +} + +// ============================================================================ +// [asmjit::Assembler - Emit] +// ============================================================================ + +//! @brief Encode MODR/M. +static ASMJIT_INLINE uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) { + return (m << 6) + (o << 3) + rm; +} + +//! @brief Encode SIB. +static ASMJIT_INLINE uint32_t x86EncodeSib(uint32_t s, uint32_t i, uint32_t b) { + return (s << 6) + (i << 3) + b; +} + +//! @internal +static const Operand::VRegOp x86PatchedHiRegs[4] = { + // --------------+---+--------------------------------+--------------+------+ + // Operand | S | Register Code | OperandId |Unused| + // --------------+---+--------------------------------+--------------+------+ + { kOperandTypeReg, 1 , (kRegTypePatchedGpbHi << 8) | 4, kInvalidValue, 0, 0 }, + { kOperandTypeReg, 1 , (kRegTypePatchedGpbHi << 8) | 5, kInvalidValue, 0, 0 }, + { kOperandTypeReg, 1 , (kRegTypePatchedGpbHi << 8) | 6, kInvalidValue, 0, 0 }, + { kOperandTypeReg, 1 , (kRegTypePatchedGpbHi << 8) | 7, kInvalidValue, 0, 0 } +}; + +template +static ASMJIT_INLINE Error X86X64Assembler_emit(X86X64Assembler* self, uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2, const Operand* o3) { + uint8_t* cursor = self->getCursor(); + + uint32_t encoded = o0->getOp() + (o1->getOp() << 3) + (o2->getOp() << 6); + uint32_t options = self->getOptionsAndClear(); + + // Instruction opcode. + uint32_t opCode; + // MODR/R opcode or register code. + uint32_t opReg; + + // REX or VEX prefix data. + // + // REX: + // 0x0008 - REX.W. + // 0x0040 - Always emit REX prefix. + // + // AVX: + // 0x0008 - AVX.W. + // 0xF000 - VVVV, zeros by default, see @ref kVexVVVV. + // + uint32_t opX; + + // MOD/RM, both rmReg and rmMem should refer to the same variable since they + // are never used together - either rmReg or rmMem. + union { + // MODR/M - register code. + uintptr_t rmReg; + // MODR/M - Memory operand. + const Mem* rmMem; + }; + + // Immediate value. + int64_t imVal; + // Immediate length. + uint32_t imLen = 0; + + // Memory operand base register index. + uint32_t mBase; + // Memory operand index register index. + uint32_t mIndex; + + // Label. + LabelData* label; + // Displacement offset + int32_t dispOffset; + // Displacement size. + uint32_t dispSize = 0; + // Displacement relocation id. + intptr_t relocId; + +#if defined(ASMJIT_DEBUG) + bool assertIllegal = false; +#endif // ASMJIT_DEBUG + + // Invalid instruction. + const InstInfo* info = &_instInfo[code]; + if (code >= _kInstCount) + goto _UnknownInst; + + // Grow request happens rarely. C++ compiler generates better code if it is + // handled at the end of the function. + if ((size_t)(self->_end - cursor) < 16) + goto _GrowBuffer; + + // -------------------------------------------------------------------------- + // [Prepare] + // -------------------------------------------------------------------------- + +_Prepare: + opCode = info->_opCode[0]; + opReg = opCode >> kInstOpCode_O_Shift; + opX = info->getFlags() >> (15 - 3); + + if (Arch == kArchX86) { + // AVX.W prefix. + opX &= 0x08; + + // Check if one or more register operand is one of AH, BH, CH, or DH and + // patch them to ensure that the binary code with correct byte-index (4-7) + // is generated. + if (o0->isRegType(kRegTypeGpbHi)) + o0 = (const Operand*)(&x86PatchedHiRegs[static_cast(o0)->getRegIndex()]); + + if (o1->isRegType(kRegTypeGpbHi)) + o1 = (const Operand*)(&x86PatchedHiRegs[static_cast(o1)->getRegIndex()]); + } + else { + ASMJIT_ASSERT(kInstOptionRex == 0x40); + + // AVX.W prefix and REX prefix. + opX |= options; + opX &= 0x48; + + // Check if one or more register operand is one of BPL, SPL, SIL, DIL and + // force a REX prefix in such case. + if (x86IsGpbRegOp(o0)) { + uint32_t index = static_cast(o0)->getRegIndex(); + if (static_cast(o0)->isGpbLo()) { + opX |= (index >= 4) << kRexShift; + } + else { + opX |= kRexForbidden; + o0 = reinterpret_cast(&x86PatchedHiRegs[index]); + } + } + + if (x86IsGpbRegOp(o1)) { + uint32_t index = static_cast(o1)->getRegIndex(); + if (static_cast(o1)->isGpbLo()) { + opX |= (index >= 4) << kRexShift; + } + else { + opX |= kRexForbidden; + o1 = reinterpret_cast(&x86PatchedHiRegs[index]); + } + } + } + + // -------------------------------------------------------------------------- + // [Lock-Prefix] + // -------------------------------------------------------------------------- + + if (options & kInstOptionLock) { + if (!info->isLockable()) + goto _IllegalInst; + EMIT_BYTE(0xF0); + } + + // -------------------------------------------------------------------------- + // [Group] + // -------------------------------------------------------------------------- + + switch (info->getGroup()) { + + // ------------------------------------------------------------------------ + // [None] + // ------------------------------------------------------------------------ + + case kInstGroupNone: + goto _EmitDone; + + // ------------------------------------------------------------------------ + // [X86] + // ------------------------------------------------------------------------ + + case kInstGroupX86Op: + goto _EmitX86Op; + + case kInstGroupX86Rm_B: + opCode += o0->getSize() != 1; + // ... Fall through ... + + case kInstGroupX86Rm: + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + if (encoded == ENC_OPS(Reg, None, None)) { + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86RmReg: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opCode += o0->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86RegRm: + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + if (encoded == ENC_OPS(Reg, Reg, None)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + break; + + case kInstGroupX86M: + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86Arith: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opCode +=(o0->getSize() != 1) + 2; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opCode +=(o0->getSize() != 1) + 2; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitX86M; + } + + // The remaining instructions use 0x80 opcode. + opCode = 0x80; + + if (encoded == ENC_OPS(Reg, Imm, None)) { + imVal = static_cast(o1)->getInt64(); + imLen = IntUtil::isInt8(imVal) ? static_cast(1) : IntUtil::iMin(o0->getSize(), 4); + rmReg = static_cast(o0)->getRegIndex(); + + // Alternate Form - AL, AX, EAX, RAX. + if (rmReg == 0 && (o0->getSize() == 1 || imLen != 1)) { + opCode = ((opReg << 3) | (0x04 + (o0->getSize() != 1))); + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + if (imLen == 1) + imLen = 4; + goto _EmitX86OpI; + } + + opCode += o0->getSize() != 1 ? (imLen != 1 ? 1 : 3) : 0; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Imm, None)) { + imVal = static_cast(o1)->getInt64(); + imLen = IntUtil::isInt8(imVal) ? static_cast(1) : IntUtil::iMin(o0->getSize(), 4); + + opCode += o0->getSize() != 1 ? (imLen != 1 ? 1 : 3) : 0; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86BSwap: + if (encoded == ENC_OPS(Reg, None, None)) { + opReg = static_cast(o0)->getRegIndex(); + opCode += opReg & 0x7; + + ADD_REX_W(o0->getSize() == 8); + ADD_REX_B(opReg); + goto _EmitX86Op; + } + break; + + case kInstGroupX86BTest: + if (encoded == ENC_OPS(Reg, Reg, None)) { + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitX86M; + } + + // The remaining instructions use the secondary opcode/r. + imVal = static_cast(o1)->getInt64(); + imLen = 1; + + opCode = info->_opCode[1]; + opReg = opCode >> kInstOpCode_O_Shift; + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + if (encoded == ENC_OPS(Reg, Imm, None)) { + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Imm, None)) { + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86Call: + if (encoded == ENC_OPS(Reg, None, None)) { + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = static_cast(o0); + goto _EmitX86M; + } + + // The following instructions use the secondary opcode. + opCode = info->_opCode[1]; + + if (encoded == ENC_OPS(Imm, None, None)) { + imVal = static_cast(o0)->getInt64(); + goto _EmitJmpOrCallImm; + } + + if (encoded == ENC_OPS(Label, None, None)) { + label = &self->_labels[static_cast(o0)->getId()]; + if (label->offset != -1) { + // Bound label. + static const intptr_t kRel32Size = 5; + intptr_t offs = label->offset - (intptr_t)(cursor - self->_buffer); + + ASMJIT_ASSERT(offs <= 0); + EMIT_OP(opCode); + EMIT_DWORD(static_cast(offs - kRel32Size)); + } + else { + // Non-bound label. + EMIT_OP(opCode); + dispOffset = -4; + dispSize = 4; + relocId = -1; + goto _EmitDisplacement; + } + goto _EmitDone; + } + break; + + case kInstGroupX86Enter: + if (encoded == ENC_OPS(Imm, Imm, None)) { + EMIT_BYTE(0xC8); + EMIT_WORD(static_cast(o1)->getUInt16()); + EMIT_BYTE(static_cast(o0)->getUInt8()); + goto _EmitDone; + } + break; + + case kInstGroupX86Imul: + if (encoded == ENC_OPS(Reg, None, None)) { + opCode = 0xF6 + (o0->getSize() != 1); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = 5; + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + opCode = 0xF6 + (o0->getSize() != 1); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = 5; + rmMem = static_cast(o0); + goto _EmitX86M; + } + + // The following instructions use 0x0FAF opcode. + opCode = kInstOpCode_MM_0F | 0xAF; + + if (encoded == ENC_OPS(Reg, Reg, None)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + + // The following instructions use 0x69/0x6B opcode. + opCode = 0x6B; + + if (encoded == ENC_OPS(Reg, Imm, None)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + imVal = static_cast(o1)->getInt64(); + imLen = 1; + + if (!IntUtil::isInt8(imVal)) { + opCode -= 2; + imLen = o0->getSize() == 2 ? 2 : 4; + } + + opReg = static_cast(o0)->getRegIndex(); + rmReg = opReg; + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (!IntUtil::isInt8(imVal)) { + opCode -= 2; + imLen = o0->getSize() == 2 ? 2 : 4; + } + + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + ASMJIT_ASSERT(o0->getSize() != 1); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (!IntUtil::isInt8(imVal)) { + opCode -= 2; + imLen = o0->getSize() == 2 ? 2 : 4; + } + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + break; + + case kInstGroupX86IncDec: + if (encoded == ENC_OPS(Reg, None, None)) { + rmReg = static_cast(o0)->getRegIndex(); + + // INC r16|r32 is not encodable in 64-bit mode. + if (Arch == kArchX86 && (o0->getSize() == 2 || o0->getSize() == 4)) { + opCode = info->_opCode[1] + (static_cast(rmReg) & 0x7); + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + goto _EmitX86Op; + } + else { + opCode += o0->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + goto _EmitX86R; + } + } + + if (encoded == ENC_OPS(Mem, None, None)) { + opCode += o0->getSize() != 1; + rmMem = static_cast(o0); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + goto _EmitX86M; + } + break; + + case kInstGroupX86Int: + if (encoded == ENC_OPS(Imm, None, None)) { + imVal = static_cast(o0)->getInt64(); + uint8_t imm8 = static_cast(imVal & 0xFF); + + if (imm8 == 0x03) { + EMIT_OP(opCode); + } + else { + EMIT_OP(opCode + 1); + EMIT_BYTE(imm8); + } + goto _EmitDone; + } + break; + + case kInstGroupX86Jcc: + if (encoded == ENC_OPS(Label, None, None)) { + label = &self->_labels[static_cast(o0)->getId()]; + + if (IntUtil::hasBit(self->_features, kCodeGenPredictedJumps)) { + if (options & kInstOptionTaken) + EMIT_BYTE(0x3E); + if (options & kInstOptionNotTaken) + EMIT_BYTE(0x2E); + } + + if (label->offset != -1) { + // Bound label. + static const intptr_t kRel8Size = 2; + static const intptr_t kRel32Size = 6; + + intptr_t offs = label->offset - (intptr_t)(cursor - self->_buffer); + ASMJIT_ASSERT(offs <= 0); + + if ((options & kInstOptionLongForm) == 0 && IntUtil::isInt8(offs - kRel8Size)) { + EMIT_OP(opCode); + EMIT_BYTE(offs - kRel8Size); + + options |= kInstOptionShortForm; + goto _EmitDone; + } + else { + EMIT_BYTE(0x0F); + EMIT_OP(opCode + 0x10); + EMIT_DWORD(static_cast(offs - kRel32Size)); + + options &= ~kInstOptionShortForm; + goto _EmitDone; + } + } + else { + // Non-bound label. + if (options & kInstOptionShortForm) { + EMIT_OP(opCode); + dispOffset = -1; + dispSize = 1; + relocId = -1; + goto _EmitDisplacement; + } + else { + EMIT_BYTE(0x0F); + EMIT_OP(opCode + 0x10); + dispOffset = -4; + dispSize = 4; + relocId = -1; + goto _EmitDisplacement; + } + } + } + break; + + case kInstGroupX86Jmp: + if (encoded == ENC_OPS(Reg, None, None)) { + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = static_cast(o0); + goto _EmitX86M; + } + + // The following instructions use the secondary opcode (0xE9). + opCode = 0xE9; + + if (encoded == ENC_OPS(Imm, None, None)) { + imVal = static_cast(o0)->getInt64(); + goto _EmitJmpOrCallImm; + } + + if (encoded == ENC_OPS(Label, None, None)) { + label = &self->_labels[static_cast(o0)->getId()]; + if (label->offset != -1) { + // Bound label. + const intptr_t kRel8Size = 2; + const intptr_t kRel32Size = 5; + + intptr_t offs = label->offset - (intptr_t)(cursor - self->_buffer); + + if ((options & kInstOptionLongForm) == 0 && IntUtil::isInt8(offs - kRel8Size)) { + options |= kInstOptionShortForm; + + EMIT_BYTE(0xEB); + EMIT_BYTE(offs - kRel8Size); + goto _EmitDone; + } + else { + options &= ~kInstOptionShortForm; + + EMIT_BYTE(0xE9); + EMIT_DWORD(static_cast(offs - kRel32Size)); + goto _EmitDone; + } + } + else { + // Non-bound label. + if ((options & kInstOptionShortForm) != 0) { + EMIT_BYTE(0xEB); + dispOffset = -1; + dispSize = 1; + relocId = -1; + goto _EmitDisplacement; + } + else { + EMIT_BYTE(0xE9); + dispOffset = -4; + dispSize = 4; + relocId = -1; + goto _EmitDisplacement; + } + } + } + break; + + case kInstGroupX86Lea: + if (encoded == ENC_OPS(Reg, Mem, None)) { + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + break; + + case kInstGroupX86Mov: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + + // Sreg <- Reg + if (static_cast(o0)->isSeg()) { + ASMJIT_ASSERT(static_cast(o1)->isGpw() || + static_cast(o1)->isGpd() || + static_cast(o1)->isGpq() ); + opCode = 0x8E; + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + goto _EmitX86R; + } + + // Reg <- Sreg + if (static_cast(o1)->isSeg()) { + ASMJIT_ASSERT(static_cast(o0)->isGpw() || + static_cast(o0)->isGpd() || + static_cast(o0)->isGpq() ); + opCode = 0x8C; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + goto _EmitX86R; + } + // Reg <- Reg + else { + ASMJIT_ASSERT(static_cast(o0)->isGpb() || + static_cast(o0)->isGpw() || + static_cast(o0)->isGpd() || + static_cast(o0)->isGpq() ); + opCode = 0x8A + (o0->getSize() != 1); + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + goto _EmitX86R; + } + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + + // Sreg <- Mem + if (static_cast(o0)->isRegType(kRegTypeSeg)) { + opCode = 0x8E; + opReg--; + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + goto _EmitX86M; + } + // Reg <- Mem + else { + ASMJIT_ASSERT(static_cast(o0)->isGpb() || + static_cast(o0)->isGpw() || + static_cast(o0)->isGpd() || + static_cast(o0)->isGpq() ); + opCode = 0x8A + (o0->getSize() != 1); + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + goto _EmitX86M; + } + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + + // Mem <- Sreg + if (static_cast(o1)->isSeg()) { + opCode = 0x8C; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + goto _EmitX86M; + } + // Mem <- Reg + else { + ASMJIT_ASSERT(static_cast(o1)->isGpb() || + static_cast(o1)->isGpw() || + static_cast(o1)->isGpd() || + static_cast(o1)->isGpq() ); + opCode = 0x88 + (o1->getSize() != 1); + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + goto _EmitX86M; + } + } + + if (encoded == ENC_OPS(Reg, Imm, None)) { + // 64-bit immediate in 64-bit mode is allowed. + imVal = static_cast(o1)->getInt64(); + imLen = o0->getSize(); + + opReg = 0; + rmReg = static_cast(o0)->getRegIndex(); + + // Optimize instruction size by using 32-bit immediate if possible. + if (Arch == kArchX64 && imLen == 8 && IntUtil::isInt32(imVal)) { + opCode = 0xC7; + ADD_REX_W(1); + imLen = 4; + goto _EmitX86R; + } + else { + opCode = 0xB0 + (static_cast(o0->getSize() != 1) << 3) + (static_cast(rmReg) & 0x7); + ADD_REX_W(imLen == 8); + ADD_REX_B(rmReg); + goto _EmitX86OpI; + } + } + + if (encoded == ENC_OPS(Mem, Imm, None)) { + imVal = static_cast(o1)->getInt64(); + imLen = IntUtil::iMin(o0->getSize(), 4); + + opCode = 0xC6 + (o0->getSize() != 1); + opReg = 0; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86MovSxZx: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + break; + + case kInstGroupX86MovPtr: + if (encoded == ENC_OPS(Reg, Imm, None)) { + ASMJIT_ASSERT(static_cast(o0)->getRegIndex() == 0); + + opCode += o0->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + imVal = static_cast(o1)->getInt64(); + imLen = self->_regSize; + goto _EmitX86OpI; + } + + // The following instruction uses the secondary opcode. + opCode = info->_opCode[1]; + + if (encoded == ENC_OPS(Imm, Reg, None)) { + ASMJIT_ASSERT(static_cast(o1)->getRegIndex() == 0); + + opCode += o1->getSize() != 1; + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + + imVal = static_cast(o0)->getInt64(); + imLen = self->_regSize; + goto _EmitX86OpI; + } + break; + + case kInstGroupX86Push: + if (encoded == ENC_OPS(Reg, None, None)) { + if (o0->isRegType(kRegTypeSeg)) { + uint32_t segment = static_cast(o0)->getRegIndex(); + ASMJIT_ASSERT(segment < kRegCountSeg); + + if (segment >= kSegFs) + EMIT_BYTE(0x0F); + + EMIT_BYTE(x86OpCodePushSeg[segment]); + goto _EmitDone; + } + else { + goto _GroupPop_Gp; + } + } + + if (encoded == ENC_OPS(Imm, None, None)) { + imVal = static_cast(o0)->getInt64(); + imLen = IntUtil::isInt8(imVal) ? 1 : 4; + + EMIT_BYTE(imLen == 1 ? 0x6A : 0x68); + goto _EmitImm; + } + // ... Fall through ... + + case kInstGroupX86Pop: + if (encoded == ENC_OPS(Reg, None, None)) { + if (o0->isRegType(kRegTypeSeg)) { + uint32_t segment = static_cast(o0)->getRegIndex(); + ASMJIT_ASSERT(segment < kRegCountSeg); + + if (segment >= kSegFs) + EMIT_BYTE(0x0F); + + EMIT_BYTE(x86OpCodePopSeg[segment]); + goto _EmitDone; + } + else { +_GroupPop_Gp: + ASMJIT_ASSERT(static_cast(o0)->getSize() == 2 || + static_cast(o0)->getSize() == self->_regSize); + + opReg = static_cast(o0)->getRegIndex(); + opCode = info->_opCode[1] + (opReg & 7); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_B(opReg); + + goto _EmitX86Op; + } + } + + if (encoded == ENC_OPS(Mem, None, None)) { + ADD_66H_P(o0->getSize() == 2); + + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86Rep: + // Emit REP 0xF2 or 0xF3 prefix first. + EMIT_BYTE(0xF2 + opReg); + goto _EmitX86Op; + + case kInstGroupX86Ret: + if (encoded == ENC_OPS(None, None, None)) { + EMIT_BYTE(0xC3); + goto _EmitDone; + } + + if (encoded == ENC_OPS(Imm, None, None)) { + imVal = static_cast(o0)->getInt64(); + if (imVal == 0) { + EMIT_BYTE(0xC3); + goto _EmitDone; + } + else { + EMIT_BYTE(0xC2); + imLen = 2; + goto _EmitImm; + } + } + break; + + case kInstGroupX86Rot: + opCode += o0->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + if (encoded == ENC_OPS(Reg, Reg, None)) { + ASMJIT_ASSERT(static_cast(o1)->isRegCode(kRegTypeGpbLo, kRegIndexCx)); + opCode += 2; + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + ASMJIT_ASSERT(static_cast(o1)->isRegCode(kRegTypeGpbLo, kRegIndexCx)); + opCode += 2; + rmMem = static_cast(o0); + goto _EmitX86M; + } + + if (encoded == ENC_OPS(Reg, Imm, None)) { + imVal = static_cast(o1)->getInt64() & 0xFF; + imLen = imVal != 1; + if (imLen) + opCode -= 16; + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Imm, None)) { + imVal = static_cast(o1)->getInt64() & 0xFF; + imLen = imVal != 1; + if (imLen) + opCode -= 0x10; + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86Set: + if (encoded == ENC_OPS(Reg, None, None)) { + ASMJIT_ASSERT(o0->getSize() == 1); + + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + ASMJIT_ASSERT(o0->getSize() <= 1); + + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86Shlrd: + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + ASMJIT_ASSERT(o0->getSize() == o1->getSize()); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + opReg = static_cast(o1)->getRegIndex(); + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, Imm)) { + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitX86M; + } + + // The following instructions use opCode + 1. + opCode++; + + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + ASMJIT_ASSERT(static_cast(o2)->isRegCode(kRegTypeGpbLo, kRegIndexCx)); + ASMJIT_ASSERT(o0->getSize() == o1->getSize()); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, Reg)) { + ASMJIT_ASSERT(static_cast(o2)->isRegCode(kRegTypeGpbLo, kRegIndexCx)); + + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86Test: + if (encoded == ENC_OPS(Reg, Reg, None)) { + ASMJIT_ASSERT(o0->getSize() == o1->getSize()); + + opCode += o0->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitX86M; + } + + // The following instructions use the secondary opcode. + opCode = info->_opCode[1] + (o0->getSize() != 1); + opReg = opCode >> kInstOpCode_O_Shift; + + if (encoded == ENC_OPS(Reg, Imm, None)) { + imVal = static_cast(o1)->getInt64(); + imLen = IntUtil::iMin(o0->getSize(), 4); + + // Alternate Form - AL, AX, EAX, RAX. + if (static_cast(o0)->getRegIndex() == 0) { + opCode = 0xA8 + (o0->getSize() != 1); + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + goto _EmitX86OpI; + } + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Imm, None)) { + ASMJIT_ASSERT(o0->getSize() != 0); + + imVal = static_cast(o1)->getInt64(); + imLen = IntUtil::iMin(o0->getSize(), 4); + + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupX86Xchg: + if (encoded == ENC_OPS(Reg, Mem, None)) { + opCode += o0->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + // ... fall through ... + + case kInstGroupX86Xadd: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = static_cast(o1)->getRegIndex(); + rmReg = static_cast(o0)->getRegIndex(); + + // Special opcode for AX/EAX/RAX. + if (o0->getSize() > 1 && (opReg == 0 || rmReg == 0)) { + opCode = 0x90 + opReg + rmReg; // One of them is zero. + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + goto _EmitX86Op; + } + + opCode += o0->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opCode += o1->getSize() != 1; + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + // ------------------------------------------------------------------------ + // [Fpu] + // ------------------------------------------------------------------------ + + case kInstGroupFpuOp: + goto _EmitFpuOp; + + case kInstGroupFpuArith: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + rmReg += opReg; + + // We switch to the alternative opcode if the first operand is zero. + if (opReg == 0) { +_EmitFpArith_Reg: + opCode = 0xD800 + ((opCode >> 8) & 0xFF) + static_cast(rmReg); + goto _EmitFpuOp; + } + else { + opCode = 0xDC00 + ((opCode >> 0) & 0xFF) + static_cast(rmReg); + goto _EmitFpuOp; + } + } + + if (encoded == ENC_OPS(Mem, None, None)) { + // 0xD8/0xDC, depends on the size of the memory operand; opReg has been + // set already. +_EmitFpArith_Mem: + opCode = (o0->getSize() == 4) ? 0xD8 : 0xDC; + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupFpuCom: + if (encoded == ENC_OPS(Reg, None, None)) { + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitFpArith_Reg; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + goto _EmitFpArith_Mem; + } + break; + + case kInstGroupFpuFldFst: + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = static_cast(o0); + + if (o0->getSize() == 4 && (info->_flags & kInstFlagMem4)) { + goto _EmitX86M; + } + + if (o0->getSize() == 8 && (info->_flags & kInstFlagMem8)) { + opCode += 4; + goto _EmitX86M; + } + + if (o0->getSize() == 10 && (info->_flags & kInstFlagMem10)) { + opCode = info->_opCode[1]; + opReg = opCode >> kInstOpCode_O_Shift; + goto _EmitX86M; + } + } + + if (encoded == ENC_OPS(Reg, None, None)) { + if (code == kInstFld) { + opCode = 0xD9C0 + static_cast(o0)->getRegIndex(); + goto _EmitFpuOp; + } + + if (code == kInstFst) { + opCode = 0xDDD0 + static_cast(o0)->getRegIndex(); + goto _EmitFpuOp; + } + + if (code == kInstFstp) { + opCode = 0xDDD8 + static_cast(o0)->getRegIndex(); + goto _EmitFpuOp; + } + } + break; + + + case kInstGroupFpuM: + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = static_cast(o0); + + if (o0->getSize() == 2 && (info->_flags & kInstFlagMem2)) { + opCode += 4; + goto _EmitX86M; + } + + if (o0->getSize() == 4 && (info->_flags & kInstFlagMem4)) { + goto _EmitX86M; + } + + if (o0->getSize() == 8 && (info->_flags & kInstFlagMem8)) { + opCode = info->_opCode[1]; + opReg = opCode >> kInstOpCode_O_Shift; + goto _EmitX86M; + } + } + break; + + case kInstGroupFpuR: + if (encoded == ENC_OPS(Reg, None, None)) { + opCode += static_cast(o0)->getRegIndex(); + goto _EmitFpuOp; + } + break; + + case kInstGroupFpuStsw: + if (encoded == ENC_OPS(Reg, None, None)) { + if (static_cast(o0)->getRegIndex() != 0) + goto _IllegalInst; + + opCode = info->_opCode[1]; + goto _EmitX86Op; + } + + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + // ------------------------------------------------------------------------ + // [Ext] + // ------------------------------------------------------------------------ + + case kInstGroupExtCrc: + if (encoded == ENC_OPS(Reg, Reg, None)) { + ASMJIT_ASSERT(static_cast(o0)->getRegType() == kRegTypeGpd || + static_cast(o0)->getRegType() == kRegTypeGpq); + + opCode += o0->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + ASMJIT_ASSERT(static_cast(o0)->getRegType() == kRegTypeGpd || + static_cast(o0)->getRegType() == kRegTypeGpq); + + opCode += o0->getSize() != 1; + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + break; + + case kInstGroupExtExtract: + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + ADD_66H_P(static_cast(o1)->isXmm()); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + opReg = static_cast(o1)->getRegIndex(); + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Mem, Reg, Imm)) { + // Secondary opcode for 'pextrw' instruction (SSE2). + opCode = info->_opCode[1]; + ADD_66H_P(static_cast(o1)->isXmm()); + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupExtFence: + if (Arch == kArchX64 && opX) { + EMIT_BYTE(0x40 | opX); + } + + EMIT_BYTE(0x0F); + EMIT_OP(opCode); + EMIT_BYTE(0xC0 | (opReg << 3)); + goto _EmitDone; + + case kInstGroupExtMov: + case kInstGroupExtMovNoRexW: + ASMJIT_ASSERT(info->_opFlags[0] != 0); + ASMJIT_ASSERT(info->_opFlags[1] != 0); + + // Check parameters Gpd|Gpq|Mm|Xmm <- Gpd|Gpq|Mm|Xmm|Mem|Imm. + ASMJIT_ASSERT(!((o0->isMem() && (info->_opFlags[0] & kInstOpMem) == 0) || + (o0->isRegType(kRegTypeMm ) && (info->_opFlags[0] & kInstOpMm ) == 0) || + (o0->isRegType(kRegTypeXmm) && (info->_opFlags[0] & kInstOpXmm) == 0) || + (o0->isRegType(kRegTypeGpd) && (info->_opFlags[0] & kInstOpGd ) == 0) || + (o0->isRegType(kRegTypeGpq) && (info->_opFlags[0] & kInstOpGq ) == 0) || + (o1->isMem() && (info->_opFlags[1] & kInstOpMem) == 0) || + (o1->isRegType(kRegTypeMm ) && (info->_opFlags[1] & kInstOpMm ) == 0) || + (o1->isRegType(kRegTypeXmm) && (info->_opFlags[1] & kInstOpXmm) == 0) || + (o1->isRegType(kRegTypeGpd) && (info->_opFlags[1] & kInstOpGd ) == 0) || + (o1->isRegType(kRegTypeGpq) && (info->_opFlags[1] & kInstOpGq ) == 0) )); + + // Gp|Mm|Xmm <- Gp|Mm|Xmm + if (encoded == ENC_OPS(Reg, Reg, None)) { + ADD_REX_W(static_cast(o0)->isGpq() && (info->getGroup() != kInstGroupExtMovNoRexW)); + ADD_REX_W(static_cast(o1)->isGpq() && (info->getGroup() != kInstGroupExtMovNoRexW)); + + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + // Gp|Mm|Xmm <- Mem + if (encoded == ENC_OPS(Reg, Mem, None)) { + ADD_REX_W(static_cast(o0)->isGpq() && (info->getGroup() != kInstGroupExtMovNoRexW)); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + + // The following instruction uses opCode[1]. + opCode = info->_opCode[1]; + + // Mem <- Gp|Mm|Xmm + if (encoded == ENC_OPS(Mem, Reg, None)) { + ADD_REX_W(static_cast(o1)->isGpq() && (info->getGroup() != kInstGroupExtMovNoRexW)); + + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupExtMovBe: + if (encoded == ENC_OPS(Reg, Mem, None)) { + ADD_66H_P(o0->getSize() == 2); + ADD_REX_W(o0->getSize() == 8); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + + // The following instruction uses the secondary opcode. + opCode = info->_opCode[1]; + + if (encoded == ENC_OPS(Mem, Reg, None)) { + ADD_66H_P(o1->getSize() == 2); + ADD_REX_W(o1->getSize() == 8); + + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupExtMovD: +_EmitMmMovD: + opReg = static_cast(o0)->getRegIndex(); + ADD_66H_P(static_cast(o0)->isXmm()); + + // Mm/Xmm <- Gp + if (encoded == ENC_OPS(Reg, Reg, None) && static_cast(o1)->isGp()) { + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + // Mm/Xmm <- Mem + if (encoded == ENC_OPS(Reg, Mem, None)) { + rmMem = static_cast(o1); + goto _EmitX86M; + } + + // The following instructions use the secondary opcode. + opCode = info->_opCode[1]; + opReg = static_cast(o1)->getRegIndex(); + ADD_66H_P(static_cast(o1)->isXmm()); + + // Gp <- Mm/Xmm + if (encoded == ENC_OPS(Reg, Reg, None) && static_cast(o0)->isGp()) { + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + + // Mem <- Mm/Xmm + if (encoded == ENC_OPS(Mem, Reg, None)) { + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupExtMovQ: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + + // Mm <- Mm + if (static_cast(o0)->isMm() && static_cast(o1)->isMm()) { + opCode = kInstOpCode_PP_00 | kInstOpCode_MM_0F | 0x6F; + goto _EmitX86R; + } + + // Xmm <- Xmm + if (static_cast(o0)->isXmm() && static_cast(o1)->isXmm()) { + opCode = kInstOpCode_PP_F3 | kInstOpCode_MM_0F | 0x7E; + goto _EmitX86R; + } + + // Mm <- Xmm (Movdq2q) + if (static_cast(o0)->isMm() && static_cast(o1)->isXmm()) { + opCode = kInstOpCode_PP_F2 | kInstOpCode_MM_0F | 0xD6; + goto _EmitX86R; + } + + // Xmm <- Mm (Movq2dq) + if (static_cast(o0)->isXmm() && static_cast(o1)->isMm()) { + opCode = kInstOpCode_PP_F3 | kInstOpCode_MM_0F | 0xD6; + goto _EmitX86R; + } + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + + // Mm <- Mem + if (static_cast(o0)->isMm()) { + opCode = kInstOpCode_PP_00 | kInstOpCode_MM_0F | 0x6F; + goto _EmitX86M; + } + + // Xmm <- Mem + if (static_cast(o0)->isXmm()) { + opCode = kInstOpCode_PP_F3 | kInstOpCode_MM_0F | 0x7E; + goto _EmitX86M; + } + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + + // Mem <- Mm + if (static_cast(o1)->isMm()) { + opCode = kInstOpCode_PP_00 | kInstOpCode_MM_0F | 0x7F; + goto _EmitX86M; + } + + // Mem <- Xmm + if (static_cast(o1)->isXmm()) { + opCode = kInstOpCode_PP_66 | kInstOpCode_MM_0F | 0xD6; + goto _EmitX86M; + } + } + + if (Arch == kArchX64) { + // Movq in other case is simply promoted Movd instruction by REX prefix. + ADD_REX_W(true); + + opCode = kInstOpCode_PP_00 | kInstOpCode_MM_0F | 0x6E; + goto _EmitMmMovD; + } + break; + + case kInstGroupExtPrefetch: + if (encoded == ENC_OPS(Mem, Imm, None)) { + opReg = static_cast(o1)->getUInt32() & 0x3; + rmMem = static_cast(o0); + goto _EmitX86M; + } + break; + + case kInstGroupExtRm_Q: + ADD_REX_W(o0->isRegType(kRegTypeGpq) || o1->isRegType(kRegTypeGpq) || (o1->isMem() && o1->getSize() == 8)); + // ... Fall through ... + + case kInstGroupExtRm: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + break; + + case kInstGroupExtRm_P: + if (encoded == ENC_OPS(Reg, Reg, None)) { + ADD_66H_P(static_cast(o0)->isXmm() || static_cast(o1)->isXmm()); + + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + ADD_66H_P(static_cast(o0)->isXmm()); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + break; + + case kInstGroupExtRmRi: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + + // The following instruction uses the secondary opcode. + opCode = info->_opCode[1]; + opReg = opCode >> kInstOpCode_O_Shift; + + if (encoded == ENC_OPS(Reg, Imm, None)) { + imVal = static_cast(o1)->getInt64(); + imLen = 1; + + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + break; + + case kInstGroupExtRmRi_P: + if (encoded == ENC_OPS(Reg, Reg, None)) { + ADD_66H_P(static_cast(o0)->isXmm() || static_cast(o1)->isXmm()); + + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + ADD_66H_P(static_cast(o0)->isXmm()); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + + // The following instruction uses the secondary opcode. + opCode = info->_opCode[1]; + opReg = opCode >> kInstOpCode_O_Shift; + + if (encoded == ENC_OPS(Reg, Imm, None)) { + ADD_66H_P(static_cast(o0)->isXmm()); + + imVal = static_cast(o1)->getInt64(); + imLen = 1; + + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitX86R; + } + break; + + case kInstGroupExtRmi: + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + break; + + case kInstGroupExtRmi_P: + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + ADD_66H_P(static_cast(o0)->isXmm() || static_cast(o1)->isXmm()); + + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + ADD_66H_P(static_cast(o0)->isXmm()); + + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitX86M; + } + break; + + // ------------------------------------------------------------------------ + // [Group - 3dNow] + // ------------------------------------------------------------------------ + + case kInstGroup3dNow: + // Every 3dNow instruction starts with 0x0F0F and the actual opcode is + // stored as 8-bit immediate. + imVal = opCode & 0xFF; + imLen = 1; + + opCode = kInstOpCode_MM_0F | 0x0F; + opReg = static_cast(o0)->getRegIndex(); + + if (encoded == ENC_OPS(Reg, Reg, None)) { + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitX86R; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + rmMem = static_cast(o1); + goto _EmitX86M; + } + break; + + // ------------------------------------------------------------------------ + // [Avx] + // ------------------------------------------------------------------------ + + case kInstGroupAvxOp: + goto _EmitAvxOp; + + case kInstGroupAvxM: + if (encoded == ENC_OPS(Mem, None, None)) { + rmMem = static_cast(o0); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxMr_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxMr: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = static_cast(o1)->getRegIndex(); + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxMri_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxMri: + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = static_cast(o1)->getRegIndex(); + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Mem, Reg, Imm)) { + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRm_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxRm: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRmi_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxRmi: + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRvm_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxRvm: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { +_EmitAvxRvm: + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o2)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o2); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRvmr_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxRvmr: + if (!o3->isReg()) + goto _IllegalInst; + + imVal = static_cast(o3)->getRegIndex() << 4;; + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o2)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o2); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRvmi_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxRvmi: + if (!o3->isImm()) + goto _IllegalInst; + + imVal = static_cast(o3)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o2)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o2); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRmv: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + opX |= static_cast(o2)->getRegIndex() << kVexVVVVShift; + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o2)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o1); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRmvi: + if (!o3->isImm()) + goto _IllegalInst; + + imVal = static_cast(o3)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + opX |= static_cast(o2)->getRegIndex() << kVexVVVVShift; + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o2)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o1); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRmMr_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxRmMr: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitAvxM; + } + + // The following instruction uses the secondary opcode. + opCode = info->_opCode[1]; + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRvmRmi_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxRvmRmi: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o2)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o2); + goto _EmitAvxM; + } + + // The following instructions use the secondary opcode. + opCode &= kInstOpCode_L_Mask; + opCode |= info->_opCode[1]; + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opReg = static_cast(o0)->getRegIndex(); + rmMem = static_cast(o1); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRvmMr: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o2)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o2); + goto _EmitAvxM; + } + + // The following instructions use the secondary opcode. + opCode = info->_opCode[1]; + + if (encoded == ENC_OPS(Reg, Reg, None)) { + opReg = static_cast(o1)->getRegIndex(); + rmReg = static_cast(o0)->getRegIndex(); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRvmMvr_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxRvmMvr: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o2)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o2); + goto _EmitAvxM; + } + + // The following instruction uses the secondary opcode. + opCode &= kInstOpCode_L_Mask; + opCode |= info->_opCode[1]; + + if (encoded == ENC_OPS(Mem, Reg, Reg)) { + opReg = static_cast(o2)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o0); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxRvmVmi_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxRvmVmi: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + rmReg = static_cast(o2)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Reg, Mem)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o1)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o2); + goto _EmitAvxM; + } + + // The following instruction uses the secondary opcode. + opCode &= kInstOpCode_L_Mask; + opCode |= info->_opCode[1]; + opReg = opCode >> kInstOpCode_O_Shift; + + imVal = static_cast(o2)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opX |= static_cast(o0)->getRegIndex() << kVexVVVVShift; + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opX |= static_cast(o0)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o1); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxVm: + if (encoded == ENC_OPS(Reg, Reg, None)) { + opX |= static_cast(o0)->getRegIndex() << kVexVVVVShift; + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opX |= static_cast(o0)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o1); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxVmi_P: + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o1)->isYmm()); + // ... Fall through ... + + case kInstGroupAvxVmi: + imVal = static_cast(o3)->getInt64(); + imLen = 1; + + if (encoded == ENC_OPS(Reg, Reg, Imm)) { + opX |= static_cast(o0)->getRegIndex() << kVexVVVVShift; + rmReg = static_cast(o1)->getRegIndex(); + goto _EmitAvxR; + } + + if (encoded == ENC_OPS(Reg, Mem, Imm)) { + opX |= static_cast(o0)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o1); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxMovSsSd: + if (encoded == ENC_OPS(Reg, Reg, Reg)) { + goto _EmitAvxRvm; + } + + if (encoded == ENC_OPS(Reg, Mem, None)) { + opX |= static_cast(o0)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o1); + goto _EmitAvxM; + } + + if (encoded == ENC_OPS(Mem, Reg, None)) { + opReg = static_cast(o1)->getRegIndex(); + rmMem = static_cast(o0); + goto _EmitAvxM; + } + break; + + case kInstGroupAvxGatherEx: + if (encoded == ENC_OPS(Reg, Mem, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o2)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o1); + + uint32_t vSib = rmMem->getVSib(); + if (vSib == kMemVSibGpz) + goto _IllegalInst; + + ADD_VEX_L(vSib == kMemVSibYmm); + goto _EmitAvxV; + } + break; + + case kInstGroupAvxGather: + if (encoded == ENC_OPS(Reg, Mem, Reg)) { + opReg = static_cast(o0)->getRegIndex(); + opX |= static_cast(o2)->getRegIndex() << kVexVVVVShift; + rmMem = static_cast(o1); + + uint32_t vSib = rmMem->getVSib(); + if (vSib == kMemVSibGpz) + goto _IllegalInst; + + ADD_VEX_L(static_cast(o0)->isYmm() || static_cast(o2)->isYmm()); + goto _EmitAvxV; + } + break; + } + + // -------------------------------------------------------------------------- + // [Illegal] + // -------------------------------------------------------------------------- + +_IllegalAddr: + self->setError(kErrorAssemblerIllegalAddr); +#if defined(ASMJIT_DEBUG) + assertIllegal = true; +#endif // ASMJIT_DEBUG + goto _EmitDone; + +_IllegalInst: + self->setError(kErrorAssemblerIllegalInst); +#if defined(ASMJIT_DEBUG) + assertIllegal = true; +#endif // ASMJIT_DEBUG + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - X86] + // -------------------------------------------------------------------------- + +_EmitX86Op: + // Mandatory instruction prefix. + EMIT_PP(opCode); + + // Rex prefix (64-bit only). + if (Arch == kArchX64 && opX) { + opX |= 0x40; + EMIT_BYTE(opX); + if (opX >= kRexForbidden) + goto _IllegalInst; + } + + // Instruction opcodes. + EMIT_MM(opCode); + EMIT_OP(opCode); + goto _EmitDone; + +_EmitX86OpI: + // Mandatory instruction prefix. + EMIT_PP(opCode); + + // Rex prefix (64-bit only). + if (Arch == kArchX64 && opX) { + opX |= 0x40; + EMIT_BYTE(opX); + if (opX >= kRexForbidden) + goto _IllegalInst; + } + + // Instruction opcodes. + EMIT_MM(opCode); + EMIT_OP(opCode); + goto _EmitImm; + +_EmitX86R: + // Mandatory instruction prefix. + EMIT_PP(opCode); + + // Rex prefix (64-bit only). + if (Arch == kArchX64) { + opX += static_cast(opReg & 0x08) >> 1; // Rex.R (0x04). + opX += static_cast(rmReg) >> 3; // Rex.B (0x01). + + if (opX) { + opX |= 0x40; + EMIT_BYTE(opX); + + if (opX >= kRexForbidden) + goto _IllegalInst; + + opReg &= 0x7; + rmReg &= 0x7; + } + } + + // Instruction opcodes. + EMIT_MM(opCode); + EMIT_OP(opCode); + + // ModR. + EMIT_BYTE(x86EncodeMod(3, opReg, static_cast(rmReg))); + + if (imLen != 0) + goto _EmitImm; + else + goto _EmitDone; + +_EmitX86M: + ASMJIT_ASSERT(rmMem != NULL); + ASMJIT_ASSERT(rmMem->getOp() == kOperandTypeMem); + + mBase = rmMem->getBase(); + mIndex = rmMem->getIndex(); + + // Size override prefix. + if (Arch == kArchX86) { + if (!rmMem->hasGpdBase()) + EMIT_BYTE(0x67); + } + else { + if (rmMem->hasGpdBase()) + EMIT_BYTE(0x67); + } + + // Segment override prefix. + if (rmMem->hasSegment()) { + EMIT_BYTE(x86SegmentPrefix[rmMem->getSegment()]); + } + + // Mandatory instruction prefix. + EMIT_PP(opCode); + + // Rex prefix (64-bit only). + if (Arch == kArchX64) { + opX += static_cast(opReg & 8) >> 1; // Rex.R (0x04). + opX += static_cast(mIndex - 8 < 8) << 1; // Rex.X (0x02). + opX += static_cast(mBase - 8 < 8); // Rex.B (0x01). + + if (opX) { + opX |= 0x40; + EMIT_BYTE(opX); + + if (opX >= kRexForbidden) + goto _IllegalInst; + + opReg &= 0x7; + } + + mBase &= 0x7; + } + + // Instruction opcodes. + EMIT_MM(opCode); + EMIT_OP(opCode); + +_EmitSib: + dispOffset = rmMem->getDisplacement(); + if (rmMem->isBaseIndexType()) { + if (mIndex >= kInvalidReg) { + if (mBase == kRegIndexSp) { + if (dispOffset == 0) { + // [Esp/Rsp/R12]. + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + EMIT_BYTE(x86EncodeSib(0, 4, 4)); + } + else if (IntUtil::isInt8(dispOffset)) { + // [Esp/Rsp/R12 + Disp8]. + EMIT_BYTE(x86EncodeMod(1, opReg, 4)); + EMIT_BYTE(x86EncodeSib(0, 4, 4)); + EMIT_BYTE(static_cast(dispOffset)); + } + else { + // [Esp/Rsp/R12 + Disp32]. + EMIT_BYTE(x86EncodeMod(2, opReg, 4)); + EMIT_BYTE(x86EncodeSib(0, 4, 4)); + EMIT_DWORD(static_cast(dispOffset)); + } + } + else if (mBase != kRegIndexBp && dispOffset == 0) { + // [Base]. + EMIT_BYTE(x86EncodeMod(0, opReg, mBase)); + } + else if (IntUtil::isInt8(dispOffset)) { + // [Base + Disp8]. + EMIT_BYTE(x86EncodeMod(1, opReg, mBase)); + EMIT_BYTE(static_cast(dispOffset)); + } + else { + // [Base + Disp32]. + EMIT_BYTE(x86EncodeMod(2, opReg, mBase)); + EMIT_DWORD(static_cast(dispOffset)); + } + } + else { + uint32_t shift = rmMem->getShift(); + + // Esp/Rsp/R12 register can't be used as an index. + if (Arch == kArchX64) + mIndex &= 0x7; + ASMJIT_ASSERT(mIndex != kRegIndexSp); + + if (mBase != kRegIndexBp && dispOffset == 0) { + // [Base + Index * Scale]. + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + } + else if (IntUtil::isInt8(dispOffset)) { + // [Base + Index * Scale + Disp8]. + EMIT_BYTE(x86EncodeMod(1, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + EMIT_BYTE(static_cast(dispOffset)); + } + else { + // [Base + Index * Scale + Disp32]. + EMIT_BYTE(x86EncodeMod(2, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + EMIT_DWORD(static_cast(dispOffset)); + } + } + } + else if (Arch == kArchX86) { + if (mIndex >= kInvalidReg) { + // [Disp32]. + EMIT_BYTE(x86EncodeMod(0, opReg, 5)); + } + else { + // [Index * Scale + Disp32]. + uint32_t shift = rmMem->getShift(); + ASMJIT_ASSERT(mIndex != kRegIndexSp); + + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, 5)); + } + + if (rmMem->getMemType() == kMemTypeLabel) { + // Relative->Absolute [x86 mode]. + label = &self->_labels[rmMem->_vmem.base]; + relocId = self->_relocData.getLength(); + + RelocData reloc; + reloc.type = kRelocRelToAbs; + reloc.size = 4; + reloc.from = static_cast((uintptr_t)(cursor - self->_buffer)); + reloc.data = static_cast(dispOffset); + + if (self->_relocData.append(reloc) != kErrorOk) + return self->setError(kErrorNoHeapMemory); + + if (label->offset != -1) { + // Bound label. + reloc.data += static_cast(label->offset); + EMIT_DWORD(0); + } + else { + // Non-bound label. + dispOffset = -4 - imLen; + dispSize = 4; + goto _EmitDisplacement; + } + } + else { + // [Disp32]. + EMIT_DWORD(static_cast(dispOffset)); + } + } + else { + if (rmMem->getMemType() == kMemTypeLabel) { + // [RIP + Disp32]. + label = &self->_labels[rmMem->_vmem.base]; + + // Indexing is invalid. + if (mIndex < kInvalidReg) + goto _IllegalAddr; + + EMIT_BYTE(x86EncodeMod(0, opReg, 5)); + dispOffset -= (4 + imLen); + + if (label->offset != -1) { + // Bound label. + dispOffset += static_cast((intptr_t)(cursor - self->_buffer) - label->offset); + EMIT_DWORD(static_cast(dispOffset)); + } + else { + // Non-bound label. + dispSize = 4; + relocId = -1; + goto _EmitDisplacement; + } + } + else { + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + if (mIndex >= kInvalidReg) { + // [Disp32]. + EMIT_BYTE(x86EncodeSib(0, 4, 5)); + } + else { + // [Disp32 + Index * Scale]. + mIndex &= 0x7; + ASMJIT_ASSERT(mIndex != kRegIndexSp); + + uint32_t shift = rmMem->getShift(); + EMIT_BYTE(x86EncodeSib(shift, mIndex, 5)); + } + + EMIT_DWORD(static_cast(dispOffset)); + } + } + + if (imLen == 0) + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - Imm] + // -------------------------------------------------------------------------- + +_EmitImm: + switch (imLen) { + case 1: EMIT_BYTE (imVal & 0x000000FF); break; + case 2: EMIT_WORD (imVal & 0x0000FFFF); break; + case 4: EMIT_DWORD(imVal & 0xFFFFFFFF); break; + case 8: EMIT_QWORD(imVal ); break; + + default: + ASMJIT_ASSERT(!"Reached"); + } + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - Fpu] + // -------------------------------------------------------------------------- + +_EmitFpuOp: + // Mandatory instruction prefix. + EMIT_PP(opCode); + + // Instruction opcodes. + EMIT_OP(opCode >> 8); + EMIT_OP(opCode); + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - Avx] + // -------------------------------------------------------------------------- + +#define EMIT_AVX_M \ + ASMJIT_ASSERT(rmMem != NULL); \ + ASMJIT_ASSERT(rmMem->getOp() == kOperandTypeMem); \ + \ + if (rmMem->hasSegment()) { \ + EMIT_BYTE(x86SegmentPrefix[rmMem->getSegment()]); \ + } \ + \ + mBase = rmMem->getBase(); \ + mIndex = rmMem->getIndex(); \ + \ + { \ + uint32_t vex_XvvvvLpp; \ + uint32_t vex_rxbmmmmm; \ + \ + vex_XvvvvLpp = (opCode >> (kInstOpCode_L_Shift - 2)) & 0x04; \ + vex_XvvvvLpp += (opCode >> (kInstOpCode_PP_Shift)) & 0x03; \ + vex_XvvvvLpp += (opX >> (kVexVVVVShift - 3)); \ + vex_XvvvvLpp += (opX << 4) & 0x80; \ + \ + vex_rxbmmmmm = (opCode >> kInstOpCode_MM_Shift) & 0x1F; \ + vex_rxbmmmmm += static_cast(mBase - 8 < 8) << 5; \ + vex_rxbmmmmm += static_cast(mIndex - 8 < 8) << 6; \ + \ + if (vex_rxbmmmmm != 0x01 || vex_XvvvvLpp >= 0x80 || (options & kInstOptionVex3) != 0) { \ + vex_rxbmmmmm |= static_cast(opReg << 4) & 0x80; \ + vex_rxbmmmmm ^= 0xE0; \ + vex_XvvvvLpp ^= 0x78; \ + \ + EMIT_BYTE(kVex3Byte); \ + EMIT_BYTE(vex_rxbmmmmm); \ + EMIT_BYTE(vex_XvvvvLpp); \ + EMIT_OP(opCode); \ + } \ + else { \ + vex_XvvvvLpp |= static_cast(opReg << 4) & 0x80; \ + vex_XvvvvLpp ^= 0xF8; \ + \ + EMIT_BYTE(kVex2Byte); \ + EMIT_BYTE(vex_XvvvvLpp); \ + EMIT_OP(opCode); \ + } \ + } \ + \ + mBase &= 0x7; \ + opReg &= 0x7; + +_EmitAvxOp: + { + uint32_t vex_XvvvvLpp; + + vex_XvvvvLpp = (opCode >> (kInstOpCode_L_Shift - 2)) & 0x04; + vex_XvvvvLpp |= (opCode >> (kInstOpCode_PP_Shift)); + vex_XvvvvLpp |= 0xF8; + + // Encode 3-byte VEX prefix only if specified in options. + if ((options & kInstOptionVex3) != 0) { + uint32_t vex_rxbmmmmm = (opCode >> kInstOpCode_MM_Shift) | 0xE0; + + EMIT_BYTE(kVex3Byte); + EMIT_OP(vex_rxbmmmmm); + EMIT_OP(vex_XvvvvLpp); + EMIT_OP(opCode); + } + else { + EMIT_BYTE(kVex2Byte); + EMIT_OP(vex_XvvvvLpp); + EMIT_OP(opCode); + } + } + goto _EmitDone; + +_EmitAvxR: + { + uint32_t vex_XvvvvLpp; + uint32_t vex_rxbmmmmm; + + vex_XvvvvLpp = (opCode >> (kInstOpCode_L_Shift - 2)) & 0x04; + vex_XvvvvLpp |= (opCode >> (kInstOpCode_PP_Shift)); + vex_XvvvvLpp |= (opX >> (kVexVVVVShift - 3)); + vex_XvvvvLpp |= (opX << 4) & 0x80; + + vex_rxbmmmmm = (opCode >> kInstOpCode_MM_Shift) & 0x1F; + vex_rxbmmmmm |= (rmReg << 2) & 0x20; + + if (vex_rxbmmmmm != 0x01 || vex_XvvvvLpp >= 0x80 || (options & kInstOptionVex3) != 0) { + vex_rxbmmmmm |= static_cast(opReg & 0x08) << 4; + vex_rxbmmmmm ^= 0xE0; + vex_XvvvvLpp ^= 0x78; + + EMIT_BYTE(kVex3Byte); + EMIT_OP(vex_rxbmmmmm); + EMIT_OP(vex_XvvvvLpp); + EMIT_OP(opCode); + + rmReg &= 0x07; + } + else { + vex_XvvvvLpp += static_cast(opReg & 0x08) << 4; + vex_XvvvvLpp ^= 0xF8; + + EMIT_BYTE(kVex2Byte); + EMIT_OP(vex_XvvvvLpp); + EMIT_OP(opCode); + } + } + + EMIT_BYTE(x86EncodeMod(3, opReg, static_cast(rmReg))); + + if (imLen == 0) + goto _EmitDone; + + EMIT_BYTE(imVal & 0xFF); + goto _EmitDone; + +_EmitAvxM: + EMIT_AVX_M + goto _EmitSib; + +_EmitAvxV: + EMIT_AVX_M + + if (mIndex >= kInvalidReg) + goto _IllegalInst; + + if (Arch == kArchX64) + mIndex &= 0x7; + + dispOffset = rmMem->getDisplacement(); + if (rmMem->isBaseIndexType()) { + uint32_t shift = rmMem->getShift(); + + if (mBase != kRegIndexBp && dispOffset == 0) { + // [Base + Index * Scale]. + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + } + else if (IntUtil::isInt8(dispOffset)) { + // [Base + Index * Scale + Disp8]. + EMIT_BYTE(x86EncodeMod(1, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + EMIT_BYTE(static_cast(dispOffset)); + } + else { + // [Base + Index * Scale + Disp32]. + EMIT_BYTE(x86EncodeMod(2, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, mBase)); + EMIT_DWORD(static_cast(dispOffset)); + } + } + else { + // [Index * Scale + Disp32]. + uint32_t shift = rmMem->getShift(); + + EMIT_BYTE(x86EncodeMod(0, opReg, 4)); + EMIT_BYTE(x86EncodeSib(shift, mIndex, 5)); + + if (rmMem->getMemType() == kMemTypeLabel) { + if (Arch == kArchX64) + goto _IllegalAddr; + + // Relative->Absolute [x86 mode]. + label = &self->_labels[rmMem->_vmem.base]; + relocId = self->_relocData.getLength(); + + RelocData reloc; + reloc.type = kRelocRelToAbs; + reloc.size = 4; + reloc.from = static_cast((uintptr_t)(cursor - self->_buffer)); + reloc.data = static_cast(dispOffset); + + if (self->_relocData.append(reloc) != kErrorOk) + return self->setError(kErrorNoHeapMemory); + + if (label->offset != -1) { + // Bound label. + reloc.data += static_cast(label->offset); + EMIT_DWORD(0); + } + else { + // Non-bound label. + dispOffset = -4 - imLen; + dispSize = 4; + goto _EmitDisplacement; + } + } + else { + // [Disp32]. + EMIT_DWORD(static_cast(dispOffset)); + } + } + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - Jump/Call to an Immediate] + // -------------------------------------------------------------------------- + + // Emit relative relocation to absolute pointer @a target. It's needed + // to add what instruction is emitting this, because in x64 mode the relative + // displacement can be impossible to calculate and in this case the trampoline + // is used. +_EmitJmpOrCallImm: + { + // The jmp and call instructions have single-byte opcode. + EMIT_OP(opCode); + + RelocData rd; + rd.type = kRelocTrampoline; + rd.size = 4; + rd.from = (intptr_t)(cursor - self->_buffer); + rd.data = static_cast(imVal); + + if (self->_relocData.append(rd) != kErrorOk) + return self->setError(kErrorNoHeapMemory); + + // Emit dummy 32-bit integer; will be overwritten by relocCode(). + EMIT_DWORD(0); + + if (Arch == kArchX64) { + // If we are compiling in 64-bit mode, we can use trampoline if relative jump + // is not possible. + self->_trampolineSize += X64TrampolineWriter::kSizeTotal; + } + } + goto _EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - Displacement] + // -------------------------------------------------------------------------- + +_EmitDisplacement: + { + ASMJIT_ASSERT(label->offset == -1); + ASMJIT_ASSERT(dispSize == 1 || dispSize == 4); + + // Chain with label. + LabelLink* link = self->_newLabelLink(); + link->prev = label->links; + link->offset = (intptr_t)(cursor - self->_buffer); + link->displacement = dispOffset; + link->relocId = relocId; + label->links = link; + + // Emit label size as dummy data. + if (dispSize == 1) + EMIT_BYTE(0x01); + else // if (dispSize == 4) + EMIT_DWORD(0x04040404); + } + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + +_EmitDone: +#if defined(ASMJIT_DEBUG) + if (self->_logger || assertIllegal) { +#else + if (self->_logger) { +#endif // ASMJIT_DEBUG + StringBuilderT<512> sb; + uint32_t loggerOptions = 0; + + if (self->_logger) { + sb.appendString(self->_logger->getIndentation()); + loggerOptions = self->_logger->getOptions(); + } + + X86Assembler_dumpInstruction(sb, Arch, code, options, o0, o1, o2, loggerOptions); + + if ((loggerOptions & (1 << kLoggerOptionBinaryForm)) != 0) + X86Assembler_dumpComment(sb, sb.getLength(), self->_cursor, (intptr_t)(cursor - self->_cursor), dispSize, self->_comment); + else + X86Assembler_dumpComment(sb, sb.getLength(), NULL, 0, 0, self->_comment); + +#if defined(ASMJIT_DEBUG) + if (self->_logger) +#endif // ASMJIT_DEBUG + self->_logger->logString(kLoggerStyleDefault, sb.getData(), sb.getLength()); + +#if defined(ASMJIT_DEBUG) + // Raise an assertion failure, because this situation shouldn't happen. + if (assertIllegal) + assertionFailed(sb.getData(), __FILE__, __LINE__); +#endif // ASMJIT_DEBUG + } + + self->_comment = NULL; + self->setCursor(cursor); + + return kErrorOk; + +_UnknownInst: + self->_comment = NULL; + return self->setError(kErrorAssemblerUnknownInst); + +_GrowBuffer: + ASMJIT_PROPAGATE_ERROR(self->_grow(16)); + + cursor = self->getCursor(); + goto _Prepare; +} + +} // x86x64 namespace +} // asmjit namespace + +// ============================================================================ +// [asmjit::x86] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X86) + +namespace asmjit { +namespace x86 { + +Assembler::Assembler(BaseRuntime* runtime) : X86X64Assembler(runtime) { + _arch = kArchX86; + _regSize = 4; +} + +Assembler::~Assembler() {} + +size_t Assembler::_relocCode(void* dst, Ptr base) const { + return X86X64Assembler_relocCode(this, dst, base); +} + +Error Assembler::_emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) { + return X86X64Assembler_emit(this, code, &o0, &o1, &o2, &o3); +} + +} // x86 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X86 + +// ============================================================================ +// [asmjit::x64] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X64) + +namespace asmjit { +namespace x64 { + +Assembler::Assembler(BaseRuntime* runtime) : X86X64Assembler(runtime) { + _arch = kArchX64; + _regSize = 8; +} + +Assembler::~Assembler() {} + +size_t Assembler::_relocCode(void* dst, Ptr base) const { + return X86X64Assembler_relocCode(this, dst, base); +} + +Error Assembler::_emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) { + return X86X64Assembler_emit(this, code, &o0, &o1, &o2, &o3); +} + +} // x64 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X64 + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 diff --git a/src/asmjit/x86/x86assembler.h b/src/asmjit/x86/x86assembler.h new file mode 100644 index 0000000..d67c8e7 --- /dev/null +++ b/src/asmjit/x86/x86assembler.h @@ -0,0 +1,5375 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86ASSEMBLER_H +#define _ASMJIT_X86_X86ASSEMBLER_H + +// [Dependencies - AsmJit] +#include "../base/assembler.h" +#include "../x86/x86defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +//! @addtogroup asmjit_x86x64 +//! @{ + +// ============================================================================ +// [CodeGen-Begin] +// ============================================================================ + +#define INST_0x(_Inst_, _Code_) \ + ASMJIT_INLINE Error _Inst_() { \ + return emit(_Code_); \ + } + +#define INST_1x(_Inst_, _Code_, _Op0_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0) { \ + return emit(_Code_, o0); \ + } + +#define INST_1x_(_Inst_, _Code_, _Op0_, _Cond_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0); \ + } + +#define INST_1i(_Inst_, _Code_, _Op0_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0) { \ + return emit(_Code_, o0); \ + } \ + \ + /* @overload */ \ + ASMJIT_INLINE Error _Inst_(int o0) { \ + return emit(_Code_, o0); \ + } + +#define INST_1i_(_Inst_, _Code_, _Op0_, _Cond_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0); \ + } \ + \ + /* @overload */ \ + ASMJIT_INLINE Error _Inst_(int o0) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0); \ + } + +#define INST_1cc(_Inst_, _Code_, _Translate_, _Op0_) \ + ASMJIT_INLINE Error _Inst_(uint32_t cc, const _Op0_& o0) { \ + return emit(_Translate_(cc), o0); \ + } \ + \ + ASMJIT_INLINE Error _Inst_##a(const _Op0_& o0) { return emit(_Code_##a, o0); } \ + ASMJIT_INLINE Error _Inst_##ae(const _Op0_& o0) { return emit(_Code_##ae, o0); } \ + ASMJIT_INLINE Error _Inst_##b(const _Op0_& o0) { return emit(_Code_##b, o0); } \ + ASMJIT_INLINE Error _Inst_##be(const _Op0_& o0) { return emit(_Code_##be, o0); } \ + ASMJIT_INLINE Error _Inst_##c(const _Op0_& o0) { return emit(_Code_##c, o0); } \ + ASMJIT_INLINE Error _Inst_##e(const _Op0_& o0) { return emit(_Code_##e, o0); } \ + ASMJIT_INLINE Error _Inst_##g(const _Op0_& o0) { return emit(_Code_##g, o0); } \ + ASMJIT_INLINE Error _Inst_##ge(const _Op0_& o0) { return emit(_Code_##ge, o0); } \ + ASMJIT_INLINE Error _Inst_##l(const _Op0_& o0) { return emit(_Code_##l, o0); } \ + ASMJIT_INLINE Error _Inst_##le(const _Op0_& o0) { return emit(_Code_##le, o0); } \ + ASMJIT_INLINE Error _Inst_##na(const _Op0_& o0) { return emit(_Code_##na, o0); } \ + ASMJIT_INLINE Error _Inst_##nae(const _Op0_& o0) { return emit(_Code_##nae, o0); } \ + ASMJIT_INLINE Error _Inst_##nb(const _Op0_& o0) { return emit(_Code_##nb, o0); } \ + ASMJIT_INLINE Error _Inst_##nbe(const _Op0_& o0) { return emit(_Code_##nbe, o0); } \ + ASMJIT_INLINE Error _Inst_##nc(const _Op0_& o0) { return emit(_Code_##nc, o0); } \ + ASMJIT_INLINE Error _Inst_##ne(const _Op0_& o0) { return emit(_Code_##ne, o0); } \ + ASMJIT_INLINE Error _Inst_##ng(const _Op0_& o0) { return emit(_Code_##ng, o0); } \ + ASMJIT_INLINE Error _Inst_##nge(const _Op0_& o0) { return emit(_Code_##nge, o0); } \ + ASMJIT_INLINE Error _Inst_##nl(const _Op0_& o0) { return emit(_Code_##nl, o0); } \ + ASMJIT_INLINE Error _Inst_##nle(const _Op0_& o0) { return emit(_Code_##nle, o0); } \ + ASMJIT_INLINE Error _Inst_##no(const _Op0_& o0) { return emit(_Code_##no, o0); } \ + ASMJIT_INLINE Error _Inst_##np(const _Op0_& o0) { return emit(_Code_##np, o0); } \ + ASMJIT_INLINE Error _Inst_##ns(const _Op0_& o0) { return emit(_Code_##ns, o0); } \ + ASMJIT_INLINE Error _Inst_##nz(const _Op0_& o0) { return emit(_Code_##nz, o0); } \ + ASMJIT_INLINE Error _Inst_##o(const _Op0_& o0) { return emit(_Code_##o, o0); } \ + ASMJIT_INLINE Error _Inst_##p(const _Op0_& o0) { return emit(_Code_##p, o0); } \ + ASMJIT_INLINE Error _Inst_##pe(const _Op0_& o0) { return emit(_Code_##pe, o0); } \ + ASMJIT_INLINE Error _Inst_##po(const _Op0_& o0) { return emit(_Code_##po, o0); } \ + ASMJIT_INLINE Error _Inst_##s(const _Op0_& o0) { return emit(_Code_##s, o0); } \ + ASMJIT_INLINE Error _Inst_##z(const _Op0_& o0) { return emit(_Code_##z, o0); } + +#define INST_2x(_Inst_, _Code_, _Op0_, _Op1_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1) { \ + return emit(_Code_, o0, o1); \ + } + +#define INST_2x_(_Inst_, _Code_, _Op0_, _Op1_, _Cond_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1); \ + } + +#define INST_2i(_Inst_, _Code_, _Op0_, _Op1_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1) { \ + return emit(_Code_, o0, o1); \ + } \ + \ + /* @overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, int o1) { \ + return emit(_Code_, o0, o1); \ + } + +#define INST_2i_(_Inst_, _Code_, _Op0_, _Op1_, _Cond_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1); \ + } \ + \ + /* @overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, int o1) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1); \ + } + +#define INST_2cc(_Inst_, _Code_, _Translate_, _Op0_, _Op1_) \ + ASMJIT_INLINE Error _Inst_(uint32_t cc, const _Op0_& o0, const _Op1_& o1) { \ + return emit(_Translate_(cc), o0, o1); \ + } \ + \ + ASMJIT_INLINE Error _Inst_##a(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##a, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##ae(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##ae, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##b(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##b, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##be(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##be, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##c(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##c, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##e(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##e, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##g(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##g, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##ge(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##ge, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##l(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##l, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##le(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##le, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##na(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##na, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##nae(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nae, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##nb(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nb, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##nbe(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nbe, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##nc(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nc, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##ne(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##ne, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##ng(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##ng, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##nge(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nge, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##nl(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nl, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##nle(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nle, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##no(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##no, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##np(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##np, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##ns(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##ns, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##nz(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nz, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##o(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##o, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##p(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##p, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##pe(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##pe, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##po(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##po, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##s(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##s, o0, o1); } \ + ASMJIT_INLINE Error _Inst_##z(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##z, o0, o1); } + +#define INST_3x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \ + return emit(_Code_, o0, o1, o2); \ + } + +#define INST_3x_(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Cond_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, o2); \ + } + +#define INST_3i(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \ + return emit(_Code_, o0, o1, o2); \ + } \ + \ + /* @overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, int o2) { \ + return emit(_Code_, o0, o1, o2); \ + } + +#define INST_3i_(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Cond_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, o2); \ + } \ + \ + /* @overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, int o2) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, o2); \ + } + + +#define INST_4x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \ + return emit(_Code_, o0, o1, o2, o3); \ + } + +#define INST_4x_(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_, _Cond_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, o2, o3); \ + } + +#define INST_4i(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \ + return emit(_Code_, o0, o1, o2, o3); \ + } \ + \ + /* @overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, int o3) { \ + return emit(_Code_, o0, o1, o2, o3); \ + } + +#define INST_4i_(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_, _Cond_) \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, o2, o3); \ + } \ + \ + /* @overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, int o3) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, o2, o3); \ + } + +// ============================================================================ +// [asmjit::x86x64::X86X64Assembler] +// ============================================================================ + +//! @brief X86/X64 assembler. +//! +//! @ref Assembler is the main class in AsmJit that can encode instructions +//! and their operands to a binary stream runnable by CPU. It creates internal +//! buffer where the encodes instructions are stored and it contains intrinsics +//! that can be used to emit the code in a convenent way. Code generation is in +//! general safe, because the intrinsics uses method overloading so even the +//! code is emitted it can be checked by a C++ compiler. It's nearly impossible +//! to create invalid instruction (for example mov [eax], [eax], +//! because such overload doesn't exist. +//! +//! Each call to an assembler intrinsic function emits instruction directly +//! to the binary stream. There are also runtime checks that prevent invalid +//! code to be emitted. It will assert in debug mode and put the @ref Assembler +//! instance to an error state in production mode. +//! +//! @section AsmJit_Assembler_CodeGeneration Code Generation +//! +//! To generate code is only needed to create instance of @c asmjit::Assembler +//! and to use intrinsics. See example how to do that: +//! +//! @code +//! // Use asmjit namespace. +//! using namespace asmjit; +//! using namespace asmjit::host; +//! +//! // Create Assembler instance. +//! Assembler a; +//! +//! // Prolog. +//! a.push(ebp); +//! a.mov(ebp, esp); +//! +//! // Mov 1024 to EAX, EAX is also return value. +//! a.mov(eax, 1024); +//! +//! // Epilog. +//! a.mov(esp, ebp); +//! a.pop(ebp); +//! +//! // Return. +//! a.ret(); +//! @endcode +//! +//! You can see that syntax is very close to Intel one. Only difference is that +//! you are calling functions that emits the binary code for you. All registers +//! are in @c asmjit namespace, so it's very comfortable to use it (look at +//! first line). There is also used method @c asmjit::imm() to create an +//! immediate value. Use @c asmjit::imm_u() to create unsigned immediate value. +//! +//! There is also possibility to use memory addresses and immediates. To build +//! memory address use @c ptr(), @c byte_ptr(), @c word_ptr(), @c dword_ptr() +//! or other friend methods. In most cases you needs only @c ptr() method, but +//! there are instructions where you must specify address size, +//! +//! for example (a is @c asmjit::Assembler instance): +//! +//! @code +//! a.mov(ptr(eax), 0); // mov ptr [eax], 0 +//! a.mov(ptr(eax), edx); // mov ptr [eax], edx +//! @endcode +//! +//! But it's also possible to create complex addresses: +//! +//! @code +//! // eax + ecx*x addresses +//! a.mov(ptr(eax, ecx, 0), 0); // mov ptr [eax + ecx], 0 +//! a.mov(ptr(eax, ecx, 1), 0); // mov ptr [eax + ecx * 2], 0 +//! a.mov(ptr(eax, ecx, 2), 0); // mov ptr [eax + ecx * 4], 0 +//! a.mov(ptr(eax, ecx, 3), 0); // mov ptr [eax + ecx * 8], 0 +//! // eax + ecx*x + disp addresses +//! a.mov(ptr(eax, ecx, 0, 4), 0); // mov ptr [eax + ecx + 4], 0 +//! a.mov(ptr(eax, ecx, 1, 8), 0); // mov ptr [eax + ecx * 2 + 8], 0 +//! a.mov(ptr(eax, ecx, 2, 12), 0); // mov ptr [eax + ecx * 4 + 12], 0 +//! a.mov(ptr(eax, ecx, 3, 16), 0); // mov ptr [eax + ecx * 8 + 16], 0 +//! @endcode +//! +//! All addresses shown are using @c asmjit::ptr() to make memory operand. +//! Some assembler instructions (single operand ones) needs to have specified +//! memory operand size. For example a.inc(ptr(eax)) can't be +//! called. @c asmjit::Assembler::inc(), @c asmjit::Assembler::dec() and similar +//! instructions can't be encoded without specifying the operand size. See +//! next code how the assembler works: +//! +//! @code +//! // [byte] address +//! a.inc(byte_ptr(eax)); // inc byte ptr [eax] +//! a.dec(byte_ptr(eax)); // dec byte ptr [eax] +//! // [word] address +//! a.inc(word_ptr(eax)); // inc word ptr [eax] +//! a.dec(word_ptr(eax)); // dec word ptr [eax] +//! // [dword] address +//! a.inc(dword_ptr(eax)); // inc dword ptr [eax] +//! a.dec(dword_ptr(eax)); // dec dword ptr [eax] +//! @endcode +//! +//! @section AsmJit_Assembler_CallingJitCode Calling JIT Code +//! +//! While you are over from emitting instructions, you can make your function +//! using @c asmjit::Assembler::make() method. This method will use memory +//! manager to allocate virtual memory and relocates generated code to it. For +//! memory allocation is used global memory manager by default and memory is +//! freeable, but of course this default behavior can be overridden specifying +//! your memory manager and allocation type. If you want to do with code +//! something else you can always override make() method and do what you want. +//! +//! You can get size of generated code by @c getCodeSize() or @c getOffset() +//! methods. These methods returns you code size (or more precisely current code +//! offset) in bytes. Use takeCode() to take internal buffer (all pointers in +//! @c asmjit::Assembler instance will be zeroed and current buffer returned) +//! to use it. If you don't take it, @c asmjit::Assembler destructor will +//! free it automatically. To alloc and run code manually don't use +//! @c malloc()'ed memory, but instead use @c asmjit::VMem::alloc() to get memory +//! for executing (specify @c canExecute to @c true) or @c asmjit::MemoryManager +//! that provides more effective and comfortable way to allocate virtual memory. +//! +//! See next example how to allocate memory where you can execute code created +//! by @c asmjit::Assembler: +//! +//! @code +//! using namespace asmjit; +//! +//! JitRuntime runtime; +//! Assembler a(&runtime); +//! +//! // ... Your code generation ... +//! +//! // The function prototype +//! typedef void (*MyFunc)(); +//! +//! // make your function +//! MyFunc func = asmjit_cast(a.make()); +//! +//! // call your function +//! func(); +//! +//! // If you don't need your function again, free it. +//! runtime.free(func); +//! @endcode +//! +//! @c note This was very primitive example how to call generated code. +//! In production code you will never do alloc/free for one run, you will +//! probably store the allocated function and free it when the application +//! ends or when JIT objects does cleanup. +//! +//! @section AsmJit_Assembler_Labels Labels +//! +//! While generating assembler code, you will usually need to create complex +//! code with labels. Labels are fully supported and you can call @c jmp or +//! @c je (and similar) instructions to initialized or yet uninitialized label. +//! Each label expects to be bound into offset. To bind label to specific +//! offset, use @c bind() method. +//! +//! See next example that contains complete code that creates simple memory +//! copy function (in DWord entities). +//! +//! @code +//! // Example: Usage of Label (32-bit code). +//! // +//! // Create simple DWord memory copy function: +//! // ASMJIT_STDCALL void copy32(uint32_t* dst, const uint32_t* src, size_t count); +//! using namespace asmjit; +//! +//! // Assembler instance. +//! JitRuntime runtime; +//! Assembler a(&runtime); +//! +//! // Constants. +//! const int arg_offset = 8; // Arguments offset (STDCALL EBP). +//! const int arg_size = 12; // Arguments size. +//! +//! // Labels. +//! Label L_Loop(a); +//! +//! // Prolog. +//! a.push(ebp); +//! a.mov(ebp, esp); +//! a.push(esi); +//! a.push(edi); +//! +//! // Fetch arguments +//! a.mov(esi, dword_ptr(ebp, arg_offset + 0)); // Get dst. +//! a.mov(edi, dword_ptr(ebp, arg_offset + 4)); // Get src. +//! a.mov(ecx, dword_ptr(ebp, arg_offset + 8)); // Get count. +//! +//! // Bind L_Loop label to here. +//! a.bind(L_Loop); +//! +//! Copy 4 bytes. +//! a.mov(eax, dword_ptr(esi)); +//! a.mov(dword_ptr(edi), eax); +//! +//! // Increment pointers. +//! a.add(esi, 4); +//! a.add(edi, 4); +//! +//! // Repeat loop until (--ecx != 0). +//! a.dec(ecx); +//! a.jz(L_Loop); +//! +//! // Epilog. +//! a.pop(edi); +//! a.pop(esi); +//! a.mov(esp, ebp); +//! a.pop(ebp); +//! +//! // Return: STDCALL convention is to pop stack in called function. +//! a.ret(arg_size); +//! @endcode +//! +//! If you need more abstraction for generating assembler code and you want +//! to hide calling conventions between 32-bit and 64-bit operating systems, +//! look at @c Compiler class that is designed for higher level code +//! generation. +//! +//! @section AsmJit_Assembler_AdvancedCodeGeneration Advanced Code Generation +//! +//! This section describes some advanced generation features of @c Assembler +//! class which can be simply overlooked. The first thing that is very likely +//! needed is generic register support. In previous example the named registers +//! were used. AsmJit contains functions which can convert register index into +//! operand and back. +//! +//! Let's define function which can be used to generate some abstract code: +//! +//! @code +//! // Simple function that generates dword copy. +//! void genCopyDWord(BaseAssembler& a, const GpReg& dst, const GpReg& src, const GpReg& tmp) +//! { +//! a.mov(tmp, dword_ptr(src)); +//! a.mov(dword_ptr(dst), tmp); +//! } +//! @endcode +//! +//! This function can be called like genCopyDWord(a, edi, esi, ebx) +//! or by using existing @ref GpReg instances. This abstraction allows to join +//! more code sections together without rewriting each to use specific registers. +//! You need to take care only about implicit registers which may be used by +//! several instructions (like mul, imul, div, idiv, shifting, etc...). +//! +//! Next, more advanced, but often needed technique is that you can build your +//! own registers allocator. X86 architecture contains 8 general purpose registers, +//! 8 Mm registers and 8 Xmm registers. The X64 (AMD64) architecture extends count +//! of Gp registers and Xmm registers to 16. Use the @c kRegCountBase constant to +//! get count of Gp or Xmm registers or @c kRegCountGp, @c kRegCountMm and @c +//! kRegCountXmm constants individually. +//! +//! To build register from index (value from 0 inclusive to kRegNumXXX +//! exclusive) use @ref gpd(), @ref gpq() or @ref gpz() functions. To create +//! a 8 or 16-bit register use @ref gpw(), @ref gpb_lo() or @ref gpb_hi(). +//! To create other registers there are similar methods like @ref mm(), @ref xmm() +//! and @ref fp(). +//! +//! So our function call to genCopyDWord can be also used like this: +//! +//! @code +//! genCopyDWord(a, gpd(kRegIndexDi), gpd(kRegIndexSi), gpd(kRegIndexBx)); +//! @endcode +//! +//! kRegIndexXXX are constants defined by @ref kRegIndex enum. You can use your +//! own register allocator (or register slot manager) to alloc / free registers +//! so kRegIndexXXX values can be replaced by your variables (0 to kRegNumXXX-1). +//! +//! @sa @ref Compiler. +struct X86X64Assembler : public BaseAssembler { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_API X86X64Assembler(BaseRuntime* runtime); + ASMJIT_API virtual ~X86X64Assembler(); + + // -------------------------------------------------------------------------- + // [Label] + // -------------------------------------------------------------------------- + + //! @override. + ASMJIT_API virtual void _bind(const Label& label); + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! @brief Add 8-bit integer data to the instuction stream. + ASMJIT_INLINE void db(uint8_t x) { embed(&x, 1); } + //! @brief Add 16-bit integer data to the instuction stream. + ASMJIT_INLINE void dw(uint16_t x) { embed(&x, 2); } + //! @brief Add 32-bit integer data to the instuction stream. + ASMJIT_INLINE void dd(uint32_t x) { embed(&x, 4); } + //! @brief Add 64-bit integer data to the instuction stream. + ASMJIT_INLINE void dq(uint64_t x) { embed(&x, 8); } + + //! @brief Add 8-bit integer data to the instuction stream. + ASMJIT_INLINE void dint8(int8_t x) { embed(&x, sizeof(int8_t)); } + //! @brief Add 8-bit integer data to the instuction stream. + ASMJIT_INLINE void duint8(uint8_t x) { embed(&x, sizeof(uint8_t)); } + + //! @brief Add 16-bit integer data to the instuction stream. + ASMJIT_INLINE void dint16(int16_t x) { embed(&x, sizeof(int16_t)); } + //! @brief Add 16-bit integer data to the instuction stream. + ASMJIT_INLINE void duint16(uint16_t x) { embed(&x, sizeof(uint16_t)); } + + //! @brief Add 32-bit integer data to the instuction stream. + ASMJIT_INLINE void dint32(int32_t x) { embed(&x, sizeof(int32_t)); } + //! @brief Add 32-bit integer data to the instuction stream. + ASMJIT_INLINE void duint32(uint32_t x) { embed(&x, sizeof(uint32_t)); } + + //! @brief Add 64-bit integer data to the instuction stream. + ASMJIT_INLINE void dint64(int64_t x) { embed(&x, sizeof(int64_t)); } + //! @brief Add 64-bit integer data to the instuction stream. + ASMJIT_INLINE void duint64(uint64_t x) { embed(&x, sizeof(uint64_t)); } + + //! @brief Add float data to the instuction stream. + ASMJIT_INLINE void dfloat(float x) { embed(&x, sizeof(float)); } + //! @brief Add double data to the instuction stream. + ASMJIT_INLINE void ddouble(double x) { embed(&x, sizeof(double)); } + + //! @brief Add pointer data to the instuction stream. + ASMJIT_INLINE void dptr(void* x) { embed(&x, sizeof(void*)); } + + //! @brief Add Mm data to the instuction stream. + ASMJIT_INLINE void dmm(const MmData& x) { embed(&x, sizeof(MmData)); } + //! @brief Add Xmm data to the instuction stream. + ASMJIT_INLINE void dxmm(const XmmData& x) { embed(&x, sizeof(XmmData)); } + + //! @brief Add data in a given structure instance to the instuction stream. + template + ASMJIT_INLINE void dstruct(const T& x) { embed(&x, static_cast(sizeof(T))); } + + //! @brief Embed absolute label pointer (4 or 8 bytes). + ASMJIT_API Error embedLabel(const Label& op); + + // -------------------------------------------------------------------------- + // [Align] + // -------------------------------------------------------------------------- + + //! @brief Align target buffer to @a m bytes. + //! + //! Typical usage of this is to align labels at start of the inner loops. + //! + //! Inserts @c nop() instructions or CPU optimized NOPs. + ASMJIT_API virtual Error _align(uint32_t m); + + // ------------------------------------------------------------------------- + // [Options] + // ------------------------------------------------------------------------- + + //! @brief Force short form of jmp/jcc/other instruction. + ASMJIT_INLINE X86X64Assembler& short_() + { _options |= kInstOptionShortForm; return *this; } + + //! @brief Force long form of jmp/jcc/other instruction. + ASMJIT_INLINE X86X64Assembler& long_() + { _options |= kInstOptionLongForm; return *this; } + + //! @brief Condition is likely to be taken. + ASMJIT_INLINE X86X64Assembler& taken() + { _options |= kInstOptionTaken; return *this; } + + //! @brief Condition is unlikely to be taken. + ASMJIT_INLINE X86X64Assembler& notTaken() + { _options |= kInstOptionNotTaken; return *this; } + + //! @brief Lock prefix. + ASMJIT_INLINE X86X64Assembler& lock() + { _options |= kInstOptionLock; return *this; } + + // -------------------------------------------------------------------------- + // [Base Instructions] + // -------------------------------------------------------------------------- + + //! @brief Add with Carry. + INST_2x(adc, kInstAdc, GpReg, GpReg) + //! @overload + INST_2x(adc, kInstAdc, GpReg, Mem) + //! @overload + INST_2i(adc, kInstAdc, GpReg, Imm) + //! @overload + INST_2x(adc, kInstAdc, Mem, GpReg) + //! @overload + INST_2i(adc, kInstAdc, Mem, Imm) + + //! @brief Add. + INST_2x(add, kInstAdd, GpReg, GpReg) + //! @overload + INST_2x(add, kInstAdd, GpReg, Mem) + //! @overload + INST_2i(add, kInstAdd, GpReg, Imm) + //! @overload + INST_2x(add, kInstAdd, Mem, GpReg) + //! @overload + INST_2i(add, kInstAdd, Mem, Imm) + + //! @brief And. + INST_2x(and_, kInstAnd, GpReg, GpReg) + //! @overload + INST_2x(and_, kInstAnd, GpReg, Mem) + //! @overload + INST_2i(and_, kInstAnd, GpReg, Imm) + //! @overload + INST_2x(and_, kInstAnd, Mem, GpReg) + //! @overload + INST_2i(and_, kInstAnd, Mem, Imm) + + //! @brief Bit scan forward. + INST_2x_(bsf, kInstBsf, GpReg, GpReg, !o0.isGpb()) + //! @overload + INST_2x_(bsf, kInstBsf, GpReg, Mem, !o0.isGpb()) + + //! @brief Bit scan reverse. + INST_2x_(bsr, kInstBsr, GpReg, GpReg, !o0.isGpb()) + //! @overload + INST_2x_(bsr, kInstBsr, GpReg, Mem, !o0.isGpb()) + + //! @brief Byte swap (32-bit or 64-bit registers only) (i486). + INST_1x_(bswap, kInstBswap, GpReg, o0.getSize() >= 4) + + //! @brief Bit test. + INST_2x(bt, kInstBt, GpReg, GpReg) + //! @overload + INST_2i(bt, kInstBt, GpReg, Imm) + //! @overload + INST_2x(bt, kInstBt, Mem, GpReg) + //! @overload + INST_2i(bt, kInstBt, Mem, Imm) + + //! @brief Bit test and complement. + INST_2x(btc, kInstBtc, GpReg, GpReg) + //! @overload + INST_2i(btc, kInstBtc, GpReg, Imm) + //! @overload + INST_2x(btc, kInstBtc, Mem, GpReg) + //! @overload + INST_2i(btc, kInstBtc, Mem, Imm) + + //! @brief Bit test and reset. + INST_2x(btr, kInstBtr, GpReg, GpReg) + //! @overload + INST_2i(btr, kInstBtr, GpReg, Imm) + //! @overload + INST_2x(btr, kInstBtr, Mem, GpReg) + //! @overload + INST_2i(btr, kInstBtr, Mem, Imm) + + //! @brief Bit test and set. + INST_2x(bts, kInstBts, GpReg, GpReg) + //! @overload + INST_2i(bts, kInstBts, GpReg, Imm) + //! @overload + INST_2x(bts, kInstBts, Mem, GpReg) + //! @overload + INST_2i(bts, kInstBts, Mem, Imm) + + //! @brief Call. + INST_1x(call, kInstCall, GpReg) + //! @overload + INST_1x(call, kInstCall, Mem) + //! @overload + INST_1x(call, kInstCall, Label) + //! @overload + INST_1x(call, kInstCall, Imm) + //! @overload + ASMJIT_INLINE Error call(void* dst) { return call(Imm((intptr_t)dst)); } + + //! @brief Clear carry flag. + INST_0x(clc, kInstClc) + //! @brief Clear direction flag. + INST_0x(cld, kInstCld) + //! @brief Complement carry flag. + INST_0x(cmc, kInstCmc) + + //! @brief Convert byte to word (AX <- Sign Extend AL). + INST_0x(cbw, kInstCbw) + //! @brief Convert word to dword (DX:AX <- Sign Extend AX). + INST_0x(cwd, kInstCwd) + //! @brief Convert word to dword (EAX <- Sign Extend AX). + INST_0x(cwde, kInstCwde) + //! @brief Convert dword to qword (EDX:EAX <- Sign Extend EAX). + INST_0x(cdq, kInstCdq) + + //! @brief Conditional move. + INST_2cc(cmov, kInstCmov, condToCmovcc, GpReg, GpReg) + //! @brief Conditional move. + INST_2cc(cmov, kInstCmov, condToCmovcc, GpReg, Mem) + + //! @brief Compare two operands. + INST_2x(cmp, kInstCmp, GpReg, GpReg) + //! @overload + INST_2x(cmp, kInstCmp, GpReg, Mem) + //! @overload + INST_2i(cmp, kInstCmp, GpReg, Imm) + //! @overload + INST_2x(cmp, kInstCmp, Mem, GpReg) + //! @overload + INST_2i(cmp, kInstCmp, Mem, Imm) + + //! @brief Compare and exchange (i486). + INST_2x(cmpxchg, kInstCmpxchg, GpReg, GpReg) + //! @overload + INST_2x(cmpxchg, kInstCmpxchg, Mem, GpReg) + + //! @brief Compares the 64-bit value in EDX:EAX with the memory operand (Pentium). + INST_1x(cmpxchg8b, kInstCmpxchg8b, Mem) + + //! @brief CPU identification (i486). + INST_0x(cpuid, kInstCpuid) + + //! @brief Accumulate crc32 value (polynomial 0x11EDC6F41) (SSE4.2). + INST_2x_(crc32, kInstCrc32, GpReg, GpReg, o0.isRegType(kRegTypeGpd) || o0.isRegType(kRegTypeGpq)) + //! @overload + INST_2x_(crc32, kInstCrc32, GpReg, Mem, o0.isRegType(kRegTypeGpd) || o0.isRegType(kRegTypeGpq)) + + //! @brief Decrement by 1. + INST_1x(dec, kInstDec, GpReg) + //! @overload + INST_1x(dec, kInstDec, Mem) + + //! @brief Unsigned divide (xDX:xAX <- xDX:xAX / o0). + INST_1x(div, kInstDiv, GpReg) + //! @overload + INST_1x(div, kInstDiv, Mem) + + //! @brief Make stack frame for procedure parameters. + INST_2x(enter, kInstEnter, Imm, Imm) + + //! @brief Signed divide (xDX:xAX <- xDX:xAX / op). + INST_1x(idiv, kInstIdiv, GpReg) + //! @overload + INST_1x(idiv, kInstIdiv, Mem) + + //! @brief Signed multiply (xDX:xAX <- xAX * o0). + INST_1x(imul, kInstImul, GpReg) + //! @overload + INST_1x(imul, kInstImul, Mem) + + //! @brief Signed multiply. + INST_2x(imul, kInstImul, GpReg, GpReg) + //! @overload + INST_2x(imul, kInstImul, GpReg, Mem) + //! @overload + INST_2i(imul, kInstImul, GpReg, Imm) + + //! @brief Signed multiply. + INST_3i(imul, kInstImul, GpReg, GpReg, Imm) + //! @overload + INST_3i(imul, kInstImul, GpReg, Mem, Imm) + + //! @brief Increment by 1. + INST_1x(inc, kInstInc, GpReg) + //! @overload + INST_1x(inc, kInstInc, Mem) + + //! @brief Interrupt. + INST_1i(int_, kInstInt, Imm) + //! @brief Interrupt 3 - trap to debugger. + ASMJIT_INLINE Error int3() { return int_(3); } + + //! @brief Jump to label @a label if condition @a cc is met. + INST_1cc(j, kInstJ, condToJcc, Label) + + //! @brief Jump. + INST_1x(jmp, kInstJmp, GpReg) + //! @overload + INST_1x(jmp, kInstJmp, Mem) + //! @overload. + INST_1x(jmp, kInstJmp, Label) + //! @overload + INST_1x(jmp, kInstJmp, Imm) + //! @overload + ASMJIT_INLINE Error jmp(void* dst) { return jmp(Imm((intptr_t)dst)); } + + //! @brief Load AH from flags. + INST_0x(lahf, kInstLahf) + + //! @brief Load effective address + INST_2x(lea, kInstLea, GpReg, Mem) + + //! @brief High level procedure exit. + INST_0x(leave, kInstLeave) + + //! @brief Move. + INST_2x(mov, kInstMov, GpReg, GpReg) + //! @overload + INST_2x(mov, kInstMov, GpReg, Mem) + //! @overload + INST_2i(mov, kInstMov, GpReg, Imm) + //! @overload + INST_2x(mov, kInstMov, Mem, GpReg) + //! @overload + INST_2i(mov, kInstMov, Mem, Imm) + + //! @brief Move from segment register. + INST_2x(mov, kInstMov, GpReg, SegReg) + //! @overload + INST_2x(mov, kInstMov, Mem, SegReg) + //! @brief Move to segment register. + INST_2x(mov, kInstMov, SegReg, GpReg) + //! @overload + INST_2x(mov, kInstMov, SegReg, Mem) + + //! @brief Move (AL|AX|EAX|RAX <- absolute address in immediate). + ASMJIT_INLINE Error mov_ptr(const GpReg& dst, void* src) { + ASMJIT_ASSERT(dst.getRegIndex() == 0); + + Imm imm(static_cast((intptr_t)src)); + return emit(kInstMovptr, dst, imm); + } + + //! @brief Move (absolute address in immediate <- AL|AX|EAX|RAX). + ASMJIT_INLINE Error mov_ptr(void* dst, const GpReg& src) { + ASMJIT_ASSERT(src.getRegIndex() == 0); + + Imm imm(static_cast((intptr_t)dst)); + return emit(kInstMovptr, imm, src); + } + + //! @brief Move data after dwapping bytes (SSE3 - Atom). + INST_2x_(movbe, kInstMovbe, GpReg, Mem, !o0.isGpb()); + //! @overload + INST_2x_(movbe, kInstMovbe, Mem, GpReg, !o1.isGpb()); + + //! @brief Move with sign-extension. + INST_2x(movsx, kInstMovsx, GpReg, GpReg) + //! @overload + INST_2x(movsx, kInstMovsx, GpReg, Mem) + + //! @brief Move with zero-extension. + INST_2x(movzx, kInstMovzx, GpReg, GpReg) + //! @overload + INST_2x(movzx, kInstMovzx, GpReg, Mem) + + //! @brief Unsigned multiply (xDX:xAX <- xAX * o0). + INST_1x(mul, kInstMul, GpReg) + //! @overload + INST_1x(mul, kInstMul, Mem) + + //! @brief Two's complement negation. + INST_1x(neg, kInstNeg, GpReg) + //! @overload + INST_1x(neg, kInstNeg, Mem) + + //! @brief No operation. + INST_0x(nop, kInstNop) + + //! @brief One's complement negation. + INST_1x(not_, kInstNot, GpReg) + //! @overload + INST_1x(not_, kInstNot, Mem) + + //! @brief Or. + INST_2x(or_, kInstOr, GpReg, GpReg) + //! @overload + INST_2x(or_, kInstOr, GpReg, Mem) + //! @overload + INST_2i(or_, kInstOr, GpReg, Imm) + //! @overload + INST_2x(or_, kInstOr, Mem, GpReg) + //! @overload + INST_2i(or_, kInstOr, Mem, Imm) + + //! @brief Pop a value from the stack. + INST_1x_(pop, kInstPop, GpReg, o0.getSize() == 2 || o0.getSize() == _regSize) + //! @overload + INST_1x_(pop, kInstPop, Mem, o0.getSize() == 2 || o0.getSize() == _regSize) + + //! @brief Pop a segment register from the stack. + //! + //! @note There is no instruction to pop a cs segment register. + INST_1x_(pop, kInstPop, SegReg, o0.getRegIndex() != kSegCs); + + //! @brief Pop stack into EFLAGS register (32-bit or 64-bit). + INST_0x(popf, kInstPopf) + + //! @brief Return the count of number of bits set to 1 (SSE4.2). + INST_2x_(popcnt, kInstPopcnt, GpReg, GpReg, !o0.isGpb() && o0.getRegType() == o1.getRegType()) + //! @overload + INST_2x_(popcnt, kInstPopcnt, GpReg, Mem, !o0.isGpb()) + + //! @brief Push word/dword/qword on the stack. + INST_1x_(push, kInstPush, GpReg, o0.getSize() == 2 || o0.getSize() == _regSize) + //! @brief Push word/dword/qword on the stack. + INST_1x_(push, kInstPush, Mem, o0.getSize() == 2 || o0.getSize() == _regSize) + //! @brief Push segment register on the stack. + INST_1x(push, kInstPush, SegReg) + //! @brief Push word/dword/qword on the stack. + INST_1i(push, kInstPush, Imm) + + //! @brief Push EFLAGS register (32-bit or 64-bit) on the stack. + INST_0x(pushf, kInstPushf) + + //! @brief Rotate bits left. + //! + //! @note @a o1 register can be only @c cl. + INST_2x(rcl, kInstRcl, GpReg, GpReg) + //! @overload + INST_2x(rcl, kInstRcl, Mem, GpReg) + //! @brief Rotate bits left. + INST_2i(rcl, kInstRcl, GpReg, Imm) + //! @overload + INST_2i(rcl, kInstRcl, Mem, Imm) + + //! @brief Rotate bits right. + //! + //! @note @a o1 register can be only @c cl. + INST_2x(rcr, kInstRcr, GpReg, GpReg) + //! @overload + INST_2x(rcr, kInstRcr, Mem, GpReg) + //! @brief Rotate bits right. + INST_2i(rcr, kInstRcr, GpReg, Imm) + //! @overload + INST_2i(rcr, kInstRcr, Mem, Imm) + + //! @brief Read time-stamp counter (Pentium). + INST_0x(rdtsc, kInstRdtsc) + //! @brief Read time-stamp counter and processor id (Pentium). + INST_0x(rdtscp, kInstRdtscp) + + //! @brief Load ECX/RCX bytes from DS:[ESI/RSI] to AL. + INST_0x(rep_lodsb, kInstRepLodsb) + //! @brief Load ECX/RCX dwords from DS:[ESI/RSI] to EAX. + INST_0x(rep_lodsd, kInstRepLodsd) + //! @brief Load ECX/RCX Words from DS:[ESI/RSI] to AX. + INST_0x(rep_lodsw, kInstRepLodsw) + + //! @brief Move ECX/RCX bytes from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_0x(rep_movsb, kInstRepMovsb) + //! @brief Move ECX/RCX dwords from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_0x(rep_movsd, kInstRepMovsd) + //! @brief Move ECX/RCX words from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_0x(rep_movsw, kInstRepMovsw) + + //! @brief Fill ECX/RCX bytes at ES:[EDI/RDI] with AL. + INST_0x(rep_stosb, kInstRepStosb) + //! @brief Fill ECX/RCX dwords at ES:[EDI/RDI] with EAX. + INST_0x(rep_stosd, kInstRepStosd) + //! @brief Fill ECX/RCX words at ES:[EDI/RDI] with AX. + INST_0x(rep_stosw, kInstRepStosw) + + //! @brief Repeated find nonmatching bytes in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_0x(repe_cmpsb, kInstRepeCmpsb) + //! @brief Repeated find nonmatching dwords in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_0x(repe_cmpsd, kInstRepeCmpsd) + //! @brief Repeated find nonmatching words in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_0x(repe_cmpsw, kInstRepeCmpsw) + + //! @brief Find non-AL byte starting at ES:[EDI/RDI]. + INST_0x(repe_scasb, kInstRepeScasb) + //! @brief Find non-EAX dword starting at ES:[EDI/RDI]. + INST_0x(repe_scasd, kInstRepeScasd) + //! @brief Find non-AX word starting at ES:[EDI/RDI]. + INST_0x(repe_scasw, kInstRepeScasw) + + //! @brief Repeated find nonmatching bytes in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_0x(repne_cmpsb, kInstRepneCmpsb) + //! @brief Repeated find nonmatching dwords in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_0x(repne_cmpsd, kInstRepneCmpsd) + //! @brief Repeated find nonmatching words in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_0x(repne_cmpsw, kInstRepneCmpsw) + + //! @brief Find AL, starting at ES:[EDI/RDI]. + INST_0x(repne_scasb, kInstRepneScasb) + //! @brief Find EAX, starting at ES:[EDI/RDI]. + INST_0x(repne_scasd, kInstRepneScasd) + //! @brief Find AX, starting at ES:[EDI/RDI]. + INST_0x(repne_scasw, kInstRepneScasw) + + //! @brief Return. + INST_0x(ret, kInstRet) + //! @overload + INST_1i(ret, kInstRet, Imm) + + //! @brief Rotate bits left. + //! + //! @note @a o1 register can be only @c cl. + INST_2x(rol, kInstRol, GpReg, GpReg) + //! @overload + INST_2x(rol, kInstRol, Mem, GpReg) + //! @brief Rotate bits left. + INST_2i(rol, kInstRol, GpReg, Imm) + //! @overload + INST_2i(rol, kInstRol, Mem, Imm) + + //! @brief Rotate bits right. + //! + //! @note @a o1 register can be only @c cl. + INST_2x(ror, kInstRor, GpReg, GpReg) + //! @overload + INST_2x(ror, kInstRor, Mem, GpReg) + //! @brief Rotate bits right. + INST_2i(ror, kInstRor, GpReg, Imm) + //! @overload + INST_2i(ror, kInstRor, Mem, Imm) + + //! @brief Store AH into flags. + INST_0x(sahf, kInstSahf) + + //! @brief Integer subtraction with borrow. + INST_2x(sbb, kInstSbb, GpReg, GpReg) + //! @overload + INST_2x(sbb, kInstSbb, GpReg, Mem) + //! @overload + INST_2i(sbb, kInstSbb, GpReg, Imm) + //! @overload + INST_2x(sbb, kInstSbb, Mem, GpReg) + //! @overload + INST_2i(sbb, kInstSbb, Mem, Imm) + + //! @brief Shift bits left. + //! + //! @note @a o1 register can be only @c cl. + INST_2x(sal, kInstSal, GpReg, GpReg) + //! @overload + INST_2x(sal, kInstSal, Mem, GpReg) + //! @brief Shift bits left. + INST_2i(sal, kInstSal, GpReg, Imm) + //! @overload + INST_2i(sal, kInstSal, Mem, Imm) + + //! @brief Shift bits right. + //! + //! @note @a o1 register can be only @c cl. + INST_2x(sar, kInstSar, GpReg, GpReg) + //! @overload + INST_2x(sar, kInstSar, Mem, GpReg) + //! @brief Shift bits right. + INST_2i(sar, kInstSar, GpReg, Imm) + //! @overload + INST_2i(sar, kInstSar, Mem, Imm) + + //! @brief Set byte on condition. + INST_1cc(set, kInstSet, condToSetcc, GpReg) + //! @brief Set byte on condition. + INST_1cc(set, kInstSet, condToSetcc, Mem) + + //! @brief Shift bits left. + //! + //! @note @a o1 register can be only @c cl. + INST_2x(shl, kInstShl, GpReg, GpReg) + //! @overload + INST_2x(shl, kInstShl, Mem, GpReg) + //! @brief Shift bits left. + INST_2i(shl, kInstShl, GpReg, Imm) + //! @overload + INST_2i(shl, kInstShl, Mem, Imm) + + //! @brief Shift bits right. + //! + //! @note @a o1 register can be only @c cl. + INST_2x(shr, kInstShr, GpReg, GpReg) + //! @overload + INST_2x(shr, kInstShr, Mem, GpReg) + //! @brief Shift bits right. + INST_2i(shr, kInstShr, GpReg, Imm) + //! @overload + INST_2i(shr, kInstShr, Mem, Imm) + + //! @brief Double precision shift left. + //! + //! @note @a o2 register can be only @c cl register. + INST_3x(shld, kInstShld, GpReg, GpReg, GpReg) + //! @overload + INST_3x(shld, kInstShld, Mem, GpReg, GpReg) + //! @brief Double precision shift left. + INST_3i(shld, kInstShld, GpReg, GpReg, Imm) + //! @overload + INST_3i(shld, kInstShld, Mem, GpReg, Imm) + + //! @brief Double precision shift right. + //! + //! @note @a o2 register can be only @c cl register. + INST_3x(shrd, kInstShrd, GpReg, GpReg, GpReg) + //! @overload + INST_3x(shrd, kInstShrd, Mem, GpReg, GpReg) + //! @brief Double precision shift right. + INST_3i(shrd, kInstShrd, GpReg, GpReg, Imm) + //! @overload + INST_3i(shrd, kInstShrd, Mem, GpReg, Imm) + + //! @brief Set carry flag to 1. + INST_0x(stc, kInstStc) + //! @brief Set direction flag to 1. + INST_0x(std, kInstStd) + + //! @brief Subtract. + INST_2x(sub, kInstSub, GpReg, GpReg) + //! @overload + INST_2x(sub, kInstSub, GpReg, Mem) + //! @overload + INST_2i(sub, kInstSub, GpReg, Imm) + //! @overload + INST_2x(sub, kInstSub, Mem, GpReg) + //! @overload + INST_2i(sub, kInstSub, Mem, Imm) + + //! @brief Logical compare. + INST_2x(test, kInstTest, GpReg, GpReg) + //! @overload + INST_2i(test, kInstTest, GpReg, Imm) + //! @overload + INST_2x(test, kInstTest, Mem, GpReg) + //! @overload + INST_2i(test, kInstTest, Mem, Imm) + + //! @brief Undefined instruction - Raise #UD exception. + INST_0x(ud2, kInstUd2) + + //! @brief Exchange and Add. + INST_2x(xadd, kInstXadd, GpReg, GpReg) + //! @overload + INST_2x(xadd, kInstXadd, Mem, GpReg) + + //! @brief Exchange register/memory with register. + INST_2x(xchg, kInstXchg, GpReg, GpReg) + //! @overload + INST_2x(xchg, kInstXchg, Mem, GpReg) + //! @overload + INST_2x(xchg, kInstXchg, GpReg, Mem) + + //! @brief Xor. + INST_2x(xor_, kInstXor, GpReg, GpReg) + //! @overload + INST_2x(xor_, kInstXor, GpReg, Mem) + //! @overload + INST_2i(xor_, kInstXor, GpReg, Imm) + //! @overload + INST_2x(xor_, kInstXor, Mem, GpReg) + //! @overload + INST_2i(xor_, kInstXor, Mem, Imm) + + // -------------------------------------------------------------------------- + // [Fpu] + // -------------------------------------------------------------------------- + + //! @brief Compute 2^x - 1 (FPU). + INST_0x(f2xm1, kInstF2xm1) + + //! @brief Absolute value of fp0 (FPU). + INST_0x(fabs, kInstFabs) + + //! @brief Add @a o1 to @a o0 and store result in @a o0 (FPU). + //! + //! @note One of dst or src must be fp0. + INST_2x_(fadd, kInstFadd, FpReg, FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0) + //! @brief Add 4-byte or 8-byte FP @a o0 to fp0 and store result in fp0 (FPU). + INST_1x(fadd, kInstFadd, Mem) + //! @brief Add fp0 to @a o0 and POP register stack (FPU). + INST_1x(faddp, kInstFaddp, FpReg) + + //! @brief Load binary coded decimal (FPU). + INST_1x(fbld, kInstFbld, Mem) + //! @brief Store BCD integer and Pop (FPU). + INST_1x(fbstp, kInstFbstp, Mem) + //! @brief Change fp0 sign (FPU). + INST_0x(fchs, kInstFchs) + + //! @brief Clear exceptions (FPU). + INST_0x(fclex, kInstFclex) + + //! @brief FP Conditional move (FPU). + INST_1x(fcmovb, kInstFcmovb, FpReg) + //! @brief FP Conditional move (FPU). + INST_1x(fcmovbe, kInstFcmovbe, FpReg) + //! @brief FP Conditional move (FPU). + INST_1x(fcmove, kInstFcmove, FpReg) + //! @brief FP Conditional move (FPU). + INST_1x(fcmovnb, kInstFcmovnb, FpReg) + //! @brief FP Conditional move (FPU). + INST_1x(fcmovnbe, kInstFcmovnbe, FpReg) + //! @brief FP Conditional move (FPU). + INST_1x(fcmovne, kInstFcmovne, FpReg) + //! @brief FP Conditional move (FPU). + INST_1x(fcmovnu, kInstFcmovnu, FpReg) + //! @brief FP Conditional move (FPU). + INST_1x(fcmovu, kInstFcmovu, FpReg) + + //! @brief Compare fp0 with @a o0 (FPU). + INST_1x(fcom, kInstFcom, FpReg) + //! @brief Compare fp0 with 4-byte or 8-byte FP at @a src (FPU). + INST_1x(fcom, kInstFcom, Mem) + //! @brief Compare fp0 with @a o0 and pop the stack (FPU). + INST_1x(fcomp, kInstFcomp, FpReg) + //! @brief Compare fp0 with 4-byte or 8-byte FP at @a adr and pop the stack (FPU). + INST_1x(fcomp, kInstFcomp, Mem) + //! @brief Compare fp0 with fp1 and pop register stack twice (FPU). + INST_0x(fcompp, kInstFcompp) + //! @brief Compare fp0 and @a o0 and Set EFLAGS (FPU). + INST_1x(fcomi, kInstFcomi, FpReg) + //! @brief Compare fp0 and @a o0 and Set EFLAGS and pop the stack (FPU). + INST_1x(fcomip, kInstFcomip, FpReg) + + //! @brief Calculate cosine of fp0 and store result in fp0 (FPU). + INST_0x(fcos, kInstFcos) + + //! @brief Decrement stack-top pointer (FPU). + INST_0x(fdecstp, kInstFdecstp) + + //! @brief Divide @a o0 by @a o1 (FPU). + //! + //! @note One of @a o0 or @a o1 register must be fp0. + INST_2x_(fdiv, kInstFdiv, FpReg, FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0) + //! @brief Divide fp0 by 32-bit or 64-bit FP value (FPU). + INST_1x(fdiv, kInstFdiv, Mem) + //! @brief Divide @a o0 by fp0 (FPU). + INST_1x(fdivp, kInstFdivp, FpReg) + + //! @brief Reverse divide @a o0 by @a o1 (FPU). + //! + //! @note One of @a o0 or @a src register must be fp0. + INST_2x_(fdivr, kInstFdivr, FpReg, FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0) + //! @brief Reverse divide fp0 by 32-bit or 64-bit FP value (FPU). + INST_1x(fdivr, kInstFdivr, Mem) + //! @brief Reverse divide @a o0 by fp0 (FPU). + INST_1x(fdivrp, kInstFdivrp, FpReg) + + //! @brief Free FP register (FPU). + //! + //! Sets the tag in the FPU tag register associated with register @a o0 + //! to empty (11B). The contents of @a o0 and the FPU stack-top pointer + //! (TOP) are not affected. + INST_1x(ffree, kInstFfree, FpReg) + + //! @brief Add 16-bit or 32-bit integer to fp0 (FPU). + INST_1x_(fiadd, kInstFiadd, Mem, o0.getSize() == 2 || o0.getSize() == 4) + + //! @brief Compare fp0 with 16-bit or 32-bit Integer (FPU). + INST_1x_(ficom, kInstFicom, Mem, o0.getSize() == 2 || o0.getSize() == 4) + //! @brief Compare fp0 with 16-bit or 32-bit Integer and pop the stack (FPU). + INST_1x_(ficomp, kInstFicomp, Mem, o0.getSize() == 2 || o0.getSize() == 4) + + //! @brief Divide fp0 by 32-bit or 16-bit integer (@a src) (FPU). + INST_1x_(fidiv, kInstFidiv, Mem, o0.getSize() == 2 || o0.getSize() == 4) + //! @brief Reverse divide fp0 by 32-bit or 16-bit integer (@a src) (FPU). + INST_1x_(fidivr, kInstFidivr, Mem, o0.getSize() == 2 || o0.getSize() == 4) + + //! @brief Load 16-bit, 32-bit or 64-bit Integer and push it to the stack (FPU). + INST_1x_(fild, kInstFild, Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8) + + //! @brief Multiply fp0 by 16-bit or 32-bit integer and store it to fp0 (FPU). + INST_1x_(fimul, kInstFimul, Mem, o0.getSize() == 2 || o0.getSize() == 4) + + //! @brief Increment stack-top pointer (FPU). + INST_0x(fincstp, kInstFincstp) + + //! @brief Initialize FPU (FPU). + INST_0x(finit, kInstFinit) + + //! @brief Subtract 16-bit or 32-bit integer from fp0 and store result to fp0 (FPU). + INST_1x_(fisub, kInstFisub, Mem, o0.getSize() == 2 || o0.getSize() == 4) + //! @brief Reverse subtract 16-bit or 32-bit integer from fp0 and store result to fp0 (FPU). + INST_1x_(fisubr, kInstFisubr, Mem, o0.getSize() == 2 || o0.getSize() == 4) + + //! @brief Initialize FPU without checking for pending unmasked exceptions (FPU). + INST_0x(fninit, kInstFninit) + + //! @brief Store fp0 as 16-bit or 32-bit Integer to @a o0 (FPU). + INST_1x_(fist, kInstFist, Mem, o0.getSize() == 2 || o0.getSize() == 4) + //! @brief Store fp0 as 16-bit, 32-bit or 64-bit Integer to @a o0 and pop stack (FPU). + INST_1x_(fistp, kInstFistp, Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8) + + //! @brief Push 32-bit, 64-bit or 80-bit floating point value on the FPU register stack (FPU). + INST_1x_(fld, kInstFld, Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10) + //! @brief Push @a o0 on the FPU register stack (FPU). + INST_1x(fld, kInstFld, FpReg) + + //! @brief Push +1.0 on the FPU register stack (FPU). + INST_0x(fld1, kInstFld1) + //! @brief Push log2(10) on the FPU register stack (FPU). + INST_0x(fldl2t, kInstFldl2t) + //! @brief Push log2(e) on the FPU register stack (FPU). + INST_0x(fldl2e, kInstFldl2e) + //! @brief Push pi on the FPU register stack (FPU). + INST_0x(fldpi, kInstFldpi) + //! @brief Push log10(2) on the FPU register stack (FPU). + INST_0x(fldlg2, kInstFldlg2) + //! @brief Push ln(2) on the FPU register stack (FPU). + INST_0x(fldln2, kInstFldln2) + //! @brief Push +0.0 on the FPU register stack (FPU). + INST_0x(fldz, kInstFldz) + + //! @brief Load x87 FPU control word (2 bytes) (FPU). + INST_1x(fldcw, kInstFldcw, Mem) + //! @brief Load x87 FPU environment (14 or 28 bytes) (FPU). + INST_1x(fldenv, kInstFldenv, Mem) + + //! @brief Multiply @a o0 by @a o1 and store result in @a o0 (FPU). + //! + //! @note One of dst or src must be fp0. + INST_2x_(fmul, kInstFmul, FpReg, FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0) + //! @brief Multiply fp0 by 32-bit or 64-bit @a o0 and store result in fp0 (FPU). + INST_1x(fmul, kInstFmul, Mem) + + //! @brief Multiply fp0 by @a o0 and POP register stack (FPU). + INST_1x(fmulp, kInstFmulp, FpReg) + + //! @brief Clear exceptions (FPU). + INST_0x(fnclex, kInstFnclex) + + //! @brief No operation (FPU). + INST_0x(fnop, kInstFnop) + + //! @brief Save FPU state (FPU). + INST_1x(fnsave, kInstFnsave, Mem) + + //! @brief Store x87 FPU environment (FPU). + INST_1x(fnstenv, kInstFnstenv, Mem) + + //! @brief Store x87 FPU control word (FPU). + INST_1x(fnstcw, kInstFnstcw, Mem) + + //! @brief Store x87 FPU status word to @a o0 (AX) (FPU). + INST_1x_(fnstsw, kInstFnstsw, GpReg, o0.isRegCode(kRegTypeGpw, kRegIndexAx)) + //! @brief Store x87 FPU status word to @a o0 (2 bytes) (FPU). + INST_1x(fnstsw, kInstFnstsw, Mem) + + //! @brief Calculate arctan(fp1 / fp0) and pop the register stack (FPU). + INST_0x(fpatan, kInstFpatan) + + //! @brief Calculate fprem(fp0, fp1) and pop the register stack (FPU). + INST_0x(fprem, kInstFprem) + //! @brief Calculate IEEE fprem(fp0, fp1) and pop the register stack (FPU). + INST_0x(fprem1, kInstFprem1) + + //! @brief Calculate arctan(fp0) and pop the register stack (FPU). + INST_0x(fptan, kInstFptan) + //! @brief Round fp0 to Integer (FPU). + INST_0x(frndint, kInstFrndint) + + //! @brief Restore FPU state from @a o0 (94 or 108 bytes) (FPU). + INST_1x(frstor, kInstFrstor, Mem) + + //! @brief Save FPU state to @a o0 (FPU). + //! + //! Store FPU state to 94 or 108-bytes after checking for + //! pending unmasked FP exceptions. Then reinitialize + //! the FPU. + INST_1x(fsave, kInstFsave, Mem) + + //! @brief Scale (FPU). + //! + //! Scale fp0 by fp1. + INST_0x(fscale, kInstFscale) + + //! @brief Calculate sine of fp0 and store result in fp0 (FPU). + INST_0x(fsin, kInstFsin) + + //! @brief Sine and cosine (FPU). + //! + //! Compute the sine and cosine of fp0; replace fp0 with the sine + //! and push the cosine on the register stack. + INST_0x(fsincos, kInstFsincos) + + //! @brief Square root (FPU). + //! + //! Calculates square root of fp0 and stores the result in fp0. + INST_0x(fsqrt, kInstFsqrt) + + //! @brief Store floating point value (FPU). + //! + //! Store fp0 as 32-bit or 64-bit floating point value to @a o0. + INST_1x_(fst, kInstFst, Mem, o0.getSize() == 4 || o0.getSize() == 8) + + //! @brief Store floating point value to @a o0 (FPU). + INST_1x(fst, kInstFst, FpReg) + + //! @brief Store floating point value and pop register stack (FPU). + //! + //! Store fp0 as 32-bit or 64-bit floating point value to @a o0 + //! and pop register stack. + INST_1x_(fstp, kInstFstp, Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10) + + //! @brief Store floating point value and pop register stack (FPU). + //! + //! Store fp0 to @a o0 and pop register stack. + INST_1x(fstp, kInstFstp, FpReg) + + //! @brief Store x87 FPU control word (FPU). + //! + //! Store FPU control word to @a o0 (2 bytes) after checking for pending + //! unmasked floating-point exceptions. + INST_1x(fstcw, kInstFstcw, Mem) + + //! @brief Store x87 FPU environment (FPU). + //! + //! Store FPU environment to @a o0 (14 or 28 bytes) after checking for + //! pending unmasked floating-point exceptions. Then mask all floating + //! point exceptions. + INST_1x(fstenv, kInstFstenv, Mem) + + //! @brief Store x87 FPU status word (AX) (FPU). + INST_1x_(fstsw, kInstFstsw, GpReg, o0.isRegCode(kRegTypeGpw, kRegIndexAx)) + //! @brief Store x87 FPU status sord (2 bytes) (FPU). + INST_1x(fstsw, kInstFstsw, Mem) + + //! @brief Subtract @a o0 from @a o0 and store result in @a o0 (FPU). + //! + //! @note One of dst or src must be fp0. + INST_2x_(fsub, kInstFsub, FpReg, FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0) + //! @brief Subtract 32-bit or 64-bit @a o0 from fp0 and store result in fp0 (FPU). + INST_1x_(fsub, kInstFsub, Mem, o0.getSize() == 4 || o0.getSize() == 8) + //! @brief Subtract fp0 from @a o0 and POP register stack (FPU). + INST_1x(fsubp, kInstFsubp, FpReg) + + //! @brief Reverse subtract @a o1 from @a o0 and store result in @a o0 (FPU). + //! + //! @note One of dst or src must be fp0. + INST_2x_(fsubr, kInstFsubr, FpReg, FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0) + //! @brief Reverse subtract 32-bit or 64-bit @a o0 from fp0 and store result in fp0 (FPU). + INST_1x_(fsubr, kInstFsubr, Mem, o0.getSize() == 4 || o0.getSize() == 8) + //! @brief Reverse subtract fp0 from @a o0 and POP register stack (FPU). + INST_1x(fsubrp, kInstFsubrp, FpReg) + + //! @brief Floating point test - Compare fp0 with 0.0. (FPU). + INST_0x(ftst, kInstFtst) + + //! @brief Unordered compare fp0 with @a o0 (FPU). + INST_1x(fucom, kInstFucom, FpReg) + //! @brief Unordered compare fp0 and @a o0, check for ordered values + //! and Set EFLAGS (FPU). + INST_1x(fucomi, kInstFucomi, FpReg) + //! @brief Unordered compare fp0 and @a o0, Check for ordered values + //! and Set EFLAGS and pop the stack (FPU). + INST_1x(fucomip, kInstFucomip, FpReg) + //! @brief Unordered compare fp0 with @a o0 and pop register stack (FPU). + INST_1x(fucomp, kInstFucomp, FpReg) + //! @brief Unordered compare fp0 with fp1 and pop register stack twice (FPU). + INST_0x(fucompp, kInstFucompp) + + INST_0x(fwait, kInstFwait) + + //! @brief Examine fp0 (FPU). + INST_0x(fxam, kInstFxam) + + //! @brief Exchange register contents (FPU). + //! + //! Exchange content of fp0 with @a o0. + INST_1x(fxch, kInstFxch, FpReg) + + //! @brief Restore FP, MMX and streaming SIMD extension states (FPU, MMX, SSE). + //! + //! Load FP and MMX technology and Streaming SIMD Extension state from + //! src (512 bytes). + INST_1x(fxrstor, kInstFxrstor, Mem) + + //! @brief Store FP, MMX and streaming SIMD extension states (FPU, MMX, SSE). + //! + //! Store FP and MMX technology state and Streaming SIMD Extension state + //! to dst (512 bytes). + INST_1x(fxsave, kInstFxsave, Mem) + + //! @brief Extract exponent and significand (FPU). + //! + //! Separate value in fp0 into exponent and significand, store exponent + //! in fp0 and push the significand on the register stack. + INST_0x(fxtract, kInstFxtract) + + //! @brief Compute y * log2(x). + //! + //! Replace fp1 with (fp1 * log2(fp0)) and pop the register stack. + INST_0x(fyl2x, kInstFyl2x) + //! @brief Compute y * log_2(x+1). + //! + //! Replace fp1 with (fp1 * (log2(fp0)+1)) and pop the register stack. + INST_0x(fyl2xp1, kInstFyl2xp1) + + // -------------------------------------------------------------------------- + // [MMX] + // -------------------------------------------------------------------------- + + //! @brief Move dword (MMX). + INST_2x(movd, kInstMovd, Mem, MmReg) + //! @overload + INST_2x(movd, kInstMovd, GpReg, MmReg) + //! @overload + INST_2x(movd, kInstMovd, MmReg, Mem) + //! @overload + INST_2x(movd, kInstMovd, MmReg, GpReg) + + //! @brief Move qword (MMX). + INST_2x(movq, kInstMovq, MmReg, MmReg) + //! @overload + INST_2x(movq, kInstMovq, Mem, MmReg) + //! @overload + INST_2x(movq, kInstMovq, MmReg, Mem) + + //! @brief Pack with signed saturation (MMX). + INST_2x(packsswb, kInstPacksswb, MmReg, MmReg) + //! @overload + INST_2x(packsswb, kInstPacksswb, MmReg, Mem) + + //! @brief Pack with signed saturation (MMX). + INST_2x(packssdw, kInstPackssdw, MmReg, MmReg) + //! @overload + INST_2x(packssdw, kInstPackssdw, MmReg, Mem) + + //! @brief Pack with unsigned saturation (MMX). + INST_2x(packuswb, kInstPackuswb, MmReg, MmReg) + //! @overload + INST_2x(packuswb, kInstPackuswb, MmReg, Mem) + + //! @brief Packed byte add (MMX). + INST_2x(paddb, kInstPaddb, MmReg, MmReg) + //! @overload + INST_2x(paddb, kInstPaddb, MmReg, Mem) + + //! @brief Packed word add (MMX). + INST_2x(paddw, kInstPaddw, MmReg, MmReg) + //! @overload + INST_2x(paddw, kInstPaddw, MmReg, Mem) + + //! @brief Packed dword add (MMX). + INST_2x(paddd, kInstPaddd, MmReg, MmReg) + //! @overload + INST_2x(paddd, kInstPaddd, MmReg, Mem) + + //! @brief Packed add with saturation (MMX). + INST_2x(paddsb, kInstPaddsb, MmReg, MmReg) + //! @overload + INST_2x(paddsb, kInstPaddsb, MmReg, Mem) + + //! @brief Packed add with saturation (MMX). + INST_2x(paddsw, kInstPaddsw, MmReg, MmReg) + //! @overload + INST_2x(paddsw, kInstPaddsw, MmReg, Mem) + + //! @brief Packed add unsigned with saturation (MMX). + INST_2x(paddusb, kInstPaddusb, MmReg, MmReg) + //! @overload + INST_2x(paddusb, kInstPaddusb, MmReg, Mem) + + //! @brief Packed add unsigned with saturation (MMX). + INST_2x(paddusw, kInstPaddusw, MmReg, MmReg) + //! @overload + INST_2x(paddusw, kInstPaddusw, MmReg, Mem) + + //! @brief And (MMX). + INST_2x(pand, kInstPand, MmReg, MmReg) + //! @overload + INST_2x(pand, kInstPand, MmReg, Mem) + + //! @brief And-not (MMX). + INST_2x(pandn, kInstPandn, MmReg, MmReg) + //! @overload + INST_2x(pandn, kInstPandn, MmReg, Mem) + + //! @brief Packed compare bytes for equal (MMX). + INST_2x(pcmpeqb, kInstPcmpeqb, MmReg, MmReg) + //! @overload + INST_2x(pcmpeqb, kInstPcmpeqb, MmReg, Mem) + + //! @brief Packed compare words for equal (MMX). + INST_2x(pcmpeqw, kInstPcmpeqw, MmReg, MmReg) + //! @overload + INST_2x(pcmpeqw, kInstPcmpeqw, MmReg, Mem) + + //! @brief Packed compare dwords for equal (MMX). + INST_2x(pcmpeqd, kInstPcmpeqd, MmReg, MmReg) + //! @overload + INST_2x(pcmpeqd, kInstPcmpeqd, MmReg, Mem) + + //! @brief Packed compare bytes for greater than (MMX). + INST_2x(pcmpgtb, kInstPcmpgtb, MmReg, MmReg) + //! @overload + INST_2x(pcmpgtb, kInstPcmpgtb, MmReg, Mem) + + //! @brief Packed compare words for greater than (MMX). + INST_2x(pcmpgtw, kInstPcmpgtw, MmReg, MmReg) + //! @overload + INST_2x(pcmpgtw, kInstPcmpgtw, MmReg, Mem) + + //! @brief Packed compare dqords for greater than (MMX). + INST_2x(pcmpgtd, kInstPcmpgtd, MmReg, MmReg) + //! @overload + INST_2x(pcmpgtd, kInstPcmpgtd, MmReg, Mem) + + //! @brief Packed multiply high (MMX). + INST_2x(pmulhw, kInstPmulhw, MmReg, MmReg) + //! @overload + INST_2x(pmulhw, kInstPmulhw, MmReg, Mem) + + //! @brief Packed multiply low (MMX). + INST_2x(pmullw, kInstPmullw, MmReg, MmReg) + //! @overload + INST_2x(pmullw, kInstPmullw, MmReg, Mem) + + //! @brief Or (MMX). + INST_2x(por, kInstPor, MmReg, MmReg) + //! @overload + INST_2x(por, kInstPor, MmReg, Mem) + + //! @brief Packed multiply and add (MMX). + INST_2x(pmaddwd, kInstPmaddwd, MmReg, MmReg) + //! @overload + INST_2x(pmaddwd, kInstPmaddwd, MmReg, Mem) + + //! @brief Packed shift left logical (MMX). + INST_2x(pslld, kInstPslld, MmReg, MmReg) + //! @overload + INST_2x(pslld, kInstPslld, MmReg, Mem) + //! @overload + INST_2i(pslld, kInstPslld, MmReg, Imm) + + //! @brief Packed shift left logical (MMX). + INST_2x(psllq, kInstPsllq, MmReg, MmReg) + //! @overload + INST_2x(psllq, kInstPsllq, MmReg, Mem) + //! @overload + INST_2i(psllq, kInstPsllq, MmReg, Imm) + + //! @brief Packed shift left logical (MMX). + INST_2x(psllw, kInstPsllw, MmReg, MmReg) + //! @overload + INST_2x(psllw, kInstPsllw, MmReg, Mem) + //! @overload + INST_2i(psllw, kInstPsllw, MmReg, Imm) + + //! @brief Packed shift right arithmetic (MMX). + INST_2x(psrad, kInstPsrad, MmReg, MmReg) + //! @overload + INST_2x(psrad, kInstPsrad, MmReg, Mem) + //! @overload + INST_2i(psrad, kInstPsrad, MmReg, Imm) + + //! @brief Packed shift right arithmetic (MMX). + INST_2x(psraw, kInstPsraw, MmReg, MmReg) + //! @overload + INST_2x(psraw, kInstPsraw, MmReg, Mem) + //! @overload + INST_2i(psraw, kInstPsraw, MmReg, Imm) + + //! @brief Packed shift right logical (MMX). + INST_2x(psrld, kInstPsrld, MmReg, MmReg) + //! @overload + INST_2x(psrld, kInstPsrld, MmReg, Mem) + //! @overload + INST_2i(psrld, kInstPsrld, MmReg, Imm) + + //! @brief Packed shift right logical (MMX). + INST_2x(psrlq, kInstPsrlq, MmReg, MmReg) + //! @overload + INST_2x(psrlq, kInstPsrlq, MmReg, Mem) + //! @overload + INST_2i(psrlq, kInstPsrlq, MmReg, Imm) + + //! @brief Packed shift right logical (MMX). + INST_2x(psrlw, kInstPsrlw, MmReg, MmReg) + //! @overload + INST_2x(psrlw, kInstPsrlw, MmReg, Mem) + //! @overload + INST_2i(psrlw, kInstPsrlw, MmReg, Imm) + + //! @brief Packed subtract (MMX). + INST_2x(psubb, kInstPsubb, MmReg, MmReg) + //! @overload + INST_2x(psubb, kInstPsubb, MmReg, Mem) + + //! @brief Packed subtract (MMX). + INST_2x(psubw, kInstPsubw, MmReg, MmReg) + //! @overload + INST_2x(psubw, kInstPsubw, MmReg, Mem) + + //! @brief Packed subtract (MMX). + INST_2x(psubd, kInstPsubd, MmReg, MmReg) + //! @overload + INST_2x(psubd, kInstPsubd, MmReg, Mem) + + //! @brief Packed subtract with saturation (MMX). + INST_2x(psubsb, kInstPsubsb, MmReg, MmReg) + //! @overload + INST_2x(psubsb, kInstPsubsb, MmReg, Mem) + + //! @brief Packed subtract with saturation (MMX). + INST_2x(psubsw, kInstPsubsw, MmReg, MmReg) + //! @overload + INST_2x(psubsw, kInstPsubsw, MmReg, Mem) + + //! @brief Packed subtract with unsigned saturation (MMX). + INST_2x(psubusb, kInstPsubusb, MmReg, MmReg) + //! @overload + INST_2x(psubusb, kInstPsubusb, MmReg, Mem) + + //! @brief Packed subtract with unsigned saturation (MMX). + INST_2x(psubusw, kInstPsubusw, MmReg, MmReg) + //! @overload + INST_2x(psubusw, kInstPsubusw, MmReg, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpckhbw, kInstPunpckhbw, MmReg, MmReg) + //! @overload + INST_2x(punpckhbw, kInstPunpckhbw, MmReg, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpckhwd, kInstPunpckhwd, MmReg, MmReg) + //! @overload + INST_2x(punpckhwd, kInstPunpckhwd, MmReg, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpckhdq, kInstPunpckhdq, MmReg, MmReg) + //! @overload + INST_2x(punpckhdq, kInstPunpckhdq, MmReg, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpcklbw, kInstPunpcklbw, MmReg, MmReg) + //! @overload + INST_2x(punpcklbw, kInstPunpcklbw, MmReg, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpcklwd, kInstPunpcklwd, MmReg, MmReg) + //! @overload + INST_2x(punpcklwd, kInstPunpcklwd, MmReg, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpckldq, kInstPunpckldq, MmReg, MmReg) + //! @overload + INST_2x(punpckldq, kInstPunpckldq, MmReg, Mem) + + //! @brief Xor (MMX). + INST_2x(pxor, kInstPxor, MmReg, MmReg) + //! @overload + INST_2x(pxor, kInstPxor, MmReg, Mem) + + //! @brief Empty MMX state. + INST_0x(emms, kInstEmms) + + // ------------------------------------------------------------------------- + // [3dNow] + // ------------------------------------------------------------------------- + + //! @brief Packed SP-FP to integer convert (3dNow!). + INST_2x(pf2id, kInstPf2id, MmReg, MmReg) + //! @overload + INST_2x(pf2id, kInstPf2id, MmReg, Mem) + + //! @brief Packed SP-FP to integer word convert (3dNow!). + INST_2x(pf2iw, kInstPf2iw, MmReg, MmReg) + //! @overload + INST_2x(pf2iw, kInstPf2iw, MmReg, Mem) + + //! @brief Packed SP-FP accumulate (3dNow!). + INST_2x(pfacc, kInstPfacc, MmReg, MmReg) + //! @overload + INST_2x(pfacc, kInstPfacc, MmReg, Mem) + + //! @brief Packed SP-FP addition (3dNow!). + INST_2x(pfadd, kInstPfadd, MmReg, MmReg) + //! @overload + INST_2x(pfadd, kInstPfadd, MmReg, Mem) + + //! @brief Packed SP-FP compare - dst == src (3dNow!). + INST_2x(pfcmpeq, kInstPfcmpeq, MmReg, MmReg) + //! @overload + INST_2x(pfcmpeq, kInstPfcmpeq, MmReg, Mem) + + //! @brief Packed SP-FP compare - dst >= src (3dNow!). + INST_2x(pfcmpge, kInstPfcmpge, MmReg, MmReg) + //! @overload + INST_2x(pfcmpge, kInstPfcmpge, MmReg, Mem) + + //! @brief Packed SP-FP compare - dst > src (3dNow!). + INST_2x(pfcmpgt, kInstPfcmpgt, MmReg, MmReg) + //! @overload + INST_2x(pfcmpgt, kInstPfcmpgt, MmReg, Mem) + + //! @brief Packed SP-FP maximum (3dNow!). + INST_2x(pfmax, kInstPfmax, MmReg, MmReg) + //! @overload + INST_2x(pfmax, kInstPfmax, MmReg, Mem) + + //! @brief Packed SP-FP minimum (3dNow!). + INST_2x(pfmin, kInstPfmin, MmReg, MmReg) + //! @overload + INST_2x(pfmin, kInstPfmin, MmReg, Mem) + + //! @brief Packed SP-FP multiply (3dNow!). + INST_2x(pfmul, kInstPfmul, MmReg, MmReg) + //! @overload + INST_2x(pfmul, kInstPfmul, MmReg, Mem) + + //! @brief Packed SP-FP negative accumulate (3dNow!). + INST_2x(pfnacc, kInstPfnacc, MmReg, MmReg) + //! @overload + INST_2x(pfnacc, kInstPfnacc, MmReg, Mem) + + //! @brief Packed SP-FP mixed accumulate (3dNow!). + INST_2x(pfpnacc, kInstPfpnacc, MmReg, MmReg) + //! @overload + INST_2x(pfpnacc, kInstPfpnacc, MmReg, Mem) + + //! @brief Packed SP-FP reciprocal Approximation (3dNow!). + INST_2x(pfrcp, kInstPfrcp, MmReg, MmReg) + //! @overload + INST_2x(pfrcp, kInstPfrcp, MmReg, Mem) + + //! @brief Packed SP-FP reciprocal, first iteration step (3dNow!). + INST_2x(pfrcpit1, kInstPfrcpit1, MmReg, MmReg) + //! @overload + INST_2x(pfrcpit1, kInstPfrcpit1, MmReg, Mem) + + //! @brief Packed SP-FP reciprocal, second iteration step (3dNow!). + INST_2x(pfrcpit2, kInstPfrcpit2, MmReg, MmReg) + //! @overload + INST_2x(pfrcpit2, kInstPfrcpit2, MmReg, Mem) + + //! @brief Packed SP-FP reciprocal square root, first iteration step (3dNow!). + INST_2x(pfrsqit1, kInstPfrsqit1, MmReg, MmReg) + //! @overload + INST_2x(pfrsqit1, kInstPfrsqit1, MmReg, Mem) + + //! @brief Packed SP-FP reciprocal square root approximation (3dNow!). + INST_2x(pfrsqrt, kInstPfrsqrt, MmReg, MmReg) + //! @overload + INST_2x(pfrsqrt, kInstPfrsqrt, MmReg, Mem) + + //! @brief Packed SP-FP subtract (3dNow!). + INST_2x(pfsub, kInstPfsub, MmReg, MmReg) + //! @overload + INST_2x(pfsub, kInstPfsub, MmReg, Mem) + + //! @brief Packed SP-FP reverse subtract (3dNow!). + INST_2x(pfsubr, kInstPfsubr, MmReg, MmReg) + //! @overload + INST_2x(pfsubr, kInstPfsubr, MmReg, Mem) + + //! @brief Packed dwords to SP-FP (3dNow!). + INST_2x(pi2fd, kInstPi2fd, MmReg, MmReg) + //! @overload + INST_2x(pi2fd, kInstPi2fd, MmReg, Mem) + + //! @brief Packed words to SP-FP (3dNow!). + INST_2x(pi2fw, kInstPi2fw, MmReg, MmReg) + //! @overload + INST_2x(pi2fw, kInstPi2fw, MmReg, Mem) + + //! @brief Packed swap dword (3dNow!) + INST_2x(pswapd, kInstPswapd, MmReg, MmReg) + //! @overload + INST_2x(pswapd, kInstPswapd, MmReg, Mem) + + //! @brief Prefetch (3dNow!). + INST_1x(prefetch3dnow, kInstPrefetch3dNow, Mem) + + //! @brief Prefetch and set cache to modified (3dNow!). + INST_1x(prefetchw3dnow, kInstPrefetchw3dNow, Mem) + + //! @brief Faster EMMS (3dNow!). + INST_0x(femms, kInstFemms) + + // -------------------------------------------------------------------------- + // [SSE] + // -------------------------------------------------------------------------- + + //! @brief Packed SP-FP add (SSE). + INST_2x(addps, kInstAddps, XmmReg, XmmReg) + //! @overload + INST_2x(addps, kInstAddps, XmmReg, Mem) + + //! @brief Scalar SP-FP add (SSE). + INST_2x(addss, kInstAddss, XmmReg, XmmReg) + //! @overload + INST_2x(addss, kInstAddss, XmmReg, Mem) + + //! @brief And-not for SP-FP (SSE). + INST_2x(andnps, kInstAndnps, XmmReg, XmmReg) + //! @overload + INST_2x(andnps, kInstAndnps, XmmReg, Mem) + + //! @brief And for SP-FP (SSE). + INST_2x(andps, kInstAndps, XmmReg, XmmReg) + //! @overload + INST_2x(andps, kInstAndps, XmmReg, Mem) + + //! @brief Packed SP-FP compare (SSE). + INST_3i(cmpps, kInstCmpps, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(cmpps, kInstCmpps, XmmReg, Mem, Imm) + + //! @brief Compare scalar SP-FP values (SSE). + INST_3i(cmpss, kInstCmpss, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(cmpss, kInstCmpss, XmmReg, Mem, Imm) + + //! @brief Scalar ordered SP-FP compare and set EFLAGS (SSE). + INST_2x(comiss, kInstComiss, XmmReg, XmmReg) + //! @overload + INST_2x(comiss, kInstComiss, XmmReg, Mem) + + //! @brief Packed signed INT32 to packed SP-FP conversion (SSE). + INST_2x(cvtpi2ps, kInstCvtpi2ps, XmmReg, MmReg) + //! @overload + INST_2x(cvtpi2ps, kInstCvtpi2ps, XmmReg, Mem) + + //! @brief Packed SP-FP to packed INT32 conversion (SSE). + INST_2x(cvtps2pi, kInstCvtps2pi, MmReg, XmmReg) + //! @overload + INST_2x(cvtps2pi, kInstCvtps2pi, MmReg, Mem) + + //! @brief Scalar signed INT32 to SP-FP conversion (SSE). + INST_2x(cvtsi2ss, kInstCvtsi2ss, XmmReg, GpReg) + //! @overload + INST_2x(cvtsi2ss, kInstCvtsi2ss, XmmReg, Mem) + + //! @brief Scalar SP-FP to signed INT32 conversion (SSE). + INST_2x(cvtss2si, kInstCvtss2si, GpReg, XmmReg) + //! @overload + INST_2x(cvtss2si, kInstCvtss2si, GpReg, Mem) + + //! @brief Packed SP-FP to packed INT32 conversion (truncate) (SSE). + INST_2x(cvttps2pi, kInstCvttps2pi, MmReg, XmmReg) + //! @overload + INST_2x(cvttps2pi, kInstCvttps2pi, MmReg, Mem) + + //! @brief Scalar SP-FP to signed INT32 conversion (truncate) (SSE). + INST_2x(cvttss2si, kInstCvttss2si, GpReg, XmmReg) + //! @overload + INST_2x(cvttss2si, kInstCvttss2si, GpReg, Mem) + + //! @brief Packed SP-FP divide (SSE). + INST_2x(divps, kInstDivps, XmmReg, XmmReg) + //! @overload + INST_2x(divps, kInstDivps, XmmReg, Mem) + + //! @brief Scalar SP-FP divide (SSE). + INST_2x(divss, kInstDivss, XmmReg, XmmReg) + //! @overload + INST_2x(divss, kInstDivss, XmmReg, Mem) + + //! @brief Load streaming SIMD extension control/status (SSE). + INST_1x(ldmxcsr, kInstLdmxcsr, Mem) + + //! @brief Byte mask write (SSE). + //! + //! @note The default memory location is specified by DS:EDI. + INST_2x(maskmovq, kInstMaskmovq, MmReg, MmReg) + + //! @brief Packed SP-FP maximum (SSE). + INST_2x(maxps, kInstMaxps, XmmReg, XmmReg) + //! @overload + INST_2x(maxps, kInstMaxps, XmmReg, Mem) + + //! @brief Scalar SP-FP maximum (SSE). + INST_2x(maxss, kInstMaxss, XmmReg, XmmReg) + //! @overload + INST_2x(maxss, kInstMaxss, XmmReg, Mem) + + //! @brief Packed SP-FP minimum (SSE). + INST_2x(minps, kInstMinps, XmmReg, XmmReg) + //! @overload + INST_2x(minps, kInstMinps, XmmReg, Mem) + + //! @brief Scalar SP-FP minimum (SSE). + INST_2x(minss, kInstMinss, XmmReg, XmmReg) + //! @overload + INST_2x(minss, kInstMinss, XmmReg, Mem) + + //! @brief Move aligned packed SP-FP values (SSE). + INST_2x(movaps, kInstMovaps, XmmReg, XmmReg) + //! @overload + INST_2x(movaps, kInstMovaps, XmmReg, Mem) + //! @brief Move aligned packed SP-FP values (SSE). + INST_2x(movaps, kInstMovaps, Mem, XmmReg) + + //! @brief Move dword. + INST_2x(movd, kInstMovd, Mem, XmmReg) + //! @overload + INST_2x(movd, kInstMovd, GpReg, XmmReg) + //! @overload + INST_2x(movd, kInstMovd, XmmReg, Mem) + //! @overload + INST_2x(movd, kInstMovd, XmmReg, GpReg) + + //! @brief Move qword (SSE). + INST_2x(movq, kInstMovq, XmmReg, XmmReg) + //! @overload + INST_2x(movq, kInstMovq, Mem, XmmReg) + //! @overload + INST_2x(movq, kInstMovq, XmmReg, Mem) + + //! @brief Move 64 Bits non-temporal (SSE). + INST_2x(movntq, kInstMovntq, Mem, MmReg) + + //! @brief High to low packed SP-FP (SSE). + INST_2x(movhlps, kInstMovhlps, XmmReg, XmmReg) + + //! @brief Move high packed SP-FP (SSE). + INST_2x(movhps, kInstMovhps, XmmReg, Mem) + //! @brief Move high packed SP-FP (SSE). + INST_2x(movhps, kInstMovhps, Mem, XmmReg) + + //! @brief Move low to high packed SP-FP (SSE). + INST_2x(movlhps, kInstMovlhps, XmmReg, XmmReg) + + //! @brief Move low packed SP-FP (SSE). + INST_2x(movlps, kInstMovlps, XmmReg, Mem) + //! @brief Move low packed SP-FP (SSE). + INST_2x(movlps, kInstMovlps, Mem, XmmReg) + + //! @brief Move aligned four packed SP-FP non-temporal (SSE). + INST_2x(movntps, kInstMovntps, Mem, XmmReg) + + //! @brief Move scalar SP-FP (SSE). + INST_2x(movss, kInstMovss, XmmReg, XmmReg) + //! @overload + INST_2x(movss, kInstMovss, XmmReg, Mem) + //! @overload + INST_2x(movss, kInstMovss, Mem, XmmReg) + + //! @brief Move unaligned packed SP-FP values (SSE). + INST_2x(movups, kInstMovups, XmmReg, XmmReg) + //! @overload + INST_2x(movups, kInstMovups, XmmReg, Mem) + //! @overload + INST_2x(movups, kInstMovups, Mem, XmmReg) + + //! @brief Packed SP-FP multiply (SSE). + INST_2x(mulps, kInstMulps, XmmReg, XmmReg) + //! @overload + INST_2x(mulps, kInstMulps, XmmReg, Mem) + + //! @brief Scalar SP-FP multiply (SSE). + INST_2x(mulss, kInstMulss, XmmReg, XmmReg) + //! @overload + INST_2x(mulss, kInstMulss, XmmReg, Mem) + + //! @brief Or for SP-FP data (SSE). + INST_2x(orps, kInstOrps, XmmReg, XmmReg) + //! @overload + INST_2x(orps, kInstOrps, XmmReg, Mem) + + //! @brief Packed average (SSE). + INST_2x(pavgb, kInstPavgb, MmReg, MmReg) + //! @overload + INST_2x(pavgb, kInstPavgb, MmReg, Mem) + + //! @brief Packed average (SSE). + INST_2x(pavgw, kInstPavgw, MmReg, MmReg) + //! @overload + INST_2x(pavgw, kInstPavgw, MmReg, Mem) + + //! @brief Extract word (SSE). + INST_3i(pextrw, kInstPextrw, GpReg, MmReg, Imm) + + //! @brief Insert word (SSE). + INST_3i(pinsrw, kInstPinsrw, MmReg, GpReg, Imm) + //! @overload + INST_3i(pinsrw, kInstPinsrw, MmReg, Mem, Imm) + + //! @brief Packed signed integer word maximum (SSE). + INST_2x(pmaxsw, kInstPmaxsw, MmReg, MmReg) + //! @overload + INST_2x(pmaxsw, kInstPmaxsw, MmReg, Mem) + + //! @brief Packed unsigned integer byte maximum (SSE). + INST_2x(pmaxub, kInstPmaxub, MmReg, MmReg) + //! @overload + INST_2x(pmaxub, kInstPmaxub, MmReg, Mem) + + //! @brief Packed signed integer word minimum (SSE). + INST_2x(pminsw, kInstPminsw, MmReg, MmReg) + //! @overload + INST_2x(pminsw, kInstPminsw, MmReg, Mem) + + //! @brief Packed unsigned integer byte minimum (SSE). + INST_2x(pminub, kInstPminub, MmReg, MmReg) + //! @overload + INST_2x(pminub, kInstPminub, MmReg, Mem) + + //! @brief Move Byte mask to integer (SSE). + INST_2x(pmovmskb, kInstPmovmskb, GpReg, MmReg) + + //! @brief Packed multiply high unsigned (SSE). + INST_2x(pmulhuw, kInstPmulhuw, MmReg, MmReg) + //! @overload + INST_2x(pmulhuw, kInstPmulhuw, MmReg, Mem) + + //! @brief Packed sum of absolute differences (SSE). + INST_2x(psadbw, kInstPsadbw, MmReg, MmReg) + //! @overload + INST_2x(psadbw, kInstPsadbw, MmReg, Mem) + + //! @brief Packed shuffle Word (SSE). + INST_3i(pshufw, kInstPshufw, MmReg, MmReg, Imm) + //! @overload + INST_3i(pshufw, kInstPshufw, MmReg, Mem, Imm) + + //! @brief Packed SP-FP reciprocal (SSE). + INST_2x(rcpps, kInstRcpps, XmmReg, XmmReg) + //! @overload + INST_2x(rcpps, kInstRcpps, XmmReg, Mem) + + //! @brief Scalar SP-FP reciprocal (SSE). + INST_2x(rcpss, kInstRcpss, XmmReg, XmmReg) + //! @overload + INST_2x(rcpss, kInstRcpss, XmmReg, Mem) + + //! @brief Prefetch (SSE). + INST_2i(prefetch, kInstPrefetch, Mem, Imm) + + //! @brief Compute sum of absolute differences (SSE). + INST_2x(psadbw, kInstPsadbw, XmmReg, XmmReg) + //! @overload + INST_2x(psadbw, kInstPsadbw, XmmReg, Mem) + + //! @brief Packed SP-FP square root reciprocal (SSE). + INST_2x(rsqrtps, kInstRsqrtps, XmmReg, XmmReg) + //! @overload + INST_2x(rsqrtps, kInstRsqrtps, XmmReg, Mem) + + //! @brief Scalar SP-FP square root reciprocal (SSE). + INST_2x(rsqrtss, kInstRsqrtss, XmmReg, XmmReg) + //! @overload + INST_2x(rsqrtss, kInstRsqrtss, XmmReg, Mem) + + //! @brief Store fence (SSE). + INST_0x(sfence, kInstSfence) + + //! @brief Shuffle SP-FP (SSE). + INST_3i(shufps, kInstShufps, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(shufps, kInstShufps, XmmReg, Mem, Imm) + + //! @brief Packed SP-FP square root (SSE). + INST_2x(sqrtps, kInstSqrtps, XmmReg, XmmReg) + //! @overload + INST_2x(sqrtps, kInstSqrtps, XmmReg, Mem) + + //! @brief Scalar SP-FP square root (SSE). + INST_2x(sqrtss, kInstSqrtss, XmmReg, XmmReg) + //! @overload + INST_2x(sqrtss, kInstSqrtss, XmmReg, Mem) + + //! @brief Store streaming SIMD extension control/status (SSE). + INST_1x(stmxcsr, kInstStmxcsr, Mem) + + //! @brief Packed SP-FP subtract (SSE). + INST_2x(subps, kInstSubps, XmmReg, XmmReg) + //! @overload + INST_2x(subps, kInstSubps, XmmReg, Mem) + + //! @brief Scalar SP-FP subtract (SSE). + INST_2x(subss, kInstSubss, XmmReg, XmmReg) + //! @overload + INST_2x(subss, kInstSubss, XmmReg, Mem) + + //! @brief Unordered scalar SP-FP compare and set EFLAGS (SSE). + INST_2x(ucomiss, kInstUcomiss, XmmReg, XmmReg) + //! @overload + INST_2x(ucomiss, kInstUcomiss, XmmReg, Mem) + + //! @brief Unpack high packed SP-FP data (SSE). + INST_2x(unpckhps, kInstUnpckhps, XmmReg, XmmReg) + //! @overload + INST_2x(unpckhps, kInstUnpckhps, XmmReg, Mem) + + //! @brief Unpack low packed SP-FP data (SSE). + INST_2x(unpcklps, kInstUnpcklps, XmmReg, XmmReg) + //! @overload + INST_2x(unpcklps, kInstUnpcklps, XmmReg, Mem) + + //! @brief Xor for SP-FP data (SSE). + INST_2x(xorps, kInstXorps, XmmReg, XmmReg) + //! @overload + INST_2x(xorps, kInstXorps, XmmReg, Mem) + + // -------------------------------------------------------------------------- + // [SSE2] + // -------------------------------------------------------------------------- + + //! @brief Packed DP-FP add (SSE2). + INST_2x(addpd, kInstAddpd, XmmReg, XmmReg) + //! @overload + INST_2x(addpd, kInstAddpd, XmmReg, Mem) + + //! @brief Scalar DP-FP add (SSE2). + INST_2x(addsd, kInstAddsd, XmmReg, XmmReg) + //! @overload + INST_2x(addsd, kInstAddsd, XmmReg, Mem) + + //! @brief And-not for DP-FP (SSE2). + INST_2x(andnpd, kInstAndnpd, XmmReg, XmmReg) + //! @overload + INST_2x(andnpd, kInstAndnpd, XmmReg, Mem) + + //! @brief And for DP-FP (SSE2). + INST_2x(andpd, kInstAndpd, XmmReg, XmmReg) + //! @overload + INST_2x(andpd, kInstAndpd, XmmReg, Mem) + + //! @brief Flush cache line (SSE2). + INST_1x(clflush, kInstClflush, Mem) + + //! @brief Packed DP-FP compare (SSE2). + INST_3i(cmppd, kInstCmppd, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(cmppd, kInstCmppd, XmmReg, Mem, Imm) + + //! @brief Compare scalar SP-FP values (SSE2). + INST_3i(cmpsd, kInstCmpsd, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(cmpsd, kInstCmpsd, XmmReg, Mem, Imm) + + //! @brief Scalar ordered DP-FP compare and set EFLAGS (SSE2). + INST_2x(comisd, kInstComisd, XmmReg, XmmReg) + //! @overload + INST_2x(comisd, kInstComisd, XmmReg, Mem) + + //! @brief Convert packed qword integers to packed DP-FP values (SSE2). + INST_2x(cvtdq2pd, kInstCvtdq2pd, XmmReg, XmmReg) + //! @overload + INST_2x(cvtdq2pd, kInstCvtdq2pd, XmmReg, Mem) + + //! @brief Convert packed qword integers to packed SP-FP values (SSE2). + INST_2x(cvtdq2ps, kInstCvtdq2ps, XmmReg, XmmReg) + //! @overload + INST_2x(cvtdq2ps, kInstCvtdq2ps, XmmReg, Mem) + + //! @brief Convert packed DP-FP values to packed qword integers (SSE2). + INST_2x(cvtpd2dq, kInstCvtpd2dq, XmmReg, XmmReg) + //! @overload + INST_2x(cvtpd2dq, kInstCvtpd2dq, XmmReg, Mem) + + //! @brief Convert packed DP-FP values to packed qword integers (SSE2). + INST_2x(cvtpd2pi, kInstCvtpd2pi, MmReg, XmmReg) + //! @overload + INST_2x(cvtpd2pi, kInstCvtpd2pi, MmReg, Mem) + + //! @brief Convert packed DP-FP values to packed SP-FP values (SSE2). + INST_2x(cvtpd2ps, kInstCvtpd2ps, XmmReg, XmmReg) + //! @overload + INST_2x(cvtpd2ps, kInstCvtpd2ps, XmmReg, Mem) + + //! @brief Convert packed dword integers to packed DP-FP values (SSE2). + INST_2x(cvtpi2pd, kInstCvtpi2pd, XmmReg, MmReg) + //! @overload + INST_2x(cvtpi2pd, kInstCvtpi2pd, XmmReg, Mem) + + //! @brief Convert packed SP-FP values to packed qword integers (SSE2). + INST_2x(cvtps2dq, kInstCvtps2dq, XmmReg, XmmReg) + //! @overload + INST_2x(cvtps2dq, kInstCvtps2dq, XmmReg, Mem) + + //! @brief Convert packed SP-FP values to packed DP-FP values (SSE2). + INST_2x(cvtps2pd, kInstCvtps2pd, XmmReg, XmmReg) + //! @overload + INST_2x(cvtps2pd, kInstCvtps2pd, XmmReg, Mem) + + //! @brief Convert scalar DP-FP value to dword integer (SSE2). + INST_2x(cvtsd2si, kInstCvtsd2si, GpReg, XmmReg) + //! @overload + INST_2x(cvtsd2si, kInstCvtsd2si, GpReg, Mem) + + //! @brief Convert scalar DP-FP value to scalar SP-FP value (SSE2). + INST_2x(cvtsd2ss, kInstCvtsd2ss, XmmReg, XmmReg) + //! @overload + INST_2x(cvtsd2ss, kInstCvtsd2ss, XmmReg, Mem) + + //! @brief Convert dword integer to scalar DP-FP value (SSE2). + INST_2x(cvtsi2sd, kInstCvtsi2sd, XmmReg, GpReg) + //! @overload + INST_2x(cvtsi2sd, kInstCvtsi2sd, XmmReg, Mem) + + //! @brief Convert scalar SP-FP value to scalar DP-FP value (SSE2). + INST_2x(cvtss2sd, kInstCvtss2sd, XmmReg, XmmReg) + //! @overload + INST_2x(cvtss2sd, kInstCvtss2sd, XmmReg, Mem) + + //! @brief Convert with truncation packed DP-FP values to packed dword integers (SSE2). + INST_2x(cvttpd2pi, kInstCvttpd2pi, MmReg, XmmReg) + //! @overload + INST_2x(cvttpd2pi, kInstCvttpd2pi, MmReg, Mem) + + //! @brief Convert with truncation packed DP-FP values to packed qword integers (SSE2). + INST_2x(cvttpd2dq, kInstCvttpd2dq, XmmReg, XmmReg) + //! @overload + INST_2x(cvttpd2dq, kInstCvttpd2dq, XmmReg, Mem) + + //! @brief Convert with truncation packed SP-FP values to packed qword integers (SSE2). + INST_2x(cvttps2dq, kInstCvttps2dq, XmmReg, XmmReg) + //! @overload + INST_2x(cvttps2dq, kInstCvttps2dq, XmmReg, Mem) + + //! @brief Convert with truncation scalar DP-FP value to signed dword integer (SSE2). + INST_2x(cvttsd2si, kInstCvttsd2si, GpReg, XmmReg) + //! @overload + INST_2x(cvttsd2si, kInstCvttsd2si, GpReg, Mem) + + //! @brief Packed DP-FP divide (SSE2). + INST_2x(divpd, kInstDivpd, XmmReg, XmmReg) + //! @overload + INST_2x(divpd, kInstDivpd, XmmReg, Mem) + + //! @brief Scalar DP-FP divide (SSE2). + INST_2x(divsd, kInstDivsd, XmmReg, XmmReg) + //! @overload + INST_2x(divsd, kInstDivsd, XmmReg, Mem) + + //! @brief Load fence (SSE2). + INST_0x(lfence, kInstLfence) + + //! @brief Store selected bytes of oword (SSE2). + //! + //! @note Target is DS:EDI. + INST_2x(maskmovdqu, kInstMaskmovdqu, XmmReg, XmmReg) + + //! @brief Return maximum packed DP-FP values (SSE2). + INST_2x(maxpd, kInstMaxpd, XmmReg, XmmReg) + //! @overload + INST_2x(maxpd, kInstMaxpd, XmmReg, Mem) + + //! @brief Return maximum scalar DP-FP value (SSE2). + INST_2x(maxsd, kInstMaxsd, XmmReg, XmmReg) + //! @overload + INST_2x(maxsd, kInstMaxsd, XmmReg, Mem) + + //! @brief Memory fence (SSE2). + INST_0x(mfence, kInstMfence) + + //! @brief Return minimum packed DP-FP values (SSE2). + INST_2x(minpd, kInstMinpd, XmmReg, XmmReg) + //! @overload + INST_2x(minpd, kInstMinpd, XmmReg, Mem) + + //! @brief Return minimum scalar DP-FP value (SSE2). + INST_2x(minsd, kInstMinsd, XmmReg, XmmReg) + //! @overload + INST_2x(minsd, kInstMinsd, XmmReg, Mem) + + //! @brief Move aligned oword (SSE2). + INST_2x(movdqa, kInstMovdqa, XmmReg, XmmReg) + //! @overload + INST_2x(movdqa, kInstMovdqa, XmmReg, Mem) + //! @overload + INST_2x(movdqa, kInstMovdqa, Mem, XmmReg) + + //! @brief Move unaligned oword (SSE2). + INST_2x(movdqu, kInstMovdqu, XmmReg, XmmReg) + //! @overload + INST_2x(movdqu, kInstMovdqu, XmmReg, Mem) + //! @overload + INST_2x(movdqu, kInstMovdqu, Mem, XmmReg) + + //! @brief Extract packed SP-FP sign mask (SSE2). + INST_2x(movmskps, kInstMovmskps, GpReg, XmmReg) + + //! @brief Extract packed DP-FP sign mask (SSE2). + INST_2x(movmskpd, kInstMovmskpd, GpReg, XmmReg) + + //! @brief Move scalar DP-FP value (SSE2). + INST_2x(movsd, kInstMovsd, XmmReg, XmmReg) + //! @overload + INST_2x(movsd, kInstMovsd, XmmReg, Mem) + //! @overload + INST_2x(movsd, kInstMovsd, Mem, XmmReg) + + //! @brief Move aligned packed DP-FP values (SSE2). + INST_2x(movapd, kInstMovapd, XmmReg, XmmReg) + //! @overload + INST_2x(movapd, kInstMovapd, XmmReg, Mem) + //! @overload + INST_2x(movapd, kInstMovapd, Mem, XmmReg) + + //! @brief Move qword from Xmm to Mm register (SSE2). + INST_2x(movdq2q, kInstMovdq2q, MmReg, XmmReg) + + //! @brief Move qword from Mm to Xmm register (SSE2). + INST_2x(movq2dq, kInstMovq2dq, XmmReg, MmReg) + + //! @brief Move high packed DP-FP value (SSE2). + INST_2x(movhpd, kInstMovhpd, XmmReg, Mem) + //! @overload + INST_2x(movhpd, kInstMovhpd, Mem, XmmReg) + + //! @brief Move low packed DP-FP value (SSE2). + INST_2x(movlpd, kInstMovlpd, XmmReg, Mem) + //! @overload + INST_2x(movlpd, kInstMovlpd, Mem, XmmReg) + + //! @brief Store oword using non-temporal hint (SSE2). + INST_2x(movntdq, kInstMovntdq, Mem, XmmReg) + + //! @brief Store store dword using non-temporal hint (SSE2). + INST_2x(movnti, kInstMovnti, Mem, GpReg) + + //! @brief Store packed DP-FP values using non-temporal hint (SSE2). + INST_2x(movntpd, kInstMovntpd, Mem, XmmReg) + + //! @brief Move unaligned packed DP-FP values (SSE2). + INST_2x(movupd, kInstMovupd, XmmReg, XmmReg) + //! @overload + INST_2x(movupd, kInstMovupd, XmmReg, Mem) + //! @overload + INST_2x(movupd, kInstMovupd, Mem, XmmReg) + + //! @brief Packed DP-FP multiply (SSE2). + INST_2x(mulpd, kInstMulpd, XmmReg, XmmReg) + //! @overload + INST_2x(mulpd, kInstMulpd, XmmReg, Mem) + + //! @brief Scalar DP-FP multiply (SSE2). + INST_2x(mulsd, kInstMulsd, XmmReg, XmmReg) + //! @overload + INST_2x(mulsd, kInstMulsd, XmmReg, Mem) + + //! @brief Or for DP-FP Data (SSE2). + INST_2x(orpd, kInstOrpd, XmmReg, XmmReg) + //! @overload + INST_2x(orpd, kInstOrpd, XmmReg, Mem) + + //! @brief Pack with signed saturation (SSE2). + INST_2x(packsswb, kInstPacksswb, XmmReg, XmmReg) + //! @overload + INST_2x(packsswb, kInstPacksswb, XmmReg, Mem) + + //! @brief Pack with signed saturation (SSE2). + INST_2x(packssdw, kInstPackssdw, XmmReg, XmmReg) + //! @overload + INST_2x(packssdw, kInstPackssdw, XmmReg, Mem) + + //! @brief Pack with unsigned saturation (SSE2). + INST_2x(packuswb, kInstPackuswb, XmmReg, XmmReg) + //! @overload + INST_2x(packuswb, kInstPackuswb, XmmReg, Mem) + + //! @brief Packed byte Add (SSE2). + INST_2x(paddb, kInstPaddb, XmmReg, XmmReg) + //! @overload + INST_2x(paddb, kInstPaddb, XmmReg, Mem) + + //! @brief Packed word add (SSE2). + INST_2x(paddw, kInstPaddw, XmmReg, XmmReg) + //! @overload + INST_2x(paddw, kInstPaddw, XmmReg, Mem) + + //! @brief Packed dword add (SSE2). + INST_2x(paddd, kInstPaddd, XmmReg, XmmReg) + //! @overload + INST_2x(paddd, kInstPaddd, XmmReg, Mem) + + //! @brief Packed qword add (SSE2). + INST_2x(paddq, kInstPaddq, MmReg, MmReg) + //! @overload + INST_2x(paddq, kInstPaddq, MmReg, Mem) + + //! @brief Packed qword add (SSE2). + INST_2x(paddq, kInstPaddq, XmmReg, XmmReg) + //! @overload + INST_2x(paddq, kInstPaddq, XmmReg, Mem) + + //! @brief Packed add with saturation (SSE2). + INST_2x(paddsb, kInstPaddsb, XmmReg, XmmReg) + //! @overload + INST_2x(paddsb, kInstPaddsb, XmmReg, Mem) + + //! @brief Packed add with saturation (SSE2). + INST_2x(paddsw, kInstPaddsw, XmmReg, XmmReg) + //! @overload + INST_2x(paddsw, kInstPaddsw, XmmReg, Mem) + + //! @brief Packed add unsigned with saturation (SSE2). + INST_2x(paddusb, kInstPaddusb, XmmReg, XmmReg) + //! @overload + INST_2x(paddusb, kInstPaddusb, XmmReg, Mem) + + //! @brief Packed add unsigned with saturation (SSE2). + INST_2x(paddusw, kInstPaddusw, XmmReg, XmmReg) + //! @overload + INST_2x(paddusw, kInstPaddusw, XmmReg, Mem) + + //! @brief And (SSE2). + INST_2x(pand, kInstPand, XmmReg, XmmReg) + //! @overload + INST_2x(pand, kInstPand, XmmReg, Mem) + + //! @brief And-not (SSE2). + INST_2x(pandn, kInstPandn, XmmReg, XmmReg) + //! @overload + INST_2x(pandn, kInstPandn, XmmReg, Mem) + + //! @brief Spin loop hint (SSE2). + INST_0x(pause, kInstPause) + + //! @brief Packed average (SSE2). + INST_2x(pavgb, kInstPavgb, XmmReg, XmmReg) + //! @overload + INST_2x(pavgb, kInstPavgb, XmmReg, Mem) + + //! @brief Packed average (SSE2). + INST_2x(pavgw, kInstPavgw, XmmReg, XmmReg) + //! @overload + INST_2x(pavgw, kInstPavgw, XmmReg, Mem) + + //! @brief Packed compare bytes for equal (SSE2). + INST_2x(pcmpeqb, kInstPcmpeqb, XmmReg, XmmReg) + //! @overload + INST_2x(pcmpeqb, kInstPcmpeqb, XmmReg, Mem) + + //! @brief Packed compare words for equal (SSE2). + INST_2x(pcmpeqw, kInstPcmpeqw, XmmReg, XmmReg) + //! @overload + INST_2x(pcmpeqw, kInstPcmpeqw, XmmReg, Mem) + + //! @brief Packed compare dwords for equal (SSE2). + INST_2x(pcmpeqd, kInstPcmpeqd, XmmReg, XmmReg) + //! @overload + INST_2x(pcmpeqd, kInstPcmpeqd, XmmReg, Mem) + + //! @brief Packed compare bytes for greater than (SSE2). + INST_2x(pcmpgtb, kInstPcmpgtb, XmmReg, XmmReg) + //! @overload + INST_2x(pcmpgtb, kInstPcmpgtb, XmmReg, Mem) + + //! @brief Packed compare words for greater than (SSE2). + INST_2x(pcmpgtw, kInstPcmpgtw, XmmReg, XmmReg) + //! @overload + INST_2x(pcmpgtw, kInstPcmpgtw, XmmReg, Mem) + + //! @brief Packed compare dwords for greater than (SSE2). + INST_2x(pcmpgtd, kInstPcmpgtd, XmmReg, XmmReg) + //! @overload + INST_2x(pcmpgtd, kInstPcmpgtd, XmmReg, Mem) + + //! @brief Extract word (SSE2). + INST_3i(pextrw, kInstPextrw, GpReg, XmmReg, Imm) + + //! @brief Insert word (SSE2). + INST_3i(pinsrw, kInstPinsrw, XmmReg, GpReg, Imm) + //! @overload + INST_3i(pinsrw, kInstPinsrw, XmmReg, Mem, Imm) + + //! @brief Packed signed integer word maximum (SSE2). + INST_2x(pmaxsw, kInstPmaxsw, XmmReg, XmmReg) + //! @overload + INST_2x(pmaxsw, kInstPmaxsw, XmmReg, Mem) + + //! @brief Packed unsigned integer byte maximum (SSE2). + INST_2x(pmaxub, kInstPmaxub, XmmReg, XmmReg) + //! @overload + INST_2x(pmaxub, kInstPmaxub, XmmReg, Mem) + + //! @brief Packed signed integer word minimum (SSE2). + INST_2x(pminsw, kInstPminsw, XmmReg, XmmReg) + //! @overload + INST_2x(pminsw, kInstPminsw, XmmReg, Mem) + + //! @brief Packed unsigned integer byte minimum (SSE2). + INST_2x(pminub, kInstPminub, XmmReg, XmmReg) + //! @overload + INST_2x(pminub, kInstPminub, XmmReg, Mem) + + //! @brief Move byte mask (SSE2). + INST_2x(pmovmskb, kInstPmovmskb, GpReg, XmmReg) + + //! @brief Packed multiply high (SSE2). + INST_2x(pmulhw, kInstPmulhw, XmmReg, XmmReg) + //! @overload + INST_2x(pmulhw, kInstPmulhw, XmmReg, Mem) + + //! @brief Packed multiply high unsigned (SSE2). + INST_2x(pmulhuw, kInstPmulhuw, XmmReg, XmmReg) + //! @overload + INST_2x(pmulhuw, kInstPmulhuw, XmmReg, Mem) + + //! @brief Packed multiply low (SSE2). + INST_2x(pmullw, kInstPmullw, XmmReg, XmmReg) + //! @overload + INST_2x(pmullw, kInstPmullw, XmmReg, Mem) + + //! @brief Packed multiply to qword (SSE2). + INST_2x(pmuludq, kInstPmuludq, MmReg, MmReg) + //! @overload + INST_2x(pmuludq, kInstPmuludq, MmReg, Mem) + + //! @brief Packed multiply to qword (SSE2). + INST_2x(pmuludq, kInstPmuludq, XmmReg, XmmReg) + //! @overload + INST_2x(pmuludq, kInstPmuludq, XmmReg, Mem) + + //! @brief Or (SSE2). + INST_2x(por, kInstPor, XmmReg, XmmReg) + //! @overload + INST_2x(por, kInstPor, XmmReg, Mem) + + //! @brief Packed shift left logical (SSE2). + INST_2x(pslld, kInstPslld, XmmReg, XmmReg) + //! @overload + INST_2x(pslld, kInstPslld, XmmReg, Mem) + //! @overload + INST_2i(pslld, kInstPslld, XmmReg, Imm) + + //! @brief Packed shift left logical (SSE2). + INST_2x(psllq, kInstPsllq, XmmReg, XmmReg) + //! @overload + INST_2x(psllq, kInstPsllq, XmmReg, Mem) + //! @overload + INST_2i(psllq, kInstPsllq, XmmReg, Imm) + + //! @brief Packed shift left logical (SSE2). + INST_2x(psllw, kInstPsllw, XmmReg, XmmReg) + //! @overload + INST_2x(psllw, kInstPsllw, XmmReg, Mem) + //! @overload + INST_2i(psllw, kInstPsllw, XmmReg, Imm) + + //! @brief Packed shift left logical (SSE2). + INST_2i(pslldq, kInstPslldq, XmmReg, Imm) + + //! @brief Packed shift right arithmetic (SSE2). + INST_2x(psrad, kInstPsrad, XmmReg, XmmReg) + //! @overload + INST_2x(psrad, kInstPsrad, XmmReg, Mem) + //! @overload + INST_2i(psrad, kInstPsrad, XmmReg, Imm) + + //! @brief Packed shift right arithmetic (SSE2). + INST_2x(psraw, kInstPsraw, XmmReg, XmmReg) + //! @overload + INST_2x(psraw, kInstPsraw, XmmReg, Mem) + //! @overload + INST_2i(psraw, kInstPsraw, XmmReg, Imm) + + //! @brief Packed subtract (SSE2). + INST_2x(psubb, kInstPsubb, XmmReg, XmmReg) + //! @overload + INST_2x(psubb, kInstPsubb, XmmReg, Mem) + + //! @brief Packed subtract (SSE2). + INST_2x(psubw, kInstPsubw, XmmReg, XmmReg) + //! @overload + INST_2x(psubw, kInstPsubw, XmmReg, Mem) + + //! @brief Packed subtract (SSE2). + INST_2x(psubd, kInstPsubd, XmmReg, XmmReg) + //! @overload + INST_2x(psubd, kInstPsubd, XmmReg, Mem) + + //! @brief Packed subtract (SSE2). + INST_2x(psubq, kInstPsubq, MmReg, MmReg) + //! @overload + INST_2x(psubq, kInstPsubq, MmReg, Mem) + + //! @brief Packed subtract (SSE2). + INST_2x(psubq, kInstPsubq, XmmReg, XmmReg) + //! @overload + INST_2x(psubq, kInstPsubq, XmmReg, Mem) + + //! @brief Packed multiply and add (SSE2). + INST_2x(pmaddwd, kInstPmaddwd, XmmReg, XmmReg) + //! @overload + INST_2x(pmaddwd, kInstPmaddwd, XmmReg, Mem) + + //! @brief Shuffle packed dwords (SSE2). + INST_3i(pshufd, kInstPshufd, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(pshufd, kInstPshufd, XmmReg, Mem, Imm) + + //! @brief Shuffle packed high words (SSE2). + INST_3i(pshufhw, kInstPshufhw, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(pshufhw, kInstPshufhw, XmmReg, Mem, Imm) + + //! @brief Shuffle packed low Words (SSE2). + INST_3i(pshuflw, kInstPshuflw, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(pshuflw, kInstPshuflw, XmmReg, Mem, Imm) + + //! @brief Packed shift right logical (SSE2). + INST_2x(psrld, kInstPsrld, XmmReg, XmmReg) + //! @overload + INST_2x(psrld, kInstPsrld, XmmReg, Mem) + //! @overload + INST_2i(psrld, kInstPsrld, XmmReg, Imm) + + //! @brief Packed shift right logical (SSE2). + INST_2x(psrlq, kInstPsrlq, XmmReg, XmmReg) + //! @overload + INST_2x(psrlq, kInstPsrlq, XmmReg, Mem) + //! @overload + INST_2i(psrlq, kInstPsrlq, XmmReg, Imm) + + //! @brief OWord shift right logical (SSE2). + INST_2i(psrldq, kInstPsrldq, XmmReg, Imm) + + //! @brief Packed shift right logical (SSE2). + INST_2x(psrlw, kInstPsrlw, XmmReg, XmmReg) + //! @overload + INST_2x(psrlw, kInstPsrlw, XmmReg, Mem) + //! @overload + INST_2i(psrlw, kInstPsrlw, XmmReg, Imm) + + //! @brief Packed subtract with saturation (SSE2). + INST_2x(psubsb, kInstPsubsb, XmmReg, XmmReg) + //! @overload + INST_2x(psubsb, kInstPsubsb, XmmReg, Mem) + + //! @brief Packed subtract with saturation (SSE2). + INST_2x(psubsw, kInstPsubsw, XmmReg, XmmReg) + //! @overload + INST_2x(psubsw, kInstPsubsw, XmmReg, Mem) + + //! @brief Packed subtract with unsigned saturation (SSE2). + INST_2x(psubusb, kInstPsubusb, XmmReg, XmmReg) + //! @overload + INST_2x(psubusb, kInstPsubusb, XmmReg, Mem) + + //! @brief Packed subtract with unsigned saturation (SSE2). + INST_2x(psubusw, kInstPsubusw, XmmReg, XmmReg) + //! @overload + INST_2x(psubusw, kInstPsubusw, XmmReg, Mem) + + //! @brief Unpack high data (SSE2). + INST_2x(punpckhbw, kInstPunpckhbw, XmmReg, XmmReg) + //! @overload + INST_2x(punpckhbw, kInstPunpckhbw, XmmReg, Mem) + + //! @brief Unpack high data (SSE2). + INST_2x(punpckhwd, kInstPunpckhwd, XmmReg, XmmReg) + //! @overload + INST_2x(punpckhwd, kInstPunpckhwd, XmmReg, Mem) + + //! @brief Unpack high data (SSE2). + INST_2x(punpckhdq, kInstPunpckhdq, XmmReg, XmmReg) + //! @overload + INST_2x(punpckhdq, kInstPunpckhdq, XmmReg, Mem) + + //! @brief Unpack high data (SSE2). + INST_2x(punpckhqdq, kInstPunpckhqdq, XmmReg, XmmReg) + //! @overload + INST_2x(punpckhqdq, kInstPunpckhqdq, XmmReg, Mem) + + //! @brief Unpack low data (SSE2). + INST_2x(punpcklbw, kInstPunpcklbw, XmmReg, XmmReg) + //! @overload + INST_2x(punpcklbw, kInstPunpcklbw, XmmReg, Mem) + + //! @brief Unpack low data (SSE2). + INST_2x(punpcklwd, kInstPunpcklwd, XmmReg, XmmReg) + //! @overload + INST_2x(punpcklwd, kInstPunpcklwd, XmmReg, Mem) + + //! @brief Unpack low data (SSE2). + INST_2x(punpckldq, kInstPunpckldq, XmmReg, XmmReg) + //! @overload + INST_2x(punpckldq, kInstPunpckldq, XmmReg, Mem) + + //! @brief Unpack low data (SSE2). + INST_2x(punpcklqdq, kInstPunpcklqdq, XmmReg, XmmReg) + //! @overload + INST_2x(punpcklqdq, kInstPunpcklqdq, XmmReg, Mem) + + //! @brief Xor (SSE2). + INST_2x(pxor, kInstPxor, XmmReg, XmmReg) + //! @overload + INST_2x(pxor, kInstPxor, XmmReg, Mem) + + //! @brief Shuffle DP-FP (SSE2). + INST_3i(shufpd, kInstShufpd, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(shufpd, kInstShufpd, XmmReg, Mem, Imm) + + //! @brief Compute square roots of packed DP-FP Values (SSE2). + INST_2x(sqrtpd, kInstSqrtpd, XmmReg, XmmReg) + //! @overload + INST_2x(sqrtpd, kInstSqrtpd, XmmReg, Mem) + + //! @brief Compute square root of scalar DP-FP value (SSE2). + INST_2x(sqrtsd, kInstSqrtsd, XmmReg, XmmReg) + //! @overload + INST_2x(sqrtsd, kInstSqrtsd, XmmReg, Mem) + + //! @brief Packed DP-FP subtract (SSE2). + INST_2x(subpd, kInstSubpd, XmmReg, XmmReg) + //! @overload + INST_2x(subpd, kInstSubpd, XmmReg, Mem) + + //! @brief Scalar DP-FP subtract (SSE2). + INST_2x(subsd, kInstSubsd, XmmReg, XmmReg) + //! @overload + INST_2x(subsd, kInstSubsd, XmmReg, Mem) + + //! @brief Scalar unordered DP-FP compare and set EFLAGS (SSE2). + INST_2x(ucomisd, kInstUcomisd, XmmReg, XmmReg) + //! @overload + INST_2x(ucomisd, kInstUcomisd, XmmReg, Mem) + + //! @brief Unpack and interleave high packed DP-FP values (SSE2). + INST_2x(unpckhpd, kInstUnpckhpd, XmmReg, XmmReg) + //! @overload + INST_2x(unpckhpd, kInstUnpckhpd, XmmReg, Mem) + + //! @brief Unpack and interleave low packed DP-FP values (SSE2). + INST_2x(unpcklpd, kInstUnpcklpd, XmmReg, XmmReg) + //! @overload + INST_2x(unpcklpd, kInstUnpcklpd, XmmReg, Mem) + + //! @brief Xor for DP-FP data (SSE2). + INST_2x(xorpd, kInstXorpd, XmmReg, XmmReg) + //! @overload + INST_2x(xorpd, kInstXorpd, XmmReg, Mem) + + // -------------------------------------------------------------------------- + // [SSE3] + // -------------------------------------------------------------------------- + + //! @brief Packed DP-FP add/subtract (SSE3). + INST_2x(addsubpd, kInstAddsubpd, XmmReg, XmmReg) + //! @overload + INST_2x(addsubpd, kInstAddsubpd, XmmReg, Mem) + + //! @brief Packed SP-FP add/subtract (SSE3). + INST_2x(addsubps, kInstAddsubps, XmmReg, XmmReg) + //! @overload + INST_2x(addsubps, kInstAddsubps, XmmReg, Mem) + + //! @brief Store integer with truncation (SSE3). + INST_1x(fisttp, kInstFisttp, Mem) + + //! @brief Packed DP-FP horizontal add (SSE3). + INST_2x(haddpd, kInstHaddpd, XmmReg, XmmReg) + //! @overload + INST_2x(haddpd, kInstHaddpd, XmmReg, Mem) + + //! @brief Packed SP-FP horizontal add (SSE3). + INST_2x(haddps, kInstHaddps, XmmReg, XmmReg) + //! @overload + INST_2x(haddps, kInstHaddps, XmmReg, Mem) + + //! @brief Packed DP-FP horizontal subtract (SSE3). + INST_2x(hsubpd, kInstHsubpd, XmmReg, XmmReg) + //! @overload + INST_2x(hsubpd, kInstHsubpd, XmmReg, Mem) + + //! @brief Packed SP-FP horizontal subtract (SSE3). + INST_2x(hsubps, kInstHsubps, XmmReg, XmmReg) + //! @overload + INST_2x(hsubps, kInstHsubps, XmmReg, Mem) + + //! @brief Load unaligned integer 128 bits (SSE3). + INST_2x(lddqu, kInstLddqu, XmmReg, Mem) + + //! @brief Setup monitor address (SSE3). + INST_0x(monitor, kInstMonitor) + + //! @brief Move one DP-FP and duplicate (SSE3). + INST_2x(movddup, kInstMovddup, XmmReg, XmmReg) + //! @overload + INST_2x(movddup, kInstMovddup, XmmReg, Mem) + + //! @brief Move packed SP-FP high and duplicate (SSE3). + INST_2x(movshdup, kInstMovshdup, XmmReg, XmmReg) + //! @overload + INST_2x(movshdup, kInstMovshdup, XmmReg, Mem) + + //! @brief Move packed SP-FP low and duplicate (SSE3). + INST_2x(movsldup, kInstMovsldup, XmmReg, XmmReg) + //! @overload + INST_2x(movsldup, kInstMovsldup, XmmReg, Mem) + + //! @brief Monitor wait (SSE3). + INST_0x(mwait, kInstMwait) + + // -------------------------------------------------------------------------- + // [SSSE3] + // -------------------------------------------------------------------------- + + //! @brief Packed sign (SSSE3). + INST_2x(psignb, kInstPsignb, MmReg, MmReg) + //! @overload + INST_2x(psignb, kInstPsignb, MmReg, Mem) + + //! @brief Packed sign (SSSE3). + INST_2x(psignb, kInstPsignb, XmmReg, XmmReg) + //! @overload + INST_2x(psignb, kInstPsignb, XmmReg, Mem) + + //! @brief Packed sign (SSSE3). + INST_2x(psignw, kInstPsignw, MmReg, MmReg) + //! @overload + INST_2x(psignw, kInstPsignw, MmReg, Mem) + + //! @brief Packed sign (SSSE3). + INST_2x(psignw, kInstPsignw, XmmReg, XmmReg) + //! @overload + INST_2x(psignw, kInstPsignw, XmmReg, Mem) + + //! @brief Packed sign (SSSE3). + INST_2x(psignd, kInstPsignd, MmReg, MmReg) + //! @overload + INST_2x(psignd, kInstPsignd, MmReg, Mem) + + //! @brief Packed sign (SSSE3). + INST_2x(psignd, kInstPsignd, XmmReg, XmmReg) + //! @overload + INST_2x(psignd, kInstPsignd, XmmReg, Mem) + + //! @brief Packed horizontal add (SSSE3). + INST_2x(phaddw, kInstPhaddw, MmReg, MmReg) + //! @overload + INST_2x(phaddw, kInstPhaddw, MmReg, Mem) + + //! @brief Packed horizontal add (SSSE3). + INST_2x(phaddw, kInstPhaddw, XmmReg, XmmReg) + //! @overload + INST_2x(phaddw, kInstPhaddw, XmmReg, Mem) + + //! @brief Packed horizontal add (SSSE3). + INST_2x(phaddd, kInstPhaddd, MmReg, MmReg) + //! @overload + INST_2x(phaddd, kInstPhaddd, MmReg, Mem) + + //! @brief Packed horizontal add (SSSE3). + INST_2x(phaddd, kInstPhaddd, XmmReg, XmmReg) + //! @overload + INST_2x(phaddd, kInstPhaddd, XmmReg, Mem) + + //! @brief Packed horizontal add and saturate (SSSE3). + INST_2x(phaddsw, kInstPhaddsw, MmReg, MmReg) + //! @overload + INST_2x(phaddsw, kInstPhaddsw, MmReg, Mem) + + //! @brief Packed horizontal add and saturate (SSSE3). + INST_2x(phaddsw, kInstPhaddsw, XmmReg, XmmReg) + //! @overload + INST_2x(phaddsw, kInstPhaddsw, XmmReg, Mem) + + //! @brief Packed horizontal subtract (SSSE3). + INST_2x(phsubw, kInstPhsubw, MmReg, MmReg) + //! @overload + INST_2x(phsubw, kInstPhsubw, MmReg, Mem) + + //! @brief Packed horizontal subtract (SSSE3). + INST_2x(phsubw, kInstPhsubw, XmmReg, XmmReg) + //! @overload + INST_2x(phsubw, kInstPhsubw, XmmReg, Mem) + + //! @brief Packed horizontal subtract (SSSE3). + INST_2x(phsubd, kInstPhsubd, MmReg, MmReg) + //! @overload + INST_2x(phsubd, kInstPhsubd, MmReg, Mem) + + //! @brief Packed horizontal subtract (SSSE3). + INST_2x(phsubd, kInstPhsubd, XmmReg, XmmReg) + //! @overload + INST_2x(phsubd, kInstPhsubd, XmmReg, Mem) + + //! @brief Packed horizontal subtract and saturate (SSSE3). + INST_2x(phsubsw, kInstPhsubsw, MmReg, MmReg) + //! @overload + INST_2x(phsubsw, kInstPhsubsw, MmReg, Mem) + + //! @brief Packed horizontal subtract and saturate (SSSE3). + INST_2x(phsubsw, kInstPhsubsw, XmmReg, XmmReg) + //! @overload + INST_2x(phsubsw, kInstPhsubsw, XmmReg, Mem) + + //! @brief Multiply and add packed signed and unsigned bytes (SSSE3). + INST_2x(pmaddubsw, kInstPmaddubsw, MmReg, MmReg) + //! @overload + INST_2x(pmaddubsw, kInstPmaddubsw, MmReg, Mem) + + //! @brief Multiply and add packed signed and unsigned bytes (SSSE3). + INST_2x(pmaddubsw, kInstPmaddubsw, XmmReg, XmmReg) + //! @overload + INST_2x(pmaddubsw, kInstPmaddubsw, XmmReg, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsb, kInstPabsb, MmReg, MmReg) + //! @overload + INST_2x(pabsb, kInstPabsb, MmReg, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsb, kInstPabsb, XmmReg, XmmReg) + //! @overload + INST_2x(pabsb, kInstPabsb, XmmReg, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsw, kInstPabsw, MmReg, MmReg) + //! @overload + INST_2x(pabsw, kInstPabsw, MmReg, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsw, kInstPabsw, XmmReg, XmmReg) + //! @overload + INST_2x(pabsw, kInstPabsw, XmmReg, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsd, kInstPabsd, MmReg, MmReg) + //! @overload + INST_2x(pabsd, kInstPabsd, MmReg, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsd, kInstPabsd, XmmReg, XmmReg) + //! @overload + INST_2x(pabsd, kInstPabsd, XmmReg, Mem) + + //! @brief Packed multiply high with round and scale (SSSE3). + INST_2x(pmulhrsw, kInstPmulhrsw, MmReg, MmReg) + //! @overload + INST_2x(pmulhrsw, kInstPmulhrsw, MmReg, Mem) + + //! @brief Packed multiply high with round and scale (SSSE3). + INST_2x(pmulhrsw, kInstPmulhrsw, XmmReg, XmmReg) + //! @overload + INST_2x(pmulhrsw, kInstPmulhrsw, XmmReg, Mem) + + //! @brief Packed shuffle bytes (SSSE3). + INST_2x(pshufb, kInstPshufb, MmReg, MmReg) + //! @overload + INST_2x(pshufb, kInstPshufb, MmReg, Mem) + + //! @brief Packed shuffle bytes (SSSE3). + INST_2x(pshufb, kInstPshufb, XmmReg, XmmReg) + //! @overload + INST_2x(pshufb, kInstPshufb, XmmReg, Mem) + + //! @brief Packed shuffle bytes (SSSE3). + INST_3i(palignr, kInstPalignr, MmReg, MmReg, Imm) + //! @overload + INST_3i(palignr, kInstPalignr, MmReg, Mem, Imm) + + //! @brief Packed shuffle bytes (SSSE3). + INST_3i(palignr, kInstPalignr, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(palignr, kInstPalignr, XmmReg, Mem, Imm) + + // -------------------------------------------------------------------------- + // [SSE4.1] + // -------------------------------------------------------------------------- + + //! @brief Blend packed DP-FP values (SSE4.1). + INST_3i(blendpd, kInstBlendpd, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(blendpd, kInstBlendpd, XmmReg, Mem, Imm) + + //! @brief Blend packed SP-FP values (SSE4.1). + INST_3i(blendps, kInstBlendps, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(blendps, kInstBlendps, XmmReg, Mem, Imm) + + //! @brief Variable blend packed DP-FP values (SSE4.1). + INST_2x(blendvpd, kInstBlendvpd, XmmReg, XmmReg) + //! @overload + INST_2x(blendvpd, kInstBlendvpd, XmmReg, Mem) + + //! @brief Variable blend packed SP-FP values (SSE4.1). + INST_2x(blendvps, kInstBlendvps, XmmReg, XmmReg) + //! @overload + INST_2x(blendvps, kInstBlendvps, XmmReg, Mem) + + //! @brief Dot product of packed DP-FP values (SSE4.1). + INST_3i(dppd, kInstDppd, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(dppd, kInstDppd, XmmReg, Mem, Imm) + + //! @brief Dot product of packed SP-FP values (SSE4.1). + INST_3i(dpps, kInstDpps, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(dpps, kInstDpps, XmmReg, Mem, Imm) + + //! @brief Extract packed SP-FP value (SSE4.1). + INST_3i(extractps, kInstExtractps, GpReg, XmmReg, Imm) + //! @overload + INST_3i(extractps, kInstExtractps, Mem, XmmReg, Imm) + + //! @brief Load oword non-temporal aligned hint (SSE4.1). + INST_2x(movntdqa, kInstMovntdqa, XmmReg, Mem) + + //! @brief Compute multiple packed sums of absolute difference (SSE4.1). + INST_3i(mpsadbw, kInstMpsadbw, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(mpsadbw, kInstMpsadbw, XmmReg, Mem, Imm) + + //! @brief Pack with unsigned saturation (SSE4.1). + INST_2x(packusdw, kInstPackusdw, XmmReg, XmmReg) + //! @overload + INST_2x(packusdw, kInstPackusdw, XmmReg, Mem) + + //! @brief Variable blend packed bytes (SSE4.1). + INST_2x(pblendvb, kInstPblendvb, XmmReg, XmmReg) + //! @overload + INST_2x(pblendvb, kInstPblendvb, XmmReg, Mem) + + //! @brief Blend packed words (SSE4.1). + INST_3i(pblendw, kInstPblendw, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(pblendw, kInstPblendw, XmmReg, Mem, Imm) + + //! @brief Compare packed qword data for equal (SSE4.1). + INST_2x(pcmpeqq, kInstPcmpeqq, XmmReg, XmmReg) + //! @overload + INST_2x(pcmpeqq, kInstPcmpeqq, XmmReg, Mem) + + //! @brief Extract byte (SSE4.1). + INST_3i(pextrb, kInstPextrb, GpReg, XmmReg, Imm) + //! @overload + INST_3i(pextrb, kInstPextrb, Mem, XmmReg, Imm) + + //! @brief Extract dword (SSE4.1). + INST_3i(pextrd, kInstPextrd, GpReg, XmmReg, Imm) + //! @overload + INST_3i(pextrd, kInstPextrd, Mem, XmmReg, Imm) + + //! @brief Extract qword (SSE4.1). + INST_3i(pextrq, kInstPextrq, GpReg, XmmReg, Imm) + //! @overload + INST_3i(pextrq, kInstPextrq, Mem, XmmReg, Imm) + + //! @brief Extract word (SSE4.1). + INST_3i(pextrw, kInstPextrw, Mem, XmmReg, Imm) + + //! @brief Packed horizontal word minimum (SSE4.1). + INST_2x(phminposuw, kInstPhminposuw, XmmReg, XmmReg) + //! @overload + INST_2x(phminposuw, kInstPhminposuw, XmmReg, Mem) + + //! @brief Insert byte (SSE4.1). + INST_3i(pinsrb, kInstPinsrb, XmmReg, GpReg, Imm) + //! @overload + INST_3i(pinsrb, kInstPinsrb, XmmReg, Mem, Imm) + + //! @brief Insert dword (SSE4.1). + INST_3i(pinsrd, kInstPinsrd, XmmReg, GpReg, Imm) + //! @overload + INST_3i(pinsrd, kInstPinsrd, XmmReg, Mem, Imm) + + //! @brief Insert qword (SSE4.1). + INST_3i(pinsrq, kInstPinsrq, XmmReg, GpReg, Imm) + //! @overload + INST_3i(pinsrq, kInstPinsrq, XmmReg, Mem, Imm) + + //! @brief Maximum of packed word integers (SSE4.1). + INST_2x(pmaxuw, kInstPmaxuw, XmmReg, XmmReg) + //! @overload + INST_2x(pmaxuw, kInstPmaxuw, XmmReg, Mem) + + //! @brief Maximum of packed signed byte integers (SSE4.1). + INST_2x(pmaxsb, kInstPmaxsb, XmmReg, XmmReg) + //! @overload + INST_2x(pmaxsb, kInstPmaxsb, XmmReg, Mem) + + //! @brief Maximum of packed signed dword integers (SSE4.1). + INST_2x(pmaxsd, kInstPmaxsd, XmmReg, XmmReg) + //! @overload + INST_2x(pmaxsd, kInstPmaxsd, XmmReg, Mem) + + //! @brief Maximum of packed unsigned dword integers (SSE4.1). + INST_2x(pmaxud, kInstPmaxud, XmmReg, XmmReg) + //! @overload + INST_2x(pmaxud,kInstPmaxud , XmmReg, Mem) + + //! @brief Minimum of packed signed byte integers (SSE4.1). + INST_2x(pminsb, kInstPminsb, XmmReg, XmmReg) + //! @overload + INST_2x(pminsb, kInstPminsb, XmmReg, Mem) + + //! @brief Minimum of packed word integers (SSE4.1). + INST_2x(pminuw, kInstPminuw, XmmReg, XmmReg) + //! @overload + INST_2x(pminuw, kInstPminuw, XmmReg, Mem) + + //! @brief Minimum of packed dword integers (SSE4.1). + INST_2x(pminud, kInstPminud, XmmReg, XmmReg) + //! @overload + INST_2x(pminud, kInstPminud, XmmReg, Mem) + + //! @brief Minimum of packed dword integers (SSE4.1). + INST_2x(pminsd, kInstPminsd, XmmReg, XmmReg) + //! @overload + INST_2x(pminsd, kInstPminsd, XmmReg, Mem) + + //! @brief Packed move with sign extend (SSE4.1). + INST_2x(pmovsxbw, kInstPmovsxbw, XmmReg, XmmReg) + //! @overload + INST_2x(pmovsxbw, kInstPmovsxbw, XmmReg, Mem) + + //! @brief Packed move with sign extend (SSE4.1). + INST_2x(pmovsxbd, kInstPmovsxbd, XmmReg, XmmReg) + //! @overload + INST_2x(pmovsxbd, kInstPmovsxbd, XmmReg, Mem) + + //! @brief Packed move with sign extend (SSE4.1). + INST_2x(pmovsxbq, kInstPmovsxbq, XmmReg, XmmReg) + //! @overload + INST_2x(pmovsxbq, kInstPmovsxbq, XmmReg, Mem) + + //! @brief Packed move with sign extend (SSE4.1). + INST_2x(pmovsxwd, kInstPmovsxwd, XmmReg, XmmReg) + //! @overload + INST_2x(pmovsxwd, kInstPmovsxwd, XmmReg, Mem) + + //! @brief (SSE4.1). + INST_2x(pmovsxwq, kInstPmovsxwq, XmmReg, XmmReg) + //! @overload + INST_2x(pmovsxwq, kInstPmovsxwq, XmmReg, Mem) + + //! @brief (SSE4.1). + INST_2x(pmovsxdq, kInstPmovsxdq, XmmReg, XmmReg) + //! @overload + INST_2x(pmovsxdq, kInstPmovsxdq, XmmReg, Mem) + + //! @brief Packed move with zero extend (SSE4.1). + INST_2x(pmovzxbw, kInstPmovzxbw, XmmReg, XmmReg) + //! @overload + INST_2x(pmovzxbw, kInstPmovzxbw, XmmReg, Mem) + + //! @brief Packed move with zero extend (SSE4.1). + INST_2x(pmovzxbd, kInstPmovzxbd, XmmReg, XmmReg) + //! @overload + INST_2x(pmovzxbd, kInstPmovzxbd, XmmReg, Mem) + + //! @brief Packed move with zero extend (SSE4.1). + INST_2x(pmovzxbq, kInstPmovzxbq, XmmReg, XmmReg) + //! @overload + INST_2x(pmovzxbq, kInstPmovzxbq, XmmReg, Mem) + + //! @brief Packed move with zero extend (SSE4.1). + INST_2x(pmovzxwd, kInstPmovzxwd, XmmReg, XmmReg) + //! @overload + INST_2x(pmovzxwd, kInstPmovzxwd, XmmReg, Mem) + + //! @brief (SSE4.1). + INST_2x(pmovzxwq, kInstPmovzxwq, XmmReg, XmmReg) + //! @overload + INST_2x(pmovzxwq, kInstPmovzxwq, XmmReg, Mem) + + //! @brief (SSE4.1). + INST_2x(pmovzxdq, kInstPmovzxdq, XmmReg, XmmReg) + //! @overload + INST_2x(pmovzxdq, kInstPmovzxdq, XmmReg, Mem) + + //! @brief Multiply packed signed qword integers (SSE4.1). + INST_2x(pmuldq, kInstPmuldq, XmmReg, XmmReg) + //! @overload + INST_2x(pmuldq, kInstPmuldq, XmmReg, Mem) + + //! @brief Multiply packed signed integers and store low result (SSE4.1). + INST_2x(pmulld, kInstPmulld, XmmReg, XmmReg) + //! @overload + INST_2x(pmulld, kInstPmulld, XmmReg, Mem) + + //! @brief Logical compare (SSE4.1). + INST_2x(ptest, kInstPtest, XmmReg, XmmReg) + //! @overload + INST_2x(ptest, kInstPtest, XmmReg, Mem) + + //! @brief Round packed SP-FP values (SSE4.1). + INST_3i(roundps, kInstRoundps, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(roundps, kInstRoundps, XmmReg, Mem, Imm) + + //! @brief Round scalar SP-FP values (SSE4.1). + INST_3i(roundss, kInstRoundss, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(roundss, kInstRoundss, XmmReg, Mem, Imm) + + //! @brief Round packed DP-FP values (SSE4.1). + INST_3i(roundpd, kInstRoundpd, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(roundpd, kInstRoundpd, XmmReg, Mem, Imm) + + //! @brief Round scalar DP-FP values (SSE4.1). + INST_3i(roundsd, kInstRoundsd, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(roundsd, kInstRoundsd, XmmReg, Mem, Imm) + + // -------------------------------------------------------------------------- + // [SSE4.2] + // -------------------------------------------------------------------------- + + //! @brief Packed compare explicit length strings, return index (SSE4.2). + INST_3i(pcmpestri, kInstPcmpestri, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(pcmpestri, kInstPcmpestri, XmmReg, Mem, Imm) + + //! @brief Packed compare explicit length strings, return mask (SSE4.2). + INST_3i(pcmpestrm, kInstPcmpestrm, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(pcmpestrm, kInstPcmpestrm, XmmReg, Mem, Imm) + + //! @brief Packed compare implicit length strings, return index (SSE4.2). + INST_3i(pcmpistri, kInstPcmpistri, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(pcmpistri, kInstPcmpistri, XmmReg, Mem, Imm) + + //! @brief Packed compare implicit length strings, return mask (SSE4.2). + INST_3i(pcmpistrm, kInstPcmpistrm, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(pcmpistrm, kInstPcmpistrm, XmmReg, Mem, Imm) + + //! @brief Compare packed data for greater than (SSE4.2). + INST_2x(pcmpgtq, kInstPcmpgtq, XmmReg, XmmReg) + //! @overload + INST_2x(pcmpgtq, kInstPcmpgtq, XmmReg, Mem) + + // -------------------------------------------------------------------------- + // [AESNI] + // -------------------------------------------------------------------------- + + //! @brief Perform a single round of the AES decryption flow. + INST_2x(aesdec, kInstAesdec, XmmReg, XmmReg) + //! @overload + INST_2x(aesdec, kInstAesdec, XmmReg, Mem) + + //! @brief Perform the last round of the AES decryption flow. + INST_2x(aesdeclast, kInstAesdeclast, XmmReg, XmmReg) + //! @overload + INST_2x(aesdeclast, kInstAesdeclast, XmmReg, Mem) + + //! @brief Perform a single round of the AES encryption flow. + INST_2x(aesenc, kInstAesenc, XmmReg, XmmReg) + //! @overload + INST_2x(aesenc, kInstAesenc, XmmReg, Mem) + + //! @brief Perform the last round of the AES encryption flow. + INST_2x(aesenclast, kInstAesenclast, XmmReg, XmmReg) + //! @overload + INST_2x(aesenclast, kInstAesenclast, XmmReg, Mem) + + //! @brief Perform the InvMixColumns transformation. + INST_2x(aesimc, kInstAesimc, XmmReg, XmmReg) + //! @overload + INST_2x(aesimc, kInstAesimc, XmmReg, Mem) + + //! @brief Assist in expanding the AES cipher key. + INST_3i(aeskeygenassist, kInstAeskeygenassist, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(aeskeygenassist, kInstAeskeygenassist, XmmReg, Mem, Imm) + + // -------------------------------------------------------------------------- + // [PCLMULQDQ] + // -------------------------------------------------------------------------- + + //! @brief Carry-less multiplication quadword. + INST_3i(pclmulqdq, kInstPclmulqdq, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(pclmulqdq, kInstPclmulqdq, XmmReg, Mem, Imm) + + // -------------------------------------------------------------------------- + // [AVX] + // -------------------------------------------------------------------------- + + INST_3x(vaddpd, kInstVaddpd, XmmReg, XmmReg, XmmReg) + INST_3x(vaddpd, kInstVaddpd, XmmReg, XmmReg, Mem) + INST_3x(vaddpd, kInstVaddpd, YmmReg, YmmReg, YmmReg) + INST_3x(vaddpd, kInstVaddpd, YmmReg, YmmReg, Mem) + + INST_3x(vaddps, kInstVaddps, XmmReg, XmmReg, XmmReg) + INST_3x(vaddps, kInstVaddps, XmmReg, XmmReg, Mem) + INST_3x(vaddps, kInstVaddps, YmmReg, YmmReg, YmmReg) + INST_3x(vaddps, kInstVaddps, YmmReg, YmmReg, Mem) + + INST_3x(vaddsd, kInstVaddsd, XmmReg, XmmReg, XmmReg) + INST_3x(vaddsd, kInstVaddsd, XmmReg, XmmReg, Mem) + + INST_3x(vaddss, kInstVaddss, XmmReg, XmmReg, XmmReg) + INST_3x(vaddss, kInstVaddss, XmmReg, XmmReg, Mem) + + INST_3x(vaddsubpd, kInstVaddsubpd, XmmReg, XmmReg, XmmReg) + INST_3x(vaddsubpd, kInstVaddsubpd, XmmReg, XmmReg, Mem) + INST_3x(vaddsubpd, kInstVaddsubpd, YmmReg, YmmReg, YmmReg) + INST_3x(vaddsubpd, kInstVaddsubpd, YmmReg, YmmReg, Mem) + + INST_3x(vaddsubps, kInstVaddsubps, XmmReg, XmmReg, XmmReg) + INST_3x(vaddsubps, kInstVaddsubps, XmmReg, XmmReg, Mem) + INST_3x(vaddsubps, kInstVaddsubps, YmmReg, YmmReg, YmmReg) + INST_3x(vaddsubps, kInstVaddsubps, YmmReg, YmmReg, Mem) + + INST_3x(vandpd, kInstVandpd, XmmReg, XmmReg, XmmReg) + INST_3x(vandpd, kInstVandpd, XmmReg, XmmReg, Mem) + INST_3x(vandpd, kInstVandpd, YmmReg, YmmReg, YmmReg) + INST_3x(vandpd, kInstVandpd, YmmReg, YmmReg, Mem) + + INST_3x(vandps, kInstVandps, XmmReg, XmmReg, XmmReg) + INST_3x(vandps, kInstVandps, XmmReg, XmmReg, Mem) + INST_3x(vandps, kInstVandps, YmmReg, YmmReg, YmmReg) + INST_3x(vandps, kInstVandps, YmmReg, YmmReg, Mem) + + INST_3x(vandnpd, kInstVandnpd, XmmReg, XmmReg, XmmReg) + INST_3x(vandnpd, kInstVandnpd, XmmReg, XmmReg, Mem) + INST_3x(vandnpd, kInstVandnpd, YmmReg, YmmReg, YmmReg) + INST_3x(vandnpd, kInstVandnpd, YmmReg, YmmReg, Mem) + + INST_3x(vandnps, kInstVandnps, XmmReg, XmmReg, XmmReg) + INST_3x(vandnps, kInstVandnps, XmmReg, XmmReg, Mem) + INST_3x(vandnps, kInstVandnps, YmmReg, YmmReg, YmmReg) + INST_3x(vandnps, kInstVandnps, YmmReg, YmmReg, Mem) + + INST_4i(vblendpd, kInstVblendpd, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vblendpd, kInstVblendpd, XmmReg, XmmReg, Mem, Imm) + INST_4i(vblendpd, kInstVblendpd, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vblendpd, kInstVblendpd, YmmReg, YmmReg, Mem, Imm) + + INST_4i(vblendps, kInstVblendps, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vblendps, kInstVblendps, XmmReg, XmmReg, Mem, Imm) + INST_4i(vblendps, kInstVblendps, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vblendps, kInstVblendps, YmmReg, YmmReg, Mem, Imm) + + INST_4x(vblendvpd, kInstVblendvpd, XmmReg, XmmReg, XmmReg, XmmReg) + INST_4x(vblendvpd, kInstVblendvpd, XmmReg, XmmReg, Mem, XmmReg) + INST_4x(vblendvpd, kInstVblendvpd, YmmReg, YmmReg, YmmReg, YmmReg) + INST_4x(vblendvpd, kInstVblendvpd, YmmReg, YmmReg, Mem, YmmReg) + + INST_4x(vblendvps, kInstVblendvps, XmmReg, XmmReg, XmmReg, XmmReg) + INST_4x(vblendvps, kInstVblendvps, XmmReg, XmmReg, Mem, XmmReg) + INST_4x(vblendvps, kInstVblendvps, YmmReg, YmmReg, YmmReg, YmmReg) + INST_4x(vblendvps, kInstVblendvps, YmmReg, YmmReg, Mem, YmmReg) + + INST_2x(vbroadcastf128, kInstVbroadcastf128, YmmReg, Mem) + + INST_2x(vbroadcastsd, kInstVbroadcastsd, YmmReg, Mem) + + INST_2x(vbroadcastss, kInstVbroadcastss, XmmReg, Mem) + INST_2x(vbroadcastss, kInstVbroadcastss, YmmReg, Mem) + + INST_4i(vcmppd, kInstVcmppd, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vcmppd, kInstVcmppd, XmmReg, XmmReg, Mem, Imm) + INST_4i(vcmppd, kInstVcmppd, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vcmppd, kInstVcmppd, YmmReg, YmmReg, Mem, Imm) + + INST_4i(vcmpps, kInstVcmpps, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vcmpps, kInstVcmpps, XmmReg, XmmReg, Mem, Imm) + INST_4i(vcmpps, kInstVcmpps, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vcmpps, kInstVcmpps, YmmReg, YmmReg, Mem, Imm) + + INST_4i(vcmpsd, kInstVcmpsd, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vcmpsd, kInstVcmpsd, XmmReg, XmmReg, Mem, Imm) + + INST_4i(vcmpss, kInstVcmpss, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vcmpss, kInstVcmpss, XmmReg, XmmReg, Mem, Imm) + + INST_2x(vcomisd, kInstVcomisd, XmmReg, XmmReg) + INST_2x(vcomisd, kInstVcomisd, XmmReg, Mem) + + INST_2x(vcomiss, kInstVcomiss, XmmReg, XmmReg) + INST_2x(vcomiss, kInstVcomiss, XmmReg, Mem) + + INST_2x(vcvtdq2pd, kInstVcvtdq2pd, XmmReg, XmmReg) + INST_2x(vcvtdq2pd, kInstVcvtdq2pd, XmmReg, Mem) + INST_2x(vcvtdq2pd, kInstVcvtdq2pd, YmmReg, XmmReg) + INST_2x(vcvtdq2pd, kInstVcvtdq2pd, YmmReg, Mem) + + INST_2x(vcvtdq2ps, kInstVcvtdq2ps, XmmReg, XmmReg) + INST_2x(vcvtdq2ps, kInstVcvtdq2ps, XmmReg, Mem) + INST_2x(vcvtdq2ps, kInstVcvtdq2ps, YmmReg, YmmReg) + INST_2x(vcvtdq2ps, kInstVcvtdq2ps, YmmReg, Mem) + + INST_2x(vcvtpd2dq, kInstVcvtpd2dq, XmmReg, XmmReg) + INST_2x(vcvtpd2dq, kInstVcvtpd2dq, XmmReg, YmmReg) + INST_2x(vcvtpd2dq, kInstVcvtpd2dq, XmmReg, Mem) + + INST_2x(vcvtpd2ps, kInstVcvtpd2ps, XmmReg, XmmReg) + INST_2x(vcvtpd2ps, kInstVcvtpd2ps, XmmReg, YmmReg) + INST_2x(vcvtpd2ps, kInstVcvtpd2ps, XmmReg, Mem) + + INST_2x(vcvtps2dq, kInstVcvtps2dq, XmmReg, XmmReg) + INST_2x(vcvtps2dq, kInstVcvtps2dq, XmmReg, Mem) + INST_2x(vcvtps2dq, kInstVcvtps2dq, YmmReg, YmmReg) + INST_2x(vcvtps2dq, kInstVcvtps2dq, YmmReg, Mem) + + INST_2x(vcvtps2pd, kInstVcvtps2pd, XmmReg, XmmReg) + INST_2x(vcvtps2pd, kInstVcvtps2pd, XmmReg, Mem) + INST_2x(vcvtps2pd, kInstVcvtps2pd, YmmReg, XmmReg) + INST_2x(vcvtps2pd, kInstVcvtps2pd, YmmReg, Mem) + + INST_2x(vcvtsd2si, kInstVcvtsd2si, GpReg, XmmReg) + INST_2x(vcvtsd2si, kInstVcvtsd2si, GpReg, Mem) + + INST_3x(vcvtsd2ss, kInstVcvtsd2ss, XmmReg, XmmReg, XmmReg) + INST_3x(vcvtsd2ss, kInstVcvtsd2ss, XmmReg, XmmReg, Mem) + + INST_3x(vcvtsi2sd, kInstVcvtsi2sd, XmmReg, XmmReg, GpReg) + INST_3x(vcvtsi2sd, kInstVcvtsi2sd, XmmReg, XmmReg, Mem) + + INST_3x(vcvtsi2ss, kInstVcvtsi2ss, XmmReg, XmmReg, GpReg) + INST_3x(vcvtsi2ss, kInstVcvtsi2ss, XmmReg, XmmReg, Mem) + + INST_3x(vcvtss2sd, kInstVcvtss2sd, XmmReg, XmmReg, XmmReg) + INST_3x(vcvtss2sd, kInstVcvtss2sd, XmmReg, XmmReg, Mem) + + INST_2x(vcvtss2si, kInstVcvtss2si, GpReg, XmmReg) + INST_2x(vcvtss2si, kInstVcvtss2si, GpReg, Mem) + + INST_2x(vcvttpd2dq, kInstVcvttpd2dq, XmmReg, XmmReg) + INST_2x(vcvttpd2dq, kInstVcvttpd2dq, XmmReg, YmmReg) + INST_2x(vcvttpd2dq, kInstVcvttpd2dq, XmmReg, Mem) + + INST_2x(vcvttps2dq, kInstVcvttps2dq, XmmReg, XmmReg) + INST_2x(vcvttps2dq, kInstVcvttps2dq, XmmReg, Mem) + INST_2x(vcvttps2dq, kInstVcvttps2dq, YmmReg, YmmReg) + INST_2x(vcvttps2dq, kInstVcvttps2dq, YmmReg, Mem) + + INST_2x(vcvttsd2si, kInstVcvttsd2si, GpReg, XmmReg) + INST_2x(vcvttsd2si, kInstVcvttsd2si, GpReg, Mem) + + INST_2x(vcvttss2si, kInstVcvttss2si, GpReg, XmmReg) + INST_2x(vcvttss2si, kInstVcvttss2si, GpReg, Mem) + + INST_3x(vdivpd, kInstVdivpd, XmmReg, XmmReg, XmmReg) + INST_3x(vdivpd, kInstVdivpd, XmmReg, XmmReg, Mem) + INST_3x(vdivpd, kInstVdivpd, YmmReg, YmmReg, YmmReg) + INST_3x(vdivpd, kInstVdivpd, YmmReg, YmmReg, Mem) + + INST_3x(vdivps, kInstVdivps, XmmReg, XmmReg, XmmReg) + INST_3x(vdivps, kInstVdivps, XmmReg, XmmReg, Mem) + INST_3x(vdivps, kInstVdivps, YmmReg, YmmReg, YmmReg) + INST_3x(vdivps, kInstVdivps, YmmReg, YmmReg, Mem) + + INST_3x(vdivsd, kInstVdivsd, XmmReg, XmmReg, XmmReg) + INST_3x(vdivsd, kInstVdivsd, XmmReg, XmmReg, Mem) + + INST_3x(vdivss, kInstVdivss, XmmReg, XmmReg, XmmReg) + INST_3x(vdivss, kInstVdivss, XmmReg, XmmReg, Mem) + + INST_4i(vdppd, kInstVdppd, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vdppd, kInstVdppd, XmmReg, XmmReg, Mem, Imm) + + INST_4i(vdpps, kInstVdpps, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vdpps, kInstVdpps, XmmReg, XmmReg, Mem, Imm) + INST_4i(vdpps, kInstVdpps, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vdpps, kInstVdpps, YmmReg, YmmReg, Mem, Imm) + + INST_3i(vextractf128, kInstVextractf128, XmmReg, YmmReg, Imm) + INST_3i(vextractf128, kInstVextractf128, Mem, YmmReg, Imm) + + INST_3i(vextractps, kInstVextractps, GpReg, XmmReg, Imm) + INST_3i(vextractps, kInstVextractps, Mem, XmmReg, Imm) + + INST_3x(vhaddpd, kInstVhaddpd, XmmReg, XmmReg, XmmReg) + INST_3x(vhaddpd, kInstVhaddpd, XmmReg, XmmReg, Mem) + INST_3x(vhaddpd, kInstVhaddpd, YmmReg, YmmReg, YmmReg) + INST_3x(vhaddpd, kInstVhaddpd, YmmReg, YmmReg, Mem) + + INST_3x(vhaddps, kInstVhaddps, XmmReg, XmmReg, XmmReg) + INST_3x(vhaddps, kInstVhaddps, XmmReg, XmmReg, Mem) + INST_3x(vhaddps, kInstVhaddps, YmmReg, YmmReg, YmmReg) + INST_3x(vhaddps, kInstVhaddps, YmmReg, YmmReg, Mem) + + INST_3x(vhsubpd, kInstVhsubpd, XmmReg, XmmReg, XmmReg) + INST_3x(vhsubpd, kInstVhsubpd, XmmReg, XmmReg, Mem) + INST_3x(vhsubpd, kInstVhsubpd, YmmReg, YmmReg, YmmReg) + INST_3x(vhsubpd, kInstVhsubpd, YmmReg, YmmReg, Mem) + + INST_3x(vhsubps, kInstVhsubps, XmmReg, XmmReg, XmmReg) + INST_3x(vhsubps, kInstVhsubps, XmmReg, XmmReg, Mem) + INST_3x(vhsubps, kInstVhsubps, YmmReg, YmmReg, YmmReg) + INST_3x(vhsubps, kInstVhsubps, YmmReg, YmmReg, Mem) + + INST_4i(vinsertf128, kInstVinsertf128, YmmReg, YmmReg, XmmReg, Imm) + INST_4i(vinsertf128, kInstVinsertf128, YmmReg, YmmReg, Mem, Imm) + + INST_4i(vinsertps, kInstVinsertps, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vinsertps, kInstVinsertps, XmmReg, XmmReg, Mem, Imm) + + INST_2x(vlddqu, kInstVlddqu, XmmReg, Mem) + INST_2x(vlddqu, kInstVlddqu, YmmReg, Mem) + + INST_1x(vldmxcsr, kInstVldmxcsr, Mem) + + INST_2x(vmaskmovdqu, kInstVmaskmovdqu, XmmReg, XmmReg) + + INST_3x(vmaskmovps, kInstVmaskmovps, XmmReg, XmmReg, Mem) + INST_3x(vmaskmovps, kInstVmaskmovps, YmmReg, YmmReg, Mem) + + INST_3x(vmaskmovpd, kInstVmaskmovpd, XmmReg, XmmReg, Mem) + INST_3x(vmaskmovpd, kInstVmaskmovpd, YmmReg, YmmReg, Mem) + + INST_3x(vmaskmovps, kInstVmaskmovps, Mem, XmmReg, XmmReg) + INST_3x(vmaskmovps, kInstVmaskmovps, Mem, YmmReg, YmmReg) + + INST_3x(vmaskmovpd, kInstVmaskmovpd, Mem, XmmReg, XmmReg) + INST_3x(vmaskmovpd, kInstVmaskmovpd, Mem, YmmReg, YmmReg) + + INST_3x(vmaxpd, kInstVmaxpd, XmmReg, XmmReg, XmmReg) + INST_3x(vmaxpd, kInstVmaxpd, XmmReg, XmmReg, Mem) + INST_3x(vmaxpd, kInstVmaxpd, YmmReg, YmmReg, YmmReg) + INST_3x(vmaxpd, kInstVmaxpd, YmmReg, YmmReg, Mem) + + INST_3x(vmaxps, kInstVmaxps, XmmReg, XmmReg, XmmReg) + INST_3x(vmaxps, kInstVmaxps, XmmReg, XmmReg, Mem) + INST_3x(vmaxps, kInstVmaxps, YmmReg, YmmReg, YmmReg) + INST_3x(vmaxps, kInstVmaxps, YmmReg, YmmReg, Mem) + + INST_3x(vmaxsd, kInstVmaxsd, XmmReg, XmmReg, XmmReg) + INST_3x(vmaxsd, kInstVmaxsd, XmmReg, XmmReg, Mem) + + INST_3x(vmaxss, kInstVmaxss, XmmReg, XmmReg, XmmReg) + INST_3x(vmaxss, kInstVmaxss, XmmReg, XmmReg, Mem) + + INST_3x(vminpd, kInstVminpd, XmmReg, XmmReg, XmmReg) + INST_3x(vminpd, kInstVminpd, XmmReg, XmmReg, Mem) + INST_3x(vminpd, kInstVminpd, YmmReg, YmmReg, YmmReg) + INST_3x(vminpd, kInstVminpd, YmmReg, YmmReg, Mem) + + INST_3x(vminps, kInstVminps, XmmReg, XmmReg, XmmReg) + INST_3x(vminps, kInstVminps, XmmReg, XmmReg, Mem) + INST_3x(vminps, kInstVminps, YmmReg, YmmReg, YmmReg) + INST_3x(vminps, kInstVminps, YmmReg, YmmReg, Mem) + + INST_3x(vminsd, kInstVminsd, XmmReg, XmmReg, XmmReg) + INST_3x(vminsd, kInstVminsd, XmmReg, XmmReg, Mem) + + INST_3x(vminss, kInstVminss, XmmReg, XmmReg, XmmReg) + INST_3x(vminss, kInstVminss, XmmReg, XmmReg, Mem) + + INST_2x(vmovapd, kInstVmovapd, XmmReg, XmmReg) + INST_2x(vmovapd, kInstVmovapd, XmmReg, Mem) + INST_2x(vmovapd, kInstVmovapd, Mem, XmmReg) + INST_2x(vmovapd, kInstVmovapd, YmmReg, YmmReg) + INST_2x(vmovapd, kInstVmovapd, YmmReg, Mem) + INST_2x(vmovapd, kInstVmovapd, Mem, YmmReg) + + INST_2x(vmovaps, kInstVmovaps, XmmReg, XmmReg) + INST_2x(vmovaps, kInstVmovaps, XmmReg, Mem) + INST_2x(vmovaps, kInstVmovaps, Mem, XmmReg) + INST_2x(vmovaps, kInstVmovaps, YmmReg, YmmReg) + INST_2x(vmovaps, kInstVmovaps, YmmReg, Mem) + INST_2x(vmovaps, kInstVmovaps, Mem, YmmReg) + + INST_2x(vmovd, kInstVmovd, XmmReg, GpReg) + INST_2x(vmovd, kInstVmovd, XmmReg, Mem) + INST_2x(vmovd, kInstVmovd, GpReg, XmmReg) + INST_2x(vmovd, kInstVmovd, Mem, XmmReg) + INST_2x(vmovq, kInstVmovq, XmmReg, XmmReg) + INST_2x(vmovq, kInstVmovq, XmmReg, Mem) + INST_2x(vmovq, kInstVmovq, Mem, XmmReg) + + INST_2x(vmovddup, kInstVmovddup, XmmReg, XmmReg) + INST_2x(vmovddup, kInstVmovddup, XmmReg, Mem) + INST_2x(vmovddup, kInstVmovddup, YmmReg, YmmReg) + INST_2x(vmovddup, kInstVmovddup, YmmReg, Mem) + + INST_2x(vmovdqa, kInstVmovdqa, XmmReg, XmmReg) + INST_2x(vmovdqa, kInstVmovdqa, XmmReg, Mem) + INST_2x(vmovdqa, kInstVmovdqa, Mem, XmmReg) + INST_2x(vmovdqa, kInstVmovdqa, YmmReg, YmmReg) + INST_2x(vmovdqa, kInstVmovdqa, YmmReg, Mem) + INST_2x(vmovdqa, kInstVmovdqa, Mem, YmmReg) + + INST_2x(vmovdqu, kInstVmovdqu, XmmReg, XmmReg) + INST_2x(vmovdqu, kInstVmovdqu, XmmReg, Mem) + INST_2x(vmovdqu, kInstVmovdqu, Mem, XmmReg) + INST_2x(vmovdqu, kInstVmovdqu, YmmReg, YmmReg) + INST_2x(vmovdqu, kInstVmovdqu, YmmReg, Mem) + INST_2x(vmovdqu, kInstVmovdqu, Mem, YmmReg) + + INST_3x(vmovhlps, kInstVmovhlps, XmmReg, XmmReg, XmmReg) + + INST_3x(vmovhpd, kInstVmovhpd, XmmReg, XmmReg, Mem) + INST_2x(vmovhpd, kInstVmovhpd, Mem, XmmReg) + + INST_3x(vmovhps, kInstVmovhps, XmmReg, XmmReg, Mem) + INST_2x(vmovhps, kInstVmovhps, Mem, XmmReg) + + INST_3x(vmovlhps, kInstVmovlhps, XmmReg, XmmReg, XmmReg) + + INST_3x(vmovlpd, kInstVmovlpd, XmmReg, XmmReg, Mem) + INST_2x(vmovlpd, kInstVmovlpd, Mem, XmmReg) + + INST_3x(vmovlps, kInstVmovlps, XmmReg, XmmReg, Mem) + INST_2x(vmovlps, kInstVmovlps, Mem, XmmReg) + + INST_2x(vmovmskpd, kInstVmovmskpd, GpReg, XmmReg) + INST_2x(vmovmskpd, kInstVmovmskpd, GpReg, YmmReg) + + INST_2x(vmovmskps, kInstVmovmskps, GpReg, XmmReg) + INST_2x(vmovmskps, kInstVmovmskps, GpReg, YmmReg) + + INST_2x(vmovntdq, kInstVmovntdq, Mem, XmmReg) + INST_2x(vmovntdq, kInstVmovntdq, Mem, YmmReg) + + INST_2x(vmovntdqa, kInstVmovntdqa, XmmReg, Mem) + + INST_2x(vmovntpd, kInstVmovntpd, Mem, XmmReg) + INST_2x(vmovntpd, kInstVmovntpd, Mem, YmmReg) + + INST_2x(vmovntps, kInstVmovntps, Mem, XmmReg) + INST_2x(vmovntps, kInstVmovntps, Mem, YmmReg) + + INST_3x(vmovsd, kInstVmovsd, XmmReg, XmmReg, XmmReg) + INST_2x(vmovsd, kInstVmovsd, XmmReg, Mem) + INST_2x(vmovsd, kInstVmovsd, Mem, XmmReg) + + INST_2x(vmovshdup, kInstVmovshdup, XmmReg, XmmReg) + INST_2x(vmovshdup, kInstVmovshdup, XmmReg, Mem) + INST_2x(vmovshdup, kInstVmovshdup, YmmReg, YmmReg) + INST_2x(vmovshdup, kInstVmovshdup, YmmReg, Mem) + + INST_2x(vmovsldup, kInstVmovsldup, XmmReg, XmmReg) + INST_2x(vmovsldup, kInstVmovsldup, XmmReg, Mem) + INST_2x(vmovsldup, kInstVmovsldup, YmmReg, YmmReg) + INST_2x(vmovsldup, kInstVmovsldup, YmmReg, Mem) + + INST_3x(vmovss, kInstVmovss, XmmReg, XmmReg, XmmReg) + INST_2x(vmovss, kInstVmovss, XmmReg, Mem) + INST_2x(vmovss, kInstVmovss, Mem, XmmReg) + + INST_2x(vmovupd, kInstVmovupd, XmmReg, XmmReg) + INST_2x(vmovupd, kInstVmovupd, XmmReg, Mem) + INST_2x(vmovupd, kInstVmovupd, Mem, XmmReg) + INST_2x(vmovupd, kInstVmovupd, YmmReg, YmmReg) + INST_2x(vmovupd, kInstVmovupd, YmmReg, Mem) + INST_2x(vmovupd, kInstVmovupd, Mem, YmmReg) + + INST_2x(vmovups, kInstVmovups, XmmReg, XmmReg) + INST_2x(vmovups, kInstVmovups, XmmReg, Mem) + INST_2x(vmovups, kInstVmovups, Mem, XmmReg) + INST_2x(vmovups, kInstVmovups, YmmReg, YmmReg) + INST_2x(vmovups, kInstVmovups, YmmReg, Mem) + INST_2x(vmovups, kInstVmovups, Mem, YmmReg) + + INST_4i(vmpsadbw, kInstVmpsadbw, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vmpsadbw, kInstVmpsadbw, XmmReg, XmmReg, Mem, Imm) + + INST_3x(vmulpd, kInstVmulpd, XmmReg, XmmReg, XmmReg) + INST_3x(vmulpd, kInstVmulpd, XmmReg, XmmReg, Mem) + INST_3x(vmulpd, kInstVmulpd, YmmReg, YmmReg, YmmReg) + INST_3x(vmulpd, kInstVmulpd, YmmReg, YmmReg, Mem) + + INST_3x(vmulps, kInstVmulps, XmmReg, XmmReg, XmmReg) + INST_3x(vmulps, kInstVmulps, XmmReg, XmmReg, Mem) + INST_3x(vmulps, kInstVmulps, YmmReg, YmmReg, YmmReg) + INST_3x(vmulps, kInstVmulps, YmmReg, YmmReg, Mem) + + INST_3x(vmulsd, kInstVmulsd, XmmReg, XmmReg, XmmReg) + INST_3x(vmulsd, kInstVmulsd, XmmReg, XmmReg, Mem) + + INST_3x(vmulss, kInstVmulss, XmmReg, XmmReg, XmmReg) + INST_3x(vmulss, kInstVmulss, XmmReg, XmmReg, Mem) + + INST_3x(vorpd, kInstVorpd, XmmReg, XmmReg, XmmReg) + INST_3x(vorpd, kInstVorpd, XmmReg, XmmReg, Mem) + INST_3x(vorpd, kInstVorpd, YmmReg, YmmReg, YmmReg) + INST_3x(vorpd, kInstVorpd, YmmReg, YmmReg, Mem) + + INST_3x(vorps, kInstVorps, XmmReg, XmmReg, XmmReg) + INST_3x(vorps, kInstVorps, XmmReg, XmmReg, Mem) + INST_3x(vorps, kInstVorps, YmmReg, YmmReg, YmmReg) + INST_3x(vorps, kInstVorps, YmmReg, YmmReg, Mem) + + INST_2x(vpabsb, kInstVpabsb, XmmReg, XmmReg) + INST_2x(vpabsb, kInstVpabsb, XmmReg, Mem) + + INST_2x(vpabsd, kInstVpabsd, XmmReg, XmmReg) + INST_2x(vpabsd, kInstVpabsd, XmmReg, Mem) + + INST_2x(vpabsw, kInstVpabsw, XmmReg, XmmReg) + INST_2x(vpabsw, kInstVpabsw, XmmReg, Mem) + + INST_3x(vpackssdw, kInstVpackssdw, XmmReg, XmmReg, XmmReg) + INST_3x(vpackssdw, kInstVpackssdw, XmmReg, XmmReg, Mem) + + INST_3x(vpacksswb, kInstVpacksswb, XmmReg, XmmReg, XmmReg) + INST_3x(vpacksswb, kInstVpacksswb, XmmReg, XmmReg, Mem) + + INST_3x(vpackusdw, kInstVpackusdw, XmmReg, XmmReg, XmmReg) + INST_3x(vpackusdw, kInstVpackusdw, XmmReg, XmmReg, Mem) + + INST_3x(vpackuswb, kInstVpackuswb, XmmReg, XmmReg, XmmReg) + INST_3x(vpackuswb, kInstVpackuswb, XmmReg, XmmReg, Mem) + + INST_3x(vpaddb, kInstVpaddb, XmmReg, XmmReg, XmmReg) + INST_3x(vpaddb, kInstVpaddb, XmmReg, XmmReg, Mem) + + INST_3x(vpaddd, kInstVpaddd, XmmReg, XmmReg, XmmReg) + INST_3x(vpaddd, kInstVpaddd, XmmReg, XmmReg, Mem) + + INST_3x(vpaddq, kInstVpaddq, XmmReg, XmmReg, XmmReg) + INST_3x(vpaddq, kInstVpaddq, XmmReg, XmmReg, Mem) + + INST_3x(vpaddw, kInstVpaddw, XmmReg, XmmReg, XmmReg) + INST_3x(vpaddw, kInstVpaddw, XmmReg, XmmReg, Mem) + + INST_3x(vpaddsb, kInstVpaddsb, XmmReg, XmmReg, XmmReg) + INST_3x(vpaddsb, kInstVpaddsb, XmmReg, XmmReg, Mem) + + INST_3x(vpaddsw, kInstVpaddsw, XmmReg, XmmReg, XmmReg) + INST_3x(vpaddsw, kInstVpaddsw, XmmReg, XmmReg, Mem) + + INST_3x(vpaddusb, kInstVpaddusb, XmmReg, XmmReg, XmmReg) + INST_3x(vpaddusb, kInstVpaddusb, XmmReg, XmmReg, Mem) + + INST_3x(vpaddusw, kInstVpaddusw, XmmReg, XmmReg, XmmReg) + INST_3x(vpaddusw, kInstVpaddusw, XmmReg, XmmReg, Mem) + + INST_4i(vpalignr, kInstVpalignr, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vpalignr, kInstVpalignr, XmmReg, XmmReg, Mem, Imm) + + INST_3x(vpand, kInstVpand, XmmReg, XmmReg, XmmReg) + INST_3x(vpand, kInstVpand, XmmReg, XmmReg, Mem) + + INST_3x(vpandn, kInstVpandn, XmmReg, XmmReg, XmmReg) + INST_3x(vpandn, kInstVpandn, XmmReg, XmmReg, Mem) + + INST_3x(vpavgb, kInstVpavgb, XmmReg, XmmReg, XmmReg) + INST_3x(vpavgb, kInstVpavgb, XmmReg, XmmReg, Mem) + + INST_3x(vpavgw, kInstVpavgw, XmmReg, XmmReg, XmmReg) + INST_3x(vpavgw, kInstVpavgw, XmmReg, XmmReg, Mem) + + INST_4x(vpblendvb, kInstVpblendvb, XmmReg, XmmReg, XmmReg, XmmReg) + INST_4x(vpblendvb, kInstVpblendvb, XmmReg, XmmReg, Mem, XmmReg) + + INST_4i(vpblendw, kInstVpblendw, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vpblendw, kInstVpblendw, XmmReg, XmmReg, Mem, Imm) + + INST_3x(vpcmpeqb, kInstVpcmpeqb, XmmReg, XmmReg, XmmReg) + INST_3x(vpcmpeqb, kInstVpcmpeqb, XmmReg, XmmReg, Mem) + + INST_3x(vpcmpeqd, kInstVpcmpeqd, XmmReg, XmmReg, XmmReg) + INST_3x(vpcmpeqd, kInstVpcmpeqd, XmmReg, XmmReg, Mem) + + INST_3x(vpcmpeqq, kInstVpcmpeqq, XmmReg, XmmReg, XmmReg) + INST_3x(vpcmpeqq, kInstVpcmpeqq, XmmReg, XmmReg, Mem) + + INST_3x(vpcmpeqw, kInstVpcmpeqw, XmmReg, XmmReg, XmmReg) + INST_3x(vpcmpeqw, kInstVpcmpeqw, XmmReg, XmmReg, Mem) + + INST_3x(vpcmpgtb, kInstVpcmpgtb, XmmReg, XmmReg, XmmReg) + INST_3x(vpcmpgtb, kInstVpcmpgtb, XmmReg, XmmReg, Mem) + + INST_3x(vpcmpgtd, kInstVpcmpgtd, XmmReg, XmmReg, XmmReg) + INST_3x(vpcmpgtd, kInstVpcmpgtd, XmmReg, XmmReg, Mem) + + INST_3x(vpcmpgtq, kInstVpcmpgtq, XmmReg, XmmReg, XmmReg) + INST_3x(vpcmpgtq, kInstVpcmpgtq, XmmReg, XmmReg, Mem) + + INST_3x(vpcmpgtw, kInstVpcmpgtw, XmmReg, XmmReg, XmmReg) + INST_3x(vpcmpgtw, kInstVpcmpgtw, XmmReg, XmmReg, Mem) + + INST_3i(vpcmpestri, kInstVpcmpestri, XmmReg, XmmReg, Imm) + INST_3i(vpcmpestri, kInstVpcmpestri, XmmReg, Mem, Imm) + + INST_3i(vpcmpestrm, kInstVpcmpestrm, XmmReg, XmmReg, Imm) + INST_3i(vpcmpestrm, kInstVpcmpestrm, XmmReg, Mem, Imm) + + INST_3i(vpcmpistri, kInstVpcmpistri, XmmReg, XmmReg, Imm) + INST_3i(vpcmpistri, kInstVpcmpistri, XmmReg, Mem, Imm) + + INST_3i(vpcmpistrm, kInstVpcmpistrm, XmmReg, XmmReg, Imm) + INST_3i(vpcmpistrm, kInstVpcmpistrm, XmmReg, Mem, Imm) + + INST_3x(vpermilpd, kInstVpermilpd, XmmReg, XmmReg, XmmReg) + INST_3x(vpermilpd, kInstVpermilpd, XmmReg, XmmReg, Mem) + INST_3x(vpermilpd, kInstVpermilpd, YmmReg, YmmReg, YmmReg) + INST_3x(vpermilpd, kInstVpermilpd, YmmReg, YmmReg, Mem) + INST_3i(vpermilpd, kInstVpermilpd, XmmReg, XmmReg, Imm) + INST_3i(vpermilpd, kInstVpermilpd, XmmReg, Mem, Imm) + INST_3i(vpermilpd, kInstVpermilpd, YmmReg, YmmReg, Imm) + INST_3i(vpermilpd, kInstVpermilpd, YmmReg, Mem, Imm) + + INST_3x(vpermilps, kInstVpermilps, XmmReg, XmmReg, XmmReg) + INST_3x(vpermilps, kInstVpermilps, XmmReg, XmmReg, Mem) + INST_3x(vpermilps, kInstVpermilps, YmmReg, YmmReg, YmmReg) + INST_3x(vpermilps, kInstVpermilps, YmmReg, YmmReg, Mem) + INST_3i(vpermilps, kInstVpermilps, XmmReg, XmmReg, Imm) + INST_3i(vpermilps, kInstVpermilps, XmmReg, Mem, Imm) + INST_3i(vpermilps, kInstVpermilps, YmmReg, YmmReg, Imm) + INST_3i(vpermilps, kInstVpermilps, YmmReg, Mem, Imm) + + INST_4i(vperm2f128, kInstVperm2f128, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vperm2f128, kInstVperm2f128, YmmReg, YmmReg, Mem, Imm) + + INST_3i(vpextrb, kInstVpextrb, GpReg, XmmReg, Imm) + INST_3i(vpextrb, kInstVpextrb, Mem, XmmReg, Imm) + + INST_3i(vpextrd, kInstVpextrd, GpReg, XmmReg, Imm) + INST_3i(vpextrd, kInstVpextrd, Mem, XmmReg, Imm) + + INST_3i(vpextrw, kInstVpextrw, GpReg, XmmReg, Imm) + INST_3i(vpextrw, kInstVpextrw, Mem, XmmReg, Imm) + + INST_3x(vphaddd, kInstVphaddd, XmmReg, XmmReg, XmmReg) + INST_3x(vphaddd, kInstVphaddd, XmmReg, XmmReg, Mem) + + INST_3x(vphaddsw, kInstVphaddsw, XmmReg, XmmReg, XmmReg) + INST_3x(vphaddsw, kInstVphaddsw, XmmReg, XmmReg, Mem) + + INST_3x(vphaddw, kInstVphaddw, XmmReg, XmmReg, XmmReg) + INST_3x(vphaddw, kInstVphaddw, XmmReg, XmmReg, Mem) + + INST_2x(vphminposuw, kInstVphminposuw, XmmReg, XmmReg) + INST_2x(vphminposuw, kInstVphminposuw, XmmReg, Mem) + + INST_3x(vphsubd, kInstVphsubd, XmmReg, XmmReg, XmmReg) + INST_3x(vphsubd, kInstVphsubd, XmmReg, XmmReg, Mem) + + INST_3x(vphsubsw, kInstVphsubsw, XmmReg, XmmReg, XmmReg) + INST_3x(vphsubsw, kInstVphsubsw, XmmReg, XmmReg, Mem) + + INST_3x(vphsubw, kInstVphsubw, XmmReg, XmmReg, XmmReg) + INST_3x(vphsubw, kInstVphsubw, XmmReg, XmmReg, Mem) + + INST_4i(vpinsrb, kInstVpinsrb, XmmReg, XmmReg, GpReg, Imm) + INST_4i(vpinsrb, kInstVpinsrb, XmmReg, XmmReg, Mem, Imm) + + INST_4i(vpinsrd, kInstVpinsrd, XmmReg, XmmReg, GpReg, Imm) + INST_4i(vpinsrd, kInstVpinsrd, XmmReg, XmmReg, Mem, Imm) + + INST_4i(vpinsrw, kInstVpinsrw, XmmReg, XmmReg, GpReg, Imm) + INST_4i(vpinsrw, kInstVpinsrw, XmmReg, XmmReg, Mem, Imm) + + INST_3x(vpmaddubsw, kInstVpmaddubsw, XmmReg, XmmReg, XmmReg) + INST_3x(vpmaddubsw, kInstVpmaddubsw, XmmReg, XmmReg, Mem) + + INST_3x(vpmaddwd, kInstVpmaddwd, XmmReg, XmmReg, XmmReg) + INST_3x(vpmaddwd, kInstVpmaddwd, XmmReg, XmmReg, Mem) + + INST_3x(vpmaxsb, kInstVpmaxsb, XmmReg, XmmReg, XmmReg) + INST_3x(vpmaxsb, kInstVpmaxsb, XmmReg, XmmReg, Mem) + + INST_3x(vpmaxsd, kInstVpmaxsd, XmmReg, XmmReg, XmmReg) + INST_3x(vpmaxsd, kInstVpmaxsd, XmmReg, XmmReg, Mem) + + INST_3x(vpmaxsw, kInstVpmaxsw, XmmReg, XmmReg, XmmReg) + INST_3x(vpmaxsw, kInstVpmaxsw, XmmReg, XmmReg, Mem) + + INST_3x(vpmaxub, kInstVpmaxub, XmmReg, XmmReg, XmmReg) + INST_3x(vpmaxub, kInstVpmaxub, XmmReg, XmmReg, Mem) + + INST_3x(vpmaxud, kInstVpmaxud, XmmReg, XmmReg, XmmReg) + INST_3x(vpmaxud, kInstVpmaxud, XmmReg, XmmReg, Mem) + + INST_3x(vpmaxuw, kInstVpmaxuw, XmmReg, XmmReg, XmmReg) + INST_3x(vpmaxuw, kInstVpmaxuw, XmmReg, XmmReg, Mem) + + INST_3x(vpminsb, kInstVpminsb, XmmReg, XmmReg, XmmReg) + INST_3x(vpminsb, kInstVpminsb, XmmReg, XmmReg, Mem) + + INST_3x(vpminsd, kInstVpminsd, XmmReg, XmmReg, XmmReg) + INST_3x(vpminsd, kInstVpminsd, XmmReg, XmmReg, Mem) + + INST_3x(vpminsw, kInstVpminsw, XmmReg, XmmReg, XmmReg) + INST_3x(vpminsw, kInstVpminsw, XmmReg, XmmReg, Mem) + + INST_3x(vpminub, kInstVpminub, XmmReg, XmmReg, XmmReg) + INST_3x(vpminub, kInstVpminub, XmmReg, XmmReg, Mem) + + INST_3x(vpminud, kInstVpminud, XmmReg, XmmReg, XmmReg) + INST_3x(vpminud, kInstVpminud, XmmReg, XmmReg, Mem) + + INST_3x(vpminuw, kInstVpminuw, XmmReg, XmmReg, XmmReg) + INST_3x(vpminuw, kInstVpminuw, XmmReg, XmmReg, Mem) + + INST_2x(vpmovmskb, kInstVpmovmskb, GpReg, XmmReg) + + INST_2x(vpmovsxbd, kInstVpmovsxbd, XmmReg, XmmReg) + INST_2x(vpmovsxbd, kInstVpmovsxbd, XmmReg, Mem) + + INST_2x(vpmovsxbq, kInstVpmovsxbq, XmmReg, XmmReg) + INST_2x(vpmovsxbq, kInstVpmovsxbq, XmmReg, Mem) + + INST_2x(vpmovsxbw, kInstVpmovsxbw, XmmReg, XmmReg) + INST_2x(vpmovsxbw, kInstVpmovsxbw, XmmReg, Mem) + + INST_2x(vpmovsxdq, kInstVpmovsxdq, XmmReg, XmmReg) + INST_2x(vpmovsxdq, kInstVpmovsxdq, XmmReg, Mem) + + INST_2x(vpmovsxwd, kInstVpmovsxwd, XmmReg, XmmReg) + INST_2x(vpmovsxwd, kInstVpmovsxwd, XmmReg, Mem) + + INST_2x(vpmovsxwq, kInstVpmovsxwq, XmmReg, XmmReg) + INST_2x(vpmovsxwq, kInstVpmovsxwq, XmmReg, Mem) + + INST_2x(vpmovzxbd, kInstVpmovzxbd, XmmReg, XmmReg) + INST_2x(vpmovzxbd, kInstVpmovzxbd, XmmReg, Mem) + + INST_2x(vpmovzxbq, kInstVpmovzxbq, XmmReg, XmmReg) + INST_2x(vpmovzxbq, kInstVpmovzxbq, XmmReg, Mem) + + INST_2x(vpmovzxbw, kInstVpmovzxbw, XmmReg, XmmReg) + INST_2x(vpmovzxbw, kInstVpmovzxbw, XmmReg, Mem) + + INST_2x(vpmovzxdq, kInstVpmovzxdq, XmmReg, XmmReg) + INST_2x(vpmovzxdq, kInstVpmovzxdq, XmmReg, Mem) + + INST_2x(vpmovzxwd, kInstVpmovzxwd, XmmReg, XmmReg) + INST_2x(vpmovzxwd, kInstVpmovzxwd, XmmReg, Mem) + + INST_2x(vpmovzxwq, kInstVpmovzxwq, XmmReg, XmmReg) + INST_2x(vpmovzxwq, kInstVpmovzxwq, XmmReg, Mem) + + INST_3x(vpmuldq, kInstVpmuldq, XmmReg, XmmReg, XmmReg) + INST_3x(vpmuldq, kInstVpmuldq, XmmReg, XmmReg, Mem) + + INST_3x(vpmulhrsw, kInstVpmulhrsw, XmmReg, XmmReg, XmmReg) + INST_3x(vpmulhrsw, kInstVpmulhrsw, XmmReg, XmmReg, Mem) + + INST_3x(vpmulhuw, kInstVpmulhuw, XmmReg, XmmReg, XmmReg) + INST_3x(vpmulhuw, kInstVpmulhuw, XmmReg, XmmReg, Mem) + + INST_3x(vpmulhw, kInstVpmulhw, XmmReg, XmmReg, XmmReg) + INST_3x(vpmulhw, kInstVpmulhw, XmmReg, XmmReg, Mem) + + INST_3x(vpmulld, kInstVpmulld, XmmReg, XmmReg, XmmReg) + INST_3x(vpmulld, kInstVpmulld, XmmReg, XmmReg, Mem) + + INST_3x(vpmullw, kInstVpmullw, XmmReg, XmmReg, XmmReg) + INST_3x(vpmullw, kInstVpmullw, XmmReg, XmmReg, Mem) + + INST_3x(vpmuludq, kInstVpmuludq, XmmReg, XmmReg, XmmReg) + INST_3x(vpmuludq, kInstVpmuludq, XmmReg, XmmReg, Mem) + + INST_3x(vpor, kInstVpor, XmmReg, XmmReg, XmmReg) + INST_3x(vpor, kInstVpor, XmmReg, XmmReg, Mem) + + INST_3x(vpsadbw, kInstVpsadbw, XmmReg, XmmReg, XmmReg) + INST_3x(vpsadbw, kInstVpsadbw, XmmReg, XmmReg, Mem) + + INST_3x(vpshufb, kInstVpshufb, XmmReg, XmmReg, XmmReg) + INST_3x(vpshufb, kInstVpshufb, XmmReg, XmmReg, Mem) + + INST_3i(vpshufd, kInstVpshufd, XmmReg, XmmReg, Imm) + INST_3i(vpshufd, kInstVpshufd, XmmReg, Mem, Imm) + + INST_3i(vpshufhw, kInstVpshufhw, XmmReg, XmmReg, Imm) + INST_3i(vpshufhw, kInstVpshufhw, XmmReg, Mem, Imm) + + INST_3i(vpshuflw, kInstVpshuflw, XmmReg, XmmReg, Imm) + INST_3i(vpshuflw, kInstVpshuflw, XmmReg, Mem, Imm) + + INST_3x(vpsignb, kInstVpsignb, XmmReg, XmmReg, XmmReg) + INST_3x(vpsignb, kInstVpsignb, XmmReg, XmmReg, Mem) + + INST_3x(vpsignd, kInstVpsignd, XmmReg, XmmReg, XmmReg) + INST_3x(vpsignd, kInstVpsignd, XmmReg, XmmReg, Mem) + + INST_3x(vpsignw, kInstVpsignw, XmmReg, XmmReg, XmmReg) + INST_3x(vpsignw, kInstVpsignw, XmmReg, XmmReg, Mem) + + INST_3x(vpslld, kInstVpslld, XmmReg, XmmReg, XmmReg) + INST_3x(vpslld, kInstVpslld, XmmReg, XmmReg, Mem) + INST_3i(vpslld, kInstVpslld, XmmReg, XmmReg, Imm) + + INST_3i(vpslldq, kInstVpslldq, XmmReg, XmmReg, Imm) + + INST_3x(vpsllq, kInstVpsllq, XmmReg, XmmReg, XmmReg) + INST_3x(vpsllq, kInstVpsllq, XmmReg, XmmReg, Mem) + INST_3i(vpsllq, kInstVpsllq, XmmReg, XmmReg, Imm) + + INST_3x(vpsllw, kInstVpsllw, XmmReg, XmmReg, XmmReg) + INST_3x(vpsllw, kInstVpsllw, XmmReg, XmmReg, Mem) + INST_3i(vpsllw, kInstVpsllw, XmmReg, XmmReg, Imm) + + INST_3x(vpsrad, kInstVpsrad, XmmReg, XmmReg, XmmReg) + INST_3x(vpsrad, kInstVpsrad, XmmReg, XmmReg, Mem) + INST_3i(vpsrad, kInstVpsrad, XmmReg, XmmReg, Imm) + + INST_3x(vpsraw, kInstVpsraw, XmmReg, XmmReg, XmmReg) + INST_3x(vpsraw, kInstVpsraw, XmmReg, XmmReg, Mem) + INST_3i(vpsraw, kInstVpsraw, XmmReg, XmmReg, Imm) + + INST_3x(vpsrld, kInstVpsrld, XmmReg, XmmReg, XmmReg) + INST_3x(vpsrld, kInstVpsrld, XmmReg, XmmReg, Mem) + INST_3i(vpsrld, kInstVpsrld, XmmReg, XmmReg, Imm) + + INST_3i(vpsrldq, kInstVpsrldq, XmmReg, XmmReg, Imm) + + INST_3x(vpsrlq, kInstVpsrlq, XmmReg, XmmReg, XmmReg) + INST_3x(vpsrlq, kInstVpsrlq, XmmReg, XmmReg, Mem) + INST_3i(vpsrlq, kInstVpsrlq, XmmReg, XmmReg, Imm) + + INST_3x(vpsrlw, kInstVpsrlw, XmmReg, XmmReg, XmmReg) + INST_3x(vpsrlw, kInstVpsrlw, XmmReg, XmmReg, Mem) + INST_3i(vpsrlw, kInstVpsrlw, XmmReg, XmmReg, Imm) + + INST_3x(vpsubb, kInstVpsubb, XmmReg, XmmReg, XmmReg) + INST_3x(vpsubb, kInstVpsubb, XmmReg, XmmReg, Mem) + + INST_3x(vpsubd, kInstVpsubd, XmmReg, XmmReg, XmmReg) + INST_3x(vpsubd, kInstVpsubd, XmmReg, XmmReg, Mem) + + INST_3x(vpsubq, kInstVpsubq, XmmReg, XmmReg, XmmReg) + INST_3x(vpsubq, kInstVpsubq, XmmReg, XmmReg, Mem) + + INST_3x(vpsubw, kInstVpsubw, XmmReg, XmmReg, XmmReg) + INST_3x(vpsubw, kInstVpsubw, XmmReg, XmmReg, Mem) + + INST_3x(vpsubsb, kInstVpsubsb, XmmReg, XmmReg, XmmReg) + INST_3x(vpsubsb, kInstVpsubsb, XmmReg, XmmReg, Mem) + + INST_3x(vpsubsw, kInstVpsubsw, XmmReg, XmmReg, XmmReg) + INST_3x(vpsubsw, kInstVpsubsw, XmmReg, XmmReg, Mem) + + INST_3x(vpsubusb, kInstVpsubusb, XmmReg, XmmReg, XmmReg) + INST_3x(vpsubusb, kInstVpsubusb, XmmReg, XmmReg, Mem) + + INST_3x(vpsubusw, kInstVpsubusw, XmmReg, XmmReg, XmmReg) + INST_3x(vpsubusw, kInstVpsubusw, XmmReg, XmmReg, Mem) + + INST_2x(vptest, kInstVptest, XmmReg, XmmReg) + INST_2x(vptest, kInstVptest, XmmReg, Mem) + INST_2x(vptest, kInstVptest, YmmReg, YmmReg) + INST_2x(vptest, kInstVptest, YmmReg, Mem) + + INST_3x(vpunpckhbw, kInstVpunpckhbw, XmmReg, XmmReg, XmmReg) + INST_3x(vpunpckhbw, kInstVpunpckhbw, XmmReg, XmmReg, Mem) + + INST_3x(vpunpckhdq, kInstVpunpckhdq, XmmReg, XmmReg, XmmReg) + INST_3x(vpunpckhdq, kInstVpunpckhdq, XmmReg, XmmReg, Mem) + + INST_3x(vpunpckhqdq, kInstVpunpckhqdq, XmmReg, XmmReg, XmmReg) + INST_3x(vpunpckhqdq, kInstVpunpckhqdq, XmmReg, XmmReg, Mem) + + INST_3x(vpunpckhwd, kInstVpunpckhwd, XmmReg, XmmReg, XmmReg) + INST_3x(vpunpckhwd, kInstVpunpckhwd, XmmReg, XmmReg, Mem) + + INST_3x(vpunpcklbw, kInstVpunpcklbw, XmmReg, XmmReg, XmmReg) + INST_3x(vpunpcklbw, kInstVpunpcklbw, XmmReg, XmmReg, Mem) + + INST_3x(vpunpckldq, kInstVpunpckldq, XmmReg, XmmReg, XmmReg) + INST_3x(vpunpckldq, kInstVpunpckldq, XmmReg, XmmReg, Mem) + + INST_3x(vpunpcklqdq, kInstVpunpcklqdq, XmmReg, XmmReg, XmmReg) + INST_3x(vpunpcklqdq, kInstVpunpcklqdq, XmmReg, XmmReg, Mem) + + INST_3x(vpunpcklwd, kInstVpunpcklwd, XmmReg, XmmReg, XmmReg) + INST_3x(vpunpcklwd, kInstVpunpcklwd, XmmReg, XmmReg, Mem) + + INST_3x(vpxor, kInstVpxor, XmmReg, XmmReg, XmmReg) + INST_3x(vpxor, kInstVpxor, XmmReg, XmmReg, Mem) + + INST_2x(vrcpps, kInstVrcpps, XmmReg, XmmReg) + INST_2x(vrcpps, kInstVrcpps, XmmReg, Mem) + INST_2x(vrcpps, kInstVrcpps, YmmReg, YmmReg) + INST_2x(vrcpps, kInstVrcpps, YmmReg, Mem) + + INST_3x(vrcpss, kInstVrcpss, XmmReg, XmmReg, XmmReg) + INST_3x(vrcpss, kInstVrcpss, XmmReg, XmmReg, Mem) + + INST_2x(vrsqrtps, kInstVrsqrtps, XmmReg, XmmReg) + INST_2x(vrsqrtps, kInstVrsqrtps, XmmReg, Mem) + INST_2x(vrsqrtps, kInstVrsqrtps, YmmReg, YmmReg) + INST_2x(vrsqrtps, kInstVrsqrtps, YmmReg, Mem) + + INST_3x(vrsqrtss, kInstVrsqrtss, XmmReg, XmmReg, XmmReg) + INST_3x(vrsqrtss, kInstVrsqrtss, XmmReg, XmmReg, Mem) + + INST_3i(vroundpd, kInstVroundpd, XmmReg, XmmReg, Imm) + INST_3i(vroundpd, kInstVroundpd, XmmReg, Mem, Imm) + INST_3i(vroundpd, kInstVroundpd, YmmReg, YmmReg, Imm) + INST_3i(vroundpd, kInstVroundpd, YmmReg, Mem, Imm) + + INST_3i(vroundps, kInstVroundps, XmmReg, XmmReg, Imm) + INST_3i(vroundps, kInstVroundps, XmmReg, Mem, Imm) + INST_3i(vroundps, kInstVroundps, YmmReg, YmmReg, Imm) + INST_3i(vroundps, kInstVroundps, YmmReg, Mem, Imm) + + INST_4i(vroundsd, kInstVroundsd, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vroundsd, kInstVroundsd, XmmReg, XmmReg, Mem, Imm) + + INST_4i(vroundss, kInstVroundss, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vroundss, kInstVroundss, XmmReg, XmmReg, Mem, Imm) + + INST_4i(vshufpd, kInstVshufpd, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vshufpd, kInstVshufpd, XmmReg, XmmReg, Mem, Imm) + INST_4i(vshufpd, kInstVshufpd, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vshufpd, kInstVshufpd, YmmReg, YmmReg, Mem, Imm) + + INST_4i(vshufps, kInstVshufps, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vshufps, kInstVshufps, XmmReg, XmmReg, Mem, Imm) + INST_4i(vshufps, kInstVshufps, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vshufps, kInstVshufps, YmmReg, YmmReg, Mem, Imm) + + INST_2x(vsqrtpd, kInstVsqrtpd, XmmReg, XmmReg) + INST_2x(vsqrtpd, kInstVsqrtpd, XmmReg, Mem) + INST_2x(vsqrtpd, kInstVsqrtpd, YmmReg, YmmReg) + INST_2x(vsqrtpd, kInstVsqrtpd, YmmReg, Mem) + + INST_2x(vsqrtps, kInstVsqrtps, XmmReg, XmmReg) + INST_2x(vsqrtps, kInstVsqrtps, XmmReg, Mem) + INST_2x(vsqrtps, kInstVsqrtps, YmmReg, YmmReg) + INST_2x(vsqrtps, kInstVsqrtps, YmmReg, Mem) + + INST_3x(vsqrtsd, kInstVsqrtsd, XmmReg, XmmReg, XmmReg) + INST_3x(vsqrtsd, kInstVsqrtsd, XmmReg, XmmReg, Mem) + + INST_3x(vsqrtss, kInstVsqrtss, XmmReg, XmmReg, XmmReg) + INST_3x(vsqrtss, kInstVsqrtss, XmmReg, XmmReg, Mem) + + INST_1x(vstmxcsr, kInstVstmxcsr, Mem) + + INST_3x(vsubpd, kInstVsubpd, XmmReg, XmmReg, XmmReg) + INST_3x(vsubpd, kInstVsubpd, XmmReg, XmmReg, Mem) + INST_3x(vsubpd, kInstVsubpd, YmmReg, YmmReg, YmmReg) + INST_3x(vsubpd, kInstVsubpd, YmmReg, YmmReg, Mem) + + INST_3x(vsubps, kInstVsubps, XmmReg, XmmReg, XmmReg) + INST_3x(vsubps, kInstVsubps, XmmReg, XmmReg, Mem) + INST_3x(vsubps, kInstVsubps, YmmReg, YmmReg, YmmReg) + INST_3x(vsubps, kInstVsubps, YmmReg, YmmReg, Mem) + + INST_3x(vsubsd, kInstVsubsd, XmmReg, XmmReg, XmmReg) + INST_3x(vsubsd, kInstVsubsd, XmmReg, XmmReg, Mem) + + INST_3x(vsubss, kInstVsubss, XmmReg, XmmReg, XmmReg) + INST_3x(vsubss, kInstVsubss, XmmReg, XmmReg, Mem) + + INST_2x(vtestps, kInstVtestps, XmmReg, XmmReg) + INST_2x(vtestps, kInstVtestps, XmmReg, Mem) + INST_2x(vtestps, kInstVtestps, YmmReg, YmmReg) + INST_2x(vtestps, kInstVtestps, YmmReg, Mem) + + INST_2x(vtestpd, kInstVtestpd, XmmReg, XmmReg) + INST_2x(vtestpd, kInstVtestpd, XmmReg, Mem) + INST_2x(vtestpd, kInstVtestpd, YmmReg, YmmReg) + INST_2x(vtestpd, kInstVtestpd, YmmReg, Mem) + + INST_2x(vucomisd, kInstVucomisd, XmmReg, XmmReg) + INST_2x(vucomisd, kInstVucomisd, XmmReg, Mem) + + INST_2x(vucomiss, kInstVucomiss, XmmReg, XmmReg) + INST_2x(vucomiss, kInstVucomiss, XmmReg, Mem) + + INST_3x(vunpckhpd, kInstVunpckhpd, XmmReg, XmmReg, XmmReg) + INST_3x(vunpckhpd, kInstVunpckhpd, XmmReg, XmmReg, Mem) + INST_3x(vunpckhpd, kInstVunpckhpd, YmmReg, YmmReg, YmmReg) + INST_3x(vunpckhpd, kInstVunpckhpd, YmmReg, YmmReg, Mem) + + INST_3x(vunpckhps, kInstVunpckhps, XmmReg, XmmReg, XmmReg) + INST_3x(vunpckhps, kInstVunpckhps, XmmReg, XmmReg, Mem) + INST_3x(vunpckhps, kInstVunpckhps, YmmReg, YmmReg, YmmReg) + INST_3x(vunpckhps, kInstVunpckhps, YmmReg, YmmReg, Mem) + + INST_3x(vunpcklpd, kInstVunpcklpd, XmmReg, XmmReg, XmmReg) + INST_3x(vunpcklpd, kInstVunpcklpd, XmmReg, XmmReg, Mem) + INST_3x(vunpcklpd, kInstVunpcklpd, YmmReg, YmmReg, YmmReg) + INST_3x(vunpcklpd, kInstVunpcklpd, YmmReg, YmmReg, Mem) + + INST_3x(vunpcklps, kInstVunpcklps, XmmReg, XmmReg, XmmReg) + INST_3x(vunpcklps, kInstVunpcklps, XmmReg, XmmReg, Mem) + INST_3x(vunpcklps, kInstVunpcklps, YmmReg, YmmReg, YmmReg) + INST_3x(vunpcklps, kInstVunpcklps, YmmReg, YmmReg, Mem) + + INST_3x(vxorpd, kInstVxorpd, XmmReg, XmmReg, XmmReg) + INST_3x(vxorpd, kInstVxorpd, XmmReg, XmmReg, Mem) + INST_3x(vxorpd, kInstVxorpd, YmmReg, YmmReg, YmmReg) + INST_3x(vxorpd, kInstVxorpd, YmmReg, YmmReg, Mem) + + INST_3x(vxorps, kInstVxorps, XmmReg, XmmReg, XmmReg) + INST_3x(vxorps, kInstVxorps, XmmReg, XmmReg, Mem) + INST_3x(vxorps, kInstVxorps, YmmReg, YmmReg, YmmReg) + INST_3x(vxorps, kInstVxorps, YmmReg, YmmReg, Mem) + + INST_0x(vzeroall, kInstVzeroall) + INST_0x(vzeroupper, kInstVzeroupper) + + // -------------------------------------------------------------------------- + // [AVX+AESNI] + // -------------------------------------------------------------------------- + + //! @brief Perform a single round of the AES decryption flow (AVX+AESNI). + INST_3x(vaesdec, kInstVaesdec, XmmReg, XmmReg, XmmReg) + //! @overload + INST_3x(vaesdec, kInstVaesdec, XmmReg, XmmReg, Mem) + + //! @brief Perform the last round of the AES decryption flow (AVX+AESNI). + INST_3x(vaesdeclast, kInstVaesdeclast, XmmReg, XmmReg, XmmReg) + //! @overload + INST_3x(vaesdeclast, kInstVaesdeclast, XmmReg, XmmReg, Mem) + + //! @brief Perform a single round of the AES encryption flow (AVX+AESNI). + INST_3x(vaesenc, kInstVaesenc, XmmReg, XmmReg, XmmReg) + //! @overload + INST_3x(vaesenc, kInstVaesenc, XmmReg, XmmReg, Mem) + + //! @brief Perform the last round of the AES encryption flow (AVX+AESNI). + INST_3x(vaesenclast, kInstVaesenclast, XmmReg, XmmReg, XmmReg) + //! @overload + INST_3x(vaesenclast, kInstVaesenclast, XmmReg, XmmReg, Mem) + + //! @brief Perform the InvMixColumns transformation (AVX+AESNI). + INST_2x(vaesimc, kInstVaesimc, XmmReg, XmmReg) + //! @overload + INST_2x(vaesimc, kInstVaesimc, XmmReg, Mem) + + //! @brief Assist in expanding the AES cipher key (AVX+AESNI). + INST_3i(vaeskeygenassist, kInstVaeskeygenassist, XmmReg, XmmReg, Imm) + //! @overload + INST_3i(vaeskeygenassist, kInstVaeskeygenassist, XmmReg, Mem, Imm) + + // -------------------------------------------------------------------------- + // [AVX+PCLMULQDQ] + // -------------------------------------------------------------------------- + + INST_4i(vpclmulqdq, kInstVpclmulqdq, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vpclmulqdq, kInstVpclmulqdq, XmmReg, XmmReg, Mem, Imm) + + // -------------------------------------------------------------------------- + // [AVX2] + // -------------------------------------------------------------------------- + + INST_2x(vbroadcasti128, kInstVbroadcasti128, YmmReg, Mem) + + INST_2x(vbroadcastsd, kInstVbroadcastsd, YmmReg, XmmReg) + + INST_2x(vbroadcastss, kInstVbroadcastss, XmmReg, XmmReg) + INST_2x(vbroadcastss, kInstVbroadcastss, YmmReg, XmmReg) + + INST_3i(vextracti128, kInstVextracti128, XmmReg, YmmReg, Imm) + INST_3i(vextracti128, kInstVextracti128, Mem, YmmReg, Imm) + + INST_3x(vgatherdpd, kInstVgatherdpd, XmmReg, Mem, XmmReg) + INST_3x(vgatherdpd, kInstVgatherdpd, YmmReg, Mem, YmmReg) + + INST_3x(vgatherdps, kInstVgatherdps, XmmReg, Mem, XmmReg) + INST_3x(vgatherdps, kInstVgatherdps, YmmReg, Mem, YmmReg) + + INST_3x(vgatherqpd, kInstVgatherqpd, XmmReg, Mem, XmmReg) + INST_3x(vgatherqpd, kInstVgatherqpd, YmmReg, Mem, YmmReg) + + INST_3x(vgatherqps, kInstVgatherqps, XmmReg, Mem, XmmReg) + + INST_4i(vinserti128, kInstVinserti128, YmmReg, YmmReg, XmmReg, Imm) + INST_4i(vinserti128, kInstVinserti128, YmmReg, YmmReg, Mem, Imm) + + INST_2x(vmovntdqa, kInstVmovntdqa, YmmReg, Mem) + + INST_4i(vmpsadbw, kInstVmpsadbw, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vmpsadbw, kInstVmpsadbw, YmmReg, YmmReg, Mem, Imm) + + INST_2x(vpabsb, kInstVpabsb, YmmReg, YmmReg) + INST_2x(vpabsb, kInstVpabsb, YmmReg, Mem) + + INST_2x(vpabsd, kInstVpabsd, YmmReg, YmmReg) + INST_2x(vpabsd, kInstVpabsd, YmmReg, Mem) + + INST_2x(vpabsw, kInstVpabsw, YmmReg, YmmReg) + INST_2x(vpabsw, kInstVpabsw, YmmReg, Mem) + + INST_3x(vpackssdw, kInstVpackssdw, YmmReg, YmmReg, YmmReg) + INST_3x(vpackssdw, kInstVpackssdw, YmmReg, YmmReg, Mem) + + INST_3x(vpacksswb, kInstVpacksswb, YmmReg, YmmReg, YmmReg) + INST_3x(vpacksswb, kInstVpacksswb, YmmReg, YmmReg, Mem) + + INST_3x(vpackusdw, kInstVpackusdw, YmmReg, YmmReg, YmmReg) + INST_3x(vpackusdw, kInstVpackusdw, YmmReg, YmmReg, Mem) + + INST_3x(vpackuswb, kInstVpackuswb, YmmReg, YmmReg, YmmReg) + INST_3x(vpackuswb, kInstVpackuswb, YmmReg, YmmReg, Mem) + + INST_3x(vpaddb, kInstVpaddb, YmmReg, YmmReg, YmmReg) + INST_3x(vpaddb, kInstVpaddb, YmmReg, YmmReg, Mem) + + INST_3x(vpaddd, kInstVpaddd, YmmReg, YmmReg, YmmReg) + INST_3x(vpaddd, kInstVpaddd, YmmReg, YmmReg, Mem) + + INST_3x(vpaddq, kInstVpaddq, YmmReg, YmmReg, YmmReg) + INST_3x(vpaddq, kInstVpaddq, YmmReg, YmmReg, Mem) + + INST_3x(vpaddw, kInstVpaddw, YmmReg, YmmReg, YmmReg) + INST_3x(vpaddw, kInstVpaddw, YmmReg, YmmReg, Mem) + + INST_3x(vpaddsb, kInstVpaddsb, YmmReg, YmmReg, YmmReg) + INST_3x(vpaddsb, kInstVpaddsb, YmmReg, YmmReg, Mem) + + INST_3x(vpaddsw, kInstVpaddsw, YmmReg, YmmReg, YmmReg) + INST_3x(vpaddsw, kInstVpaddsw, YmmReg, YmmReg, Mem) + + INST_3x(vpaddusb, kInstVpaddusb, YmmReg, YmmReg, YmmReg) + INST_3x(vpaddusb, kInstVpaddusb, YmmReg, YmmReg, Mem) + + INST_3x(vpaddusw, kInstVpaddusw, YmmReg, YmmReg, YmmReg) + INST_3x(vpaddusw, kInstVpaddusw, YmmReg, YmmReg, Mem) + + INST_4i(vpalignr, kInstVpalignr, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vpalignr, kInstVpalignr, YmmReg, YmmReg, Mem, Imm) + + INST_3x(vpand, kInstVpand, YmmReg, YmmReg, YmmReg) + INST_3x(vpand, kInstVpand, YmmReg, YmmReg, Mem) + + INST_3x(vpandn, kInstVpandn, YmmReg, YmmReg, YmmReg) + INST_3x(vpandn, kInstVpandn, YmmReg, YmmReg, Mem) + + INST_3x(vpavgb, kInstVpavgb, YmmReg, YmmReg, YmmReg) + INST_3x(vpavgb, kInstVpavgb, YmmReg, YmmReg, Mem) + + INST_3x(vpavgw, kInstVpavgw, YmmReg, YmmReg, YmmReg) + INST_3x(vpavgw, kInstVpavgw, YmmReg, YmmReg, Mem) + + INST_4i(vpblendd, kInstVpblendd, XmmReg, XmmReg, XmmReg, Imm) + INST_4i(vpblendd, kInstVpblendd, XmmReg, XmmReg, Mem, Imm) + INST_4i(vpblendd, kInstVpblendd, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vpblendd, kInstVpblendd, YmmReg, YmmReg, Mem, Imm) + + INST_4x(vpblendvb, kInstVpblendvb, YmmReg, YmmReg, YmmReg, YmmReg) + INST_4x(vpblendvb, kInstVpblendvb, YmmReg, YmmReg, Mem, YmmReg) + + INST_4i(vpblendw, kInstVpblendw, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vpblendw, kInstVpblendw, YmmReg, YmmReg, Mem, Imm) + + INST_2x(vpbroadcastb, kInstVpbroadcastb, XmmReg, XmmReg) + INST_2x(vpbroadcastb, kInstVpbroadcastb, XmmReg, Mem) + INST_2x(vpbroadcastb, kInstVpbroadcastb, YmmReg, XmmReg) + INST_2x(vpbroadcastb, kInstVpbroadcastb, YmmReg, Mem) + + INST_2x(vpbroadcastd, kInstVpbroadcastd, XmmReg, XmmReg) + INST_2x(vpbroadcastd, kInstVpbroadcastd, XmmReg, Mem) + INST_2x(vpbroadcastd, kInstVpbroadcastd, YmmReg, XmmReg) + INST_2x(vpbroadcastd, kInstVpbroadcastd, YmmReg, Mem) + + INST_2x(vpbroadcastq, kInstVpbroadcastq, XmmReg, XmmReg) + INST_2x(vpbroadcastq, kInstVpbroadcastq, XmmReg, Mem) + INST_2x(vpbroadcastq, kInstVpbroadcastq, YmmReg, XmmReg) + INST_2x(vpbroadcastq, kInstVpbroadcastq, YmmReg, Mem) + + INST_2x(vpbroadcastw, kInstVpbroadcastw, XmmReg, XmmReg) + INST_2x(vpbroadcastw, kInstVpbroadcastw, XmmReg, Mem) + INST_2x(vpbroadcastw, kInstVpbroadcastw, YmmReg, XmmReg) + INST_2x(vpbroadcastw, kInstVpbroadcastw, YmmReg, Mem) + + INST_3x(vpcmpeqb, kInstVpcmpeqb, YmmReg, YmmReg, YmmReg) + INST_3x(vpcmpeqb, kInstVpcmpeqb, YmmReg, YmmReg, Mem) + + INST_3x(vpcmpeqd, kInstVpcmpeqd, YmmReg, YmmReg, YmmReg) + INST_3x(vpcmpeqd, kInstVpcmpeqd, YmmReg, YmmReg, Mem) + + INST_3x(vpcmpeqq, kInstVpcmpeqq, YmmReg, YmmReg, YmmReg) + INST_3x(vpcmpeqq, kInstVpcmpeqq, YmmReg, YmmReg, Mem) + + INST_3x(vpcmpeqw, kInstVpcmpeqw, YmmReg, YmmReg, YmmReg) + INST_3x(vpcmpeqw, kInstVpcmpeqw, YmmReg, YmmReg, Mem) + + INST_3x(vpcmpgtb, kInstVpcmpgtb, YmmReg, YmmReg, YmmReg) + INST_3x(vpcmpgtb, kInstVpcmpgtb, YmmReg, YmmReg, Mem) + + INST_3x(vpcmpgtd, kInstVpcmpgtd, YmmReg, YmmReg, YmmReg) + INST_3x(vpcmpgtd, kInstVpcmpgtd, YmmReg, YmmReg, Mem) + + INST_3x(vpcmpgtq, kInstVpcmpgtq, YmmReg, YmmReg, YmmReg) + INST_3x(vpcmpgtq, kInstVpcmpgtq, YmmReg, YmmReg, Mem) + + INST_3x(vpcmpgtw, kInstVpcmpgtw, YmmReg, YmmReg, YmmReg) + INST_3x(vpcmpgtw, kInstVpcmpgtw, YmmReg, YmmReg, Mem) + + INST_4i(vperm2i128, kInstVperm2i128, YmmReg, YmmReg, YmmReg, Imm) + INST_4i(vperm2i128, kInstVperm2i128, YmmReg, YmmReg, Mem, Imm) + + INST_3x(vpermd, kInstVpermd, YmmReg, YmmReg, YmmReg) + INST_3x(vpermd, kInstVpermd, YmmReg, YmmReg, Mem) + + INST_3x(vpermps, kInstVpermps, YmmReg, YmmReg, YmmReg) + INST_3x(vpermps, kInstVpermps, YmmReg, YmmReg, Mem) + + INST_3i(vpermpd, kInstVpermpd, YmmReg, YmmReg, Imm) + INST_3i(vpermpd, kInstVpermpd, YmmReg, Mem, Imm) + + INST_3i(vpermq, kInstVpermq, YmmReg, YmmReg, Imm) + INST_3i(vpermq, kInstVpermq, YmmReg, Mem, Imm) + + INST_3x(vpgatherdd, kInstVpgatherdd, XmmReg, Mem, XmmReg) + INST_3x(vpgatherdd, kInstVpgatherdd, YmmReg, Mem, YmmReg) + + INST_3x(vpgatherdq, kInstVpgatherdq, XmmReg, Mem, XmmReg) + INST_3x(vpgatherdq, kInstVpgatherdq, YmmReg, Mem, YmmReg) + + INST_3x(vpgatherqd, kInstVpgatherqd, XmmReg, Mem, XmmReg) + + INST_3x(vpgatherqq, kInstVpgatherqq, XmmReg, Mem, XmmReg) + INST_3x(vpgatherqq, kInstVpgatherqq, YmmReg, Mem, YmmReg) + + INST_2x(vpmovmskb, kInstVpmovmskb, GpReg, YmmReg) + + INST_2x(vpmovsxbd, kInstVpmovsxbd, YmmReg, Mem) + INST_2x(vpmovsxbd, kInstVpmovsxbd, YmmReg, XmmReg) + + INST_2x(vpmovsxbq, kInstVpmovsxbq, YmmReg, Mem) + INST_2x(vpmovsxbq, kInstVpmovsxbq, YmmReg, XmmReg) + + INST_2x(vpmovsxbw, kInstVpmovsxbw, YmmReg, Mem) + INST_2x(vpmovsxbw, kInstVpmovsxbw, YmmReg, XmmReg) + + INST_2x(vpmovsxdq, kInstVpmovsxdq, YmmReg, Mem) + INST_2x(vpmovsxdq, kInstVpmovsxdq, YmmReg, XmmReg) + + INST_2x(vpmovsxwd, kInstVpmovsxwd, YmmReg, Mem) + INST_2x(vpmovsxwd, kInstVpmovsxwd, YmmReg, XmmReg) + + INST_2x(vpmovsxwq, kInstVpmovsxwq, YmmReg, Mem) + INST_2x(vpmovsxwq, kInstVpmovsxwq, YmmReg, XmmReg) + + INST_2x(vpmovzxbd, kInstVpmovzxbd, YmmReg, Mem) + INST_2x(vpmovzxbd, kInstVpmovzxbd, YmmReg, XmmReg) + + INST_2x(vpmovzxbq, kInstVpmovzxbq, YmmReg, Mem) + INST_2x(vpmovzxbq, kInstVpmovzxbq, YmmReg, XmmReg) + + INST_2x(vpmovzxbw, kInstVpmovzxbw, YmmReg, Mem) + INST_2x(vpmovzxbw, kInstVpmovzxbw, YmmReg, XmmReg) + + INST_2x(vpmovzxdq, kInstVpmovzxdq, YmmReg, Mem) + INST_2x(vpmovzxdq, kInstVpmovzxdq, YmmReg, XmmReg) + + INST_2x(vpmovzxwd, kInstVpmovzxwd, YmmReg, Mem) + INST_2x(vpmovzxwd, kInstVpmovzxwd, YmmReg, XmmReg) + + INST_2x(vpmovzxwq, kInstVpmovzxwq, YmmReg, Mem) + INST_2x(vpmovzxwq, kInstVpmovzxwq, YmmReg, XmmReg) + + INST_3i(vpshufd, kInstVpshufd, YmmReg, Mem, Imm) + INST_3i(vpshufd, kInstVpshufd, YmmReg, YmmReg, Imm) + + INST_3i(vpshufhw, kInstVpshufhw, YmmReg, Mem, Imm) + INST_3i(vpshufhw, kInstVpshufhw, YmmReg, YmmReg, Imm) + + INST_3i(vpshuflw, kInstVpshuflw, YmmReg, Mem, Imm) + INST_3i(vpshuflw, kInstVpshuflw, YmmReg, YmmReg, Imm) + + INST_3i(vpslld, kInstVpslld, YmmReg, YmmReg, Imm) + + INST_3i(vpslldq, kInstVpslldq, YmmReg, YmmReg, Imm) + + INST_3i(vpsllq, kInstVpsllq, YmmReg, YmmReg, Imm) + + INST_3i(vpsllw, kInstVpsllw, YmmReg, YmmReg, Imm) + + INST_3i(vpsrad, kInstVpsrad, YmmReg, YmmReg, Imm) + + INST_3i(vpsraw, kInstVpsraw, YmmReg, YmmReg, Imm) + + INST_3i(vpsrld, kInstVpsrld, YmmReg, YmmReg, Imm) + + INST_3i(vpsrldq, kInstVpsrldq, YmmReg, YmmReg, Imm) + + INST_3i(vpsrlq, kInstVpsrlq, YmmReg, YmmReg, Imm) + + INST_3i(vpsrlw, kInstVpsrlw, YmmReg, YmmReg, Imm) + + INST_3x(vphaddd, kInstVphaddd, YmmReg, YmmReg, Mem) + INST_3x(vphaddd, kInstVphaddd, YmmReg, YmmReg, YmmReg) + + INST_3x(vphaddsw, kInstVphaddsw, YmmReg, YmmReg, Mem) + INST_3x(vphaddsw, kInstVphaddsw, YmmReg, YmmReg, YmmReg) + + INST_3x(vphaddw, kInstVphaddw, YmmReg, YmmReg, Mem) + INST_3x(vphaddw, kInstVphaddw, YmmReg, YmmReg, YmmReg) + + INST_3x(vphsubd, kInstVphsubd, YmmReg, YmmReg, Mem) + INST_3x(vphsubd, kInstVphsubd, YmmReg, YmmReg, YmmReg) + + INST_3x(vphsubsw, kInstVphsubsw, YmmReg, YmmReg, Mem) + INST_3x(vphsubsw, kInstVphsubsw, YmmReg, YmmReg, YmmReg) + + INST_3x(vphsubw, kInstVphsubw, YmmReg, YmmReg, Mem) + INST_3x(vphsubw, kInstVphsubw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmaddubsw, kInstVpmaddubsw, YmmReg, YmmReg, Mem) + INST_3x(vpmaddubsw, kInstVpmaddubsw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmaddwd, kInstVpmaddwd, YmmReg, YmmReg, Mem) + INST_3x(vpmaddwd, kInstVpmaddwd, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmaskmovd, kInstVpmaskmovd, Mem, XmmReg, XmmReg) + INST_3x(vpmaskmovd, kInstVpmaskmovd, Mem, YmmReg, YmmReg) + INST_3x(vpmaskmovd, kInstVpmaskmovd, XmmReg, XmmReg, Mem) + INST_3x(vpmaskmovd, kInstVpmaskmovd, YmmReg, YmmReg, Mem) + + INST_3x(vpmaskmovq, kInstVpmaskmovq, Mem, XmmReg, XmmReg) + INST_3x(vpmaskmovq, kInstVpmaskmovq, Mem, YmmReg, YmmReg) + INST_3x(vpmaskmovq, kInstVpmaskmovq, XmmReg, XmmReg, Mem) + INST_3x(vpmaskmovq, kInstVpmaskmovq, YmmReg, YmmReg, Mem) + + INST_3x(vpmaxsb, kInstVpmaxsb, YmmReg, YmmReg, Mem) + INST_3x(vpmaxsb, kInstVpmaxsb, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmaxsd, kInstVpmaxsd, YmmReg, YmmReg, Mem) + INST_3x(vpmaxsd, kInstVpmaxsd, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmaxsw, kInstVpmaxsw, YmmReg, YmmReg, Mem) + INST_3x(vpmaxsw, kInstVpmaxsw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmaxub, kInstVpmaxub, YmmReg, YmmReg, Mem) + INST_3x(vpmaxub, kInstVpmaxub, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmaxud, kInstVpmaxud, YmmReg, YmmReg, Mem) + INST_3x(vpmaxud, kInstVpmaxud, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmaxuw, kInstVpmaxuw, YmmReg, YmmReg, Mem) + INST_3x(vpmaxuw, kInstVpmaxuw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpminsb, kInstVpminsb, YmmReg, YmmReg, Mem) + INST_3x(vpminsb, kInstVpminsb, YmmReg, YmmReg, YmmReg) + + INST_3x(vpminsd, kInstVpminsd, YmmReg, YmmReg, Mem) + INST_3x(vpminsd, kInstVpminsd, YmmReg, YmmReg, YmmReg) + + INST_3x(vpminsw, kInstVpminsw, YmmReg, YmmReg, Mem) + INST_3x(vpminsw, kInstVpminsw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpminub, kInstVpminub, YmmReg, YmmReg, Mem) + INST_3x(vpminub, kInstVpminub, YmmReg, YmmReg, YmmReg) + + INST_3x(vpminud, kInstVpminud, YmmReg, YmmReg, Mem) + INST_3x(vpminud, kInstVpminud, YmmReg, YmmReg, YmmReg) + + INST_3x(vpminuw, kInstVpminuw, YmmReg, YmmReg, Mem) + INST_3x(vpminuw, kInstVpminuw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmuldq, kInstVpmuldq, YmmReg, YmmReg, Mem) + INST_3x(vpmuldq, kInstVpmuldq, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmulhrsw, kInstVpmulhrsw, YmmReg, YmmReg, Mem) + INST_3x(vpmulhrsw, kInstVpmulhrsw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmulhuw, kInstVpmulhuw, YmmReg, YmmReg, Mem) + INST_3x(vpmulhuw, kInstVpmulhuw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmulhw, kInstVpmulhw, YmmReg, YmmReg, Mem) + INST_3x(vpmulhw, kInstVpmulhw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmulld, kInstVpmulld, YmmReg, YmmReg, Mem) + INST_3x(vpmulld, kInstVpmulld, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmullw, kInstVpmullw, YmmReg, YmmReg, Mem) + INST_3x(vpmullw, kInstVpmullw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpmuludq, kInstVpmuludq, YmmReg, YmmReg, Mem) + INST_3x(vpmuludq, kInstVpmuludq, YmmReg, YmmReg, YmmReg) + + INST_3x(vpor, kInstVpor, YmmReg, YmmReg, Mem) + INST_3x(vpor, kInstVpor, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsadbw, kInstVpsadbw, YmmReg, YmmReg, Mem) + INST_3x(vpsadbw, kInstVpsadbw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpshufb, kInstVpshufb, YmmReg, YmmReg, Mem) + INST_3x(vpshufb, kInstVpshufb, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsignb, kInstVpsignb, YmmReg, YmmReg, Mem) + INST_3x(vpsignb, kInstVpsignb, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsignd, kInstVpsignd, YmmReg, YmmReg, Mem) + INST_3x(vpsignd, kInstVpsignd, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsignw, kInstVpsignw, YmmReg, YmmReg, Mem) + INST_3x(vpsignw, kInstVpsignw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpslld, kInstVpslld, YmmReg, YmmReg, Mem) + INST_3x(vpslld, kInstVpslld, YmmReg, YmmReg, XmmReg) + + INST_3x(vpsllq, kInstVpsllq, YmmReg, YmmReg, Mem) + INST_3x(vpsllq, kInstVpsllq, YmmReg, YmmReg, XmmReg) + + INST_3x(vpsllvd, kInstVpsllvd, XmmReg, XmmReg, Mem) + INST_3x(vpsllvd, kInstVpsllvd, XmmReg, XmmReg, XmmReg) + INST_3x(vpsllvd, kInstVpsllvd, YmmReg, YmmReg, Mem) + INST_3x(vpsllvd, kInstVpsllvd, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsllvq, kInstVpsllvq, XmmReg, XmmReg, Mem) + INST_3x(vpsllvq, kInstVpsllvq, XmmReg, XmmReg, XmmReg) + INST_3x(vpsllvq, kInstVpsllvq, YmmReg, YmmReg, Mem) + INST_3x(vpsllvq, kInstVpsllvq, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsllw, kInstVpsllw, YmmReg, YmmReg, Mem) + INST_3x(vpsllw, kInstVpsllw, YmmReg, YmmReg, XmmReg) + + INST_3x(vpsrad, kInstVpsrad, YmmReg, YmmReg, Mem) + INST_3x(vpsrad, kInstVpsrad, YmmReg, YmmReg, XmmReg) + + INST_3x(vpsravd, kInstVpsravd, XmmReg, XmmReg, Mem) + INST_3x(vpsravd, kInstVpsravd, XmmReg, XmmReg, XmmReg) + INST_3x(vpsravd, kInstVpsravd, YmmReg, YmmReg, Mem) + INST_3x(vpsravd, kInstVpsravd, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsraw, kInstVpsraw, YmmReg, YmmReg, Mem) + INST_3x(vpsraw, kInstVpsraw, YmmReg, YmmReg, XmmReg) + + INST_3x(vpsrld, kInstVpsrld, YmmReg, YmmReg, Mem) + INST_3x(vpsrld, kInstVpsrld, YmmReg, YmmReg, XmmReg) + + INST_3x(vpsrlq, kInstVpsrlq, YmmReg, YmmReg, Mem) + INST_3x(vpsrlq, kInstVpsrlq, YmmReg, YmmReg, XmmReg) + + INST_3x(vpsrlvd, kInstVpsrlvd, XmmReg, XmmReg, Mem) + INST_3x(vpsrlvd, kInstVpsrlvd, XmmReg, XmmReg, XmmReg) + INST_3x(vpsrlvd, kInstVpsrlvd, YmmReg, YmmReg, Mem) + INST_3x(vpsrlvd, kInstVpsrlvd, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsrlvq, kInstVpsrlvq, XmmReg, XmmReg, Mem) + INST_3x(vpsrlvq, kInstVpsrlvq, XmmReg, XmmReg, XmmReg) + INST_3x(vpsrlvq, kInstVpsrlvq, YmmReg, YmmReg, Mem) + INST_3x(vpsrlvq, kInstVpsrlvq, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsrlw, kInstVpsrlw, YmmReg, YmmReg, Mem) + INST_3x(vpsrlw, kInstVpsrlw, YmmReg, YmmReg, XmmReg) + + INST_3x(vpsubb, kInstVpsubb, YmmReg, YmmReg, Mem) + INST_3x(vpsubb, kInstVpsubb, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsubd, kInstVpsubd, YmmReg, YmmReg, Mem) + INST_3x(vpsubd, kInstVpsubd, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsubq, kInstVpsubq, YmmReg, YmmReg, Mem) + INST_3x(vpsubq, kInstVpsubq, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsubsb, kInstVpsubsb, YmmReg, YmmReg, Mem) + INST_3x(vpsubsb, kInstVpsubsb, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsubsw, kInstVpsubsw, YmmReg, YmmReg, Mem) + INST_3x(vpsubsw, kInstVpsubsw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsubusb, kInstVpsubusb, YmmReg, YmmReg, Mem) + INST_3x(vpsubusb, kInstVpsubusb, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsubusw, kInstVpsubusw, YmmReg, YmmReg, Mem) + INST_3x(vpsubusw, kInstVpsubusw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpsubw, kInstVpsubw, YmmReg, YmmReg, Mem) + INST_3x(vpsubw, kInstVpsubw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpunpckhbw, kInstVpunpckhbw, YmmReg, YmmReg, Mem) + INST_3x(vpunpckhbw, kInstVpunpckhbw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpunpckhdq, kInstVpunpckhdq, YmmReg, YmmReg, Mem) + INST_3x(vpunpckhdq, kInstVpunpckhdq, YmmReg, YmmReg, YmmReg) + + INST_3x(vpunpckhqdq, kInstVpunpckhqdq, YmmReg, YmmReg, Mem) + INST_3x(vpunpckhqdq, kInstVpunpckhqdq, YmmReg, YmmReg, YmmReg) + + INST_3x(vpunpckhwd, kInstVpunpckhwd, YmmReg, YmmReg, Mem) + INST_3x(vpunpckhwd, kInstVpunpckhwd, YmmReg, YmmReg, YmmReg) + + INST_3x(vpunpcklbw, kInstVpunpcklbw, YmmReg, YmmReg, Mem) + INST_3x(vpunpcklbw, kInstVpunpcklbw, YmmReg, YmmReg, YmmReg) + + INST_3x(vpunpckldq, kInstVpunpckldq, YmmReg, YmmReg, Mem) + INST_3x(vpunpckldq, kInstVpunpckldq, YmmReg, YmmReg, YmmReg) + + INST_3x(vpunpcklqdq, kInstVpunpcklqdq, YmmReg, YmmReg, Mem) + INST_3x(vpunpcklqdq, kInstVpunpcklqdq, YmmReg, YmmReg, YmmReg) + + INST_3x(vpunpcklwd, kInstVpunpcklwd, YmmReg, YmmReg, Mem) + INST_3x(vpunpcklwd, kInstVpunpcklwd, YmmReg, YmmReg, YmmReg) + + INST_3x(vpxor, kInstVpxor, YmmReg, YmmReg, Mem) + INST_3x(vpxor, kInstVpxor, YmmReg, YmmReg, YmmReg) + + // -------------------------------------------------------------------------- + // [FMA3] + // -------------------------------------------------------------------------- + + INST_3x(vfmadd132pd, kInstVfmadd132pd, XmmReg, XmmReg, Mem) + INST_3x(vfmadd132pd, kInstVfmadd132pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmadd132pd, kInstVfmadd132pd, YmmReg, YmmReg, Mem) + INST_3x(vfmadd132pd, kInstVfmadd132pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmadd132ps, kInstVfmadd132ps, XmmReg, XmmReg, Mem) + INST_3x(vfmadd132ps, kInstVfmadd132ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmadd132ps, kInstVfmadd132ps, YmmReg, YmmReg, Mem) + INST_3x(vfmadd132ps, kInstVfmadd132ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmadd132sd, kInstVfmadd132sd, XmmReg, XmmReg, Mem) + INST_3x(vfmadd132sd, kInstVfmadd132sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmadd132ss, kInstVfmadd132ss, XmmReg, XmmReg, Mem) + INST_3x(vfmadd132ss, kInstVfmadd132ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmadd213pd, kInstVfmadd213pd, XmmReg, XmmReg, Mem) + INST_3x(vfmadd213pd, kInstVfmadd213pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmadd213pd, kInstVfmadd213pd, YmmReg, YmmReg, Mem) + INST_3x(vfmadd213pd, kInstVfmadd213pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmadd213ps, kInstVfmadd213ps, XmmReg, XmmReg, Mem) + INST_3x(vfmadd213ps, kInstVfmadd213ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmadd213ps, kInstVfmadd213ps, YmmReg, YmmReg, Mem) + INST_3x(vfmadd213ps, kInstVfmadd213ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmadd213sd, kInstVfmadd213sd, XmmReg, XmmReg, Mem) + INST_3x(vfmadd213sd, kInstVfmadd213sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmadd213ss, kInstVfmadd213ss, XmmReg, XmmReg, Mem) + INST_3x(vfmadd213ss, kInstVfmadd213ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmadd231pd, kInstVfmadd231pd, XmmReg, XmmReg, Mem) + INST_3x(vfmadd231pd, kInstVfmadd231pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmadd231pd, kInstVfmadd231pd, YmmReg, YmmReg, Mem) + INST_3x(vfmadd231pd, kInstVfmadd231pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmadd231ps, kInstVfmadd231ps, XmmReg, XmmReg, Mem) + INST_3x(vfmadd231ps, kInstVfmadd231ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmadd231ps, kInstVfmadd231ps, YmmReg, YmmReg, Mem) + INST_3x(vfmadd231ps, kInstVfmadd231ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmadd231sd, kInstVfmadd231sd, XmmReg, XmmReg, Mem) + INST_3x(vfmadd231sd, kInstVfmadd231sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmadd231ss, kInstVfmadd231ss, XmmReg, XmmReg, Mem) + INST_3x(vfmadd231ss, kInstVfmadd231ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmaddsub132pd, kInstVfmaddsub132pd, XmmReg, XmmReg, Mem) + INST_3x(vfmaddsub132pd, kInstVfmaddsub132pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmaddsub132pd, kInstVfmaddsub132pd, YmmReg, YmmReg, Mem) + INST_3x(vfmaddsub132pd, kInstVfmaddsub132pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmaddsub132ps, kInstVfmaddsub132ps, XmmReg, XmmReg, Mem) + INST_3x(vfmaddsub132ps, kInstVfmaddsub132ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmaddsub132ps, kInstVfmaddsub132ps, YmmReg, YmmReg, Mem) + INST_3x(vfmaddsub132ps, kInstVfmaddsub132ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmaddsub213pd, kInstVfmaddsub213pd, XmmReg, XmmReg, Mem) + INST_3x(vfmaddsub213pd, kInstVfmaddsub213pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmaddsub213pd, kInstVfmaddsub213pd, YmmReg, YmmReg, Mem) + INST_3x(vfmaddsub213pd, kInstVfmaddsub213pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmaddsub213ps, kInstVfmaddsub213ps, XmmReg, XmmReg, Mem) + INST_3x(vfmaddsub213ps, kInstVfmaddsub213ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmaddsub213ps, kInstVfmaddsub213ps, YmmReg, YmmReg, Mem) + INST_3x(vfmaddsub213ps, kInstVfmaddsub213ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmaddsub231pd, kInstVfmaddsub231pd, XmmReg, XmmReg, Mem) + INST_3x(vfmaddsub231pd, kInstVfmaddsub231pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmaddsub231pd, kInstVfmaddsub231pd, YmmReg, YmmReg, Mem) + INST_3x(vfmaddsub231pd, kInstVfmaddsub231pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmaddsub231ps, kInstVfmaddsub231ps, XmmReg, XmmReg, Mem) + INST_3x(vfmaddsub231ps, kInstVfmaddsub231ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmaddsub231ps, kInstVfmaddsub231ps, YmmReg, YmmReg, Mem) + INST_3x(vfmaddsub231ps, kInstVfmaddsub231ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsub132pd, kInstVfmsub132pd, XmmReg, XmmReg, Mem) + INST_3x(vfmsub132pd, kInstVfmsub132pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsub132pd, kInstVfmsub132pd, YmmReg, YmmReg, Mem) + INST_3x(vfmsub132pd, kInstVfmsub132pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsub132ps, kInstVfmsub132ps, XmmReg, XmmReg, Mem) + INST_3x(vfmsub132ps, kInstVfmsub132ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsub132ps, kInstVfmsub132ps, YmmReg, YmmReg, Mem) + INST_3x(vfmsub132ps, kInstVfmsub132ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsub132sd, kInstVfmsub132sd, XmmReg, XmmReg, Mem) + INST_3x(vfmsub132sd, kInstVfmsub132sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmsub132ss, kInstVfmsub132ss, XmmReg, XmmReg, Mem) + INST_3x(vfmsub132ss, kInstVfmsub132ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmsub213pd, kInstVfmsub213pd, XmmReg, XmmReg, Mem) + INST_3x(vfmsub213pd, kInstVfmsub213pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsub213pd, kInstVfmsub213pd, YmmReg, YmmReg, Mem) + INST_3x(vfmsub213pd, kInstVfmsub213pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsub213ps, kInstVfmsub213ps, XmmReg, XmmReg, Mem) + INST_3x(vfmsub213ps, kInstVfmsub213ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsub213ps, kInstVfmsub213ps, YmmReg, YmmReg, Mem) + INST_3x(vfmsub213ps, kInstVfmsub213ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsub213sd, kInstVfmsub213sd, XmmReg, XmmReg, Mem) + INST_3x(vfmsub213sd, kInstVfmsub213sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmsub213ss, kInstVfmsub213ss, XmmReg, XmmReg, Mem) + INST_3x(vfmsub213ss, kInstVfmsub213ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmsub231pd, kInstVfmsub231pd, XmmReg, XmmReg, Mem) + INST_3x(vfmsub231pd, kInstVfmsub231pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsub231pd, kInstVfmsub231pd, YmmReg, YmmReg, Mem) + INST_3x(vfmsub231pd, kInstVfmsub231pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsub231ps, kInstVfmsub231ps, XmmReg, XmmReg, Mem) + INST_3x(vfmsub231ps, kInstVfmsub231ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsub231ps, kInstVfmsub231ps, YmmReg, YmmReg, Mem) + INST_3x(vfmsub231ps, kInstVfmsub231ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsub231sd, kInstVfmsub231sd, XmmReg, XmmReg, Mem) + INST_3x(vfmsub231sd, kInstVfmsub231sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmsub231ss, kInstVfmsub231ss, XmmReg, XmmReg, Mem) + INST_3x(vfmsub231ss, kInstVfmsub231ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfmsubadd132pd, kInstVfmsubadd132pd, XmmReg, XmmReg, Mem) + INST_3x(vfmsubadd132pd, kInstVfmsubadd132pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsubadd132pd, kInstVfmsubadd132pd, YmmReg, YmmReg, Mem) + INST_3x(vfmsubadd132pd, kInstVfmsubadd132pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsubadd132ps, kInstVfmsubadd132ps, XmmReg, XmmReg, Mem) + INST_3x(vfmsubadd132ps, kInstVfmsubadd132ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsubadd132ps, kInstVfmsubadd132ps, YmmReg, YmmReg, Mem) + INST_3x(vfmsubadd132ps, kInstVfmsubadd132ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsubadd213pd, kInstVfmsubadd213pd, XmmReg, XmmReg, Mem) + INST_3x(vfmsubadd213pd, kInstVfmsubadd213pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsubadd213pd, kInstVfmsubadd213pd, YmmReg, YmmReg, Mem) + INST_3x(vfmsubadd213pd, kInstVfmsubadd213pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsubadd213ps, kInstVfmsubadd213ps, XmmReg, XmmReg, Mem) + INST_3x(vfmsubadd213ps, kInstVfmsubadd213ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsubadd213ps, kInstVfmsubadd213ps, YmmReg, YmmReg, Mem) + INST_3x(vfmsubadd213ps, kInstVfmsubadd213ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsubadd231pd, kInstVfmsubadd231pd, XmmReg, XmmReg, Mem) + INST_3x(vfmsubadd231pd, kInstVfmsubadd231pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsubadd231pd, kInstVfmsubadd231pd, YmmReg, YmmReg, Mem) + INST_3x(vfmsubadd231pd, kInstVfmsubadd231pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfmsubadd231ps, kInstVfmsubadd231ps, XmmReg, XmmReg, Mem) + INST_3x(vfmsubadd231ps, kInstVfmsubadd231ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfmsubadd231ps, kInstVfmsubadd231ps, YmmReg, YmmReg, Mem) + INST_3x(vfmsubadd231ps, kInstVfmsubadd231ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmadd132pd, kInstVfnmadd132pd, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd132pd, kInstVfnmadd132pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmadd132pd, kInstVfnmadd132pd, YmmReg, YmmReg, Mem) + INST_3x(vfnmadd132pd, kInstVfnmadd132pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmadd132ps, kInstVfnmadd132ps, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd132ps, kInstVfnmadd132ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmadd132ps, kInstVfnmadd132ps, YmmReg, YmmReg, Mem) + INST_3x(vfnmadd132ps, kInstVfnmadd132ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmadd132sd, kInstVfnmadd132sd, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd132sd, kInstVfnmadd132sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmadd132ss, kInstVfnmadd132ss, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd132ss, kInstVfnmadd132ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmadd213pd, kInstVfnmadd213pd, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd213pd, kInstVfnmadd213pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmadd213pd, kInstVfnmadd213pd, YmmReg, YmmReg, Mem) + INST_3x(vfnmadd213pd, kInstVfnmadd213pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmadd213ps, kInstVfnmadd213ps, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd213ps, kInstVfnmadd213ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmadd213ps, kInstVfnmadd213ps, YmmReg, YmmReg, Mem) + INST_3x(vfnmadd213ps, kInstVfnmadd213ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmadd213sd, kInstVfnmadd213sd, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd213sd, kInstVfnmadd213sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmadd213ss, kInstVfnmadd213ss, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd213ss, kInstVfnmadd213ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmadd231pd, kInstVfnmadd231pd, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd231pd, kInstVfnmadd231pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmadd231pd, kInstVfnmadd231pd, YmmReg, YmmReg, Mem) + INST_3x(vfnmadd231pd, kInstVfnmadd231pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmadd231ps, kInstVfnmadd231ps, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd231ps, kInstVfnmadd231ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmadd231ps, kInstVfnmadd231ps, YmmReg, YmmReg, Mem) + INST_3x(vfnmadd231ps, kInstVfnmadd231ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmadd231sd, kInstVfnmadd231sd, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd231sd, kInstVfnmadd231sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmadd231ss, kInstVfnmadd231ss, XmmReg, XmmReg, Mem) + INST_3x(vfnmadd231ss, kInstVfnmadd231ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmsub132pd, kInstVfnmsub132pd, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub132pd, kInstVfnmsub132pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmsub132pd, kInstVfnmsub132pd, YmmReg, YmmReg, Mem) + INST_3x(vfnmsub132pd, kInstVfnmsub132pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmsub132ps, kInstVfnmsub132ps, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub132ps, kInstVfnmsub132ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmsub132ps, kInstVfnmsub132ps, YmmReg, YmmReg, Mem) + INST_3x(vfnmsub132ps, kInstVfnmsub132ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmsub132sd, kInstVfnmsub132sd, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub132sd, kInstVfnmsub132sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmsub132ss, kInstVfnmsub132ss, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub132ss, kInstVfnmsub132ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmsub213pd, kInstVfnmsub213pd, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub213pd, kInstVfnmsub213pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmsub213pd, kInstVfnmsub213pd, YmmReg, YmmReg, Mem) + INST_3x(vfnmsub213pd, kInstVfnmsub213pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmsub213ps, kInstVfnmsub213ps, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub213ps, kInstVfnmsub213ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmsub213ps, kInstVfnmsub213ps, YmmReg, YmmReg, Mem) + INST_3x(vfnmsub213ps, kInstVfnmsub213ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmsub213sd, kInstVfnmsub213sd, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub213sd, kInstVfnmsub213sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmsub213ss, kInstVfnmsub213ss, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub213ss, kInstVfnmsub213ss, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmsub231pd, kInstVfnmsub231pd, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub231pd, kInstVfnmsub231pd, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmsub231pd, kInstVfnmsub231pd, YmmReg, YmmReg, Mem) + INST_3x(vfnmsub231pd, kInstVfnmsub231pd, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmsub231ps, kInstVfnmsub231ps, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub231ps, kInstVfnmsub231ps, XmmReg, XmmReg, XmmReg) + INST_3x(vfnmsub231ps, kInstVfnmsub231ps, YmmReg, YmmReg, Mem) + INST_3x(vfnmsub231ps, kInstVfnmsub231ps, YmmReg, YmmReg, YmmReg) + + INST_3x(vfnmsub231sd, kInstVfnmsub231sd, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub231sd, kInstVfnmsub231sd, XmmReg, XmmReg, XmmReg) + + INST_3x(vfnmsub231ss, kInstVfnmsub231ss, XmmReg, XmmReg, Mem) + INST_3x(vfnmsub231ss, kInstVfnmsub231ss, XmmReg, XmmReg, XmmReg) + + // -------------------------------------------------------------------------- + // [BMI] + // -------------------------------------------------------------------------- + + INST_3x(andn, kInstAndn, GpReg, GpReg, GpReg) + INST_3x(andn, kInstAndn, GpReg, GpReg, Mem) + + INST_3x(bextr, kInstBextr, GpReg, GpReg, GpReg) + INST_3x(bextr, kInstBextr, GpReg, Mem, GpReg) + + INST_2x(blsi, kInstBlsi, GpReg, GpReg) + INST_2x(blsi, kInstBlsi, GpReg, Mem) + + INST_2x(blsmsk, kInstBlsmsk, GpReg, GpReg) + INST_2x(blsmsk, kInstBlsmsk, GpReg, Mem) + + INST_2x(blsr, kInstBlsr, GpReg, GpReg) + INST_2x(blsr, kInstBlsr, GpReg, Mem) + + INST_2x(tzcnt, kInstTzcnt, GpReg, GpReg) + INST_2x(tzcnt, kInstTzcnt, GpReg, Mem) + + // -------------------------------------------------------------------------- + // [LZCNT] + // -------------------------------------------------------------------------- + + INST_2x(lzcnt, kInstLzcnt, GpReg, GpReg) + INST_2x(lzcnt, kInstLzcnt, GpReg, Mem) + + // -------------------------------------------------------------------------- + // [BMI2] + // -------------------------------------------------------------------------- + + INST_3x(bzhi, kInstBzhi, GpReg, GpReg, GpReg) + INST_3x(bzhi, kInstBzhi, GpReg, Mem, GpReg) + + INST_3x(mulx, kInstMulx, GpReg, GpReg, GpReg) + INST_3x(mulx, kInstMulx, GpReg, GpReg, Mem) + + INST_3x(pdep, kInstPdep, GpReg, GpReg, GpReg) + INST_3x(pdep, kInstPdep, GpReg, GpReg, Mem) + + INST_3x(pext, kInstPext, GpReg, GpReg, GpReg) + INST_3x(pext, kInstPext, GpReg, GpReg, Mem) + + INST_3i(rorx, kInstRorx, GpReg, GpReg, Imm) + INST_3i(rorx, kInstRorx, GpReg, Mem, Imm) + + INST_3x(sarx, kInstSarx, GpReg, GpReg, GpReg) + INST_3x(sarx, kInstSarx, GpReg, Mem, GpReg) + + INST_3x(shlx, kInstShlx, GpReg, GpReg, GpReg) + INST_3x(shlx, kInstShlx, GpReg, Mem, GpReg) + + INST_3x(shrx, kInstShrx, GpReg, GpReg, GpReg) + INST_3x(shrx, kInstShrx, GpReg, Mem, GpReg) + + // -------------------------------------------------------------------------- + // [RDRAND] + // -------------------------------------------------------------------------- + + INST_1x(rdrand, kInstRdrand, GpReg) + + // -------------------------------------------------------------------------- + // [F16C] + // -------------------------------------------------------------------------- + + INST_2x(vcvtph2ps, kInstVcvtph2ps, XmmReg, XmmReg) + INST_2x(vcvtph2ps, kInstVcvtph2ps, XmmReg, Mem) + INST_2x(vcvtph2ps, kInstVcvtph2ps, YmmReg, XmmReg) + INST_2x(vcvtph2ps, kInstVcvtph2ps, YmmReg, Mem) + + INST_3i(vcvtps2ph, kInstVcvtps2ph, XmmReg, XmmReg, Imm) + INST_3i(vcvtps2ph, kInstVcvtps2ph, Mem, XmmReg, Imm) + INST_3i(vcvtps2ph, kInstVcvtps2ph, XmmReg, YmmReg, Imm) + INST_3i(vcvtps2ph, kInstVcvtps2ph, Mem, YmmReg, Imm) +}; + +//! @} + +} // x86x64 namespace +} // asmjit namespace + +// ============================================================================ +// [asmjit::x86] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X86) + +namespace asmjit { +namespace x86 { + +//! @addtogroup asmjit_x86x64 +//! @{ + +//! @brief X86-only assembler. +struct Assembler : public X86X64Assembler { + ASMJIT_NO_COPY(Assembler) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_API Assembler(BaseRuntime* runtime); + ASMJIT_API virtual ~Assembler(); + + // -------------------------------------------------------------------------- + // [Reloc] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual size_t _relocCode(void* dst, Ptr base) const; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual Error _emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3); + + // ------------------------------------------------------------------------- + // [Options] + // ------------------------------------------------------------------------- + + //! @overload + ASMJIT_INLINE Assembler& short_() + { _options |= kInstOptionShortForm; return *this; } + + //! @overload + ASMJIT_INLINE Assembler& long_() + { _options |= kInstOptionLongForm; return *this; } + + //! @overload + ASMJIT_INLINE Assembler& taken() + { _options |= kInstOptionTaken; return *this; } + + //! @overload + ASMJIT_INLINE Assembler& notTaken() + { _options |= kInstOptionNotTaken; return *this; } + + //! @overload + ASMJIT_INLINE Assembler& lock() + { _options |= kInstOptionLock; return *this; } + + //! @brief Force rex prefix. + ASMJIT_INLINE Assembler& vex3() + { _options |= kInstOptionVex3; return *this; } + + // -------------------------------------------------------------------------- + // [X86-Only Instructions] + // -------------------------------------------------------------------------- + + //! @brief Decimal adjust AL after addition (32-bit). + INST_0x(daa, kInstDaa) + //! @brief Decimal adjust AL after subtraction (32-bit). + INST_0x(das, kInstDas) + + //! @brief Pop all Gp registers (EDI|ESI|EBP|EBX|EDX|ECX|EAX). + INST_0x(popa, kInstPopa) + + //! @brief Push all Gp registers (EAX|ECX|EDX|EBX|original ESP|EBP|ESI|EDI). + INST_0x(pusha, kInstPusha) +}; + +//! @} + +} // x86 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X86 + +// ============================================================================ +// [asmjit::x64] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X64) + +namespace asmjit { +namespace x64 { + +//! @addtogroup asmjit_x86x64 +//! @{ + +//! @brief X64-only assembler. +struct Assembler : public X86X64Assembler { + ASMJIT_NO_COPY(Assembler) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_API Assembler(BaseRuntime* runtime); + ASMJIT_API virtual ~Assembler(); + + // -------------------------------------------------------------------------- + // [Reloc] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual size_t _relocCode(void* dst, Ptr base) const; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + ASMJIT_API virtual Error _emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3); + + // -------------------------------------------------------------------------- + // [Options] + // -------------------------------------------------------------------------- + + //! @overload + ASMJIT_INLINE Assembler& short_() + { _options |= kInstOptionShortForm; return *this; } + + //! @overload + ASMJIT_INLINE Assembler& long_() + { _options |= kInstOptionLongForm; return *this; } + + //! @overload + ASMJIT_INLINE Assembler& taken() + { _options |= kInstOptionTaken; return *this; } + + //! @overload + ASMJIT_INLINE Assembler& notTaken() + { _options |= kInstOptionNotTaken; return *this; } + + //! @overload + ASMJIT_INLINE Assembler& lock() + { _options |= kInstOptionLock; return *this; } + + //! @brief Force rex prefix. + ASMJIT_INLINE Assembler& rex() + { _options |= kInstOptionRex; return *this; } + + //! @brief Force rex prefix. + ASMJIT_INLINE Assembler& vex3() + { _options |= kInstOptionVex3; return *this; } + + // -------------------------------------------------------------------------- + // [X64-Only Instructions] + // -------------------------------------------------------------------------- + + //! @brief Convert dword to qword (RAX <- Sign Extend EAX). + INST_0x(cdqe, kInstCdqe) + //! @brief Convert qword to oword (RDX:RAX <- Sign Extend RAX). + INST_0x(cqo, kInstCqo) + + //! @brief Compares the 128-bit value in RDX:RAX with the memory operand (64-bit). + INST_1x(cmpxchg16b, kInstCmpxchg16b, Mem) + + //! @brief Move dword to qword with sign-extension. + INST_2x(movsxd, kInstMovsxd, GpReg, GpReg) + //! @overload + INST_2x(movsxd, kInstMovsxd, GpReg, Mem) + + //! @brief Load ECX/RCX qwords from DS:[ESI/RSI] to RAX. + INST_0x(rep_lodsq, kInstRepLodsq) + + //! @brief Move ECX/RCX qwords from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_0x(rep_movsq, kInstRepMovsq) + + //! @brief Fill ECX/RCX qwords at ES:[EDI/RDI] with RAX. + INST_0x(rep_stosq, kInstRepStosq) + + //! @brief Repeated find nonmatching qwords in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_0x(repe_cmpsq, kInstRepeCmpsq) + + //! @brief Find non-rax qword starting at ES:[EDI/RDI]. + INST_0x(repe_scasq, kInstRepeScasq) + + //! @brief Repeated find nonmatching qwords in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_0x(repne_cmpsq, kInstRepneCmpsq) + + //! @brief Find RAX, starting at ES:[EDI/RDI]. + INST_0x(repne_scasq, kInstRepneScasq) + + using X86X64Assembler::movq; + + //! @overload + INST_2x(movq, kInstMovq, GpReg, MmReg) + //! @overload + INST_2x(movq, kInstMovq, MmReg, GpReg) + + //! @overload + INST_2x(movq, kInstMovq, GpReg, XmmReg) + //! @overload + INST_2x(movq, kInstMovq, XmmReg, GpReg) + + // -------------------------------------------------------------------------- + // [AVX] + // -------------------------------------------------------------------------- + + INST_2x(vmovq, kInstVmovq, XmmReg, GpReg) + INST_2x(vmovq, kInstVmovq, GpReg, XmmReg) + + INST_3i(vpextrq, kInstVpextrq, GpReg, XmmReg, Imm) + INST_3i(vpextrq, kInstVpextrq, Mem, XmmReg, Imm) + + INST_4i(vpinsrq, kInstVpinsrq, XmmReg, XmmReg, GpReg, Imm) + INST_4i(vpinsrq, kInstVpinsrq, XmmReg, XmmReg, Mem, Imm) + + // -------------------------------------------------------------------------- + // [FSGSBASE] + // -------------------------------------------------------------------------- + + INST_1x(rdfsbase, kInstRdfsbase, GpReg) + INST_1x(rdgsbase, kInstRdgsbase, GpReg) + INST_1x(wrfsbase, kInstWrfsbase, GpReg) + INST_1x(wrgsbase, kInstWrgsbase, GpReg) +}; + +//! @} + +} // x64 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X64 + +// ============================================================================ +// [CodeGen-End] +// ============================================================================ + +#undef INST_0x + +#undef INST_1x +#undef INST_1x_ +#undef INST_1i +#undef INST_1i_ +#undef INST_1cc + +#undef INST_2x +#undef INST_2x_ +#undef INST_2i +#undef INST_2i_ +#undef INST_2cc + +#undef INST_3x +#undef INST_3x_ +#undef INST_3i +#undef INST_3i_ + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_X86_X86ASSEMBLER_H diff --git a/src/asmjit/x86/x86compiler.cpp b/src/asmjit/x86/x86compiler.cpp new file mode 100644 index 0000000..20e1d63 --- /dev/null +++ b/src/asmjit/x86/x86compiler.cpp @@ -0,0 +1,627 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + +// [Dependencies - AsmJit] +#include "../base/intutil.h" +#include "../base/string.h" +#include "../x86/x86assembler.h" +#include "../x86/x86compiler.h" +#include "../x86/x86context_p.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +// ============================================================================ +// [asmjit::x86x64::X86X64CallNode - Prototype] +// ============================================================================ + +Error X86X64CallNode::setPrototype(uint32_t conv, const FuncPrototype& p) { + return _x86Decl.setPrototype(conv, p); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64CallNode - Arg / Ret] +// ============================================================================ + +bool X86X64CallNode::_setArg(uint32_t i, const Operand& op) { + if ((i & ~kFuncArgHi) >= _x86Decl.getArgCount()) + return false; + + _args[i] = op; + return true; +} + +bool X86X64CallNode::_setRet(uint32_t i, const Operand& op) { + if (i >= 2) + return false; + + _ret[i] = op; + return true; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Compiler - Construction / Destruction] +// ============================================================================ + +X86X64Compiler::X86X64Compiler(BaseRuntime* runtime) : BaseCompiler(runtime) {} +X86X64Compiler::~X86X64Compiler() {} + +// ============================================================================ +// [asmjit::x86x64::X86X64Compiler - Inst] +// ============================================================================ + +//! @brief Get compiler instruction item size without operands assigned. +static ASMJIT_INLINE size_t X86X64Compiler_getInstSize(uint32_t code) { + return (IntUtil::inInterval(code, _kInstJbegin, _kInstJend)) ? sizeof(JumpNode) : sizeof(InstNode); +} + +static InstNode* X86X64Compiler_newInst(X86X64Compiler* self, void* p, uint32_t code, uint32_t options, Operand* opList, uint32_t opCount) { + if (IntUtil::inInterval(code, _kInstJbegin, _kInstJend)) { + JumpNode* node = new(p) JumpNode(self, code, options, opList, opCount); + TargetNode* jTarget = self->getTargetById(opList[0].getId()); + + node->addFlags(code == kInstJmp ? kNodeFlagIsJmp | kNodeFlagIsTaken : kNodeFlagIsJcc); + node->_target = jTarget; + node->_jumpNext = static_cast(jTarget->_from); + + jTarget->_from = node; + jTarget->addNumRefs(); + + // The 'jmp' is always taken, conditional jump can contain hint, we detect it. + if (code == kInstJmp) + node->addFlags(kNodeFlagIsTaken); + else if (options & kInstOptionTaken) + node->addFlags(kNodeFlagIsTaken); + + node->addOptions(options); + return node; + } + else { + InstNode* node = new(p) InstNode(self, code, options, opList, opCount); + node->addOptions(options); + return node; + } +} + +InstNode* X86X64Compiler::newInst(uint32_t code) { + size_t size = X86X64Compiler_getInstSize(code); + InstNode* inst = static_cast(_zoneAllocator.alloc(size)); + + if (inst == NULL) + goto _NoMemory; + + return X86X64Compiler_newInst(this, inst, code, getOptionsAndClear(), NULL, 0); + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +InstNode* X86X64Compiler::newInst(uint32_t code, const Operand& o0) { + size_t size = X86X64Compiler_getInstSize(code); + InstNode* inst = static_cast(_zoneAllocator.alloc(size + 1 * sizeof(Operand))); + + if (inst == NULL) + goto _NoMemory; + + { + Operand* opList = reinterpret_cast(reinterpret_cast(inst) + size); + opList[0] = o0; + return X86X64Compiler_newInst(this, inst, code, getOptionsAndClear(), opList, 1); + } + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +InstNode* X86X64Compiler::newInst(uint32_t code, const Operand& o0, const Operand& o1) { + size_t size = X86X64Compiler_getInstSize(code); + InstNode* inst = static_cast(_zoneAllocator.alloc(size + 2 * sizeof(Operand))); + + if (inst == NULL) + goto _NoMemory; + + { + Operand* opList = reinterpret_cast(reinterpret_cast(inst) + size); + opList[0] = o0; + opList[1] = o1; + return X86X64Compiler_newInst(this, inst, code, getOptionsAndClear(), opList, 2); + } + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +InstNode* X86X64Compiler::newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2) { + size_t size = X86X64Compiler_getInstSize(code); + InstNode* inst = static_cast(_zoneAllocator.alloc(size + 3 * sizeof(Operand))); + + if (inst == NULL) + goto _NoMemory; + + { + Operand* opList = reinterpret_cast(reinterpret_cast(inst) + size); + opList[0] = o0; + opList[1] = o1; + opList[2] = o2; + return X86X64Compiler_newInst(this, inst, code, getOptionsAndClear(), opList, 3); + } + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +InstNode* X86X64Compiler::newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3) { + size_t size = X86X64Compiler_getInstSize(code); + InstNode* inst = static_cast(_zoneAllocator.alloc(size + 4 * sizeof(Operand))); + + if (inst == NULL) + goto _NoMemory; + + { + Operand* opList = reinterpret_cast(reinterpret_cast(inst) + size); + opList[0] = o0; + opList[1] = o1; + opList[2] = o2; + opList[3] = o3; + return X86X64Compiler_newInst(this, inst, code, getOptionsAndClear(), opList, 4); + } + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +InstNode* X86X64Compiler::newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3, const Operand& o4) { + size_t size = X86X64Compiler_getInstSize(code); + InstNode* inst = static_cast(_zoneAllocator.alloc(size + 5 * sizeof(Operand))); + + if (inst == NULL) + goto _NoMemory; + + { + Operand* opList = reinterpret_cast(reinterpret_cast(inst) + size); + opList[0] = o0; + opList[1] = o1; + opList[2] = o2; + opList[3] = o3; + opList[4] = o4; + return X86X64Compiler_newInst(this, inst, code, getOptionsAndClear(), opList, 5); + } + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +InstNode* X86X64Compiler::emit(uint32_t code) { + InstNode* node = newInst(code); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +InstNode* X86X64Compiler::emit(uint32_t code, const Operand& o0) { + InstNode* node = newInst(code, o0); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +InstNode* X86X64Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1){ + InstNode* node = newInst(code, o0, o1); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +InstNode* X86X64Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2) { + InstNode* node = newInst(code, o0, o1, o2); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +InstNode* X86X64Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3){ + InstNode* node = newInst(code, o0, o1, o2, o3); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +InstNode* X86X64Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3, const Operand& o4) { + InstNode* node = newInst(code, o0, o1, o2, o3, o4); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +InstNode* X86X64Compiler::emit(uint32_t code, int o0_) { + Imm o0(o0_); + InstNode* node = newInst(code, o0); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +InstNode* X86X64Compiler::emit(uint32_t code, const Operand& o0, int o1_) { + Imm o1(o1_); + InstNode* node = newInst(code, o0, o1); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +InstNode* X86X64Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, int o2_) { + Imm o2(o2_); + InstNode* node = newInst(code, o0, o1, o2); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Compiler - Func] +// ============================================================================ + +X86X64FuncNode* X86X64Compiler::newFunc(uint32_t conv, const FuncPrototype& p) { + X86X64FuncNode* func = newNode(); + Error error; + + if (func == NULL) + goto _NoMemory; + + // Create helper nodes. + func->_entryNode = newTarget(); + func->_exitNode = newTarget(); + func->_end = newNode(); + + if (func->_entryNode == NULL || func->_exitNode == NULL || func->_end == NULL) + goto _NoMemory; + + // Emit push/pop sequence by default. + func->_funcHints |= IntUtil::mask(kFuncHintPushPop); + + // Function prototype. + if ((error = func->_x86Decl.setPrototype(conv, p)) != kErrorOk) { + setError(error); + return NULL; + } + + // Function arguments stack size. Since function requires _argStackSize to be + // set, we have to copy it from X86X64FuncDecl. + func->_argStackSize = func->_x86Decl.getArgStackSize(); + func->_redZoneSize = static_cast(func->_x86Decl.getRedZoneSize()); + func->_spillZoneSize = static_cast(func->_x86Decl.getSpillZoneSize()); + + // Expected/Required stack alignment. + func->_expectedStackAlignment = getRuntime()->getStackAlignment(); + func->_requiredStackAlignment = 0; + + // Allocate space for function arguments. + func->_argList = NULL; + if (func->getArgCount() != 0) { + func->_argList = _zoneAllocator.allocT(func->getArgCount() * sizeof(VarData*)); + if (func->_argList == NULL) + goto _NoMemory; + ::memset(func->_argList, 0, func->getArgCount() * sizeof(VarData*)); + } + + return func; + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +X86X64FuncNode* X86X64Compiler::addFunc(uint32_t conv, const FuncPrototype& p) { + X86X64FuncNode* func = newFunc(conv, p); + + if (func == NULL) { + setError(kErrorNoHeapMemory); + return NULL; + } + + ASMJIT_ASSERT(_func == NULL); + _func = func; + + addNode(func); + addNode(func->getEntryNode()); + + return func; +} + +EndNode* X86X64Compiler::endFunc() { + X86X64FuncNode* func = getFunc(); + ASMJIT_ASSERT(func != NULL); + + addNode(func->getExitNode()); + addNode(func->getEnd()); + + func->addFuncFlags(kFuncFlagIsFinished); + _func = NULL; + + return func->getEnd(); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Compiler - Ret] +// ============================================================================ + +RetNode* X86X64Compiler::newRet(const Operand& o0, const Operand& o1) { + RetNode* node = newNode(o0, o1); + if (node == NULL) + goto _NoMemory; + return node; + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +RetNode* X86X64Compiler::addRet(const Operand& o0, const Operand& o1) { + RetNode* node = newRet(o0, o1); + if (node == NULL) + return node; + return static_cast(addNode(node)); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Compiler - Call] +// ============================================================================ + +X86X64CallNode* X86X64Compiler::newCall(const Operand& o0, uint32_t conv, const FuncPrototype& p) { + X86X64CallNode* node = newNode(o0); + Error error; + uint32_t nArgs; + + if (node == NULL) + goto _NoMemory; + + if ((error = node->_x86Decl.setPrototype(conv, p)) != kErrorOk) { + setError(error); + return NULL; + } + + // If there are no arguments skip the allocation. + if ((nArgs = p.getArgCount()) == 0) + return node; + + node->_args = static_cast(_zoneAllocator.alloc(nArgs * sizeof(Operand))); + if (node->_args == NULL) + goto _NoMemory; + + ::memset(node->_args, 0, nArgs * sizeof(Operand)); + return node; + +_NoMemory: + setError(kErrorNoHeapMemory); + return NULL; +} + +X86X64CallNode* X86X64Compiler::addCall(const Operand& o0, uint32_t conv, const FuncPrototype& p) { + X86X64CallNode* node = newCall(o0, conv, p); + if (node == NULL) + return NULL; + return static_cast(addNode(node)); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Compiler - Vars] +// ============================================================================ + +Error X86X64Compiler::setArg(uint32_t argIndex, BaseVar& var) { + X86X64FuncNode* func = getFunc(); + + if (func == NULL) + return kErrorInvalidArgument; + + if (!isVarCreated(var)) + return kErrorInvalidState; + + VarData* vd = getVd(var); + func->setArg(argIndex, vd); + + return kErrorOk; +} + +Error X86X64Compiler::_newVar(BaseVar* var, uint32_t vType, const char* name) { + ASMJIT_ASSERT(vType < kVarTypeCount); + + vType = _targetVarMapping[vType]; + const VarInfo& vInfo = _varInfo[vType]; + + VarData* vd = _newVd(vType, vInfo.getSize(), vInfo.getClass(), name); + if (vd == NULL) { + static_cast(var)->reset(); + return getError(); + } + + var->_init_packed_op_sz_w0_id(kOperandTypeVar, vd->getSize(), vInfo.getReg() << 8, vd->getId()); + var->_vreg.vType = vType; + return kErrorOk; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Compiler - Stack] +// ============================================================================ + +Error X86X64Compiler::_newStack(BaseMem* mem, uint32_t size, uint32_t alignment, const char* name) { + if (size == 0) + return kErrorInvalidArgument; + + if (alignment > 64) + alignment = 64; + + VarData* vd = _newVd(kVarTypeInvalid, size, kRegClassInvalid, name); + if (vd == NULL) { + static_cast(mem)->reset(); + return getError(); + } + + vd->_isStack = true; + vd->_alignment = static_cast(alignment); + + static_cast(mem)->_init(kMemTypeStackIndex, vd->getId(), 0, 0); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Compiler - Make] +// ============================================================================ + +template +static ASMJIT_INLINE void* X86X64Compiler_make(X86X64Compiler* self) { + Assembler assembler(self->_runtime); + BaseLogger* logger = self->_logger; + + if (logger) { + assembler.setLogger(logger); + } + + assembler._features = self->_features; + + if (self->serialize(assembler) != kErrorOk) { + return NULL; + } + + if (assembler.getError() != kErrorOk) { + self->setError(assembler.getError()); + return NULL; + } + + void* result = assembler.make(); + if (logger) { + logger->logFormat(kLoggerStyleComment, + "*** COMPILER SUCCESS - Wrote %u bytes, code: %u, trampolines: %u.\n\n", + static_cast(assembler.getCodeSize()), + static_cast(assembler.getOffset()), + static_cast(assembler.getTrampolineSize())); + } + + return result; +} + +void* X86X64Compiler::make() { +#if defined(ASMJIT_BUILD_X86) && !defined(ASMJIT_BUILD_X64) + return X86X64Compiler_make(this); +#elif !defined(ASMJIT_BUILD_X86) && defined(ASMJIT_BUILD_X64) + return X86X64Compiler_make(this); +#else + if (_arch == kArchX86) + return X86X64Compiler_make(this); + else + return X86X64Compiler_make(this); +#endif // ASMJIT_BUILD_X86 && ASMJIT_BUILD_X64 +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Compiler - Assemble] +// ============================================================================ + +Error X86X64Compiler::serialize(BaseAssembler& assembler) { + if (_firstNode == NULL) + return kErrorOk; + + X86X64Context context(this); + Error error = kErrorOk; + + BaseNode* node = _firstNode; + BaseNode* start; + + // Find function and use the context to translate/emit. + do { + start = node; + + if (node->getType() == kNodeTypeFunc) { + node = static_cast(start)->getEnd(); + error = context.compile(static_cast(start)); + + if (error != kErrorOk) + goto _Error; + } + + do { + node = node->getNext(); + } while (node != NULL && node->getType() != kNodeTypeFunc); + + error = context.serialize(&assembler, start, node); + if (error != kErrorOk) + goto _Error; + context.cleanup(); + } while (node != NULL); + return kErrorOk; + +_Error: + context.cleanup(); + return error; +} + +} // x86x64 namespace +} // asmjit namespace + +// ============================================================================ +// [asmjit::x86] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X86) + +namespace asmjit { +namespace x86 { + +Compiler::Compiler(BaseRuntime* runtime) : X86X64Compiler(runtime) { + _arch = kArchX86; + _regSize = 4; + _targetVarMapping = _varMapping; +} + +Compiler::~Compiler() {} + +} // x86 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X86 + +// ============================================================================ +// [asmjit::x64] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X64) + +namespace asmjit { +namespace x64 { + +Compiler::Compiler(BaseRuntime* runtime) : X86X64Compiler(runtime) { + _arch = kArchX64; + _regSize = 8; + _targetVarMapping = _varMapping; +} + +Compiler::~Compiler() {} + +} // x64 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X64 + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 diff --git a/src/asmjit/x86/x86compiler.h b/src/asmjit/x86/x86compiler.h new file mode 100644 index 0000000..f122fa7 --- /dev/null +++ b/src/asmjit/x86/x86compiler.h @@ -0,0 +1,4201 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86COMPILER_H +#define _ASMJIT_X86_X86COMPILER_H + +// [Dependencies - AsmJit] +#include "../base/compiler.h" +#include "../x86/x86assembler.h" +#include "../x86/x86defs.h" +#include "../x86/x86func.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +//! @addtogroup asmjit_x86x64 +//! @{ + +// ============================================================================ +// [CodeGen-Begin] +// ============================================================================ + +#define INST_0x(_Inst_, _Code_) \ + ASMJIT_INLINE InstNode* _Inst_() { \ + return emit(_Code_); \ + } + +#define INST_1x(_Inst_, _Code_, _Op0_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0) { \ + return emit(_Code_, o0); \ + } + +#define INST_1x_(_Inst_, _Code_, _Op0_, _Cond_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0); \ + } + +#define INST_1i(_Inst_, _Code_, _Op0_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0) { \ + return emit(_Code_, o0); \ + } \ + /* @overload */ \ + ASMJIT_INLINE InstNode* _Inst_(int o0) { \ + return emit(_Code_, o0); \ + } + +#define INST_1i_(_Inst_, _Code_, _Op0_, _Cond_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0); \ + } \ + /* @overload */ \ + ASMJIT_INLINE InstNode* _Inst_(int o0) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0); \ + } + +#define INST_1cc(_Inst_, _Code_, _Translate_, _Op0_) \ + ASMJIT_INLINE InstNode* _Inst_(uint32_t cc, const _Op0_& o0) { \ + return emit(_Translate_(cc), o0); \ + } \ + \ + ASMJIT_INLINE InstNode* _Inst_##a(const _Op0_& o0) { return emit(_Code_##a, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##ae(const _Op0_& o0) { return emit(_Code_##ae, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##b(const _Op0_& o0) { return emit(_Code_##b, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##be(const _Op0_& o0) { return emit(_Code_##be, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##c(const _Op0_& o0) { return emit(_Code_##c, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##e(const _Op0_& o0) { return emit(_Code_##e, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##g(const _Op0_& o0) { return emit(_Code_##g, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##ge(const _Op0_& o0) { return emit(_Code_##ge, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##l(const _Op0_& o0) { return emit(_Code_##l, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##le(const _Op0_& o0) { return emit(_Code_##le, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##na(const _Op0_& o0) { return emit(_Code_##na, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##nae(const _Op0_& o0) { return emit(_Code_##nae, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##nb(const _Op0_& o0) { return emit(_Code_##nb, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##nbe(const _Op0_& o0) { return emit(_Code_##nbe, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##nc(const _Op0_& o0) { return emit(_Code_##nc, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##ne(const _Op0_& o0) { return emit(_Code_##ne, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##ng(const _Op0_& o0) { return emit(_Code_##ng, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##nge(const _Op0_& o0) { return emit(_Code_##nge, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##nl(const _Op0_& o0) { return emit(_Code_##nl, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##nle(const _Op0_& o0) { return emit(_Code_##nle, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##no(const _Op0_& o0) { return emit(_Code_##no, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##np(const _Op0_& o0) { return emit(_Code_##np, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##ns(const _Op0_& o0) { return emit(_Code_##ns, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##nz(const _Op0_& o0) { return emit(_Code_##nz, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##o(const _Op0_& o0) { return emit(_Code_##o, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##p(const _Op0_& o0) { return emit(_Code_##p, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##pe(const _Op0_& o0) { return emit(_Code_##pe, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##po(const _Op0_& o0) { return emit(_Code_##po, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##s(const _Op0_& o0) { return emit(_Code_##s, o0); } \ + ASMJIT_INLINE InstNode* _Inst_##z(const _Op0_& o0) { return emit(_Code_##z, o0); } + +#define INST_2x(_Inst_, _Code_, _Op0_, _Op1_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1) { \ + return emit(_Code_, o0, o1); \ + } + +#define INST_2x_(_Inst_, _Code_, _Op0_, _Op1_, _Cond_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1); \ + } + +#define INST_2i(_Inst_, _Code_, _Op0_, _Op1_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1) { \ + return emit(_Code_, o0, o1); \ + } \ + /* @overload */ \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, int o1) { \ + return emit(_Code_, o0, o1); \ + } + +#define INST_2i_(_Inst_, _Code_, _Op0_, _Op1_, _Cond_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1); \ + } \ + /* @overload */ \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, int o1) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1); \ + } + +#define INST_2cc(_Inst_, _Code_, _Translate_, _Op0_, _Op1_) \ + ASMJIT_INLINE InstNode* _Inst_(uint32_t cc, const _Op0_& o0, const _Op1_& o1) { \ + return emit(_Translate_(cc), o0, o1); \ + } \ + \ + ASMJIT_INLINE InstNode* _Inst_##a(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##a, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##ae(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##ae, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##b(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##b, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##be(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##be, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##c(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##c, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##e(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##e, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##g(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##g, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##ge(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##ge, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##l(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##l, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##le(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##le, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##na(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##na, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##nae(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nae, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##nb(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nb, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##nbe(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nbe, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##nc(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nc, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##ne(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##ne, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##ng(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##ng, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##nge(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nge, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##nl(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nl, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##nle(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nle, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##no(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##no, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##np(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##np, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##ns(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##ns, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##nz(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##nz, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##o(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##o, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##p(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##p, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##pe(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##pe, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##po(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##po, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##s(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##s, o0, o1); } \ + ASMJIT_INLINE InstNode* _Inst_##z(const _Op0_& o0, const _Op1_& o1) { return emit(_Code_##z, o0, o1); } + +#define INST_3x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \ + return emit(_Code_, o0, o1, o2); \ + } + +#define INST_3x_(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Cond_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, o2); \ + } + +#define INST_3i(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \ + return emit(_Code_, o0, o1, o2); \ + } \ + \ + /* @overload */ \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, int o2) { \ + return emit(_Code_, o0, o1, o2); \ + } + +#define INST_3i_(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Cond_) \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, o2); \ + } \ + /* @overload */ \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, int o2) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, o2); \ + } + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct X86X64CallNode; +struct X86X64FuncNode; +struct VarState; + +// ============================================================================ +// [asmjit::x86x64::kVarAttr] +// ============================================================================ + +//! @brief X86/X64 VarAttr flags. +ASMJIT_ENUM(kVarAttr) { + kVarAttrGpbLo = 0x10000000, + kVarAttrGpbHi = 0x20000000 +}; + +// ============================================================================ +// [asmjit::x86x64::VarInst] +// ============================================================================ + +struct VarInst : public BaseVarInst { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get variable-attributes list as VarAttr data. + ASMJIT_INLINE VarAttr* getVaList() const + { return const_cast(_list); } + + //! @brief Get variable-attributes list as VarAttr data (by class). + ASMJIT_INLINE VarAttr* getVaListByClass(uint32_t c) const + { return const_cast(_list) + _start.get(c); } + + //! @brief Get position of variables (by class). + ASMJIT_INLINE uint32_t getVaStart(uint32_t c) const + { return _start.get(c); } + + //! @brief Get count of variables (all). + ASMJIT_INLINE uint32_t getVaCount() const + { return _vaCount; } + + //! @brief Get count of variables (by class). + ASMJIT_INLINE uint32_t getVaCountByClass(uint32_t c) const + { return _count.get(c); } + + //! @brief Get VarAttr at @a index. + ASMJIT_INLINE VarAttr* getVa(uint32_t index) const + { + ASMJIT_ASSERT(index < _vaCount); + return getVaList() + index; + } + + //! @brief Get VarAttr of @a c class at @a index. + ASMJIT_INLINE VarAttr* getVaByClass(uint32_t c, uint32_t index) const + { + ASMJIT_ASSERT(index < _count._regs[c]); + return getVaListByClass(c) + index; + } + + // -------------------------------------------------------------------------- + // [Utils] + // -------------------------------------------------------------------------- + + //! @brief Find VarAttr. + ASMJIT_INLINE VarAttr* findVa(VarData* vd) const + { + VarAttr* list = getVaList(); + uint32_t count = getVaCount(); + + for (uint32_t i = 0; i < count; i++) + if (list[i].getVd() == vd) + return &list[i]; + + return NULL; + } + + //! @brief Find VarAttr (by class). + ASMJIT_INLINE VarAttr* findVaByClass(uint32_t c, VarData* vd) const + { + VarAttr* list = getVaListByClass(c); + uint32_t count = getVaCountByClass(c); + + for (uint32_t i = 0; i < count; i++) + if (list[i].getVd() == vd) + return &list[i]; + + return NULL; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Variables count. + uint32_t _vaCount; + + //! @brief Special registers on input. + //! + //! Special register(s) restricted to one or more physical register. If there + //! is more than one special register it means that we have to duplicate the + //! variable content to all of them (it means that the same varible was used + //! by two or more operands). We forget about duplicates after the register + //! allocation finishes and marks all duplicates as non-assigned. + RegMask _inRegs; + + //! @brief Special registers on output. + //! + //! Special register(s) used on output. Each variable can have only one + //! special register on the output, 'VarInst' contains all registers from + //! all 'VarAttr's. + RegMask _outRegs; + + //! @brief Clobbered registers (by a function call). + RegMask _clobberedRegs; + + //! @brief Start indexes of variables per register class. + RegCount _start; + //! @brief Count of variables per register class. + RegCount _count; + + //! @brief VarAttr list. + VarAttr _list[1]; +}; + +// ============================================================================ +// [asmjit::x86x64::StateCell] +// ============================================================================ + +//! @brief X86/X64 state-cell. +union StateCell { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint32_t getState() const { return _state; } + ASMJIT_INLINE void setState(uint32_t state) { _state = static_cast(state); } + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset() { _packed = 0; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + uint8_t _packed; + + struct { + uint8_t _state : 2; + uint8_t _unused : 6; + }; +}; + +// ============================================================================ +// [asmjit::x86x64::VarState] +// ============================================================================ + +//! @brief X86/X64 state. +struct VarState : BaseVarState { + enum { + //! @brief Base index for Gp registers. + kGpIndex = 0, + //! @brief Count of Gp registers. + kGpCount = 16, + + //! @brief Base index for Mm registers. + kMmIndex = kGpIndex + kGpCount, + //! @brief Count of Mm registers. + kMmCount = 8, + + //! @brief Base index for Xmm registers. + kXmmIndex = kMmIndex + kMmCount, + //! @brief Count of Xmm registers. + kXmmCount = 16, + + //! @brief Count of all registers in @ref VarState. + kAllCount = kXmmIndex + kXmmCount + }; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE VarData** getList() + { return _list; } + + ASMJIT_INLINE VarData** getListByClass(uint32_t c) + { + switch (c) { + case kRegClassGp: return _listGp; + case kRegClassMm: return _listMm; + case kRegClassXy: return _listXmm; + + default: + return NULL; + } + } + + // -------------------------------------------------------------------------- + // [Clear] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset(size_t numCells) + { + ::memset(this, 0, kAllCount * sizeof(VarData* ) + + 2 * sizeof(RegMask ) + + numCells * sizeof(StateCell)); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + //! @brief List of all allocated variables in one array. + VarData* _list[kAllCount]; + + struct { + //! @brief Allocated Gp registers. + VarData* _listGp[kGpCount]; + //! @brief Allocated Mm registers. + VarData* _listMm[kMmCount]; + //! @brief Allocated Xmm registers. + VarData* _listXmm[kXmmCount]; + }; + }; + + //! @brief Occupied registers (mask). + RegMask _occupied; + //! @brief Modified registers (mask). + RegMask _modified; + + //! @brief Variables data (count of variables is stored in @ref Context). + StateCell _cells[1]; +}; + +// ============================================================================ +// [asmjit::X86X64FuncNode] +// ============================================================================ + +//! @brief X86/X64 function node. +struct X86X64FuncNode : public FuncNode { + ASMJIT_NO_COPY(X86X64FuncNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref X86X64FuncNode instance. + ASMJIT_INLINE X86X64FuncNode(BaseCompiler* compiler) : FuncNode(compiler) { + _decl = &_x86Decl; + _saveRestoreRegs.reset(); + + _alignStackSize = 0; + _alignedMemStackSize = 0; + _pushPopStackSize = 0; + _moveStackSize = 0; + _extraStackSize = 0; + + _stackFrameRegIndex = kInvalidReg; + _isStackFrameRegPreserved = false; + _stackFrameCopyGpIndex[0] = kInvalidReg; + _stackFrameCopyGpIndex[1] = kInvalidReg; + _stackFrameCopyGpIndex[2] = kInvalidReg; + _stackFrameCopyGpIndex[3] = kInvalidReg; + _stackFrameCopyGpIndex[4] = kInvalidReg; + _stackFrameCopyGpIndex[5] = kInvalidReg; + } + + //! @brief Destroy the @ref X86X64FuncNode instance. + ASMJIT_INLINE ~X86X64FuncNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get function declaration as @ref X86X64FuncDecl. + ASMJIT_INLINE X86X64FuncDecl* getDecl() const { + return const_cast(&_x86Decl); + } + + //! @brief Get argument. + ASMJIT_INLINE VarData* getArg(uint32_t i) const { + ASMJIT_ASSERT(i < _x86Decl.getArgCount()); + return static_cast(_argList[i]); + } + + //! @brief Get registers which have to be saved in prolog/epilog. + ASMJIT_INLINE uint32_t getSaveRestoreRegs(uint32_t c) { return _saveRestoreRegs.get(c); } + + //! @brief Get stack size needed to align stack back to the nature alignment. + ASMJIT_INLINE uint32_t getAlignStackSize() const { return _alignStackSize; } + //! @brief Set stack size needed to align stack back to the nature alignment. + ASMJIT_INLINE void setAlignStackSize(uint32_t s) { _alignStackSize = s; } + + //! @brief Get aligned stack size used by variables and memory allocated on the stack. + ASMJIT_INLINE uint32_t getAlignedMemStackSize() const { return _alignedMemStackSize; } + + //! @brief Get stack size used by push/pop sequences in prolog/epilog. + ASMJIT_INLINE uint32_t getPushPopStackSize() const { return _pushPopStackSize; } + //! @brief Set stack size used by push/pop sequences in prolog/epilog. + ASMJIT_INLINE void setPushPopStackSize(uint32_t s) { _pushPopStackSize = s; } + + //! @brief Get stack size used by mov sequences in prolog/epilog. + ASMJIT_INLINE uint32_t getMoveStackSize() const { return _moveStackSize; } + //! @brief Set stack size used by mov sequences in prolog/epilog. + ASMJIT_INLINE void setMoveStackSize(uint32_t s) { _moveStackSize = s; } + + //! @brief Get extra stack size. + ASMJIT_INLINE uint32_t getExtraStackSize() const { return _extraStackSize; } + //! @brief Set extra stack size. + ASMJIT_INLINE void setExtraStackSize(uint32_t s) { _extraStackSize = s; } + + //! @brief Get whether the function has stack frame register. + //! + //! @note Stack frame register can be used for both - aligning purposes or + //! generating standard prolog/epilog sequence. + //! + //! @note Used only when stack is misaligned. + ASMJIT_INLINE bool hasStackFrameReg() const { return _stackFrameRegIndex != kInvalidReg; } + //! @brief Get stack frame register index. + //! + //! @note Used only when stack is misaligned. + ASMJIT_INLINE uint32_t getStackFrameRegIndex() const { return _stackFrameRegIndex; } + //! @brief Get whether the stack frame register is preserved. + //! + //! @note Used only when stack is misaligned. + ASMJIT_INLINE bool isStackFrameRegPreserved() const { return static_cast(_isStackFrameRegPreserved); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief X86 function decl. + X86X64FuncDecl _x86Decl; + //! @brief Registers which must be saved/restored in prolog/epilog. + RegMask _saveRestoreRegs; + + //! @brief Stack size needed to align function back to the nature alignment. + uint32_t _alignStackSize; + //! @brief Like @ref _memStackSize, but aligned. + uint32_t _alignedMemStackSize; + + //! @brief Stack required for push/pop in prolog/epilog (X86/X64 specific). + uint32_t _pushPopStackSize; + //! @brief Stack required for movs in prolog/epilog (X86/X64 specific). + uint32_t _moveStackSize; + + //! @brief Stack required to put extra data (for example function arguments + //! when manually aligning to requested alignment). + uint32_t _extraStackSize; + + //! @brief Stack frame register. + uint8_t _stackFrameRegIndex; + //! @brief Whether the stack frame register is preserved. + uint8_t _isStackFrameRegPreserved; + //! @brief Gp registers indexes that can be used to copy function arguments + //! to a new location in case we are doing manual stack alignment. + uint8_t _stackFrameCopyGpIndex[6]; +}; + +// ============================================================================ +// [asmjit::X86X64CallNode] +// ============================================================================ + +//! @brief X86/X64 function-call node. +struct X86X64CallNode : public CallNode { + ASMJIT_NO_COPY(X86X64CallNode) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref X86X64CallNode instance. + ASMJIT_INLINE X86X64CallNode(BaseCompiler* compiler, const Operand& target) : CallNode(compiler, target) { + _decl = &_x86Decl; + _usedArgs.reset(); + } + + //! @brief Destroy the @ref X86X64CallNode instance. + ASMJIT_INLINE ~X86X64CallNode() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get function prototype. + ASMJIT_INLINE X86X64FuncDecl* getDecl() const { + return const_cast(&_x86Decl); + } + + // -------------------------------------------------------------------------- + // [Prototype] + // -------------------------------------------------------------------------- + + //! @brief Set function prototype. + ASMJIT_API Error setPrototype(uint32_t conv, const FuncPrototype& p); + + // -------------------------------------------------------------------------- + // [Arg / Ret] + // -------------------------------------------------------------------------- + + //! @brief Set argument at @a i to @a op. + ASMJIT_API bool _setArg(uint32_t i, const Operand& op); + //! @brief Set return at @a i to @a op. + ASMJIT_API bool _setRet(uint32_t i, const Operand& op); + + //! @brief Set argument at @a i to @a var. + ASMJIT_INLINE bool setArg(uint32_t i, const BaseVar& var) { return _setArg(i, var); } + //! @brief Set argument at @a i to @a imm. + ASMJIT_INLINE bool setArg(uint32_t i, const Imm& imm) { return _setArg(i, imm); } + //! @brief Set return at @a i to @a var. + ASMJIT_INLINE bool setRet(uint32_t i, const BaseVar& var) { return _setRet(i, var); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief X86 declaration. + X86X64FuncDecl _x86Decl; + //! @brief Mask of all registers actually used to pass function arguments. + //! + //! @note This bit-mask is not the same as @c X86X64Func::_passed. It contains + //! only registers actually used to do the call while X86X64Func::_passed + //! mask contains all registers for all function prototype combinations. + RegMask _usedArgs; +}; + +// ============================================================================ +// [asmjit::x86x64::X86X64Compiler] +// ============================================================================ + +//! @brief X86/X64 compiler. +//! +//! This class is used to store instruction stream and allows to modify +//! it on the fly. It uses different concept than @c asmjit::Assembler class +//! and in fact @c asmjit::Assembler is only used as a backend. Compiler never +//! emits machine code and each instruction you use is stored to instruction +//! array instead. This allows to modify instruction stream later and for +//! example to reorder instructions to make better performance. +//! +//! @ref asmjit::X86X64Compiler moves code generation to a higher level. Higher +//! level constructs allows to write more abstract and extensible code that +//! is not possible with pure @c asmjit::Assembler class. Because +//! @c asmjit::Compiler needs to create many objects and lifetime of these +//! objects is small (same as @c asmjit::Compiler lifetime itself) it uses +//! very fast memory management model. This model allows to create object +//! instances in nearly zero time (compared to @c malloc() or @c new() +//! operators) so overhead by creating machine code by @c asmjit::Compiler +//! is minimized. +//! +//! @section asmjit_compiler_introduction The Story +//! +//! Before telling you how Compiler works I'd like to write a story. I'd like +//! to cover reasons why this class was created and why I'm recommending to use +//! it. When I released the first version of AsmJit (0.1) it was a toy. The +//! first function I wrote was function which is still available as testjit and +//! which simply returns 1024. The reason why function works for both 32-bit/ +//! 64-bit mode and for Windows/Unix specific calling conventions is luck, no +//! arguments usage and no registers usage except returning value in EAX/RAX. +//! +//! Then I started a project called BlitJit which was targetted to generating +//! JIT code for computer graphics. After writing some lines I decided that I +//! can't join pieces of code together without abstraction, should be +//! pixels source pointer in ESI/RSI or EDI/RDI or it's completelly +//! irrellevant? What about destination pointer and SSE2 register for reading +//! input pixels? The simple answer might be "just pick some one and use it". +//! +//! Another reason for abstraction is function calling-conventions. It's really +//! not easy to write assembler code for 32-bit and 64-bit platform supporting +//! three calling conventions (32-bit is similar between Windows and Unix, but +//! 64-bit calling conventions are different). +//! +//! At this time I realized that I can't write code which uses named registers, +//! I need to abstract it. In most cases you don't need specific register, you +//! need to emit instruction that does something with 'virtual' register(s), +//! memory, immediate or label. +//! +//! The first version of AsmJit with Compiler was 0.5 (or 0.6?, can't remember). +//! There was support for 32-bit and 64-bit mode, function calling conventions, +//! but when emitting instructions the developer needed to decide which +//! registers are changed, which are only read or completely overwritten. This +//! model helped a lot when generating code, especially when joining more +//! code-sections together, but there was also small possibility for mistakes. +//! Simply the first version of Compiler was great improvement over low-level +//! Assembler class, but the API design wasn't perfect. +//! +//! The second version of Compiler, completelly rewritten and based on +//! different goals, is part of AsmJit starting at version 1.0. This version +//! was designed after the first one and it contains serious improvements over +//! the old one. The first improvement is that you just use instructions with +//! virtual registers - called variables. When using compiler there is no way +//! to use native registers, there are variables instead. AsmJit is smarter +//! than before and it knows which register is needed only for read (r), +//! read/write (w) or overwrite (x). Supported are also instructions which +//! are using some registers in implicit way (these registers are not part of +//! instruction definition in string form). For example to use CPUID instruction +//! you must give it four variables which will be automatically allocated in +//! input/output registers (EAX, EBX, ECX, EDX). +//! +//! Another improvement is algorithm used by a register allocator. In first +//! version the registers were allocated when creating instruction stream. In +//! new version registers are allocated after calling @c Compiler::make(), +//! thus register allocator has information about scope of all variables and +//! statistics of their usage. The algorithm to allocate registers is very +//! simple and it's always called as a 'linear scan register allocator'. When +//! you get out of registers the all possible variables are scored and the worst +//! is spilled. Of course algorithm ignores the variables used for current +//! instruction. +//! +//! In addition, because registers are allocated after the code stream is +//! generated, the state switches between jumps are handled by Compiler too. +//! You don't need to worry about jumps, compiler always do this dirty work +//! for you. +//! +//! The nearly last thing I'd like to present is calling other functions from +//! the generated code. AsmJit uses a @c FuncPrototype class to hold function +//! parameters, their position in stack (or register index) and return value. +//! This class is used internally, but it can be used to create your own +//! function calling-convention. All standard function calling conventions are +//! implemented. +//! +//! Please enjoy the new version of Compiler, it was created for writing a +//! low-level code using high-level API, leaving developer to concentrate on +//! real problems and not to solving a register puzzle. +//! +//! @section asmjit_compiler_codegeneration Code Generation +//! +//! First that is needed to know about compiler is that compiler never emits +//! machine code. It's used as a middleware between @c asmjit::Assembler and +//! your code. There is also convenience method @c make() that allows to +//! generate machine code directly without creating @c asmjit::Assembler +//! instance. +//! +//! Comparison of generating machine code through @c Assembler and directly +//! by @c Compiler: +//! +//! @code +//! // Assembler instance is low level code generation class that emits +//! // machine code. +//! Assembler a; +//! +//! // Compiler instance is high level code generation class that stores all +//! // instructions in internal representation. +//! Compiler c; +//! +//! // ... put your code here ... +//! +//! // Final step - generate code. asmjit::Compiler::serialize() will send all +//! // instructions into Assembler and this ensures generating real machine code. +//! c.serialize(a); +//! +//! // Your function +//! void* fn = a.make(); +//! @endcode +//! +//! Example how to generate machine code using only @c Compiler (preferred): +//! +//! @code +//! // Compiler instance is enough. +//! Compiler c; +//! +//! // ... put your code here ... +//! +//! // Your function +//! void* fn = c.make(); +//! @endcode +//! +//! You can see that there is @c asmjit::Compiler::serialize() function that +//! emits instructions into @c asmjit::Assembler(). This layered architecture +//! means that each class is used for something different and there is no code +//! duplication. For convenience there is also @c asmjit::Compiler::make() +//! method that can create your function using @c asmjit::Assembler, but +//! internally (this is preferred bahavior when using @c asmjit::Compiler). +//! +//! The @c make() method allocates memory using @ref BaseRuntime instance passed +//! into the @c Compiler constructor. If code generator is used to create JIT +//! function then virtual memory allocated by @c MemoryManager is used. To get +//! global memory manager use @c MemoryManager::getGlobal(). +//! +//! @code +//! // Compiler instance is enough. +//! Compiler c; +//! +//! // ... put your code using Compiler instance ... +//! +//! // Your function +//! void* fn = c.make(); +//! +//! // Free it if you don't want it anymore +//! // (using global memory manager instance) +//! MemoryManager::getGlobal()->free(fn); +//! @endcode +//! +//! @section asmjit_compiler_Functions Functions +//! +//! To build functions with @c Compiler, see @c asmjit::Compiler::addFunc() +//! method. +//! +//! @section asmjit_compiler_Variables Variables +//! +//! Compiler is able to manage variables and function arguments. Function +//! arguments are moved to variables by using @c setArg() method, where the +//! first parameter is argument index and second parameter is the variable +//! instance. To declare variable use @c newGpVar(), @c newMmVar() and @c +//! newXmmVar() methods. The @c newXXX() methods accept also parameter +//! describing the variable type. For example the @c newGpVar() method always +//! creates variable which size matches the target architecture size (for +//! 32-bit target the 32-bit variable is created, for 64-bit target the +//! variable size is 64-bit). To override this behavior the variable type +//! must be specified. +//! +//! @code +//! // Compiler and function declaration - void f(int*); +//! Compiler c; +//! GpVar a0(c, kVarTypeIntPtr); +//! +//! c.addFunc(kFuncConvHost, BuildFunction1()); +//! c.setArg(0, a0); +//! +//! // Create your variables. +//! GpVar x0(c, kVarTypeInt32); +//! GpVar x1(c, kVarTypeInt32); +//! +//! // Init your variables. +//! c.mov(x0, 1); +//! c.mov(x1, 2); +//! +//! // ... your code ... +//! c.add(x0, x1); +//! // ... your code ... +//! +//! // Store result to a given pointer in first argument +//! c.mov(dword_ptr(a0), x0); +//! +//! // End of function body. +//! c.endFunc(); +//! +//! // Make the function. +//! typedef void (*MyFunc)(int*); +//! MyFunc func = asmjit_cast(c.make()); +//! @endcode +//! +//! This code snipped needs to be explained. You can see that there are more +//! variable types that can be used by @c Compiler. Most useful variables can +//! be allocated using general purpose registers (@c GpVar), MMX registers +//! (@c MmVar) or SSE/SSE2 registers (@c XmmVar). +//! +//! X86/X64 variable types: +//! +//! - @c kVarTypeInt8 - Signed 8-bit integer, mapped to Gpd register (eax, ebx, ...). +//! - @c kVarTypeUInt8 - Unsigned 8-bit integer, mapped to Gpd register (eax, ebx, ...). +//! +//! - @c kVarTypeInt16 - Signed 16-bit integer, mapped to Gpd register (eax, ebx, ...). +//! - @c kVarTypeUInt16 - Unsigned 16-bit integer, mapped to Gpd register (eax, ebx, ...). +//! +//! - @c kVarTypeInt32 - Signed 32-bit integer, mapped to Gpd register (eax, ebx, ...). +//! - @c kVarTypeUInt32 - Unsigned 32-bit integer, mapped to Gpd register (eax, ebx, ...). +//! +//! - @c kVarTypeInt64 - Signed 64-bit integer, mapped to Gpq register (rax, rbx, ...). +//! - @c kVarTypeUInt64 - Unsigned 64-bit integer, mapped to Gpq register (rax, rbx, ...). +//! +//! - @c kVarTypeIntPtr - intptr_t, mapped to Gpd/Gpq register; depends on target, not host! +//! - @c kVarTypeUIntPtr - uintptr_t, mapped to Gpd/Gpq register; depends on target, not host! +//! +//! - @c kVarTypeFp32 - 32-bit floating point register (fp0, fp1, ...). +//! - @c kVarTypeFp64 - 64-bit floating point register (fp0, fp1, ...). +//! - @c kVarTypeFpEx - 80-bit floating point register (fp0, fp1, ...). +//! +//! - @c kVarTypeMm - 64-bit Mm register (mm0, mm1, ...). +//! +//! - @c kVarTypeXmm - 128-bit SSE register. +//! - @c kVarTypeXmmSs - 128-bit SSE register that contains a scalar 32-bit SP-FP value. +//! - @c kVarTypeXmmSd - 128-bit SSE register that contains a scalar 64-bit DP-FP value. +//! - @c kVarTypeXmmPs - 128-bit SSE register that contains 4 packed 32-bit SP-FP values. +//! - @c kVarTypeXmmPd - 128-bit SSE register that contains 2 packed 64-bit DP-FP values. +//! +//! - @c kVarTypeYmm - 256-bit AVX register. +//! - @c kVarTypeYmmPs - 256-bit AVX register that contains 4 packed 32-bit SP-FP values. +//! - @c kVarTypeYmmPd - 256-bit AVX register that contains 2 packed 64-bit DP-FP values. +//! +//! Variable states: +//! +//! - @c kVarStateUnused - State that is assigned to newly created +//! variables or to not used variables (dereferenced to zero). +//! - @c kVarStateReg - State that means that variable is currently +//! allocated in register. +//! - @c kVarStateMem - State that means that variable is currently +//! only in memory location. +//! +//! When you create new variable, initial state is always @c kVarStateUnused, +//! allocating it to register or spilling to memory changes this state to +//! @c kVarStateReg or @c kVarStateMem, respectively. +//! During variable lifetime it's usual that its state is changed multiple +//! times. To generate better code, you can control allocating and spilling +//! by using up to four types of methods that allows it (see next list). +//! +//! Explicit variable allocating / spilling methods: +//! +//! - @c Compiler::alloc() - Explicit method to alloc variable into +//! register. You can use this before loops or code blocks. +//! +//! - @c Compiler::spill() - Explicit method to spill variable. If variable +//! is in register and you call this method, it's moved to its home memory +//! location. If variable is not in register no operation is performed. +//! +//! - @c Compiler::unuse() - Unuse variable (you can use this to end the +//! variable scope or sub-scope). +//! +//! Please see AsmJit tutorials (testcompiler.cpp and testvariables.cpp) for +//! more complete examples. +//! +//! @section asmjit_compiler_MemoryManagement Memory Management +//! +//! @c Compiler Memory management follows these rules: +//! - Everything created by @c Compiler is always freed by @c Compiler. +//! - To get decent performance, compiler always uses larger memory buffer +//! for objects to allocate and when compiler instance is destroyed, this +//! buffer is freed. Destructors of active objects are called when +//! destroying compiler instance. Destructors of abadonded compiler +//! objects are called immediately after abadonding them. +//! - This type of memory management is called 'zone memory management'. +//! +//! This means that you can't use any @c Compiler object after destructing it, +//! it also means that each object like @c Label, @c BaseVar and others are +//! created and managed by @c BaseCompiler itself. These objects contain ID +//! which is used internally by Compiler to store additional information about +//! these objects. +//! +//! @section asmjit_compiler_StateManagement Control-Flow and State Management. +//! +//! The @c Compiler automatically manages state of the variables when using +//! control flow instructions like jumps, conditional jumps and calls. There +//! is minimal heuristics for choosing the method how state is saved or restored. +//! +//! Generally the state can be changed only when using jump or conditional jump +//! instruction. When using non-conditional jump then state change is embedded +//! into the instruction stream before the jump. When using conditional jump +//! the @c Compiler decides whether to restore state before the jump or whether +//! to use another block where state is restored. The last case is that no-code +//! have to be emitted and there is no state change (this is of course ideal). +//! +//! Choosing whether to embed 'restore-state' section before conditional jump +//! is quite simple. If jump is likely to be 'taken' then code is embedded, if +//! jump is unlikely to be taken then the small code section for state-switch +//! will be generated instead. +//! +//! Next example is the situation where the extended code block is used to +//! do state-change: +//! +//! @code +//! Compiler c; +//! +//! c.addFunc(kFuncConvHost, FuncBuilder0()); +//! +//! // Labels. +//! Label L0(c); +//! +//! // Variables. +//! GpVar var0(c, kVarTypeInt32); +//! GpVar var1(c, kVarTypeInt32); +//! +//! // Cleanup. After these two lines, the var0 and var1 will be always stored +//! // in registers. Our example is very small, but in larger code the var0 can +//! // be spilled by xor(var1, var1). +//! c.xor_(var0, var0); +//! c.xor_(var1, var1); +//! c.cmp(var0, var1); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! // We manually spill these variables. +//! c.spill(var0); +//! c.spill(var1); +//! // State: +//! // var0 - memory. +//! // var1 - memory. +//! +//! // Conditional jump to L0. It will be always taken, but compiler thinks that +//! // it is unlikely taken so it will embed state change code somewhere. +//! c.je(L0); +//! +//! // Do something. The variables var0 and var1 will be allocated again. +//! c.add(var0, 1); +//! c.add(var1, 2); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! // Bind label here, the state is not changed. +//! c.bind(L0); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! // We need to use var0 and var1, because if compiler detects that variables +//! // are out of scope then it optimizes the state-change. +//! c.sub(var0, var1); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! c.endFunc(); +//! @endcode +//! +//! The output: +//! +//! @verbatim +//! xor eax, eax ; xor var_0, var_0 +//! xor ecx, ecx ; xor var_1, var_1 +//! cmp eax, ecx ; cmp var_0, var_1 +//! mov [esp - 24], eax ; spill var_0 +//! mov [esp - 28], ecx ; spill var_1 +//! je L0_Switch +//! mov eax, [esp - 24] ; alloc var_0 +//! add eax, 1 ; add var_0, 1 +//! mov ecx, [esp - 28] ; alloc var_1 +//! add ecx, 2 ; add var_1, 2 +//! L0: +//! sub eax, ecx ; sub var_0, var_1 +//! ret +//! +//! ; state-switch begin +//! L0_Switch0: +//! mov eax, [esp - 24] ; alloc var_0 +//! mov ecx, [esp - 28] ; alloc var_1 +//! jmp short L0 +//! ; state-switch end +//! @endverbatim +//! +//! You can see that the state-switch section was generated (see L0_Switch0). +//! The compiler is unable to restore state immediately when emitting the +//! forward jump (the code is generated from first to last instruction and +//! the target state is simply not known at this time). +//! +//! To tell @c Compiler that you want to embed state-switch code before jump +//! it's needed to create backward jump (where also processor expects that it +//! will be taken). To demonstrate the possibility to embed state-switch before +//! jump we use slightly modified code: +//! +//! @code +//! Compiler c; +//! +//! c.addFunc(kFuncConvHost, FuncBuilder0()); +//! +//! // Labels. +//! Label L0(c); +//! +//! // Variables. +//! GpVar var0(c, kVarTypeInt32); +//! GpVar var1(c, kVarTypeInt32); +//! +//! // Cleanup. After these two lines, the var0 and var1 will be always stored +//! // in registers. Our example is very small, but in larger code the var0 can +//! // be spilled by xor(var1, var1). +//! c.xor_(var0, var0); +//! c.xor_(var1, var1); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! // We manually spill these variables. +//! c.spill(var0); +//! c.spill(var1); +//! // State: +//! // var0 - memory. +//! // var1 - memory. +//! +//! // Bind our label here. +//! c.bind(L0); +//! +//! // Do something, the variables will be allocated again. +//! c.add(var0, 1); +//! c.add(var1, 2); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! // Backward conditional jump to L0. The default behavior is that it is taken +//! // so state-change code will be embedded here. +//! c.je(L0); +//! +//! c.endFunc(); +//! @endcode +//! +//! The output: +//! +//! @verbatim +//! xor ecx, ecx ; xor var_0, var_0 +//! xor edx, edx ; xor var_1, var_1 +//! mov [esp - 24], ecx ; spill var_0 +//! mov [esp - 28], edx ; spill var_1 +//! L2: +//! mov ecx, [esp - 24] ; alloc var_0 +//! add ecx, 1 ; add var_0, 1 +//! mov edx, [esp - 28] ; alloc var_1 +//! add edx, 2 ; add var_1, 2 +//! +//! ; state-switch begin +//! mov [esp - 24], ecx ; spill var_0 +//! mov [esp - 28], edx ; spill var_1 +//! ; state-switch end +//! +//! je short L2 +//! ret +//! @endverbatim +//! +//! Please notice where the state-switch section is located. The @c Compiler +//! decided that jump is likely to be taken so the state change is embedded +//! before the conditional jump. To change this behavior into the previous +//! case it's needed to add an option (kInstOptionTaken/kInstOptionNotTaken). +//! +//! Replacing the c.je(L0) by c.taken(); c.je(L0) +//! will generate code like this: +//! +//! @verbatim +//! xor ecx, ecx ; xor var_0, var_0 +//! xor edx, edx ; xor var_1, var_1 +//! mov [esp - 24], ecx ; spill var_0 +//! mov [esp - 28], edx ; spill var_1 +//! L0: +//! mov ecx, [esp - 24] ; alloc var_0 +//! add ecx, 1 ; add var_0, a +//! mov edx, [esp - 28] ; alloc var_1 +//! add edx, 2 ; add var_1, 2 +//! je L0_Switch, 2 +//! ret +//! +//! ; state-switch begin +//! L0_Switch: +//! mov [esp - 24], ecx ; spill var_0 +//! mov [esp - 28], edx ; spill var_1 +//! jmp short L0 +//! ; state-switch end +//! @endverbatim +//! +//! This section provided information about how state-change works. The +//! behavior is deterministic and it can be overridden. +//! +//! @section asmjit_compiler_AdvancedCodeGeneration Advanced Code Generation +//! +//! This section describes advanced method of code generation available to +//! @c Compiler (but also to @c Assembler). When emitting code to instruction +//! stream the methods like @c mov(), @c add(), @c sub() can be called directly +//! (advantage is static-type control performed also by C++ compiler) or +//! indirectly using @c emit() method. The @c emit() method needs only +//! instruction code and operands. +//! +//! Example of code generating by standard type-safe API: +//! +//! @code +//! Compiler c; +//! +//! GpVar var0(c, kVarTypeInt32); +//! GpVar var1(c, kVarTypeInt32); +//! +//! ... +//! +//! c.mov(var0, 0); +//! c.add(var0, var1); +//! c.sub(var0, var1); +//! @endcode +//! +//! The code above can be rewritten as: +//! +//! @code +//! Compiler c; +//! +//! GpVar var0(c, kVarTypeInt32); +//! GpVar var1(c, kVarTypeInt32); +//! +//! ... +//! +//! c.emit(kInstMov, var0, 0); +//! c.emit(kInstAdd, var0, var1); +//! c.emit(kInstSub, var0, var1); +//! @endcode +//! +//! The advantage of first snippet is very friendly API and type-safe control +//! that is controlled by the C++ compiler. The advantage of second snippet is +//! availability to replace or generate instruction code in different places. +//! See the next example how the @c emit() method can be used to generate +//! abstract code. +//! +//! Use case: +//! +//! @code +//! bool emitArithmetic(Compiler& c, XmmVar& var0, XmmVar& var1, const char* op) +//! { +//! uint32_t code = kInstNone; +//! +//! if (strcmp(op, "ADD") == 0) +//! code = kInstAddss; +//! else if (strcmp(op, "SUBTRACT") == 0) +//! code = kInstSubss; +//! else if (strcmp(op, "MULTIPLY") == 0) +//! code = kInstMulss; +//! else if (strcmp(op, "DIVIDE") == 0) +//! code = kInstDivss; +//! else +//! // Invalid parameter? +//! return false; +//! +//! c.emit(code, var0, var1); +//! } +//! @endcode +//! +//! Other use cases are waiting for you! Be sure that instruction you are +//! emitting is correct and encodable, because if not, Assembler will set +//! status code to @c kErrorAssemblerUnknownInst. +struct X86X64Compiler : public BaseCompiler { + ASMJIT_NO_COPY(X86X64Compiler) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a @ref X86X64Compiler instance. + ASMJIT_API X86X64Compiler(BaseRuntime* runtime); + //! @brief Destroy the @ref X86X64Compiler instance. + ASMJIT_API ~X86X64Compiler(); + + // -------------------------------------------------------------------------- + // [Inst / Emit] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref InstNode. + ASMJIT_API InstNode* newInst(uint32_t code); + //! @overload + ASMJIT_API InstNode* newInst(uint32_t code, const Operand& o0); + //! @overload + ASMJIT_API InstNode* newInst(uint32_t code, const Operand& o0, const Operand& o1); + //! @overload + ASMJIT_API InstNode* newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2); + //! @overload + ASMJIT_API InstNode* newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3); + //! @overload + ASMJIT_API InstNode* newInst(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3, const Operand& o4); + + //! @brief Add a new @ref InstNode. + ASMJIT_API InstNode* emit(uint32_t code); + //! @overload + ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0); + //! @overload + ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1); + //! @overload + ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2); + //! @overload + ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3); + //! @overload + ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, const Operand& o3, const Operand& o4); + + //! @overload + ASMJIT_API InstNode* emit(uint32_t code, int o0); + //! @overload + ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, int o1); + //! @overload + ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, int o2); + + // -------------------------------------------------------------------------- + // [Func] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref X86X64FuncNode. + ASMJIT_API X86X64FuncNode* newFunc(uint32_t conv, const FuncPrototype& p); + + //! @brief Add a new function. + //! + //! @param cconv Calling convention to use (see @c kFuncConv enum) + //! @param params Function arguments prototype. + //! + //! This method is usually used as a first step when generating functions + //! by @c Compiler. First parameter @a cconv specifies function calling + //! convention to use. Second parameter @a params specifies function + //! arguments. To create function arguments are used templates + //! @c BuildFunction0<...>, @c BuildFunction1<...>, @c BuildFunction2<...>, + //! etc... + //! + //! Templates with BuildFunction prefix are used to generate argument IDs + //! based on real C++ types. See next example how to generate function with + //! two 32-bit integer arguments. + //! + //! @code + //! // Building function using asmjit::Compiler example. + //! + //! // Compiler instance + //! Compiler c; + //! + //! // Begin of function (also emits function @c Prolog) + //! c.addFunc( + //! // Default calling convention (32-bit cdecl or 64-bit for host OS) + //! kFuncConvHost, + //! // Using function builder to generate arguments list + //! BuildFunction2()); + //! + //! // End of function (also emits function @c Epilog) + //! c.endFunc(); + //! @endcode + //! + //! You can see that building functions is really easy. Previous code snipped + //! will generate code for function with two 32-bit integer arguments. You + //! can access arguments by @c asmjit::Function::argument() method. Arguments + //! are indexed from 0 (like everything in C). + //! + //! @code + //! // Accessing function arguments through asmjit::Function example. + //! + //! // Compiler instance + //! Compiler c; + //! GpVar a0(c, kVarTypeInt32); + //! GpVar a1(c, kVarTypeInt32); + //! + //! // Begin of function (also emits function @c Prolog) + //! c.addFunc( + //! // Default calling convention (32-bit cdecl or 64-bit for host OS) + //! kFuncConvHost, + //! // Using function builder to generate arguments list + //! BuildFunction2()); + //! + //! c.setArg(0, a0); + //! c.setArg(1, a1); + //! + //! // Use them. + //! c.add(a0, a1); + //! + //! // End of function - emits function epilog and return instruction. + //! c.endFunc(); + //! @endcode + //! + //! Arguments are like variables. How to manipulate with variables is + //! documented in @c asmjit::Compiler, variables section. + //! + //! @note To get current function use @c currentFunction() method or save + //! pointer to @c asmjit::Function returned by @c asmjit::Compiler::addFunc<> + //! method. Recommended is to save the pointer. + //! + //! @sa @c BuildFunction0, @c BuildFunction1, @c BuildFunction2, ... + ASMJIT_API X86X64FuncNode* addFunc(uint32_t conv, const FuncPrototype& p); + + //! @brief End of current function. + ASMJIT_API EndNode* endFunc(); + + //! @brief Get current function as @ref X86X64FuncNode. + //! + //! This method can be called within @c addFunc() and @c endFunc() + //! block to get current function you are working with. It's recommended + //! to store @c asmjit::Function pointer returned by @c addFunc<> method, + //! because this allows you in future implement function sections outside of + //! function itself (yeah, this is possible!). + ASMJIT_INLINE X86X64FuncNode* getFunc() const { return static_cast(_func); } + + // -------------------------------------------------------------------------- + // [Ret] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref RetNode. + ASMJIT_API RetNode* newRet(const Operand& o0, const Operand& o1); + //! @brief Add a new @ref RetNode. + ASMJIT_API RetNode* addRet(const Operand& o0, const Operand& o1); + + // -------------------------------------------------------------------------- + // [Call] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref X86X64CallNode. + ASMJIT_API X86X64CallNode* newCall(const Operand& o0, uint32_t conv, const FuncPrototype& p); + //! @brief Add a new @ref X86X64CallNode. + ASMJIT_API X86X64CallNode* addCall(const Operand& o0, uint32_t conv, const FuncPrototype& p); + + // -------------------------------------------------------------------------- + // [Vars] + // -------------------------------------------------------------------------- + + //! @brief Set function argument to @a var. + ASMJIT_API Error setArg(uint32_t argIndex, BaseVar& var); + + //! @overridden + ASMJIT_API virtual Error _newVar(BaseVar* var, uint32_t type, const char* name); + + //! @brief Create a new Gp variable. + ASMJIT_INLINE GpVar newGpVar(uint32_t vType = kVarTypeIntPtr, const char* name = NULL) { + ASMJIT_ASSERT(vType < kVarTypeCount); + ASMJIT_ASSERT(IntUtil::inInterval(vType, _kVarTypeIntStart, _kVarTypeIntEnd)); + + GpVar var(DontInitialize); + _newVar(&var, vType, name); + return var; + } + + //! @brief Create a new Mm variable. + ASMJIT_INLINE MmVar newMmVar(uint32_t vType = kVarTypeMm, const char* name = NULL) { + ASMJIT_ASSERT(vType < kVarTypeCount); + ASMJIT_ASSERT(IntUtil::inInterval(vType, _kVarTypeMmStart, _kVarTypeMmEnd)); + + MmVar var(DontInitialize); + _newVar(&var, vType, name); + return var; + } + + //! @brief Create a new Xmm variable. + ASMJIT_INLINE XmmVar newXmmVar(uint32_t vType = kVarTypeXmm, const char* name = NULL) { + ASMJIT_ASSERT(vType < kVarTypeCount); + ASMJIT_ASSERT(IntUtil::inInterval(vType, _kVarTypeXmmStart, _kVarTypeXmmEnd)); + + XmmVar var(DontInitialize); + _newVar(&var, vType, name); + return var; + } + + //! @brief Create a new Ymm variable. + ASMJIT_INLINE YmmVar newYmmVar(uint32_t vType = kVarTypeYmm, const char* name = NULL) { + ASMJIT_ASSERT(vType < kVarTypeCount); + ASMJIT_ASSERT(IntUtil::inInterval(vType, _kVarTypeYmmStart, _kVarTypeYmmEnd)); + + YmmVar var(DontInitialize); + _newVar(&var, vType, name); + return var; + } + + //! @brief Get memory home of variable @a var. + ASMJIT_API void getMemoryHome(BaseVar& var, GpVar* home, int* displacement = NULL); + + //! @brief Set memory home of variable @a var. + //! + //! Default memory home location is on stack (ESP/RSP), but when needed the + //! bebahior can be changed by this method. + //! + //! It is an error to chaining memory home locations. For example the given + //! code is invalid: + //! + //! @code + //! Compiler c; + //! + //! ... + //! + //! GpVar v0(c, kVarTypeIntPtr); + //! GpVar v1(c, kVarTypeIntPtr); + //! GpVar v2(c, kVarTypeIntPtr); + //! GpVar v3(c, kVarTypeIntPtr); + //! + //! c.setMemoryHome(v1, v0, 0); // Allowed, [v0+0] is memory home for v1. + //! c.setMemoryHome(v2, v0, 4); // Allowed, [v0+4] is memory home for v2. + //! c.setMemoryHome(v3, v2); // CHAINING, NOT ALLOWED! + //! @endcode + ASMJIT_API void setMemoryHome(BaseVar& var, const GpVar& home, int displacement = 0); + + // -------------------------------------------------------------------------- + // [Stack] + // -------------------------------------------------------------------------- + + //! @overridden + ASMJIT_API virtual Error _newStack(BaseMem* mem, uint32_t size, uint32_t alignment, const char* name); + + //! @brief Create a new memory chunk allocated on the stack. + ASMJIT_INLINE Mem newStack(uint32_t size, uint32_t alignment, const char* name = NULL) { + Mem m(DontInitialize); + _newStack(&m, size, alignment, name); + return m; + } + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! @brief Add 8-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* db(uint8_t x) { return embed(&x, 1); } + //! @brief Add 16-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* dw(uint16_t x) { return embed(&x, 2); } + //! @brief Add 32-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* dd(uint32_t x) { return embed(&x, 4); } + //! @brief Add 64-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* dq(uint64_t x) { return embed(&x, 8); } + + //! @brief Add 8-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* dint8(int8_t x) { return embed(&x, static_cast(sizeof(int8_t))); } + //! @brief Add 8-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* duint8(uint8_t x) { return embed(&x, static_cast(sizeof(uint8_t))); } + + //! @brief Add 16-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* dint16(int16_t x) { return embed(&x, static_cast(sizeof(int16_t))); } + //! @brief Add 16-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* duint16(uint16_t x) { return embed(&x, static_cast(sizeof(uint16_t))); } + + //! @brief Add 32-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* dint32(int32_t x) { return embed(&x, static_cast(sizeof(int32_t))); } + //! @brief Add 32-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* duint32(uint32_t x) { return embed(&x, static_cast(sizeof(uint32_t))); } + + //! @brief Add 64-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* dint64(int64_t x) { return embed(&x, static_cast(sizeof(int64_t))); } + //! @brief Add 64-bit integer data to the instuction stream. + ASMJIT_INLINE EmbedNode* duint64(uint64_t x) { return embed(&x, static_cast(sizeof(uint64_t))); } + + //! @brief Add float data to the instuction stream. + ASMJIT_INLINE EmbedNode* dfloat(float x) { return embed(&x, static_cast(sizeof(float))); } + //! @brief Add double data to the instuction stream. + ASMJIT_INLINE EmbedNode* ddouble(double x) { return embed(&x, static_cast(sizeof(double))); } + + //! @brief Add pointer data to the instuction stream. + ASMJIT_INLINE EmbedNode* dptr(void* x) { return embed(&x, static_cast(sizeof(void*))); } + + //! @brief Add Mm data to the instuction stream. + ASMJIT_INLINE EmbedNode* dmm(const MmData& x) { return embed(&x, static_cast(sizeof(MmData))); } + //! @brief Add Xmm data to the instuction stream. + ASMJIT_INLINE EmbedNode* dxmm(const XmmData& x) { return embed(&x, static_cast(sizeof(XmmData))); } + + //! @brief Add data in a given structure instance to the instuction stream. + template + ASMJIT_INLINE EmbedNode* dstruct(const T& x) { return embed(&x, static_cast(sizeof(T))); } + + // -------------------------------------------------------------------------- + // [Make] + // -------------------------------------------------------------------------- + + //! @overridden + ASMJIT_API virtual void* make(); + + // ------------------------------------------------------------------------- + // [Serialize] + // ------------------------------------------------------------------------- + + //! @overridden + ASMJIT_API virtual Error serialize(BaseAssembler& assembler); + + // ------------------------------------------------------------------------- + // [Options] + // ------------------------------------------------------------------------- + + //! @brief Force short form of jmp/jcc/other instruction. + ASMJIT_INLINE X86X64Compiler& short_() + { _options |= kInstOptionShortForm; return *this; } + + //! @brief Force long form of jmp/jcc/other instruction. + ASMJIT_INLINE X86X64Compiler& long_() + { _options |= kInstOptionLongForm; return *this; } + + //! @brief Condition is likely to be taken. + ASMJIT_INLINE X86X64Compiler& taken() + { _options |= kInstOptionTaken; return *this; } + + //! @brief Condition is unlikely to be taken. + ASMJIT_INLINE X86X64Compiler& notTaken() + { _options |= kInstOptionNotTaken; return *this; } + + //! @brief Lock prefix. + ASMJIT_INLINE X86X64Compiler& lock() + { _options |= kInstOptionLock; return *this; } + + // -------------------------------------------------------------------------- + // [X86 Instructions] + // -------------------------------------------------------------------------- + + //! @brief Add with carry. + INST_2x(adc, kInstAdc, GpVar, GpVar) + //! @overload + INST_2x(adc, kInstAdc, GpVar, Mem) + //! @overload + INST_2i(adc, kInstAdc, GpVar, Imm) + //! @overload + INST_2x(adc, kInstAdc, Mem, GpVar) + //! @overload + INST_2i(adc, kInstAdc, Mem, Imm) + + //! @brief Add. + INST_2x(add, kInstAdd, GpVar, GpVar) + //! @overload + INST_2x(add, kInstAdd, GpVar, Mem) + //! @overload + INST_2i(add, kInstAdd, GpVar, Imm) + //! @overload + INST_2x(add, kInstAdd, Mem, GpVar) + //! @overload + INST_2i(add, kInstAdd, Mem, Imm) + + //! @brief And. + INST_2x(and_, kInstAnd, GpVar, GpVar) + //! @overload + INST_2x(and_, kInstAnd, GpVar, Mem) + //! @overload + INST_2i(and_, kInstAnd, GpVar, Imm) + //! @overload + INST_2x(and_, kInstAnd, Mem, GpVar) + //! @overload + INST_2i(and_, kInstAnd, Mem, Imm) + + //! @brief Bit scan forward. + INST_2x_(bsf, kInstBsf, GpVar, GpVar, !o0.isGpb()) + //! @overload + INST_2x_(bsf, kInstBsf, GpVar, Mem, !o0.isGpb()) + + //! @brief Bit scan reverse. + INST_2x_(bsr, kInstBsr, GpVar, GpVar, !o0.isGpb()) + //! @overload + INST_2x_(bsr, kInstBsr, GpVar, Mem, !o0.isGpb()) + + //! @brief Byte swap (32-bit or 64-bit registers only) (i486). + INST_1x_(bswap, kInstBswap, GpVar, o0.getSize() >= 4) + + //! @brief Bit test. + INST_2x(bt, kInstBt, GpVar, GpVar) + //! @overload + INST_2i(bt, kInstBt, GpVar, Imm) + //! @overload + INST_2x(bt, kInstBt, Mem, GpVar) + //! @overload + INST_2i(bt, kInstBt, Mem, Imm) + + //! @brief Bit test and complement. + INST_2x(btc, kInstBtc, GpVar, GpVar) + //! @overload + INST_2i(btc, kInstBtc, GpVar, Imm) + //! @overload + INST_2x(btc, kInstBtc, Mem, GpVar) + //! @overload + INST_2i(btc, kInstBtc, Mem, Imm) + + //! @brief Bit test and reset. + INST_2x(btr, kInstBtr, GpVar, GpVar) + //! @overload + INST_2i(btr, kInstBtr, GpVar, Imm) + //! @overload + INST_2x(btr, kInstBtr, Mem, GpVar) + //! @overload + INST_2i(btr, kInstBtr, Mem, Imm) + + //! @brief Bit test and set. + INST_2x(bts, kInstBts, GpVar, GpVar) + //! @overload + INST_2i(bts, kInstBts, GpVar, Imm) + //! @overload + INST_2x(bts, kInstBts, Mem, GpVar) + //! @overload + INST_2i(bts, kInstBts, Mem, Imm) + + //! @brief Call. + ASMJIT_INLINE X86X64CallNode* call(const GpVar& dst, uint32_t conv, const FuncPrototype& p) + { return addCall(dst, conv, p); } + //! @overload + ASMJIT_INLINE X86X64CallNode* call(const Mem& dst, uint32_t conv, const FuncPrototype& p) + { return addCall(dst, conv, p); } + //! @overload + ASMJIT_INLINE X86X64CallNode* call(const Imm& dst, uint32_t conv, const FuncPrototype& p) + { return addCall(dst, conv, p); } + //! @overload + ASMJIT_INLINE X86X64CallNode* call(void* dst, uint32_t conv, const FuncPrototype& p) { + Imm imm((intptr_t)dst); + return addCall(imm, conv, p); + } + //! @overload + ASMJIT_INLINE X86X64CallNode* call(const Label& label, uint32_t conv, const FuncPrototype& p) + { return addCall(label, conv, p); } + + //! @brief Clear carry flag + INST_0x(clc, kInstClc) + //! @brief Clear direction flag + INST_0x(cld, kInstCld) + //! @brief Complement carry Flag. + INST_0x(cmc, kInstCmc) + + //! @brief Convert byte to word (AX <- Sign Extend AL). + INST_1x(cbw, kInstCbw, GpVar /* al */) + //! @brief Convert word to dword (DX:AX <- Sign Extend AX). + INST_2x(cwd, kInstCwd, GpVar /* dx */, GpVar /* ax */) + //! @brief Convert word to dword (EAX <- Sign Extend AX). + INST_1x(cwde, kInstCwde, GpVar /* eax */) + //! @brief Convert dword to qword (EDX:EAX <- Sign Extend EAX). + INST_2x(cdq, kInstCdq, GpVar /* edx */, GpVar /* eax */) + + //! @brief Conditional move. + INST_2cc(cmov, kInstCmov, condToCmovcc, GpVar, GpVar) + //! @brief Conditional move. + INST_2cc(cmov, kInstCmov, condToCmovcc, GpVar, Mem) + + //! @brief Compare two operands. + INST_2x(cmp, kInstCmp, GpVar, GpVar) + //! @overload + INST_2x(cmp, kInstCmp, GpVar, Mem) + //! @overload + INST_2i(cmp, kInstCmp, GpVar, Imm) + //! @overload + INST_2x(cmp, kInstCmp, Mem, GpVar) + //! @overload + INST_2i(cmp, kInstCmp, Mem, Imm) + + //! @brief Compare and exchange (i486). + INST_3x(cmpxchg, kInstCmpxchg, GpVar /* eax */, GpVar, GpVar) + //! @overload + INST_3x(cmpxchg, kInstCmpxchg, GpVar /* eax */, Mem, GpVar) + + //! @brief Compares the 64-bit value in EDX:EAX with the memory operand (Pentium). + ASMJIT_INLINE InstNode* cmpxchg8b( + const GpVar& cmp_edx, const GpVar& cmp_eax, + const GpVar& cmp_ecx, const GpVar& cmp_ebx, + const Mem& dst) { + + return emit(kInstCmpxchg8b, cmp_edx, cmp_eax, cmp_ecx, cmp_ebx, dst); + } + + //! @brief CPU identification (i486). + ASMJIT_INLINE InstNode* cpuid( + const GpVar& inout_eax, + const GpVar& out_ebx, + const GpVar& out_ecx, + const GpVar& out_edx) { + + // Destination variables must be different. + ASMJIT_ASSERT(inout_eax.getId() != out_ebx.getId() && + out_ebx.getId() != out_ecx.getId() && + out_ecx.getId() != out_edx.getId()); + + return emit(kInstCpuid, inout_eax, out_ebx, out_ecx, out_edx); + } + + //! @brief Accumulate crc32 value (polynomial 0x11EDC6F41) (SSE4.2). + INST_2x_(crc32, kInstCrc32, GpVar, GpVar, o0.isRegType(kRegTypeGpd) || o0.isRegType(kRegTypeGpq)) + //! @overload + INST_2x_(crc32, kInstCrc32, GpVar, Mem, o0.isRegType(kRegTypeGpd) || o0.isRegType(kRegTypeGpq)) + + //! @brief Decrement by 1. + INST_1x(dec, kInstDec, GpVar) + //! @overload + INST_1x(dec, kInstDec, Mem) + + //! @brief Unsigned divide (o0:o1 <- o0:o1 / o2). + //! + //! Remainder is stored in @a o0, quotient is stored in @a o1. + INST_3x_(div, kInstDiv, GpVar, GpVar, GpVar, o0.getId() != o1.getId()) + //! @overload + INST_3x_(div, kInstDiv, GpVar, GpVar, Mem, o0.getId() != o1.getId()) + + //! @brief Signed divide (o0:o1 <- o0:o1 / o2). + //! + //! Remainder is stored in @a o0, quotient is stored in @a o1. + INST_3x_(idiv, kInstIdiv, GpVar, GpVar, GpVar, o0.getId() != o1.getId()) + //! @overload + INST_3x_(idiv, kInstIdiv, GpVar, GpVar, Mem, o0.getId() != o1.getId()) + + //! @brief Signed multiply (o0:o1 <- o1 * o2). + //! + //! Hi value is stored in @a o0, lo value is stored in @a o1. + INST_3x_(imul, kInstImul, GpVar, GpVar, GpVar, o0.getId() != o1.getId()) + //! @overload + INST_3x_(imul, kInstImul, GpVar, GpVar, Mem, o0.getId() != o1.getId()) + + //! @brief Signed multiply. + INST_2x(imul, kInstImul, GpVar, GpVar) + //! @overload + INST_2x(imul, kInstImul, GpVar, Mem) + //! @overload + INST_2i(imul, kInstImul, GpVar, Imm) + + //! @brief Signed multiply. + INST_3i(imul, kInstImul, GpVar, GpVar, Imm) + //! @overload + INST_3i(imul, kInstImul, GpVar, Mem, Imm) + + //! @brief Increment by 1. + INST_1x(inc, kInstInc, GpVar) + //! @overload + INST_1x(inc, kInstInc, Mem) + + //! @brief Interrupt. + INST_1i(int_, kInstInt, Imm) + //! @brief Interrupt 3 - trap to debugger. + ASMJIT_INLINE InstNode* int3() { return int_(3); } + + //! @brief Jump to label @a label if condition @a cc is met. + INST_1cc(j, kInstJ, condToJcc, Label) + + //! @brief Jump. + INST_1x(jmp, kInstJmp, GpVar) + //! @overload + INST_1x(jmp, kInstJmp, Mem) + //! @overload + INST_1x(jmp, kInstJmp, Label) + //! @overload + INST_1x(jmp, kInstJmp, Imm) + //! @overload + ASMJIT_INLINE InstNode* jmp(void* dst) { return jmp(Imm((intptr_t)dst)); } + + //! @brief Load AH from flags. + INST_1x(lahf, kInstLahf, GpVar) + + //! @brief Load effective address + INST_2x(lea, kInstLea, GpVar, Mem) + + //! @brief Move. + INST_2x(mov, kInstMov, GpVar, GpVar) + //! @overload + INST_2x(mov, kInstMov, GpVar, Mem) + //! @overload + INST_2i(mov, kInstMov, GpVar, Imm) + //! @overload + INST_2x(mov, kInstMov, Mem, GpVar) + //! @overload + INST_2i(mov, kInstMov, Mem, Imm) + + //! @brief Move from segment register. + INST_2x(mov, kInstMov, GpVar, SegReg) + //! @overload + INST_2x(mov, kInstMov, Mem, SegReg) + //! @brief Move to segment register. + INST_2x(mov, kInstMov, SegReg, GpVar) + //! @overload + INST_2x(mov, kInstMov, SegReg, Mem) + + //! @brief Move (AL|AX|EAX|RAX <- absolute address in immediate). + ASMJIT_INLINE InstNode* mov_ptr(const GpVar& dst, void* src) { + Imm imm(static_cast((intptr_t)src)); + return emit(kInstMovptr, dst, imm); + } + //! @brief Move (absolute address in immediate <- AL|AX|EAX|RAX). + ASMJIT_INLINE InstNode* mov_ptr(void* dst, const GpVar& src) { + Imm imm(static_cast((intptr_t)dst)); + return emit(kInstMovptr, imm, src); + } + + //! @brief Move data after swapping bytes (SSE3 - Atom). + INST_2x_(movbe, kInstMovbe, GpVar, Mem, !o0.isGpb()); + //! @overload + INST_2x_(movbe, kInstMovbe, Mem, GpVar, !o1.isGpb()); + + //! @brief Move with sign-extension. + INST_2x(movsx, kInstMovsx, GpVar, GpVar) + //! @overload + INST_2x(movsx, kInstMovsx, GpVar, Mem) + + //! @brief Move with zero-extension. + INST_2x(movzx, kInstMovzx, GpVar, GpVar) + //! @overload + INST_2x(movzx, kInstMovzx, GpVar, Mem) + + //! @brief Unsigned multiply (o0:o1 <- o1 * o2). + INST_3x_(mul, kInstMul, GpVar, GpVar, GpVar, o0.getId() != o1.getId()) + //! @overload + INST_3x_(mul, kInstMul, GpVar, GpVar, Mem, o0.getId() != o1.getId()) + + //! @brief Two's complement negation. + INST_1x(neg, kInstNeg, GpVar) + //! @overload + INST_1x(neg, kInstNeg, Mem) + + //! @brief No operation. + INST_0x(nop, kInstNop) + + //! @brief One's complement negation. + INST_1x(not_, kInstNot, GpVar) + //! @overload + INST_1x(not_, kInstNot, Mem) + + //! @brief Or. + INST_2x(or_, kInstOr, GpVar, GpVar) + //! @overload + INST_2x(or_, kInstOr, GpVar, Mem) + //! @overload + INST_2i(or_, kInstOr, GpVar, Imm) + //! @overload + INST_2x(or_, kInstOr, Mem, GpVar) + //! @overload + INST_2i(or_, kInstOr, Mem, Imm) + + //! @brief Pop a value from the stack. + INST_1x_(pop, kInstPop, GpVar, o0.getSize() == 2 || o0.getSize() == _regSize) + //! @overload + INST_1x_(pop, kInstPop, Mem, o0.getSize() == 2 || o0.getSize() == _regSize) + + //! @brief Pop stack into EFLAGS Register (32-bit or 64-bit). + INST_0x(popf, kInstPopf) + + //! @brief Return the count of number of bits set to 1 (SSE4.2). + INST_2x_(popcnt, kInstPopcnt, GpVar, GpVar, !o0.isGpb() && o0.getSize() == o1.getSize()) + //! @overload + INST_2x_(popcnt, kInstPopcnt, GpVar, Mem, !o0.isGpb()) + + //! @brief Push word/dword/qword on the stack. + INST_1x_(push, kInstPush, GpVar, o0.getSize() == 2 || o0.getSize() == _regSize) + //! @brief Push word/dword/qword on the stack. + INST_1x_(push, kInstPush, Mem,o0.getSize() == 2 || o0.getSize() == _regSize) + //! @brief Push segment register on the stack. + INST_1x(push, kInstPush, SegReg) + //! @brief Push word/dword/qword on the stack. + INST_1i(push, kInstPush, Imm) + + //! @brief Push EFLAGS register (32-bit or 64-bit) on the stack. + INST_0x(pushf, kInstPushf) + + //! @brief Rotate bits left. + INST_2x(rcl, kInstRcl, GpVar, GpVar) + //! @overload + INST_2x(rcl, kInstRcl, Mem, GpVar) + //! @brief Rotate bits left. + INST_2i(rcl, kInstRcl, GpVar, Imm) + //! @overload + INST_2i(rcl, kInstRcl, Mem, Imm) + + //! @brief Rotate bits right. + INST_2x(rcr, kInstRcr, GpVar, GpVar) + //! @overload + INST_2x(rcr, kInstRcr, Mem, GpVar) + //! @brief Rotate bits right. + INST_2i(rcr, kInstRcr, GpVar, Imm) + //! @overload + INST_2i(rcr, kInstRcr, Mem, Imm) + + //! @brief Read time-stamp counter (Pentium). + INST_2x_(rdtsc, kInstRdtsc, GpVar, GpVar, o0.getId() != o1.getId()) + //! @brief Read time-stamp counter and processor id (Pentium). + INST_3x_(rdtscp, kInstRdtscp, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Load ECX/RCX bytes from DS:[ESI/RSI] to AL. + INST_3x_(rep_lodsb, kInstRepLodsb, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Load ECX/RCX dwords from DS:[ESI/RSI] to AL. + INST_3x_(rep_lodsd, kInstRepLodsd, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Load ECX/RCX words from DS:[ESI/RSI] to AX. + INST_3x_(rep_lodsw, kInstRepLodsw, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Move ECX/RCX bytes from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_3x_(rep_movsb, kInstRepMovsb, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Move ECX/RCX dwords from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_3x_(rep_movsd, kInstRepMovsd, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Move ECX/RCX dwords from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_3x_(rep_movsw, kInstRepMovsw, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Fill ECX/RCX bytes at ES:[EDI/RDI] with AL. + INST_3x_(rep_stosb, kInstRepStosb, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Fill ECX/RCX dwords at ES:[EDI/RDI] with EAX. + INST_3x_(rep_stosd, kInstRepStosd, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Fill ECX/RCX words at ES:[EDI/RDI] with AX. + INST_3x_(rep_stosw, kInstRepStosw, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Repeated find nonmatching bytes in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_3x_(repe_cmpsb, kInstRepeCmpsb, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Repeated find nonmatching dwords in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_3x_(repe_cmpsd, kInstRepeCmpsd, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Repeated find nonmatching words in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_3x_(repe_cmpsw, kInstRepeCmpsw, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Find non-AL byte starting at ES:[EDI/RDI]. + INST_3x_(repe_scasb, kInstRepeScasb, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Find non-EAX dword starting at ES:[EDI/RDI]. + INST_3x_(repe_scasd, kInstRepeScasd, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Find non-AX word starting at ES:[EDI/RDI]. + INST_3x_(repe_scasw, kInstRepeScasw, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Find matching bytes in [RDI] and [RSI]. + INST_3x_(repne_cmpsb, kInstRepneCmpsb, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Find matching dwords in [RDI] and [RSI]. + INST_3x_(repne_cmpsd, kInstRepneCmpsd, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Find matching words in [RDI] and [RSI]. + INST_3x_(repne_cmpsw, kInstRepneCmpsw, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Find AL, starting at ES:[EDI/RDI]. + INST_3x_(repne_scasb, kInstRepneScasb, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Find EAX, starting at ES:[EDI/RDI]. + INST_3x_(repne_scasd, kInstRepneScasd, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + //! @brief Find AX, starting at ES:[EDI/RDI]. + INST_3x_(repne_scasw, kInstRepneScasw, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Return. + ASMJIT_INLINE RetNode* ret() { return addRet(noOperand, noOperand); } + //! @overload + ASMJIT_INLINE RetNode* ret(const GpVar& o0) { return addRet(o0, noOperand); } + //! @overload + ASMJIT_INLINE RetNode* ret(const GpVar& o0, const GpVar& o1) { return addRet(o0, o1); } + //! @overload + ASMJIT_INLINE RetNode* ret(const XmmVar& o0) { return addRet(o0, noOperand); } + //! @overload + ASMJIT_INLINE RetNode* ret(const XmmVar& o0, const XmmVar& o1) { return addRet(o0, o1); } + + //! @brief Rotate bits left. + INST_2x(rol, kInstRol, GpVar, GpVar) + //! @overload + INST_2x(rol, kInstRol, Mem, GpVar) + //! @brief Rotate bits left. + INST_2i(rol, kInstRol, GpVar, Imm) + //! @overload + INST_2i(rol, kInstRol, Mem, Imm) + + //! @brief Rotate bits right. + INST_2x(ror, kInstRor, GpVar, GpVar) + //! @overload + INST_2x(ror, kInstRor, Mem, GpVar) + //! @brief Rotate bits right. + INST_2i(ror, kInstRor, GpVar, Imm) + //! @overload + INST_2i(ror, kInstRor, Mem, Imm) + + //! @brief Store @a var (allocated in AH/AX/EAX/RAX) into Flags. + INST_1x(sahf, kInstSahf, GpVar) + + //! @brief Integer subtraction with borrow. + INST_2x(sbb, kInstSbb, GpVar, GpVar) + //! @overload + INST_2x(sbb, kInstSbb, GpVar, Mem) + //! @overload + INST_2i(sbb, kInstSbb, GpVar, Imm) + //! @overload + INST_2x(sbb, kInstSbb, Mem, GpVar) + //! @overload + INST_2i(sbb, kInstSbb, Mem, Imm) + + //! @brief Shift bits left. + INST_2x(sal, kInstSal, GpVar, GpVar) + //! @overload + INST_2x(sal, kInstSal, Mem, GpVar) + //! @brief Shift bits left. + INST_2i(sal, kInstSal, GpVar, Imm) + //! @overload + INST_2i(sal, kInstSal, Mem, Imm) + + //! @brief Shift bits right. + INST_2x(sar, kInstSar, GpVar, GpVar) + //! @overload + INST_2x(sar, kInstSar, Mem, GpVar) + //! @brief Shift bits right. + INST_2i(sar, kInstSar, GpVar, Imm) + //! @overload + INST_2i(sar, kInstSar, Mem, Imm) + + //! @brief Set byte on condition. + INST_1cc(set, kInstSet, condToSetcc, GpVar) + //! @brief Set byte on condition. + INST_1cc(set, kInstSet, condToSetcc, Mem) + + //! @brief Shift bits left. + INST_2x(shl, kInstShl, GpVar, GpVar) + //! @overload + INST_2x(shl, kInstShl, Mem, GpVar) + //! @brief Shift bits left. + INST_2i(shl, kInstShl, GpVar, Imm) + //! @overload + INST_2i(shl, kInstShl, Mem, Imm) + + //! @brief Shift bits right. + INST_2x(shr, kInstShr, GpVar, GpVar) + //! @overload + INST_2x(shr, kInstShr, Mem, GpVar) + //! @brief Shift bits right. + INST_2i(shr, kInstShr, GpVar, Imm) + //! @overload + INST_2i(shr, kInstShr, Mem, Imm) + + //! @brief Double precision shift left. + INST_3x(shld, kInstShld, GpVar, GpVar, GpVar) + //! @overload + INST_3x(shld, kInstShld, Mem, GpVar, GpVar) + //! @brief Double precision shift left. + INST_3i(shld, kInstShld, GpVar, GpVar, Imm) + //! @overload + INST_3i(shld, kInstShld, Mem, GpVar, Imm) + + //! @brief Double precision shift right. + INST_3x(shrd, kInstShrd, GpVar, GpVar, GpVar) + //! @overload + INST_3x(shrd, kInstShrd, Mem, GpVar, GpVar) + //! @brief Double precision shift right. + INST_3i(shrd, kInstShrd, GpVar, GpVar, Imm) + //! @overload + INST_3i(shrd, kInstShrd, Mem, GpVar, Imm) + + //! @brief Set carry flag to 1. + INST_0x(stc, kInstStc) + //! @brief Set direction flag to 1. + INST_0x(std, kInstStd) + + //! @brief Subtract. + INST_2x(sub, kInstSub, GpVar, GpVar) + //! @overload + INST_2x(sub, kInstSub, GpVar, Mem) + //! @overload + INST_2i(sub, kInstSub, GpVar, Imm) + //! @overload + INST_2x(sub, kInstSub, Mem, GpVar) + //! @overload + INST_2i(sub, kInstSub, Mem, Imm) + + //! @brief Logical compare. + INST_2x(test, kInstTest, GpVar, GpVar) + //! @overload + INST_2i(test, kInstTest, GpVar, Imm) + //! @overload + INST_2x(test, kInstTest, Mem, GpVar) + //! @overload + INST_2i(test, kInstTest, Mem, Imm) + + //! @brief Undefined instruction - Raise #UD exception. + INST_0x(ud2, kInstUd2) + + //! @brief Exchange and add. + INST_2x(xadd, kInstXadd, GpVar, GpVar) + //! @overload + INST_2x(xadd, kInstXadd, Mem, GpVar) + + //! @brief Exchange register/memory with register. + INST_2x(xchg, kInstXchg, GpVar, GpVar) + //! @overload + INST_2x(xchg, kInstXchg, Mem, GpVar) + //! @overload + INST_2x(xchg, kInstXchg, GpVar, Mem) + + //! @brief Xor. + INST_2x(xor_, kInstXor, GpVar, GpVar) + //! @overload + INST_2x(xor_, kInstXor, GpVar, Mem) + //! @overload + INST_2i(xor_, kInstXor, GpVar, Imm) + //! @overload + INST_2x(xor_, kInstXor, Mem, GpVar) + //! @overload + INST_2i(xor_, kInstXor, Mem, Imm) + + // -------------------------------------------------------------------------- + // [MMX] + // -------------------------------------------------------------------------- + + //! @brief Move dword (MMX). + INST_2x(movd, kInstMovd, Mem, MmVar) + //! @overload + INST_2x(movd, kInstMovd, GpVar, MmVar) + //! @overload + INST_2x(movd, kInstMovd, MmVar, Mem) + //! @overload + INST_2x(movd, kInstMovd, MmVar, GpVar) + + //! @brief Move qword (MMX). + INST_2x(movq, kInstMovq, MmVar, MmVar) + //! @overload + INST_2x(movq, kInstMovq, Mem, MmVar) + //! @overload + INST_2x(movq, kInstMovq, MmVar, Mem) + + //! @brief Pack with signed saturation (MMX). + INST_2x(packsswb, kInstPacksswb, MmVar, MmVar) + //! @overload + INST_2x(packsswb, kInstPacksswb, MmVar, Mem) + + //! @brief Pack with signed saturation (MMX). + INST_2x(packssdw, kInstPackssdw, MmVar, MmVar) + //! @overload + INST_2x(packssdw, kInstPackssdw, MmVar, Mem) + + //! @brief Pack with unsigned saturation (MMX). + INST_2x(packuswb, kInstPackuswb, MmVar, MmVar) + //! @overload + INST_2x(packuswb, kInstPackuswb, MmVar, Mem) + + //! @brief Packed byte add (MMX). + INST_2x(paddb, kInstPaddb, MmVar, MmVar) + //! @overload + INST_2x(paddb, kInstPaddb, MmVar, Mem) + + //! @brief Packed word add (MMX). + INST_2x(paddw, kInstPaddw, MmVar, MmVar) + //! @overload + INST_2x(paddw, kInstPaddw, MmVar, Mem) + + //! @brief Packed dword add (MMX). + INST_2x(paddd, kInstPaddd, MmVar, MmVar) + //! @overload + INST_2x(paddd, kInstPaddd, MmVar, Mem) + + //! @brief Packed add with saturation (MMX). + INST_2x(paddsb, kInstPaddsb, MmVar, MmVar) + //! @overload + INST_2x(paddsb, kInstPaddsb, MmVar, Mem) + + //! @brief Packed add with saturation (MMX). + INST_2x(paddsw, kInstPaddsw, MmVar, MmVar) + //! @overload + INST_2x(paddsw, kInstPaddsw, MmVar, Mem) + + //! @brief Packed add unsigned with saturation (MMX). + INST_2x(paddusb, kInstPaddusb, MmVar, MmVar) + //! @overload + INST_2x(paddusb, kInstPaddusb, MmVar, Mem) + + //! @brief Packed add unsigned with saturation (MMX). + INST_2x(paddusw, kInstPaddusw, MmVar, MmVar) + //! @overload + INST_2x(paddusw, kInstPaddusw, MmVar, Mem) + + //! @brief And (MMX). + INST_2x(pand, kInstPand, MmVar, MmVar) + //! @overload + INST_2x(pand, kInstPand, MmVar, Mem) + + //! @brief And-not (MMX). + INST_2x(pandn, kInstPandn, MmVar, MmVar) + //! @overload + INST_2x(pandn, kInstPandn, MmVar, Mem) + + //! @brief Packed compare bytes for equal (MMX). + INST_2x(pcmpeqb, kInstPcmpeqb, MmVar, MmVar) + //! @overload + INST_2x(pcmpeqb, kInstPcmpeqb, MmVar, Mem) + + //! @brief Packed compare words for equal (MMX). + INST_2x(pcmpeqw, kInstPcmpeqw, MmVar, MmVar) + //! @overload + INST_2x(pcmpeqw, kInstPcmpeqw, MmVar, Mem) + + //! @brief Packed compare dwords for equal (MMX). + INST_2x(pcmpeqd, kInstPcmpeqd, MmVar, MmVar) + //! @overload + INST_2x(pcmpeqd, kInstPcmpeqd, MmVar, Mem) + + //! @brief Packed compare bytes for greater than (MMX). + INST_2x(pcmpgtb, kInstPcmpgtb, MmVar, MmVar) + //! @overload + INST_2x(pcmpgtb, kInstPcmpgtb, MmVar, Mem) + + //! @brief Packed compare words for greater than (MMX). + INST_2x(pcmpgtw, kInstPcmpgtw, MmVar, MmVar) + //! @overload + INST_2x(pcmpgtw, kInstPcmpgtw, MmVar, Mem) + + //! @brief Packed compare dwords for greater than (MMX). + INST_2x(pcmpgtd, kInstPcmpgtd, MmVar, MmVar) + //! @overload + INST_2x(pcmpgtd, kInstPcmpgtd, MmVar, Mem) + + //! @brief Packed multiply high (MMX). + INST_2x(pmulhw, kInstPmulhw, MmVar, MmVar) + //! @overload + INST_2x(pmulhw, kInstPmulhw, MmVar, Mem) + + //! @brief Packed multiply low (MMX). + INST_2x(pmullw, kInstPmullw, MmVar, MmVar) + //! @overload + INST_2x(pmullw, kInstPmullw, MmVar, Mem) + + //! @brief Bitwise logical or (MMX). + INST_2x(por, kInstPor, MmVar, MmVar) + //! @overload + INST_2x(por, kInstPor, MmVar, Mem) + + //! @brief Packed multiply and add (MMX). + INST_2x(pmaddwd, kInstPmaddwd, MmVar, MmVar) + //! @overload + INST_2x(pmaddwd, kInstPmaddwd, MmVar, Mem) + + //! @brief Packed shift left logical (MMX). + INST_2x(pslld, kInstPslld, MmVar, MmVar) + //! @overload + INST_2x(pslld, kInstPslld, MmVar, Mem) + //! @overload + INST_2i(pslld, kInstPslld, MmVar, Imm) + + //! @brief Packed shift left logical (MMX). + INST_2x(psllq, kInstPsllq, MmVar, MmVar) + //! @overload + INST_2x(psllq, kInstPsllq, MmVar, Mem) + //! @overload + INST_2i(psllq, kInstPsllq, MmVar, Imm) + + //! @brief Packed shift left logical (MMX). + INST_2x(psllw, kInstPsllw, MmVar, MmVar) + //! @overload + INST_2x(psllw, kInstPsllw, MmVar, Mem) + //! @overload + INST_2i(psllw, kInstPsllw, MmVar, Imm) + + //! @brief Packed shift right arithmetic (MMX). + INST_2x(psrad, kInstPsrad, MmVar, MmVar) + //! @overload + INST_2x(psrad, kInstPsrad, MmVar, Mem) + //! @overload + INST_2i(psrad, kInstPsrad, MmVar, Imm) + + //! @brief Packed shift right arithmetic (MMX). + INST_2x(psraw, kInstPsraw, MmVar, MmVar) + //! @overload + INST_2x(psraw, kInstPsraw, MmVar, Mem) + //! @overload + INST_2i(psraw, kInstPsraw, MmVar, Imm) + + //! @brief Packed shift right logical (MMX). + INST_2x(psrld, kInstPsrld, MmVar, MmVar) + //! @overload + INST_2x(psrld, kInstPsrld, MmVar, Mem) + //! @overload + INST_2i(psrld, kInstPsrld, MmVar, Imm) + + //! @brief Packed shift right logical (MMX). + INST_2x(psrlq, kInstPsrlq, MmVar, MmVar) + //! @overload + INST_2x(psrlq, kInstPsrlq, MmVar, Mem) + //! @overload + INST_2i(psrlq, kInstPsrlq, MmVar, Imm) + + //! @brief Packed shift right logical (MMX). + INST_2x(psrlw, kInstPsrlw, MmVar, MmVar) + //! @overload + INST_2x(psrlw, kInstPsrlw, MmVar, Mem) + //! @overload + INST_2i(psrlw, kInstPsrlw, MmVar, Imm) + + //! @brief Packed subtract (MMX). + INST_2x(psubb, kInstPsubb, MmVar, MmVar) + //! @overload + INST_2x(psubb, kInstPsubb, MmVar, Mem) + + //! @brief Packed subtract (MMX). + INST_2x(psubw, kInstPsubw, MmVar, MmVar) + //! @overload + INST_2x(psubw, kInstPsubw, MmVar, Mem) + + //! @brief Packed subtract (MMX). + INST_2x(psubd, kInstPsubd, MmVar, MmVar) + //! @overload + INST_2x(psubd, kInstPsubd, MmVar, Mem) + + //! @brief Packed subtract with saturation (MMX). + INST_2x(psubsb, kInstPsubsb, MmVar, MmVar) + //! @overload + INST_2x(psubsb, kInstPsubsb, MmVar, Mem) + + //! @brief Packed subtract with saturation (MMX). + INST_2x(psubsw, kInstPsubsw, MmVar, MmVar) + //! @overload + INST_2x(psubsw, kInstPsubsw, MmVar, Mem) + + //! @brief Packed subtract with unsigned saturation (MMX). + INST_2x(psubusb, kInstPsubusb, MmVar, MmVar) + //! @overload + INST_2x(psubusb, kInstPsubusb, MmVar, Mem) + + //! @brief Packed subtract with unsigned saturation (MMX). + INST_2x(psubusw, kInstPsubusw, MmVar, MmVar) + //! @overload + INST_2x(psubusw, kInstPsubusw, MmVar, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpckhbw, kInstPunpckhbw, MmVar, MmVar) + //! @overload + INST_2x(punpckhbw, kInstPunpckhbw, MmVar, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpckhwd, kInstPunpckhwd, MmVar, MmVar) + //! @overload + INST_2x(punpckhwd, kInstPunpckhwd, MmVar, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpckhdq, kInstPunpckhdq, MmVar, MmVar) + //! @overload + INST_2x(punpckhdq, kInstPunpckhdq, MmVar, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpcklbw, kInstPunpcklbw, MmVar, MmVar) + //! @overload + INST_2x(punpcklbw, kInstPunpcklbw, MmVar, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpcklwd, kInstPunpcklwd, MmVar, MmVar) + //! @overload + INST_2x(punpcklwd, kInstPunpcklwd, MmVar, Mem) + + //! @brief Unpack high packed data (MMX). + INST_2x(punpckldq, kInstPunpckldq, MmVar, MmVar) + //! @overload + INST_2x(punpckldq, kInstPunpckldq, MmVar, Mem) + + //! @brief Xor (MMX). + INST_2x(pxor, kInstPxor, MmVar, MmVar) + //! @overload + INST_2x(pxor, kInstPxor, MmVar, Mem) + + //! @brief Empty MMX state. + INST_0x(emms, kInstEmms) + + // -------------------------------------------------------------------------- + // [3dNow] + // -------------------------------------------------------------------------- + + //! @brief Packed SP-FP to integer convert (3dNow!). + INST_2x(pf2id, kInstPf2id, MmVar, MmVar) + //! @overload + INST_2x(pf2id, kInstPf2id, MmVar, Mem) + + //! @brief Packed SP-FP to integer word convert (3dNow!). + INST_2x(pf2iw, kInstPf2iw, MmVar, MmVar) + //! @overload + INST_2x(pf2iw, kInstPf2iw, MmVar, Mem) + + //! @brief Packed SP-FP accumulate (3dNow!). + INST_2x(pfacc, kInstPfacc, MmVar, MmVar) + //! @overload + INST_2x(pfacc, kInstPfacc, MmVar, Mem) + + //! @brief Packed SP-FP addition (3dNow!). + INST_2x(pfadd, kInstPfadd, MmVar, MmVar) + //! @overload + INST_2x(pfadd, kInstPfadd, MmVar, Mem) + + //! @brief Packed SP-FP compare - dst == src (3dNow!). + INST_2x(pfcmpeq, kInstPfcmpeq, MmVar, MmVar) + //! @overload + INST_2x(pfcmpeq, kInstPfcmpeq, MmVar, Mem) + + //! @brief Packed SP-FP compare - dst >= src (3dNow!). + INST_2x(pfcmpge, kInstPfcmpge, MmVar, MmVar) + //! @overload + INST_2x(pfcmpge, kInstPfcmpge, MmVar, Mem) + + //! @brief Packed SP-FP compare - dst > src (3dNow!). + INST_2x(pfcmpgt, kInstPfcmpgt, MmVar, MmVar) + //! @overload + INST_2x(pfcmpgt, kInstPfcmpgt, MmVar, Mem) + + //! @brief Packed SP-FP maximum (3dNow!). + INST_2x(pfmax, kInstPfmax, MmVar, MmVar) + //! @overload + INST_2x(pfmax, kInstPfmax, MmVar, Mem) + + //! @brief Packed SP-FP minimum (3dNow!). + INST_2x(pfmin, kInstPfmin, MmVar, MmVar) + //! @overload + INST_2x(pfmin, kInstPfmin, MmVar, Mem) + + //! @brief Packed SP-FP multiply (3dNow!). + INST_2x(pfmul, kInstPfmul, MmVar, MmVar) + //! @overload + INST_2x(pfmul, kInstPfmul, MmVar, Mem) + + //! @brief Packed SP-FP negative accumulate (3dNow!). + INST_2x(pfnacc, kInstPfnacc, MmVar, MmVar) + //! @overload + INST_2x(pfnacc, kInstPfnacc, MmVar, Mem) + + //! @brief Packed SP-FP mixed accumulate (3dNow!). + INST_2x(pfpnacc, kInstPfpnacc, MmVar, MmVar) + //! @overload + INST_2x(pfpnacc, kInstPfpnacc, MmVar, Mem) + + //! @brief Packed SP-FP reciprocal approximation (3dNow!). + INST_2x(pfrcp, kInstPfrcp, MmVar, MmVar) + //! @overload + INST_2x(pfrcp, kInstPfrcp, MmVar, Mem) + + //! @brief Packed SP-FP reciprocal, first iteration step (3dNow!). + INST_2x(pfrcpit1, kInstPfrcpit1, MmVar, MmVar) + //! @overload + INST_2x(pfrcpit1, kInstPfrcpit1, MmVar, Mem) + + //! @brief Packed SP-FP reciprocal, second iteration step (3dNow!). + INST_2x(pfrcpit2, kInstPfrcpit2, MmVar, MmVar) + //! @overload + INST_2x(pfrcpit2, kInstPfrcpit2, MmVar, Mem) + + //! @brief Packed SP-FP reciprocal square root, first iteration step (3dNow!). + INST_2x(pfrsqit1, kInstPfrsqit1, MmVar, MmVar) + //! @overload + INST_2x(pfrsqit1, kInstPfrsqit1, MmVar, Mem) + + //! @brief Packed SP-FP reciprocal square root approximation (3dNow!). + INST_2x(pfrsqrt, kInstPfrsqrt, MmVar, MmVar) + //! @overload + INST_2x(pfrsqrt, kInstPfrsqrt, MmVar, Mem) + + //! @brief Packed SP-FP subtract (3dNow!). + INST_2x(pfsub, kInstPfsub, MmVar, MmVar) + //! @overload + INST_2x(pfsub, kInstPfsub, MmVar, Mem) + + //! @brief Packed SP-FP reverse subtract (3dNow!). + INST_2x(pfsubr, kInstPfsubr, MmVar, MmVar) + //! @overload + INST_2x(pfsubr, kInstPfsubr, MmVar, Mem) + + //! @brief Packed dwords to SP-FP (3dNow!). + INST_2x(pi2fd, kInstPi2fd, MmVar, MmVar) + //! @overload + INST_2x(pi2fd, kInstPi2fd, MmVar, Mem) + + //! @brief Packed words to SP-FP (3dNow!). + INST_2x(pi2fw, kInstPi2fw, MmVar, MmVar) + //! @overload + INST_2x(pi2fw, kInstPi2fw, MmVar, Mem) + + //! @brief Packed swap dword (3dNow!) + INST_2x(pswapd, kInstPswapd, MmVar, MmVar) + //! @overload + INST_2x(pswapd, kInstPswapd, MmVar, Mem) + + //! @brief Prefetch (3dNow!). + INST_1x(prefetch_3dnow, kInstPrefetch3dNow, Mem) + + //! @brief Prefetch and set cache to modified (3dNow!). + INST_1x(prefetchw_3dnow, kInstPrefetchw3dNow, Mem) + + //! @brief Faster EMMS (3dNow!). + INST_0x(femms, kInstFemms) + + // -------------------------------------------------------------------------- + // [SSE] + // -------------------------------------------------------------------------- + + //! @brief Packed SP-FP add (SSE). + INST_2x(addps, kInstAddps, XmmVar, XmmVar) + //! @overload + INST_2x(addps, kInstAddps, XmmVar, Mem) + + //! @brief Scalar SP-FP add (SSE). + INST_2x(addss, kInstAddss, XmmVar, XmmVar) + //! @overload + INST_2x(addss, kInstAddss, XmmVar, Mem) + + //! @brief And-not For SP-FP (SSE). + INST_2x(andnps, kInstAndnps, XmmVar, XmmVar) + //! @overload + INST_2x(andnps, kInstAndnps, XmmVar, Mem) + + //! @brief And for SP-FP (SSE). + INST_2x(andps, kInstAndps, XmmVar, XmmVar) + //! @overload + INST_2x(andps, kInstAndps, XmmVar, Mem) + + //! @brief Packed SP-FP compare (SSE). + INST_3i(cmpps, kInstCmpps, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(cmpps, kInstCmpps, XmmVar, Mem, Imm) + + //! @brief Compare scalar SP-FP Values (SSE). + INST_3i(cmpss, kInstCmpss, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(cmpss, kInstCmpss, XmmVar, Mem, Imm) + + //! @brief Scalar ordered SP-FP compare and set EFLAGS (SSE). + INST_2x(comiss, kInstComiss, XmmVar, XmmVar) + //! @overload + INST_2x(comiss, kInstComiss, XmmVar, Mem) + + //! @brief Packed signed INT32 to packed SP-FP conversion (SSE). + INST_2x(cvtpi2ps, kInstCvtpi2ps, XmmVar, MmVar) + //! @overload + INST_2x(cvtpi2ps, kInstCvtpi2ps, XmmVar, Mem) + + //! @brief Packed SP-FP to packed INT32 conversion (SSE). + INST_2x(cvtps2pi, kInstCvtps2pi, MmVar, XmmVar) + //! @overload + INST_2x(cvtps2pi, kInstCvtps2pi, MmVar, Mem) + + //! @brief Scalar signed INT32 to SP-FP conversion (SSE). + INST_2x(cvtsi2ss, kInstCvtsi2ss, XmmVar, GpVar) + //! @overload + INST_2x(cvtsi2ss, kInstCvtsi2ss, XmmVar, Mem) + + //! @brief Scalar SP-FP to Signed INT32 conversion (SSE). + INST_2x(cvtss2si, kInstCvtss2si, GpVar, XmmVar) + //! @overload + INST_2x(cvtss2si, kInstCvtss2si, GpVar, Mem) + + //! @brief Packed SP-FP to packed INT32 conversion (truncate) (SSE). + INST_2x(cvttps2pi, kInstCvttps2pi, MmVar, XmmVar) + //! @overload + INST_2x(cvttps2pi, kInstCvttps2pi, MmVar, Mem) + + //! @brief Scalar SP-FP to signed INT32 conversion (truncate) (SSE). + INST_2x(cvttss2si, kInstCvttss2si, GpVar, XmmVar) + //! @overload + INST_2x(cvttss2si, kInstCvttss2si, GpVar, Mem) + + //! @brief Packed SP-FP divide (SSE). + INST_2x(divps, kInstDivps, XmmVar, XmmVar) + //! @overload + INST_2x(divps, kInstDivps, XmmVar, Mem) + + //! @brief Scalar SP-FP divide (SSE). + INST_2x(divss, kInstDivss, XmmVar, XmmVar) + //! @overload + INST_2x(divss, kInstDivss, XmmVar, Mem) + + //! @brief Load streaming SIMD extension control/status (SSE). + INST_1x(ldmxcsr, kInstLdmxcsr, Mem) + + //! @brief Byte mask write (SSE). + //! + //! @note The default memory location is specified by DS:EDI. + INST_2x(maskmovq, kInstMaskmovq, MmVar, MmVar) + + //! @brief Packed SP-FP maximum (SSE). + INST_2x(maxps, kInstMaxps, XmmVar, XmmVar) + //! @overload + INST_2x(maxps, kInstMaxps, XmmVar, Mem) + + //! @brief Scalar SP-FP maximum (SSE). + INST_2x(maxss, kInstMaxss, XmmVar, XmmVar) + //! @overload + INST_2x(maxss, kInstMaxss, XmmVar, Mem) + + //! @brief Packed SP-FP minimum (SSE). + INST_2x(minps, kInstMinps, XmmVar, XmmVar) + //! @overload + INST_2x(minps, kInstMinps, XmmVar, Mem) + + //! @brief Scalar SP-FP minimum (SSE). + INST_2x(minss, kInstMinss, XmmVar, XmmVar) + //! @overload + INST_2x(minss, kInstMinss, XmmVar, Mem) + + //! @brief Move aligned packed SP-FP values (SSE). + INST_2x(movaps, kInstMovaps, XmmVar, XmmVar) + //! @overload + INST_2x(movaps, kInstMovaps, XmmVar, Mem) + //! @brief Move aligned packed SP-FP values (SSE). + INST_2x(movaps, kInstMovaps, Mem, XmmVar) + + //! @brief Move dword. + INST_2x(movd, kInstMovd, Mem, XmmVar) + //! @overload + INST_2x(movd, kInstMovd, GpVar, XmmVar) + //! @overload + INST_2x(movd, kInstMovd, XmmVar, Mem) + //! @overload + INST_2x(movd, kInstMovd, XmmVar, GpVar) + + //! @brief Move qword (SSE). + INST_2x(movq, kInstMovq, XmmVar, XmmVar) + //! @overload + INST_2x(movq, kInstMovq, Mem, XmmVar) + //! @overload + INST_2x(movq, kInstMovq, XmmVar, Mem) + + //! @brief Move 64 bits non-temporal (SSE). + INST_2x(movntq, kInstMovntq, Mem, MmVar) + + //! @brief High to low packed SP-FP (SSE). + INST_2x(movhlps, kInstMovhlps, XmmVar, XmmVar) + + //! @brief Move high packed SP-FP (SSE). + INST_2x(movhps, kInstMovhps, XmmVar, Mem) + //! @brief Move high packed SP-FP (SSE). + INST_2x(movhps, kInstMovhps, Mem, XmmVar) + + //! @brief Move low to high packed SP-FP (SSE). + INST_2x(movlhps, kInstMovlhps, XmmVar, XmmVar) + + //! @brief Move low packed SP-FP (SSE). + INST_2x(movlps, kInstMovlps, XmmVar, Mem) + //! @brief Move low packed SP-FP (SSE). + INST_2x(movlps, kInstMovlps, Mem, XmmVar) + + //! @brief Move aligned four packed SP-FP non-temporal (SSE). + INST_2x(movntps, kInstMovntps, Mem, XmmVar) + + //! @brief Move scalar SP-FP (SSE). + INST_2x(movss, kInstMovss, XmmVar, XmmVar) + //! @overload + INST_2x(movss, kInstMovss, XmmVar, Mem) + //! @overload + INST_2x(movss, kInstMovss, Mem, XmmVar) + + //! @brief Move unaligned packed SP-FP values (SSE). + INST_2x(movups, kInstMovups, XmmVar, XmmVar) + //! @overload + INST_2x(movups, kInstMovups, XmmVar, Mem) + //! @overload + INST_2x(movups, kInstMovups, Mem, XmmVar) + + //! @brief Packed SP-FP multiply (SSE). + INST_2x(mulps, kInstMulps, XmmVar, XmmVar) + //! @overload + INST_2x(mulps, kInstMulps, XmmVar, Mem) + + //! @brief Scalar SP-FP multiply (SSE). + INST_2x(mulss, kInstMulss, XmmVar, XmmVar) + //! @overload + INST_2x(mulss, kInstMulss, XmmVar, Mem) + + //! @brief Or for SP-FP data (SSE). + INST_2x(orps, kInstOrps, XmmVar, XmmVar) + //! @overload + INST_2x(orps, kInstOrps, XmmVar, Mem) + + //! @brief Packed average (SSE). + INST_2x(pavgb, kInstPavgb, MmVar, MmVar) + //! @overload + INST_2x(pavgb, kInstPavgb, MmVar, Mem) + + //! @brief Packed average (SSE). + INST_2x(pavgw, kInstPavgw, MmVar, MmVar) + //! @overload + INST_2x(pavgw, kInstPavgw, MmVar, Mem) + + //! @brief Extract word (SSE). + INST_3i(pextrw, kInstPextrw, GpVar, MmVar, Imm) + + //! @brief Insert word (SSE). + INST_3i(pinsrw, kInstPinsrw, MmVar, GpVar, Imm) + //! @overload + INST_3i(pinsrw, kInstPinsrw, MmVar, Mem, Imm) + + //! @brief Packed signed integer word maximum (SSE). + INST_2x(pmaxsw, kInstPmaxsw, MmVar, MmVar) + //! @overload + INST_2x(pmaxsw, kInstPmaxsw, MmVar, Mem) + + //! @brief Packed unsigned integer byte maximum (SSE). + INST_2x(pmaxub, kInstPmaxub, MmVar, MmVar) + //! @overload + INST_2x(pmaxub, kInstPmaxub, MmVar, Mem) + + //! @brief Packed signed integer word minimum (SSE). + INST_2x(pminsw, kInstPminsw, MmVar, MmVar) + //! @overload + INST_2x(pminsw, kInstPminsw, MmVar, Mem) + + //! @brief Packed unsigned integer byte minimum (SSE). + INST_2x(pminub, kInstPminub, MmVar, MmVar) + //! @overload + INST_2x(pminub, kInstPminub, MmVar, Mem) + + //! @brief Move byte mask to integer (SSE). + INST_2x(pmovmskb, kInstPmovmskb, GpVar, MmVar) + + //! @brief Packed multiply high unsigned (SSE). + INST_2x(pmulhuw, kInstPmulhuw, MmVar, MmVar) + //! @overload + INST_2x(pmulhuw, kInstPmulhuw, MmVar, Mem) + + //! @brief Packed sum of absolute differences (SSE). + INST_2x(psadbw, kInstPsadbw, MmVar, MmVar) + //! @overload + INST_2x(psadbw, kInstPsadbw, MmVar, Mem) + + //! @brief Packed shuffle word (SSE). + INST_3i(pshufw, kInstPshufw, MmVar, MmVar, Imm) + //! @overload + INST_3i(pshufw, kInstPshufw, MmVar, Mem, Imm) + + //! @brief Packed SP-FP reciprocal (SSE). + INST_2x(rcpps, kInstRcpps, XmmVar, XmmVar) + //! @overload + INST_2x(rcpps, kInstRcpps, XmmVar, Mem) + + //! @brief Scalar SP-FP reciprocal (SSE). + INST_2x(rcpss, kInstRcpss, XmmVar, XmmVar) + //! @overload + INST_2x(rcpss, kInstRcpss, XmmVar, Mem) + + //! @brief Prefetch (SSE). + INST_2i(prefetch, kInstPrefetch, Mem, Imm) + + //! @brief Compute sum of absolute differences (SSE). + INST_2x(psadbw, kInstPsadbw, XmmVar, XmmVar) + //! @overload + INST_2x(psadbw, kInstPsadbw, XmmVar, Mem) + + //! @brief Packed SP-FP Square root reciprocal (SSE). + INST_2x(rsqrtps, kInstRsqrtps, XmmVar, XmmVar) + //! @overload + INST_2x(rsqrtps, kInstRsqrtps, XmmVar, Mem) + + //! @brief Scalar SP-FP Square root reciprocal (SSE). + INST_2x(rsqrtss, kInstRsqrtss, XmmVar, XmmVar) + //! @overload + INST_2x(rsqrtss, kInstRsqrtss, XmmVar, Mem) + + //! @brief Store fence (SSE). + INST_0x(sfence, kInstSfence) + + //! @brief Shuffle SP-FP (SSE). + INST_3i(shufps, kInstShufps, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(shufps, kInstShufps, XmmVar, Mem, Imm) + + //! @brief Packed SP-FP square root (SSE). + INST_2x(sqrtps, kInstSqrtps, XmmVar, XmmVar) + //! @overload + INST_2x(sqrtps, kInstSqrtps, XmmVar, Mem) + + //! @brief Scalar SP-FP square root (SSE). + INST_2x(sqrtss, kInstSqrtss, XmmVar, XmmVar) + //! @overload + INST_2x(sqrtss, kInstSqrtss, XmmVar, Mem) + + //! @brief Store streaming SIMD extension control/status (SSE). + INST_1x(stmxcsr, kInstStmxcsr, Mem) + + //! @brief Packed SP-FP subtract (SSE). + INST_2x(subps, kInstSubps, XmmVar, XmmVar) + //! @overload + INST_2x(subps, kInstSubps, XmmVar, Mem) + + //! @brief Scalar SP-FP subtract (SSE). + INST_2x(subss, kInstSubss, XmmVar, XmmVar) + //! @overload + INST_2x(subss, kInstSubss, XmmVar, Mem) + + //! @brief Unordered scalar SP-FP compare and set EFLAGS (SSE). + INST_2x(ucomiss, kInstUcomiss, XmmVar, XmmVar) + //! @overload + INST_2x(ucomiss, kInstUcomiss, XmmVar, Mem) + + //! @brief Unpack high packed SP-FP data (SSE). + INST_2x(unpckhps, kInstUnpckhps, XmmVar, XmmVar) + //! @overload + INST_2x(unpckhps, kInstUnpckhps, XmmVar, Mem) + + //! @brief Unpack low packed SP-FP data (SSE). + INST_2x(unpcklps, kInstUnpcklps, XmmVar, XmmVar) + //! @overload + INST_2x(unpcklps, kInstUnpcklps, XmmVar, Mem) + + //! @brief Xor for SP-FP data (SSE). + INST_2x(xorps, kInstXorps, XmmVar, XmmVar) + //! @overload + INST_2x(xorps, kInstXorps, XmmVar, Mem) + + //! @brief Packed DP-FP add (SSE2). + INST_2x(addpd, kInstAddpd, XmmVar, XmmVar) + //! @overload + INST_2x(addpd, kInstAddpd, XmmVar, Mem) + + //! @brief Scalar DP-FP add (SSE2). + INST_2x(addsd, kInstAddsd, XmmVar, XmmVar) + //! @overload + INST_2x(addsd, kInstAddsd, XmmVar, Mem) + + //! @brief And-not for DP-FP (SSE2). + INST_2x(andnpd, kInstAndnpd, XmmVar, XmmVar) + //! @overload + INST_2x(andnpd, kInstAndnpd, XmmVar, Mem) + + //! @brief And for DP-FP (SSE2). + INST_2x(andpd, kInstAndpd, XmmVar, XmmVar) + //! @overload + INST_2x(andpd, kInstAndpd, XmmVar, Mem) + + //! @brief Flush cache line (SSE2). + INST_1x(clflush, kInstClflush, Mem) + + //! @brief Packed DP-FP compare (SSE2). + INST_3i(cmppd, kInstCmppd, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(cmppd, kInstCmppd, XmmVar, Mem, Imm) + + //! @brief Compare scalar SP-FP values (SSE2). + INST_3i(cmpsd, kInstCmpsd, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(cmpsd, kInstCmpsd, XmmVar, Mem, Imm) + + //! @brief Scalar ordered DP-FP compare and set EFLAGS (SSE2). + INST_2x(comisd, kInstComisd, XmmVar, XmmVar) + //! @overload + INST_2x(comisd, kInstComisd, XmmVar, Mem) + + //! @brief Convert packed Dword integers to packed DP-FP values (SSE2). + INST_2x(cvtdq2pd, kInstCvtdq2pd, XmmVar, XmmVar) + //! @overload + INST_2x(cvtdq2pd, kInstCvtdq2pd, XmmVar, Mem) + + //! @brief Convert packed Dword integers to packed SP-FP values (SSE2). + INST_2x(cvtdq2ps, kInstCvtdq2ps, XmmVar, XmmVar) + //! @overload + INST_2x(cvtdq2ps, kInstCvtdq2ps, XmmVar, Mem) + + //! @brief Convert packed DP-FP values to packed dword integers (SSE2). + INST_2x(cvtpd2dq, kInstCvtpd2dq, XmmVar, XmmVar) + //! @overload + INST_2x(cvtpd2dq, kInstCvtpd2dq, XmmVar, Mem) + + //! @brief Convert packed DP-FP values to packed dword integers (SSE2). + INST_2x(cvtpd2pi, kInstCvtpd2pi, MmVar, XmmVar) + //! @overload + INST_2x(cvtpd2pi, kInstCvtpd2pi, MmVar, Mem) + + //! @brief Convert packed DP-FP values to packed SP-FP values (SSE2). + INST_2x(cvtpd2ps, kInstCvtpd2ps, XmmVar, XmmVar) + //! @overload + INST_2x(cvtpd2ps, kInstCvtpd2ps, XmmVar, Mem) + + //! @brief Convert packed dword integers to packed DP-FP values (SSE2). + INST_2x(cvtpi2pd, kInstCvtpi2pd, XmmVar, MmVar) + //! @overload + INST_2x(cvtpi2pd, kInstCvtpi2pd, XmmVar, Mem) + + //! @brief Convert packed SP-FP values to packed dword integers (SSE2). + INST_2x(cvtps2dq, kInstCvtps2dq, XmmVar, XmmVar) + //! @overload + INST_2x(cvtps2dq, kInstCvtps2dq, XmmVar, Mem) + + //! @brief Convert packed SP-FP values to packed DP-FP values (SSE2). + INST_2x(cvtps2pd, kInstCvtps2pd, XmmVar, XmmVar) + //! @overload + INST_2x(cvtps2pd, kInstCvtps2pd, XmmVar, Mem) + + //! @brief Convert scalar DP-FP value to dword Integer (SSE2). + INST_2x(cvtsd2si, kInstCvtsd2si, GpVar, XmmVar) + //! @overload + INST_2x(cvtsd2si, kInstCvtsd2si, GpVar, Mem) + + //! @brief Convert scalar DP-FP value to scalar SP-FP value (SSE2). + INST_2x(cvtsd2ss, kInstCvtsd2ss, XmmVar, XmmVar) + //! @overload + INST_2x(cvtsd2ss, kInstCvtsd2ss, XmmVar, Mem) + + //! @brief Convert dword integer to scalar DP-FP value (SSE2). + INST_2x(cvtsi2sd, kInstCvtsi2sd, XmmVar, GpVar) + //! @overload + INST_2x(cvtsi2sd, kInstCvtsi2sd, XmmVar, Mem) + + //! @brief Convert scalar SP-FP value to scalar DP-FP value (SSE2). + INST_2x(cvtss2sd, kInstCvtss2sd, XmmVar, XmmVar) + //! @overload + INST_2x(cvtss2sd, kInstCvtss2sd, XmmVar, Mem) + + //! @brief Convert with truncation packed DP-FP values to packed dword Integers (SSE2). + INST_2x(cvttpd2pi, kInstCvttpd2pi, MmVar, XmmVar) + //! @overload + INST_2x(cvttpd2pi, kInstCvttpd2pi, MmVar, Mem) + + //! @brief Convert with truncation packed DP-FP values to packed qword Integers (SSE2). + INST_2x(cvttpd2dq, kInstCvttpd2dq, XmmVar, XmmVar) + //! @overload + INST_2x(cvttpd2dq, kInstCvttpd2dq, XmmVar, Mem) + + //! @brief Convert with truncation packed SP-FP values to packed qword Integers (SSE2). + INST_2x(cvttps2dq, kInstCvttps2dq, XmmVar, XmmVar) + //! @overload + INST_2x(cvttps2dq, kInstCvttps2dq, XmmVar, Mem) + + //! @brief Convert with truncation scalar DP-FP value to signed dword Integer (SSE2). + INST_2x(cvttsd2si, kInstCvttsd2si, GpVar, XmmVar) + //! @overload + INST_2x(cvttsd2si, kInstCvttsd2si, GpVar, Mem) + + //! @brief Packed DP-FP divide (SSE2). + INST_2x(divpd, kInstDivpd, XmmVar, XmmVar) + //! @overload + INST_2x(divpd, kInstDivpd, XmmVar, Mem) + + //! @brief Scalar DP-FP divide (SSE2). + INST_2x(divsd, kInstDivsd, XmmVar, XmmVar) + //! @overload + INST_2x(divsd, kInstDivsd, XmmVar, Mem) + + //! @brief Load fence (SSE2). + INST_0x(lfence, kInstLfence) + + //! @brief Store selected bytes of oword (SSE2). + //! + //! @note Target is DS:EDI. + INST_2x(maskmovdqu, kInstMaskmovdqu, XmmVar, XmmVar) + + //! @brief Return maximum packed DP-FP values (SSE2). + INST_2x(maxpd, kInstMaxpd, XmmVar, XmmVar) + //! @overload + INST_2x(maxpd, kInstMaxpd, XmmVar, Mem) + + //! @brief Return maximum scalar DP-FP value (SSE2). + INST_2x(maxsd, kInstMaxsd, XmmVar, XmmVar) + //! @overload + INST_2x(maxsd, kInstMaxsd, XmmVar, Mem) + + //! @brief Memory fence (SSE2). + INST_0x(mfence, kInstMfence) + + //! @brief Return minimum packed DP-FP Values (SSE2). + INST_2x(minpd, kInstMinpd, XmmVar, XmmVar) + //! @overload + INST_2x(minpd, kInstMinpd, XmmVar, Mem) + + //! @brief Return minimum scalar DP-FP value (SSE2). + INST_2x(minsd, kInstMinsd, XmmVar, XmmVar) + //! @overload + INST_2x(minsd, kInstMinsd, XmmVar, Mem) + + //! @brief Move aligned oword (SSE2). + INST_2x(movdqa, kInstMovdqa, XmmVar, XmmVar) + //! @overload + INST_2x(movdqa, kInstMovdqa, XmmVar, Mem) + //! @overload + INST_2x(movdqa, kInstMovdqa, Mem, XmmVar) + + //! @brief Move unaligned oword (SSE2). + INST_2x(movdqu, kInstMovdqu, XmmVar, XmmVar) + //! @overload + INST_2x(movdqu, kInstMovdqu, XmmVar, Mem) + //! @overload + INST_2x(movdqu, kInstMovdqu, Mem, XmmVar) + + //! @brief Extract packed SP-FP sign mask (SSE2). + INST_2x(movmskps, kInstMovmskps, GpVar, XmmVar) + + //! @brief Extract packed DP-FP sign mask (SSE2). + INST_2x(movmskpd, kInstMovmskpd, GpVar, XmmVar) + + //! @brief Move scalar DP-FP value (SSE2). + INST_2x(movsd, kInstMovsd, XmmVar, XmmVar) + //! @overload + INST_2x(movsd, kInstMovsd, XmmVar, Mem) + //! @overload + INST_2x(movsd, kInstMovsd, Mem, XmmVar) + + //! @brief Move aligned packed DP-FP values (SSE2). + INST_2x(movapd, kInstMovapd, XmmVar, XmmVar) + //! @overload + INST_2x(movapd, kInstMovapd, XmmVar, Mem) + //! @overload + INST_2x(movapd, kInstMovapd, Mem, XmmVar) + + //! @brief Move qword from Xmm to Mm register (SSE2). + INST_2x(movdq2q, kInstMovdq2q, MmVar, XmmVar) + + //! @brief Move qword from Mm to Xmm register (SSE2). + INST_2x(movq2dq, kInstMovq2dq, XmmVar, MmVar) + + //! @brief Move high packed DP-FP value (SSE2). + INST_2x(movhpd, kInstMovhpd, XmmVar, Mem) + //! @overload + INST_2x(movhpd, kInstMovhpd, Mem, XmmVar) + + //! @brief Move low packed DP-FP value (SSE2). + INST_2x(movlpd, kInstMovlpd, XmmVar, Mem) + //! @overload + INST_2x(movlpd, kInstMovlpd, Mem, XmmVar) + + //! @brief Store oword using non-temporal hint (SSE2). + INST_2x(movntdq, kInstMovntdq, Mem, XmmVar) + + //! @brief Store dword using non-temporal hint (SSE2). + INST_2x(movnti, kInstMovnti, Mem, GpVar) + + //! @brief Store packed DP-FP values using non-temporal hint (SSE2). + INST_2x(movntpd, kInstMovntpd, Mem, XmmVar) + + //! @brief Move unaligned packed DP-FP values (SSE2). + INST_2x(movupd, kInstMovupd, XmmVar, XmmVar) + //! @overload + INST_2x(movupd, kInstMovupd, XmmVar, Mem) + //! @overload + INST_2x(movupd, kInstMovupd, Mem, XmmVar) + + //! @brief Packed DP-FP multiply (SSE2). + INST_2x(mulpd, kInstMulpd, XmmVar, XmmVar) + //! @overload + INST_2x(mulpd, kInstMulpd, XmmVar, Mem) + + //! @brief Scalar DP-FP multiply (SSE2). + INST_2x(mulsd, kInstMulsd, XmmVar, XmmVar) + //! @overload + INST_2x(mulsd, kInstMulsd, XmmVar, Mem) + + //! @brief Or for DP-FP data (SSE2). + INST_2x(orpd, kInstOrpd, XmmVar, XmmVar) + //! @overload + INST_2x(orpd, kInstOrpd, XmmVar, Mem) + + //! @brief Pack with signed saturation (SSE2). + INST_2x(packsswb, kInstPacksswb, XmmVar, XmmVar) + //! @overload + INST_2x(packsswb, kInstPacksswb, XmmVar, Mem) + + //! @brief Pack with signed saturation (SSE2). + INST_2x(packssdw, kInstPackssdw, XmmVar, XmmVar) + //! @overload + INST_2x(packssdw, kInstPackssdw, XmmVar, Mem) + + //! @brief Pack with unsigned saturation (SSE2). + INST_2x(packuswb, kInstPackuswb, XmmVar, XmmVar) + //! @overload + INST_2x(packuswb, kInstPackuswb, XmmVar, Mem) + + //! @brief Packed byte add (SSE2). + INST_2x(paddb, kInstPaddb, XmmVar, XmmVar) + //! @overload + INST_2x(paddb, kInstPaddb, XmmVar, Mem) + + //! @brief Packed word add (SSE2). + INST_2x(paddw, kInstPaddw, XmmVar, XmmVar) + //! @overload + INST_2x(paddw, kInstPaddw, XmmVar, Mem) + + //! @brief Packed dword add (SSE2). + INST_2x(paddd, kInstPaddd, XmmVar, XmmVar) + //! @overload + INST_2x(paddd, kInstPaddd, XmmVar, Mem) + + //! @brief Packed qword add (SSE2). + INST_2x(paddq, kInstPaddq, MmVar, MmVar) + //! @overload + INST_2x(paddq, kInstPaddq, MmVar, Mem) + + //! @brief Packed qword add (SSE2). + INST_2x(paddq, kInstPaddq, XmmVar, XmmVar) + //! @overload + INST_2x(paddq, kInstPaddq, XmmVar, Mem) + + //! @brief Packed add with saturation (SSE2). + INST_2x(paddsb, kInstPaddsb, XmmVar, XmmVar) + //! @overload + INST_2x(paddsb, kInstPaddsb, XmmVar, Mem) + + //! @brief Packed add with saturation (SSE2). + INST_2x(paddsw, kInstPaddsw, XmmVar, XmmVar) + //! @overload + INST_2x(paddsw, kInstPaddsw, XmmVar, Mem) + + //! @brief Packed add unsigned with saturation (SSE2). + INST_2x(paddusb, kInstPaddusb, XmmVar, XmmVar) + //! @overload + INST_2x(paddusb, kInstPaddusb, XmmVar, Mem) + + //! @brief Packed add unsigned with saturation (SSE2). + INST_2x(paddusw, kInstPaddusw, XmmVar, XmmVar) + //! @overload + INST_2x(paddusw, kInstPaddusw, XmmVar, Mem) + + //! @brief And (SSE2). + INST_2x(pand, kInstPand, XmmVar, XmmVar) + //! @overload + INST_2x(pand, kInstPand, XmmVar, Mem) + + //! @brief And-not (SSE2). + INST_2x(pandn, kInstPandn, XmmVar, XmmVar) + //! @overload + INST_2x(pandn, kInstPandn, XmmVar, Mem) + + //! @brief Spin loop hint (SSE2). + INST_0x(pause, kInstPause) + + //! @brief Packed average (SSE2). + INST_2x(pavgb, kInstPavgb, XmmVar, XmmVar) + //! @overload + INST_2x(pavgb, kInstPavgb, XmmVar, Mem) + + //! @brief Packed average (SSE2). + INST_2x(pavgw, kInstPavgw, XmmVar, XmmVar) + //! @overload + INST_2x(pavgw, kInstPavgw, XmmVar, Mem) + + //! @brief Packed compare bytes for equal (SSE2). + INST_2x(pcmpeqb, kInstPcmpeqb, XmmVar, XmmVar) + //! @overload + INST_2x(pcmpeqb, kInstPcmpeqb, XmmVar, Mem) + + //! @brief Packed compare words for equal (SSE2). + INST_2x(pcmpeqw, kInstPcmpeqw, XmmVar, XmmVar) + //! @overload + INST_2x(pcmpeqw, kInstPcmpeqw, XmmVar, Mem) + + //! @brief Packed compare dwords for equal (SSE2). + INST_2x(pcmpeqd, kInstPcmpeqd, XmmVar, XmmVar) + //! @overload + INST_2x(pcmpeqd, kInstPcmpeqd, XmmVar, Mem) + + //! @brief Packed compare bytes for greater than (SSE2). + INST_2x(pcmpgtb, kInstPcmpgtb, XmmVar, XmmVar) + //! @overload + INST_2x(pcmpgtb, kInstPcmpgtb, XmmVar, Mem) + + //! @brief Packed compare words for greater than (SSE2). + INST_2x(pcmpgtw, kInstPcmpgtw, XmmVar, XmmVar) + //! @overload + INST_2x(pcmpgtw, kInstPcmpgtw, XmmVar, Mem) + + //! @brief Packed compare dwords for greater than (SSE2). + INST_2x(pcmpgtd, kInstPcmpgtd, XmmVar, XmmVar) + //! @overload + INST_2x(pcmpgtd, kInstPcmpgtd, XmmVar, Mem) + + //! @brief Extract word (SSE2). + INST_3i(pextrw, kInstPextrw, GpVar, XmmVar, Imm) + + //! @brief Insert word (SSE2). + INST_3i(pinsrw, kInstPinsrw, XmmVar, GpVar, Imm) + //! @overload + INST_3i(pinsrw, kInstPinsrw, XmmVar, Mem, Imm) + + //! @brief Packed signed integer word maximum (SSE2). + INST_2x(pmaxsw, kInstPmaxsw, XmmVar, XmmVar) + //! @overload + INST_2x(pmaxsw, kInstPmaxsw, XmmVar, Mem) + + //! @brief Packed unsigned integer byte maximum (SSE2). + INST_2x(pmaxub, kInstPmaxub, XmmVar, XmmVar) + //! @overload + INST_2x(pmaxub, kInstPmaxub, XmmVar, Mem) + + //! @brief Packed signed integer word minimum (SSE2). + INST_2x(pminsw, kInstPminsw, XmmVar, XmmVar) + //! @overload + INST_2x(pminsw, kInstPminsw, XmmVar, Mem) + + //! @brief Packed unsigned integer byte minimum (SSE2). + INST_2x(pminub, kInstPminub, XmmVar, XmmVar) + //! @overload + INST_2x(pminub, kInstPminub, XmmVar, Mem) + + //! @brief Move byte mask (SSE2). + INST_2x(pmovmskb, kInstPmovmskb, GpVar, XmmVar) + + //! @brief Packed multiply high (SSE2). + INST_2x(pmulhw, kInstPmulhw, XmmVar, XmmVar) + //! @overload + INST_2x(pmulhw, kInstPmulhw, XmmVar, Mem) + + //! @brief Packed multiply high Unsigned (SSE2). + INST_2x(pmulhuw, kInstPmulhuw, XmmVar, XmmVar) + //! @overload + INST_2x(pmulhuw, kInstPmulhuw, XmmVar, Mem) + + //! @brief Packed multiply low (SSE2). + INST_2x(pmullw, kInstPmullw, XmmVar, XmmVar) + //! @overload + INST_2x(pmullw, kInstPmullw, XmmVar, Mem) + + //! @brief Packed multiply to QWORD (SSE2). + INST_2x(pmuludq, kInstPmuludq, MmVar, MmVar) + //! @overload + INST_2x(pmuludq, kInstPmuludq, MmVar, Mem) + + //! @brief Packed multiply to QWORD (SSE2). + INST_2x(pmuludq, kInstPmuludq, XmmVar, XmmVar) + //! @overload + INST_2x(pmuludq, kInstPmuludq, XmmVar, Mem) + + //! @brief Or (SSE2). + INST_2x(por, kInstPor, XmmVar, XmmVar) + //! @overload + INST_2x(por, kInstPor, XmmVar, Mem) + + //! @brief Packed shift left logical (SSE2). + INST_2x(pslld, kInstPslld, XmmVar, XmmVar) + //! @overload + INST_2x(pslld, kInstPslld, XmmVar, Mem) + //! @overload + INST_2i(pslld, kInstPslld, XmmVar, Imm) + + //! @brief Packed shift left logical (SSE2). + INST_2x(psllq, kInstPsllq, XmmVar, XmmVar) + //! @overload + INST_2x(psllq, kInstPsllq, XmmVar, Mem) + //! @overload + INST_2i(psllq, kInstPsllq, XmmVar, Imm) + + //! @brief Packed shift left logical (SSE2). + INST_2x(psllw, kInstPsllw, XmmVar, XmmVar) + //! @overload + INST_2x(psllw, kInstPsllw, XmmVar, Mem) + //! @overload + INST_2i(psllw, kInstPsllw, XmmVar, Imm) + + //! @brief Packed shift left logical (SSE2). + INST_2i(pslldq, kInstPslldq, XmmVar, Imm) + + //! @brief Packed shift right arithmetic (SSE2). + INST_2x(psrad, kInstPsrad, XmmVar, XmmVar) + //! @overload + INST_2x(psrad, kInstPsrad, XmmVar, Mem) + //! @overload + INST_2i(psrad, kInstPsrad, XmmVar, Imm) + + //! @brief Packed shift right arithmetic (SSE2). + INST_2x(psraw, kInstPsraw, XmmVar, XmmVar) + //! @overload + INST_2x(psraw, kInstPsraw, XmmVar, Mem) + //! @overload + INST_2i(psraw, kInstPsraw, XmmVar, Imm) + + //! @brief Packed subtract (SSE2). + INST_2x(psubb, kInstPsubb, XmmVar, XmmVar) + //! @overload + INST_2x(psubb, kInstPsubb, XmmVar, Mem) + + //! @brief Packed subtract (SSE2). + INST_2x(psubw, kInstPsubw, XmmVar, XmmVar) + //! @overload + INST_2x(psubw, kInstPsubw, XmmVar, Mem) + + //! @brief Packed subtract (SSE2). + INST_2x(psubd, kInstPsubd, XmmVar, XmmVar) + //! @overload + INST_2x(psubd, kInstPsubd, XmmVar, Mem) + + //! @brief Packed subtract (SSE2). + INST_2x(psubq, kInstPsubq, MmVar, MmVar) + //! @overload + INST_2x(psubq, kInstPsubq, MmVar, Mem) + + //! @brief Packed subtract (SSE2). + INST_2x(psubq, kInstPsubq, XmmVar, XmmVar) + //! @overload + INST_2x(psubq, kInstPsubq, XmmVar, Mem) + + //! @brief Packed multiply and Add (SSE2). + INST_2x(pmaddwd, kInstPmaddwd, XmmVar, XmmVar) + //! @overload + INST_2x(pmaddwd, kInstPmaddwd, XmmVar, Mem) + + //! @brief Shuffle packed dwords (SSE2). + INST_3i(pshufd, kInstPshufd, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(pshufd, kInstPshufd, XmmVar, Mem, Imm) + + //! @brief Shuffle packed high words (SSE2). + INST_3i(pshufhw, kInstPshufhw, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(pshufhw, kInstPshufhw, XmmVar, Mem, Imm) + + //! @brief Shuffle packed low words (SSE2). + INST_3i(pshuflw, kInstPshuflw, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(pshuflw, kInstPshuflw, XmmVar, Mem, Imm) + + //! @brief Packed shift right logical (SSE2). + INST_2x(psrld, kInstPsrld, XmmVar, XmmVar) + //! @overload + INST_2x(psrld, kInstPsrld, XmmVar, Mem) + //! @overload + INST_2i(psrld, kInstPsrld, XmmVar, Imm) + + //! @brief Packed shift right logical (SSE2). + INST_2x(psrlq, kInstPsrlq, XmmVar, XmmVar) + //! @overload + INST_2x(psrlq, kInstPsrlq, XmmVar, Mem) + //! @overload + INST_2i(psrlq, kInstPsrlq, XmmVar, Imm) + + //! @brief Oword shift right logical (SSE2). + INST_2i(psrldq, kInstPsrldq, XmmVar, Imm) + + //! @brief Packed shift right logical (SSE2). + INST_2x(psrlw, kInstPsrlw, XmmVar, XmmVar) + //! @overload + INST_2x(psrlw, kInstPsrlw, XmmVar, Mem) + //! @overload + INST_2i(psrlw, kInstPsrlw, XmmVar, Imm) + + //! @brief Packed subtract with saturation (SSE2). + INST_2x(psubsb, kInstPsubsb, XmmVar, XmmVar) + //! @overload + INST_2x(psubsb, kInstPsubsb, XmmVar, Mem) + + //! @brief Packed subtract with saturation (SSE2). + INST_2x(psubsw, kInstPsubsw, XmmVar, XmmVar) + //! @overload + INST_2x(psubsw, kInstPsubsw, XmmVar, Mem) + + //! @brief Packed subtract with unsigned saturation (SSE2). + INST_2x(psubusb, kInstPsubusb, XmmVar, XmmVar) + //! @overload + INST_2x(psubusb, kInstPsubusb, XmmVar, Mem) + + //! @brief Packed subtract with unsigned saturation (SSE2). + INST_2x(psubusw, kInstPsubusw, XmmVar, XmmVar) + //! @overload + INST_2x(psubusw, kInstPsubusw, XmmVar, Mem) + + //! @brief Unpack high data (SSE2). + INST_2x(punpckhbw, kInstPunpckhbw, XmmVar, XmmVar) + //! @overload + INST_2x(punpckhbw, kInstPunpckhbw, XmmVar, Mem) + + //! @brief Unpack high data (SSE2). + INST_2x(punpckhwd, kInstPunpckhwd, XmmVar, XmmVar) + //! @overload + INST_2x(punpckhwd, kInstPunpckhwd, XmmVar, Mem) + + //! @brief Unpack high data (SSE2). + INST_2x(punpckhdq, kInstPunpckhdq, XmmVar, XmmVar) + //! @overload + INST_2x(punpckhdq, kInstPunpckhdq, XmmVar, Mem) + + //! @brief Unpack high data (SSE2). + INST_2x(punpckhqdq, kInstPunpckhqdq, XmmVar, XmmVar) + //! @overload + INST_2x(punpckhqdq, kInstPunpckhqdq, XmmVar, Mem) + + //! @brief Unpack low data (SSE2). + INST_2x(punpcklbw, kInstPunpcklbw, XmmVar, XmmVar) + //! @overload + INST_2x(punpcklbw, kInstPunpcklbw, XmmVar, Mem) + + //! @brief Unpack low data (SSE2). + INST_2x(punpcklwd, kInstPunpcklwd, XmmVar, XmmVar) + //! @overload + INST_2x(punpcklwd, kInstPunpcklwd, XmmVar, Mem) + + //! @brief Unpack low data (SSE2). + INST_2x(punpckldq, kInstPunpckldq, XmmVar, XmmVar) + //! @overload + INST_2x(punpckldq, kInstPunpckldq, XmmVar, Mem) + + //! @brief Unpack low data (SSE2). + INST_2x(punpcklqdq, kInstPunpcklqdq, XmmVar, XmmVar) + //! @overload + INST_2x(punpcklqdq, kInstPunpcklqdq, XmmVar, Mem) + + //! @brief Xor (SSE2). + INST_2x(pxor, kInstPxor, XmmVar, XmmVar) + //! @overload + INST_2x(pxor, kInstPxor, XmmVar, Mem) + + //! @brief Shuffle DP-FP (SSE2). + INST_3i(shufpd, kInstShufpd, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(shufpd, kInstShufpd, XmmVar, Mem, Imm) + + //! @brief Compute square roots of packed DP-FP Values (SSE2). + INST_2x(sqrtpd, kInstSqrtpd, XmmVar, XmmVar) + //! @overload + INST_2x(sqrtpd, kInstSqrtpd, XmmVar, Mem) + + //! @brief Compute square root of scalar DP-FP value (SSE2). + INST_2x(sqrtsd, kInstSqrtsd, XmmVar, XmmVar) + //! @overload + INST_2x(sqrtsd, kInstSqrtsd, XmmVar, Mem) + + //! @brief Packed DP-FP subtract (SSE2). + INST_2x(subpd, kInstSubpd, XmmVar, XmmVar) + //! @overload + INST_2x(subpd, kInstSubpd, XmmVar, Mem) + + //! @brief Scalar DP-FP subtract (SSE2). + INST_2x(subsd, kInstSubsd, XmmVar, XmmVar) + //! @overload + INST_2x(subsd, kInstSubsd, XmmVar, Mem) + + //! @brief Scalar unordered DP-FP compare and set EFLAGS (SSE2). + INST_2x(ucomisd, kInstUcomisd, XmmVar, XmmVar) + //! @overload + INST_2x(ucomisd, kInstUcomisd, XmmVar, Mem) + + //! @brief Unpack and interleave high packed DP-FP values (SSE2). + INST_2x(unpckhpd, kInstUnpckhpd, XmmVar, XmmVar) + //! @overload + INST_2x(unpckhpd, kInstUnpckhpd, XmmVar, Mem) + + //! @brief Unpack and interleave low packed DP-FP values (SSE2). + INST_2x(unpcklpd, kInstUnpcklpd, XmmVar, XmmVar) + //! @overload + INST_2x(unpcklpd, kInstUnpcklpd, XmmVar, Mem) + + //! @brief Or for DP-FP data (SSE2). + INST_2x(xorpd, kInstXorpd, XmmVar, XmmVar) + //! @overload + INST_2x(xorpd, kInstXorpd, XmmVar, Mem) + + // -------------------------------------------------------------------------- + // [SSE3] + // -------------------------------------------------------------------------- + + //! @brief Packed DP-FP add/subtract (SSE3). + INST_2x(addsubpd, kInstAddsubpd, XmmVar, XmmVar) + //! @overload + INST_2x(addsubpd, kInstAddsubpd, XmmVar, Mem) + + //! @brief Packed SP-FP add/subtract (SSE3). + INST_2x(addsubps, kInstAddsubps, XmmVar, XmmVar) + //! @overload + INST_2x(addsubps, kInstAddsubps, XmmVar, Mem) + + // //! @brief Store integer with truncation (SSE3). + // INST_1x(fisttp, kInstFisttp, Mem) + + //! @brief Packed DP-FP horizontal add (SSE3). + INST_2x(haddpd, kInstHaddpd, XmmVar, XmmVar) + //! @overload + INST_2x(haddpd, kInstHaddpd, XmmVar, Mem) + + //! @brief Packed SP-FP horizontal add (SSE3). + INST_2x(haddps, kInstHaddps, XmmVar, XmmVar) + //! @overload + INST_2x(haddps, kInstHaddps, XmmVar, Mem) + + //! @brief Packed DP-FP horizontal subtract (SSE3). + INST_2x(hsubpd, kInstHsubpd, XmmVar, XmmVar) + //! @overload + INST_2x(hsubpd, kInstHsubpd, XmmVar, Mem) + + //! @brief Packed SP-FP horizontal subtract (SSE3). + INST_2x(hsubps, kInstHsubps, XmmVar, XmmVar) + //! @overload + INST_2x(hsubps, kInstHsubps, XmmVar, Mem) + + //! @brief Load unaligned Integer 128 Bits (SSE3). + INST_2x(lddqu, kInstLddqu, XmmVar, Mem) + + // //! @brief Setup monitor address (SSE3). + // INST_0x(monitor, kInstMonitor) + + //! @brief Move one DP-FP and duplicate (SSE3). + INST_2x(movddup, kInstMovddup, XmmVar, XmmVar) + //! @overload + INST_2x(movddup, kInstMovddup, XmmVar, Mem) + + //! @brief Move packed SP-FP high and duplicate (SSE3). + INST_2x(movshdup, kInstMovshdup, XmmVar, XmmVar) + //! @overload + INST_2x(movshdup, kInstMovshdup, XmmVar, Mem) + + //! @brief Move packed SP-FP low and duplicate (SSE3). + INST_2x(movsldup, kInstMovsldup, XmmVar, XmmVar) + //! @overload + INST_2x(movsldup, kInstMovsldup, XmmVar, Mem) + + // //! @brief Monitor wait (SSE3). + // INST_0x(mwait, kInstMwait) + + // -------------------------------------------------------------------------- + // [SSSE3] + // -------------------------------------------------------------------------- + + //! @brief Packed sign (SSSE3). + INST_2x(psignb, kInstPsignb, MmVar, MmVar) + //! @overload + INST_2x(psignb, kInstPsignb, MmVar, Mem) + + //! @brief Packed sign (SSSE3). + INST_2x(psignb, kInstPsignb, XmmVar, XmmVar) + //! @overload + INST_2x(psignb, kInstPsignb, XmmVar, Mem) + + //! @brief Packed sign (SSSE3). + INST_2x(psignw, kInstPsignw, MmVar, MmVar) + //! @overload + INST_2x(psignw, kInstPsignw, MmVar, Mem) + + //! @brief Packed sign (SSSE3). + INST_2x(psignw, kInstPsignw, XmmVar, XmmVar) + //! @overload + INST_2x(psignw, kInstPsignw, XmmVar, Mem) + + //! @brief Packed sign (SSSE3). + INST_2x(psignd, kInstPsignd, MmVar, MmVar) + //! @overload + INST_2x(psignd, kInstPsignd, MmVar, Mem) + + //! @brief Packed sign (SSSE3). + INST_2x(psignd, kInstPsignd, XmmVar, XmmVar) + //! @overload + INST_2x(psignd, kInstPsignd, XmmVar, Mem) + + //! @brief Packed horizontal add (SSSE3). + INST_2x(phaddw, kInstPhaddw, MmVar, MmVar) + //! @overload + INST_2x(phaddw, kInstPhaddw, MmVar, Mem) + + //! @brief Packed horizontal add (SSSE3). + INST_2x(phaddw, kInstPhaddw, XmmVar, XmmVar) + //! @overload + INST_2x(phaddw, kInstPhaddw, XmmVar, Mem) + + //! @brief Packed horizontal add (SSSE3). + INST_2x(phaddd, kInstPhaddd, MmVar, MmVar) + //! @overload + INST_2x(phaddd, kInstPhaddd, MmVar, Mem) + + //! @brief Packed horizontal add (SSSE3). + INST_2x(phaddd, kInstPhaddd, XmmVar, XmmVar) + //! @overload + INST_2x(phaddd, kInstPhaddd, XmmVar, Mem) + + //! @brief Packed horizontal add and saturate (SSSE3). + INST_2x(phaddsw, kInstPhaddsw, MmVar, MmVar) + //! @overload + INST_2x(phaddsw, kInstPhaddsw, MmVar, Mem) + + //! @brief Packed horizontal add and saturate (SSSE3). + INST_2x(phaddsw, kInstPhaddsw, XmmVar, XmmVar) + //! @overload + INST_2x(phaddsw, kInstPhaddsw, XmmVar, Mem) + + //! @brief Packed horizontal subtract (SSSE3). + INST_2x(phsubw, kInstPhsubw, MmVar, MmVar) + //! @overload + INST_2x(phsubw, kInstPhsubw, MmVar, Mem) + + //! @brief Packed horizontal subtract (SSSE3). + INST_2x(phsubw, kInstPhsubw, XmmVar, XmmVar) + //! @overload + INST_2x(phsubw, kInstPhsubw, XmmVar, Mem) + + //! @brief Packed horizontal subtract (SSSE3). + INST_2x(phsubd, kInstPhsubd, MmVar, MmVar) + //! @overload + INST_2x(phsubd, kInstPhsubd, MmVar, Mem) + + //! @brief Packed horizontal subtract (SSSE3). + INST_2x(phsubd, kInstPhsubd, XmmVar, XmmVar) + //! @overload + INST_2x(phsubd, kInstPhsubd, XmmVar, Mem) + + //! @brief Packed horizontal subtract and saturate (SSSE3). + INST_2x(phsubsw, kInstPhsubsw, MmVar, MmVar) + //! @overload + INST_2x(phsubsw, kInstPhsubsw, MmVar, Mem) + + //! @brief Packed horizontal subtract and saturate (SSSE3). + INST_2x(phsubsw, kInstPhsubsw, XmmVar, XmmVar) + //! @overload + INST_2x(phsubsw, kInstPhsubsw, XmmVar, Mem) + + //! @brief Multiply and add packed signed and unsigned bytes (SSSE3). + INST_2x(pmaddubsw, kInstPmaddubsw, MmVar, MmVar) + //! @overload + INST_2x(pmaddubsw, kInstPmaddubsw, MmVar, Mem) + + //! @brief Multiply and add packed signed and unsigned bytes (SSSE3). + INST_2x(pmaddubsw, kInstPmaddubsw, XmmVar, XmmVar) + //! @overload + INST_2x(pmaddubsw, kInstPmaddubsw, XmmVar, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsb, kInstPabsb, MmVar, MmVar) + //! @overload + INST_2x(pabsb, kInstPabsb, MmVar, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsb, kInstPabsb, XmmVar, XmmVar) + //! @overload + INST_2x(pabsb, kInstPabsb, XmmVar, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsw, kInstPabsw, MmVar, MmVar) + //! @overload + INST_2x(pabsw, kInstPabsw, MmVar, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsw, kInstPabsw, XmmVar, XmmVar) + //! @overload + INST_2x(pabsw, kInstPabsw, XmmVar, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsd, kInstPabsd, MmVar, MmVar) + //! @overload + INST_2x(pabsd, kInstPabsd, MmVar, Mem) + + //! @brief Packed absolute value (SSSE3). + INST_2x(pabsd, kInstPabsd, XmmVar, XmmVar) + //! @overload + INST_2x(pabsd, kInstPabsd, XmmVar, Mem) + + //! @brief Packed multiply high with round and scale (SSSE3). + INST_2x(pmulhrsw, kInstPmulhrsw, MmVar, MmVar) + //! @overload + INST_2x(pmulhrsw, kInstPmulhrsw, MmVar, Mem) + + //! @brief Packed multiply high with round and scale (SSSE3). + INST_2x(pmulhrsw, kInstPmulhrsw, XmmVar, XmmVar) + //! @overload + INST_2x(pmulhrsw, kInstPmulhrsw, XmmVar, Mem) + + //! @brief Packed shuffle bytes (SSSE3). + INST_2x(pshufb, kInstPshufb, MmVar, MmVar) + //! @overload + INST_2x(pshufb, kInstPshufb, MmVar, Mem) + + //! @brief Packed shuffle bytes (SSSE3). + INST_2x(pshufb, kInstPshufb, XmmVar, XmmVar) + //! @overload + INST_2x(pshufb, kInstPshufb, XmmVar, Mem) + + //! @brief Packed shuffle bytes (SSSE3). + INST_3i(palignr, kInstPalignr, MmVar, MmVar, Imm) + //! @overload + INST_3i(palignr, kInstPalignr, MmVar, Mem, Imm) + + //! @brief Packed shuffle bytes (SSSE3). + INST_3i(palignr, kInstPalignr, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(palignr, kInstPalignr, XmmVar, Mem, Imm) + + // -------------------------------------------------------------------------- + // [SSE4.1] + // -------------------------------------------------------------------------- + + //! @brief Blend packed DP-FP values (SSE4.1). + INST_3i(blendpd, kInstBlendpd, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(blendpd, kInstBlendpd, XmmVar, Mem, Imm) + + //! @brief Blend packed SP-FP values (SSE4.1). + INST_3i(blendps, kInstBlendps, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(blendps, kInstBlendps, XmmVar, Mem, Imm) + + //! @brief Variable blend packed DP-FP values (SSE4.1). + INST_3x(blendvpd, kInstBlendvpd, XmmVar, XmmVar, XmmVar) + //! @overload + INST_3x(blendvpd, kInstBlendvpd, XmmVar, Mem, XmmVar) + + //! @brief Variable blend packed SP-FP values (SSE4.1). + INST_3x(blendvps, kInstBlendvps, XmmVar, XmmVar, XmmVar) + //! @overload + INST_3x(blendvps, kInstBlendvps, XmmVar, Mem, XmmVar) + + //! @brief Dot product of packed DP-FP values (SSE4.1). + INST_3i(dppd, kInstDppd, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(dppd, kInstDppd, XmmVar, Mem, Imm) + + //! @brief Dot product of packed SP-FP values (SSE4.1). + INST_3i(dpps, kInstDpps, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(dpps, kInstDpps, XmmVar, Mem, Imm) + + //! @brief Extract packed SP-FP value (SSE4.1). + INST_3i(extractps, kInstExtractps, GpVar, XmmVar, Imm) + //! @overload + INST_3i(extractps, kInstExtractps, Mem, XmmVar, Imm) + + //! @brief Load oword non-temporal aligned hint (SSE4.1). + INST_2x(movntdqa, kInstMovntdqa, XmmVar, Mem) + + //! @brief Compute multiple packed sums of absolute difference (SSE4.1). + INST_3i(mpsadbw, kInstMpsadbw, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(mpsadbw, kInstMpsadbw, XmmVar, Mem, Imm) + + //! @brief Pack with unsigned saturation (SSE4.1). + INST_2x(packusdw, kInstPackusdw, XmmVar, XmmVar) + //! @overload + INST_2x(packusdw, kInstPackusdw, XmmVar, Mem) + + //! @brief Variable blend packed bytes (SSE4.1). + INST_3x(pblendvb, kInstPblendvb, XmmVar, XmmVar, XmmVar) + //! @overload + INST_3x(pblendvb, kInstPblendvb, XmmVar, Mem, XmmVar) + + //! @brief Blend packed words (SSE4.1). + INST_3i(pblendw, kInstPblendw, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(pblendw, kInstPblendw, XmmVar, Mem, Imm) + + //! @brief Compare packed qword Data for Equal (SSE4.1). + INST_2x(pcmpeqq, kInstPcmpeqq, XmmVar, XmmVar) + //! @overload + INST_2x(pcmpeqq, kInstPcmpeqq, XmmVar, Mem) + + //! @brief Extract byte (SSE4.1). + INST_3i(pextrb, kInstPextrb, GpVar, XmmVar, Imm) + //! @overload + INST_3i(pextrb, kInstPextrb, Mem, XmmVar, Imm) + + //! @brief Extract dword (SSE4.1). + INST_3i(pextrd, kInstPextrd, GpVar, XmmVar, Imm) + //! @overload + INST_3i(pextrd, kInstPextrd, Mem, XmmVar, Imm) + + //! @brief Extract qword (SSE4.1). + INST_3i(pextrq, kInstPextrq, GpVar, XmmVar, Imm) + //! @overload + INST_3i(pextrq, kInstPextrq, Mem, XmmVar, Imm) + + //! @brief Extract word (SSE4.1). + INST_3i(pextrw, kInstPextrw, Mem, XmmVar, Imm) + + //! @brief Packed horizontal word minimum (SSE4.1). + INST_2x(phminposuw, kInstPhminposuw, XmmVar, XmmVar) + //! @overload + INST_2x(phminposuw, kInstPhminposuw, XmmVar, Mem) + + //! @brief Insert byte (SSE4.1). + INST_3i(pinsrb, kInstPinsrb, XmmVar, GpVar, Imm) + //! @overload + INST_3i(pinsrb, kInstPinsrb, XmmVar, Mem, Imm) + + //! @brief Insert dword (SSE4.1). + INST_3i(pinsrd, kInstPinsrd, XmmVar, GpVar, Imm) + //! @overload + INST_3i(pinsrd, kInstPinsrd, XmmVar, Mem, Imm) + + //! @brief Insert qword (SSE4.1). + INST_3i(pinsrq, kInstPinsrq, XmmVar, GpVar, Imm) + //! @overload + INST_3i(pinsrq, kInstPinsrq, XmmVar, Mem, Imm) + + //! @brief Maximum of packed word integers (SSE4.1). + INST_2x(pmaxuw, kInstPmaxuw, XmmVar, XmmVar) + //! @overload + INST_2x(pmaxuw, kInstPmaxuw, XmmVar, Mem) + + //! @brief Maximum of packed signed byte integers (SSE4.1). + INST_2x(pmaxsb, kInstPmaxsb, XmmVar, XmmVar) + //! @overload + INST_2x(pmaxsb, kInstPmaxsb, XmmVar, Mem) + + //! @brief Maximum of packed signed dword integers (SSE4.1). + INST_2x(pmaxsd, kInstPmaxsd, XmmVar, XmmVar) + //! @overload + INST_2x(pmaxsd, kInstPmaxsd, XmmVar, Mem) + + //! @brief Maximum of packed unsigned dword integers (SSE4.1). + INST_2x(pmaxud, kInstPmaxud, XmmVar, XmmVar) + //! @overload + INST_2x(pmaxud,kInstPmaxud , XmmVar, Mem) + + //! @brief Minimum of packed signed byte integers (SSE4.1). + INST_2x(pminsb, kInstPminsb, XmmVar, XmmVar) + //! @overload + INST_2x(pminsb, kInstPminsb, XmmVar, Mem) + + //! @brief Minimum of packed word integers (SSE4.1). + INST_2x(pminuw, kInstPminuw, XmmVar, XmmVar) + //! @overload + INST_2x(pminuw, kInstPminuw, XmmVar, Mem) + + //! @brief Minimum of packed dword integers (SSE4.1). + INST_2x(pminud, kInstPminud, XmmVar, XmmVar) + //! @overload + INST_2x(pminud, kInstPminud, XmmVar, Mem) + + //! @brief Minimum of packed dword integers (SSE4.1). + INST_2x(pminsd, kInstPminsd, XmmVar, XmmVar) + //! @overload + INST_2x(pminsd, kInstPminsd, XmmVar, Mem) + + //! @brief Packed move with sign extend (SSE4.1). + INST_2x(pmovsxbw, kInstPmovsxbw, XmmVar, XmmVar) + //! @overload + INST_2x(pmovsxbw, kInstPmovsxbw, XmmVar, Mem) + + //! @brief Packed move with sign extend (SSE4.1). + INST_2x(pmovsxbd, kInstPmovsxbd, XmmVar, XmmVar) + //! @overload + INST_2x(pmovsxbd, kInstPmovsxbd, XmmVar, Mem) + + //! @brief Packed move with sign extend (SSE4.1). + INST_2x(pmovsxbq, kInstPmovsxbq, XmmVar, XmmVar) + //! @overload + INST_2x(pmovsxbq, kInstPmovsxbq, XmmVar, Mem) + + //! @brief Packed move with sign extend (SSE4.1). + INST_2x(pmovsxwd, kInstPmovsxwd, XmmVar, XmmVar) + //! @overload + INST_2x(pmovsxwd, kInstPmovsxwd, XmmVar, Mem) + + //! @brief (SSE4.1). + INST_2x(pmovsxwq, kInstPmovsxwq, XmmVar, XmmVar) + //! @overload + INST_2x(pmovsxwq, kInstPmovsxwq, XmmVar, Mem) + + //! @brief (SSE4.1). + INST_2x(pmovsxdq, kInstPmovsxdq, XmmVar, XmmVar) + //! @overload + INST_2x(pmovsxdq, kInstPmovsxdq, XmmVar, Mem) + + //! @brief Packed move with zero extend (SSE4.1). + INST_2x(pmovzxbw, kInstPmovzxbw, XmmVar, XmmVar) + //! @overload + INST_2x(pmovzxbw, kInstPmovzxbw, XmmVar, Mem) + + //! @brief Packed move with zero extend (SSE4.1). + INST_2x(pmovzxbd, kInstPmovzxbd, XmmVar, XmmVar) + //! @overload + INST_2x(pmovzxbd, kInstPmovzxbd, XmmVar, Mem) + + //! @brief Packed move with zero extend (SSE4.1). + INST_2x(pmovzxbq, kInstPmovzxbq, XmmVar, XmmVar) + //! @overload + INST_2x(pmovzxbq, kInstPmovzxbq, XmmVar, Mem) + + //! @brief Packed move with zero extend (SSE4.1). + INST_2x(pmovzxwd, kInstPmovzxwd, XmmVar, XmmVar) + //! @overload + INST_2x(pmovzxwd, kInstPmovzxwd, XmmVar, Mem) + + //! @brief (SSE4.1). + INST_2x(pmovzxwq, kInstPmovzxwq, XmmVar, XmmVar) + //! @overload + INST_2x(pmovzxwq, kInstPmovzxwq, XmmVar, Mem) + + //! @brief (SSE4.1). + INST_2x(pmovzxdq, kInstPmovzxdq, XmmVar, XmmVar) + //! @overload + INST_2x(pmovzxdq, kInstPmovzxdq, XmmVar, Mem) + + //! @brief Multiply packed signed dword integers (SSE4.1). + INST_2x(pmuldq, kInstPmuldq, XmmVar, XmmVar) + //! @overload + INST_2x(pmuldq, kInstPmuldq, XmmVar, Mem) + + //! @brief Multiply packed signed integers and store low result (SSE4.1). + INST_2x(pmulld, kInstPmulld, XmmVar, XmmVar) + //! @overload + INST_2x(pmulld, kInstPmulld, XmmVar, Mem) + + //! @brief Logical compare (SSE4.1). + INST_2x(ptest, kInstPtest, XmmVar, XmmVar) + //! @overload + INST_2x(ptest, kInstPtest, XmmVar, Mem) + + //! @brief Round packed SP-FP values (SSE4.1). + INST_3i(roundps, kInstRoundps, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(roundps, kInstRoundps, XmmVar, Mem, Imm) + + //! @brief Round scalar SP-FP values (SSE4.1). + INST_3i(roundss, kInstRoundss, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(roundss, kInstRoundss, XmmVar, Mem, Imm) + + //! @brief Round packed DP-FP values (SSE4.1). + INST_3i(roundpd, kInstRoundpd, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(roundpd, kInstRoundpd, XmmVar, Mem, Imm) + + //! @brief Round scalar DP-FP values (SSE4.1). + INST_3i(roundsd, kInstRoundsd, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(roundsd, kInstRoundsd, XmmVar, Mem, Imm) + + // -------------------------------------------------------------------------- + // [SSE4.2] + // -------------------------------------------------------------------------- + + //! @brief Packed compare explicit length strings, return index (SSE4.2). + INST_3i(pcmpestri, kInstPcmpestri, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(pcmpestri, kInstPcmpestri, XmmVar, Mem, Imm) + + //! @brief Packed compare explicit length strings, return mask (SSE4.2). + INST_3i(pcmpestrm, kInstPcmpestrm, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(pcmpestrm, kInstPcmpestrm, XmmVar, Mem, Imm) + + //! @brief Packed compare implicit length strings, return index (SSE4.2). + INST_3i(pcmpistri, kInstPcmpistri, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(pcmpistri, kInstPcmpistri, XmmVar, Mem, Imm) + + //! @brief Packed compare implicit length strings, return mask (SSE4.2). + INST_3i(pcmpistrm, kInstPcmpistrm, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(pcmpistrm, kInstPcmpistrm, XmmVar, Mem, Imm) + + //! @brief Compare packed data for greater than (SSE4.2). + INST_2x(pcmpgtq, kInstPcmpgtq, XmmVar, XmmVar) + //! @overload + INST_2x(pcmpgtq, kInstPcmpgtq, XmmVar, Mem) + + // -------------------------------------------------------------------------- + // [AESNI] + // -------------------------------------------------------------------------- + + //! @brief Perform a single round of the AES decryption flow. + INST_2x(aesdec, kInstAesdec, XmmVar, XmmVar) + //! @overload + INST_2x(aesdec, kInstAesdec, XmmVar, Mem) + + //! @brief Perform the last round of the AES decryption flow. + INST_2x(aesdeclast, kInstAesdeclast, XmmVar, XmmVar) + //! @overload + INST_2x(aesdeclast, kInstAesdeclast, XmmVar, Mem) + + //! @brief Perform a single round of the AES encryption flow. + INST_2x(aesenc, kInstAesenc, XmmVar, XmmVar) + //! @overload + INST_2x(aesenc, kInstAesenc, XmmVar, Mem) + + //! @brief Perform the last round of the AES encryption flow. + INST_2x(aesenclast, kInstAesenclast, XmmVar, XmmVar) + //! @overload + INST_2x(aesenclast, kInstAesenclast, XmmVar, Mem) + + //! @brief Perform the InvMixColumns transformation. + INST_2x(aesimc, kInstAesimc, XmmVar, XmmVar) + //! @overload + INST_2x(aesimc, kInstAesimc, XmmVar, Mem) + + //! @brief Assist in expanding the AES cipher key. + INST_3i(aeskeygenassist, kInstAeskeygenassist, XmmVar, XmmVar, Imm) + //! @overload + INST_3i(aeskeygenassist, kInstAeskeygenassist, XmmVar, Mem, Imm) + + // -------------------------------------------------------------------------- + // [PCLMULQDQ] + // -------------------------------------------------------------------------- + + //! @brief Carry-less multiplication quadword. + INST_3i(pclmulqdq, kInstPclmulqdq, XmmVar, XmmVar, Imm); + //! @overload + INST_3i(pclmulqdq, kInstPclmulqdq, XmmVar, Mem, Imm); +}; + +//! @} + +} // x86x64 namespace +} // asmjit namespace + +// ============================================================================ +// [asmjit::x86] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X86) + +namespace asmjit { +namespace x86 { + +//! @addtogroup asmjit_x86x64 +//! @{ + +struct Compiler : public X86X64Compiler { + ASMJIT_NO_COPY(Compiler) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a @ref Compiler instance. + ASMJIT_API Compiler(BaseRuntime* runtime); + //! @brief Destroy the @ref Compiler instance. + ASMJIT_API ~Compiler(); + + // ------------------------------------------------------------------------- + // [Options] + // ------------------------------------------------------------------------- + + //! @overload + ASMJIT_INLINE Compiler& short_() + { _options |= kInstOptionShortForm; return *this; } + + //! @overload + ASMJIT_INLINE Compiler& long_() + { _options |= kInstOptionLongForm; return *this; } + + //! @overload + ASMJIT_INLINE Compiler& taken() + { _options |= kInstOptionTaken; return *this; } + + //! @overload + ASMJIT_INLINE Compiler& notTaken() + { _options |= kInstOptionNotTaken; return *this; } + + //! @overload + ASMJIT_INLINE Compiler& lock() + { _options |= kInstOptionLock; return *this; } + + // -------------------------------------------------------------------------- + // [X86-Only Instructions] + // -------------------------------------------------------------------------- + + //! @brief Decimal adjust AL after addition (32-bit). + INST_1x(daa, kInstDaa, GpVar) + //! @brief Decimal adjust AL after subtraction (32-bit). + INST_1x(das, kInstDas, GpVar) + + //! @brief Pop all Gp registers (EDI|ESI|EBP|EBX|EDX|ECX|EAX). + INST_0x(popa, kInstPopa) + + //! @brief Push all Gp registers (EAX|ECX|EDX|EBX|original ESP|EBP|ESI|EDI). + INST_0x(pusha, kInstPusha) +}; + +//! @} + +} // x86 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X86 + +// ============================================================================ +// [asmjit::x64] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X64) + +namespace asmjit { +namespace x64 { + +//! @addtogroup asmjit_x86x64 +//! @{ + +struct Compiler : public X86X64Compiler { + ASMJIT_NO_COPY(Compiler) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a @ref Compiler instance. + ASMJIT_API Compiler(BaseRuntime* runtime); + //! @brief Destroy the @ref Compiler instance. + ASMJIT_API ~Compiler(); + + // ------------------------------------------------------------------------- + // [Options] + // ------------------------------------------------------------------------- + + //! @overload + ASMJIT_INLINE Compiler& short_() + { _options |= kInstOptionShortForm; return *this; } + + //! @overload + ASMJIT_INLINE Compiler& long_() + { _options |= kInstOptionLongForm; return *this; } + + //! @overload + ASMJIT_INLINE Compiler& taken() + { _options |= kInstOptionTaken; return *this; } + + //! @overload + ASMJIT_INLINE Compiler& notTaken() + { _options |= kInstOptionNotTaken; return *this; } + + //! @overload + ASMJIT_INLINE Compiler& lock() + { _options |= kInstOptionLock; return *this; } + + //! @brief Force rex prefix. + ASMJIT_INLINE Compiler& rex() + { _options |= kInstOptionRex; return *this; } + + // -------------------------------------------------------------------------- + // [X64-Only Instructions] + // -------------------------------------------------------------------------- + + //! @brief Convert dword to qword (RAX <- Sign Extend EAX). + INST_1x(cdqe, kInstCdqe, GpVar /* eax */) + //! @brief Convert qword to oword (RDX:RAX <- Sign Extend RAX). + INST_2x(cqo, kInstCdq, GpVar /* rdx */, GpVar /* rax */) + + //! @brief Compares the 128-bit value in RDX:RAX with the memory operand (X64). + ASMJIT_INLINE InstNode* cmpxchg16b( + const GpVar& cmp_edx, const GpVar& cmp_eax, + const GpVar& cmp_ecx, const GpVar& cmp_ebx, + const Mem& dst) + { return emit(kInstCmpxchg16b, cmp_edx, cmp_eax, cmp_ecx, cmp_ebx, dst); } + + //! @brief Move dword to qword with sign-extension. + INST_2x(movsxd, kInstMovsxd, GpVar, GpVar) + //! @overload + INST_2x(movsxd, kInstMovsxd, GpVar, Mem) + + //! @brief Load ECX/RCX Qdwords from DS:[ESI/RSI] to RAX. + INST_3x_(rep_lodsq, kInstRepLodsq, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Move ECX/RCX Qdwords from DS:[ESI/RSI] to ES:[EDI/RDI]. + INST_3x_(rep_movsq, kInstRepMovsq, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Fill ECX/RCX Qdwords at ES:[EDI/RDI] with RAX. + INST_3x_(rep_stosq, kInstRepStosq, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Repeated find nonmatching Qdwords in ES:[EDI/RDI] and DS:[ESI/RDI]. + INST_3x_(repe_cmpsq, kInstRepeCmpsq, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Find non-RAX QWORD starting at ES:[EDI/RDI]. + INST_3x_(repe_scasq, kInstRepeScasq, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Find matching Qdwords in [RDI] and [RSI]. + INST_3x_(repne_cmpsq, kInstRepneCmpsq, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + //! @brief Find RAX, starting at ES:[EDI/RDI]. + INST_3x_(repne_scasq, kInstRepneScasq, GpVar, GpVar, GpVar, o0.getId() != o1.getId() && o1.getId() != o2.getId()) + + using X86X64Compiler::movq; + + //! @overload + INST_2x(movq, kInstMovq, GpVar, MmVar) + //! @overload + INST_2x(movq, kInstMovq, MmVar, GpVar) + + //! @overload + INST_2x(movq, kInstMovq, GpVar, XmmVar) + //! @overload + INST_2x(movq, kInstMovq, XmmVar, GpVar) +}; + +//! @} + +} // x64 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X64 + +// ============================================================================ +// [CodeGen-End] +// ============================================================================ + +#undef INST_0x + +#undef INST_1x +#undef INST_1x_ +#undef INST_1i +#undef INST_1i_ +#undef INST_1cc + +#undef INST_2x +#undef INST_2x_ +#undef INST_2i +#undef INST_2i_ +#undef INST_2cc + +#undef INST_3x +#undef INST_3x_ +#undef INST_3i +#undef INST_3i_ + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_X86_X86COMPILER_H diff --git a/src/asmjit/x86/x86context.cpp b/src/asmjit/x86/x86context.cpp new file mode 100644 index 0000000..bfe191a --- /dev/null +++ b/src/asmjit/x86/x86context.cpp @@ -0,0 +1,5159 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + +// [Dependencies - AsmJit] +#include "../base/intutil.h" +#include "../base/string.h" +#include "../x86/x86assembler.h" +#include "../x86/x86compiler.h" +#include "../x86/x86context_p.h" +#include "../x86/x86cpu.h" +#include "../x86/x86func.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +static Error X86X64Context_translateOperands(X86X64Context* self, Operand* opList, uint32_t opCount); + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - Construction / Destruction] +// ============================================================================ + +X86X64Context::X86X64Context(X86X64Compiler* compiler) : BaseContext(compiler) { + // Setup x86 specific data. +#if defined(ASMJIT_BUILD_X86) + if (compiler->getArch() == kArchX86) { + _zsp = x86::esp; + _zbp = x86::ebp; + _memSlot._vmem.type = kMemTypeStackIndex; + _memSlot.setGpdBase(true); + _baseRegsCount = x86::kRegCountGp; + } +#endif // ASMJIT_BUILD_X86 + + // Setup x64 specific data. +#if defined(ASMJIT_BUILD_X64) + if (compiler->getArch() == kArchX64) { + _zsp = x64::rsp; + _zbp = x64::rbp; + _memSlot._vmem.type = kMemTypeStackIndex; + _memSlot.setGpdBase(false); + _baseRegsCount = x64::kRegCountGp; + } +#endif // ASMJIT_BUILD_X64 + + _state = &_x86State; + _emitComments = compiler->getLogger() != NULL; + + reset(); +} + +X86X64Context::~X86X64Context() {} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - Reset] +// ============================================================================ + +void X86X64Context::reset() { + BaseContext::reset(); + + _x86State.reset(0); + _clobberedRegs.reset(); + + _stackFrameCell = NULL; + _gaRegs[kRegClassGp] = IntUtil::bits(_baseRegsCount) & ~IntUtil::mask(kRegIndexSp); + _gaRegs[kRegClassFp] = IntUtil::bits(kRegCountFp); + _gaRegs[kRegClassMm] = IntUtil::bits(kRegCountMm); + _gaRegs[kRegClassXy] = IntUtil::bits(_baseRegsCount); + + _argBaseReg = kInvalidReg; // Used by patcher. + _varBaseReg = kInvalidReg; // Used by patcher. + + _argBaseOffset = 0; // Used by patcher. + _varBaseOffset = 0; // Used by patcher. + + _argActualDisp = 0; // Used by translator. + _varActualDisp = 0; // Used by translator. +} + +// ============================================================================ +// [asmjit::x86x64::X86X64SpecialInst] +// ============================================================================ + +struct X86X64SpecialInst { + uint8_t inReg; + uint8_t outReg; + uint16_t flags; +}; + +static const X86X64SpecialInst x86SpecialInstCpuid[] = { + { kRegIndexAx, kRegIndexAx, kVarAttrInOutReg }, + { kInvalidReg, kRegIndexBx, kVarAttrOutReg }, + { kInvalidReg, kRegIndexCx, kVarAttrOutReg }, + { kInvalidReg, kRegIndexDx, kVarAttrOutReg } +}; + +static const X86X64SpecialInst x86SpecialInstCbwCdqeCwde[] = { + { kRegIndexAx, kRegIndexAx, kVarAttrInOutReg } +}; + +static const X86X64SpecialInst x86SpecialInstCdqCwdCqo[] = { + { kInvalidReg, kRegIndexDx, kVarAttrOutReg }, + { kRegIndexAx, kInvalidReg, kVarAttrInReg } +}; + +static const X86X64SpecialInst x86SpecialInstCmpxchg[] = { + { kRegIndexAx, kRegIndexAx, kVarAttrInOutReg }, + { kInvalidReg, kInvalidReg, kVarAttrInOutReg }, + { kInvalidReg, kInvalidReg, kVarAttrInReg } +}; + +static const X86X64SpecialInst x86SpecialInstCmpxchg8b16b[] = { + { kRegIndexDx, kRegIndexDx, kVarAttrInOutReg }, + { kRegIndexAx, kRegIndexAx, kVarAttrInOutReg }, + { kRegIndexCx, kInvalidReg, kVarAttrInReg }, + { kRegIndexBx, kInvalidReg, kVarAttrInReg } +}; + +static const X86X64SpecialInst x86SpecialInstDaaDas[] = { + { kRegIndexAx, kRegIndexAx, kVarAttrInOutReg } +}; + +static const X86X64SpecialInst x86SpecialInstDiv[] = { + { kInvalidReg, kRegIndexDx, kVarAttrInOutReg }, + { kRegIndexAx, kRegIndexAx, kVarAttrInOutReg }, + { kInvalidReg, kInvalidReg, kVarAttrInReg } +}; + +static const X86X64SpecialInst x86SpecialInstMul[] = { + { kInvalidReg, kRegIndexDx, kVarAttrOutReg }, + { kRegIndexAx, kRegIndexAx, kVarAttrInOutReg }, + { kInvalidReg, kInvalidReg, kVarAttrInReg } +}; + +static const X86X64SpecialInst x86SpecialInstMovPtr[] = { + { kInvalidReg, kRegIndexAx, kVarAttrOutReg }, + { kRegIndexAx, kInvalidReg, kVarAttrInReg } +}; + +static const X86X64SpecialInst x86SpecialInstLahf[] = { + { kInvalidReg, kRegIndexAx, kVarAttrOutReg } +}; + +static const X86X64SpecialInst x86SpecialInstSahf[] = { + { kRegIndexAx, kInvalidReg, kVarAttrInReg } +}; + +static const X86X64SpecialInst x86SpecialInstMaskmovqMaskmovdqu[] = { + { kInvalidReg, kRegIndexDi, kVarAttrInReg }, + { kInvalidReg, kInvalidReg, kVarAttrInReg }, + { kInvalidReg, kInvalidReg, kVarAttrInReg } +}; + +static const X86X64SpecialInst x86SpecialInstRot[] = { + { kInvalidReg, kInvalidReg, kVarAttrInOutReg }, + { kRegIndexCx, kInvalidReg, kVarAttrInReg } +}; + +static const X86X64SpecialInst x86SpecialInstShlrd[] = { + { kInvalidReg, kInvalidReg, kVarAttrInOutReg }, + { kInvalidReg, kInvalidReg, kVarAttrInReg }, + { kRegIndexCx, kInvalidReg, kVarAttrInReg } +}; + +static const X86X64SpecialInst x86SpecialInstRdtscRdtscp[] = { + { kInvalidReg, kRegIndexDx, kVarAttrOutReg }, + { kInvalidReg, kRegIndexAx, kVarAttrOutReg }, + { kInvalidReg, kRegIndexCx, kVarAttrOutReg } +}; + +static const X86X64SpecialInst x86SpecialInstRepLod[] = { + { kInvalidReg, kRegIndexAx, kVarAttrOutReg }, + { kRegIndexSi, kInvalidReg, kVarAttrInReg }, + { kRegIndexCx, kRegIndexCx, kVarAttrInOutReg } +}; + +static const X86X64SpecialInst x86SpecialInstRepMovCmp[] = { + { kRegIndexDi, kInvalidReg, kVarAttrInReg }, + { kRegIndexSi, kInvalidReg, kVarAttrInReg }, + { kRegIndexCx, kRegIndexCx, kVarAttrInOutReg } +}; + +static const X86X64SpecialInst x86SpecialInstRepSto[] = { + { kRegIndexDi, kInvalidReg, kVarAttrInReg }, + { kRegIndexAx, kInvalidReg, kVarAttrInReg }, + { kRegIndexCx, kRegIndexCx, kVarAttrInOutReg } +}; + +static const X86X64SpecialInst x86SpecialInstRepSca[] = { + { kRegIndexDi, kInvalidReg, kVarAttrInReg }, + { kRegIndexAx, kInvalidReg, kVarAttrInReg }, + { kRegIndexCx, kRegIndexCx, kVarAttrInOutReg } +}; + +static const X86X64SpecialInst x86SpecialInstBlend[] = { + { kInvalidReg, kInvalidReg, kVarAttrOutReg }, + { kInvalidReg, kInvalidReg, kVarAttrInReg }, + { 0 , kInvalidReg, kVarAttrInReg } +}; + +static ASMJIT_INLINE const X86X64SpecialInst* X86X64SpecialInst_get(uint32_t code, const Operand* opList, uint32_t opCount) { + switch (code) { + case kInstCpuid: + return x86SpecialInstCpuid; + + case kInstCbw: + case kInstCdqe: + case kInstCwde: + return x86SpecialInstCbwCdqeCwde; + + case kInstCdq: + case kInstCwd: + case kInstCqo: + return x86SpecialInstCdqCwdCqo; + + case kInstCmpxchg: + return x86SpecialInstCmpxchg; + + case kInstCmpxchg8b: + case kInstCmpxchg16b: + return x86SpecialInstCmpxchg8b16b; + + case kInstDaa: + case kInstDas: + return x86SpecialInstDaaDas; + + case kInstIdiv: + case kInstDiv: + return x86SpecialInstDiv; + + case kInstImul: + if (opCount == 2) + return NULL; + if (opCount == 3 && !(opList[0].isVar() && opList[1].isVar() && opList[2].isVarOrMem())) + return NULL; + // ... Fall through ... + case kInstMul: + return x86SpecialInstMul; + + case kInstMovptr: + return x86SpecialInstMovPtr; + + case kInstLahf: + return x86SpecialInstLahf; + + case kInstSahf: + return x86SpecialInstSahf; + + case kInstMaskmovq: + case kInstMaskmovdqu: + return x86SpecialInstMaskmovqMaskmovdqu; + + // Not supported. + case kInstEnter: + case kInstLeave: + return NULL; + + // Not supported. + case kInstRet: + return NULL; + + case kInstMonitor: + case kInstMwait: + // TODO: [COMPILER] Monitor/MWait. + return NULL; + + case kInstPop: + // TODO: [COMPILER] Pop. + return NULL; + + // Not supported. + case kInstPopa: + case kInstPopf: + return NULL; + + case kInstPush: + // TODO: [COMPILER] Push. + return NULL; + + // Not supported. + case kInstPusha: + case kInstPushf: + return NULL; + + // Rot instruction is special only if the last operand is a variable. + case kInstRcl: + case kInstRcr: + case kInstRol: + case kInstRor: + case kInstSal: + case kInstSar: + case kInstShl: + case kInstShr: + if (!opList[1].isVar()) + return NULL; + return x86SpecialInstRot; + + // Shld/Shrd instruction is special only if the last operand is a variable. + case kInstShld: + case kInstShrd: + if (!opList[2].isVar()) + return NULL; + return x86SpecialInstShlrd; + + case kInstRdtsc: + case kInstRdtscp: + return x86SpecialInstRdtscRdtscp; + + case kInstRepLodsb: + case kInstRepLodsd: + case kInstRepLodsq: + case kInstRepLodsw: + return x86SpecialInstRepLod; + + case kInstRepMovsb: + case kInstRepMovsd: + case kInstRepMovsq: + case kInstRepMovsw: + return x86SpecialInstRepMovCmp; + + case kInstRepeCmpsb: + case kInstRepeCmpsd: + case kInstRepeCmpsq: + case kInstRepeCmpsw: + return x86SpecialInstRepMovCmp; + + case kInstRepneCmpsb: + case kInstRepneCmpsd: + case kInstRepneCmpsq: + case kInstRepneCmpsw: + return x86SpecialInstRepMovCmp; + + case kInstRepStosb: + case kInstRepStosd: + case kInstRepStosq: + case kInstRepStosw: + return x86SpecialInstRepSto; + + case kInstRepeScasb: + case kInstRepeScasd: + case kInstRepeScasq: + case kInstRepeScasw: + return x86SpecialInstRepSca; + + case kInstRepneScasb: + case kInstRepneScasd: + case kInstRepneScasq: + case kInstRepneScasw: + return x86SpecialInstRepSca; + + case kInstBlendvpd: + case kInstBlendvps: + case kInstPblendvb: + return x86SpecialInstBlend; + + default: + return NULL; + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - EmitLoad] +// ============================================================================ + +void X86X64Context::emitLoad(VarData* vd, uint32_t regIndex, const char* reason) { + ASMJIT_ASSERT(regIndex != kInvalidReg); + + X86X64Compiler* compiler = getCompiler(); + Mem m = getVarMem(vd); + + BaseNode* node = NULL; + bool comment = _emitComments; + + switch (vd->getType()) { + case kVarTypeInt8: + case kVarTypeUInt8: + node = compiler->emit(kInstMov, gpb_lo(regIndex), m); + if (comment) goto _Comment; + break; + + case kVarTypeInt16: + case kVarTypeUInt16: + node = compiler->emit(kInstMov, gpw(regIndex), m); + if (comment) goto _Comment; + break; + + case kVarTypeInt32: + case kVarTypeUInt32: + node = compiler->emit(kInstMov, gpd(regIndex), m); + if (comment) goto _Comment; + break; + +#if defined(ASMJIT_BUILD_X64) + case kVarTypeInt64: + case kVarTypeUInt64: + node = compiler->emit(kInstMov, x64::gpq(regIndex), m); + if (comment) goto _Comment; + break; +#endif // ASMJIT_BUILD_X64 + + case kVarTypeFp32: + case kVarTypeFp64: + case kVarTypeFpEx: + // TODO: [COMPILER] FPU. + break; + + case kVarTypeMm: + node = compiler->emit(kInstMovq, mm(regIndex), m); + if (comment) goto _Comment; + break; + + case kVarTypeXmm: + node = compiler->emit(kInstMovdqa, xmm(regIndex), m); + if (comment) goto _Comment; + break; + + case kVarTypeXmmSs: + node = compiler->emit(kInstMovss, xmm(regIndex), m); + if (comment) goto _Comment; + break; + + case kVarTypeXmmSd: + node = compiler->emit(kInstMovsd, xmm(regIndex), m); + if (comment) goto _Comment; + break; + + case kVarTypeXmmPs: + node = compiler->emit(kInstMovaps, xmm(regIndex), m); + if (comment) goto _Comment; + break; + + case kVarTypeXmmPd: + node = compiler->emit(kInstMovapd, xmm(regIndex), m); + if (comment) goto _Comment; + break; + } + return; + +_Comment: + node->setComment(compiler->_stringAllocator.sformat("[%s] %s", reason, vd->getName())); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - EmitSave] +// ============================================================================ + +void X86X64Context::emitSave(VarData* vd, uint32_t regIndex, const char* reason) { + ASMJIT_ASSERT(regIndex != kInvalidReg); + + X86X64Compiler* compiler = getCompiler(); + Mem m = getVarMem(vd); + + BaseNode* node = NULL; + bool comment = _emitComments; + + switch (vd->getType()) { + case kVarTypeInt8: + case kVarTypeUInt8: + node = compiler->emit(kInstMov, m, gpb_lo(regIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeInt16: + case kVarTypeUInt16: + node = compiler->emit(kInstMov, m, gpw(regIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeInt32: + case kVarTypeUInt32: + node = compiler->emit(kInstMov, m, gpd(regIndex)); + if (comment) goto _Comment; + break; + +#if defined(ASMJIT_BUILD_X64) + case kVarTypeInt64: + case kVarTypeUInt64: + node = compiler->emit(kInstMov, m, x64::gpq(regIndex)); + if (comment) goto _Comment; + break; +#endif // ASMJIT_BUILD_X64 + + case kVarTypeFp32: + case kVarTypeFp64: + case kVarTypeFpEx: + // TODO: [COMPILER] FPU. + break; + + case kVarTypeMm: + node = compiler->emit(kInstMovq, m, mm(regIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeXmm: + node = compiler->emit(kInstMovdqa, m, xmm(regIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeXmmSs: + node = compiler->emit(kInstMovss, m, xmm(regIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeXmmSd: + node = compiler->emit(kInstMovsd, m, xmm(regIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeXmmPs: + node = compiler->emit(kInstMovaps, m, xmm(regIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeXmmPd: + node = compiler->emit(kInstMovapd, m, xmm(regIndex)); + if (comment) goto _Comment; + break; + } + return; + +_Comment: + node->setComment(compiler->_stringAllocator.sformat("[%s] %s", reason, vd->getName())); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - EmitMove] +// ============================================================================ + +void X86X64Context::emitMove(VarData* vd, uint32_t toRegIndex, uint32_t fromRegIndex, const char* reason) { + ASMJIT_ASSERT(toRegIndex != kInvalidReg); + ASMJIT_ASSERT(fromRegIndex != kInvalidReg); + + X86X64Compiler* compiler = getCompiler(); + + BaseNode* node = NULL; + bool comment = _emitComments; + + switch (vd->getType()) { + case kVarTypeInt8: + case kVarTypeUInt8: + case kVarTypeInt16: + case kVarTypeUInt16: + case kVarTypeInt32: + case kVarTypeUInt32: + node = compiler->emit(kInstMov, gpd(toRegIndex), gpd(fromRegIndex)); + if (comment) goto _Comment; + break; + +#if defined(ASMJIT_BUILD_X64) + case kVarTypeInt64: + case kVarTypeUInt64: + node = compiler->emit(kInstMov, x64::gpq(toRegIndex), x64::gpq(fromRegIndex)); + if (comment) goto _Comment; + break; +#endif // ASMJIT_BUILD_X64 + + case kVarTypeFp32: + case kVarTypeFp64: + case kVarTypeFpEx: + // TODO: [COMPILER] FPU. + break; + + case kVarTypeMm: + node = compiler->emit(kInstMovq, mm(toRegIndex), mm(fromRegIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeXmm: + node = compiler->emit(kInstMovdqa, xmm(toRegIndex), xmm(fromRegIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeXmmSs: + node = compiler->emit(kInstMovss, xmm(toRegIndex), xmm(fromRegIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeXmmSd: + node = compiler->emit(kInstMovsd, xmm(toRegIndex), xmm(fromRegIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeXmmPs: + node = compiler->emit(kInstMovaps, xmm(toRegIndex), xmm(fromRegIndex)); + if (comment) goto _Comment; + break; + + case kVarTypeXmmPd: + node = compiler->emit(kInstMovapd, xmm(toRegIndex), xmm(fromRegIndex)); + if (comment) goto _Comment; + break; + } + return; + +_Comment: + node->setComment(compiler->_stringAllocator.sformat("[%s] %s", reason, vd->getName())); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - EmitSwap] +// ============================================================================ + +void X86X64Context::emitSwapGp(VarData* aVd, VarData* bVd, uint32_t aIndex, uint32_t bIndex, const char* reason) { + ASMJIT_ASSERT(aIndex != kInvalidReg); + ASMJIT_ASSERT(bIndex != kInvalidReg); + + X86X64Compiler* compiler = getCompiler(); + + BaseNode* node = NULL; + bool comment = _emitComments; + +#if defined(ASMJIT_BUILD_X64) + uint32_t vType = IntUtil::iMax(aVd->getType(), bVd->getType()); + + if (vType == kVarTypeInt64 || vType == kVarTypeUInt64) { + node = compiler->emit(kInstXchg, x64::gpq(aIndex), x64::gpq(bIndex)); + if (comment) goto _Comment; + return; + } +#endif // ASMJIT_BUILD_X64 + + node = compiler->emit(kInstXchg, gpd(aIndex), gpd(bIndex)); + if (comment) goto _Comment; + return; + +_Comment: + node->setComment(compiler->_stringAllocator.sformat("[%s] %s, %s", reason, aVd->getName(), bVd->getName())); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - EmitPushSequence / EmitPopSequence] +// ============================================================================ + +void X86X64Context::emitPushSequence(uint32_t regs) { + X86X64Compiler* compiler = getCompiler(); + uint32_t i = 0; + + GpReg gpReg(_zsp); + while (regs != 0) { + ASMJIT_ASSERT(i < _baseRegsCount); + if ((regs & 0x1) != 0) + compiler->emit(kInstPush, gpReg.setIndex(i)); + i++; + regs >>= 1; + } +} + +void X86X64Context::emitPopSequence(uint32_t regs) { + X86X64Compiler* compiler = getCompiler(); + int32_t i; + uint32_t mask; + + if (regs == 0) + return; + + GpReg gpReg(_zsp); + for (i = _baseRegsCount - 1, mask = 0x1 << static_cast(i); i >= 0; i--, mask >>= 1) { + if ((regs & mask) == 0) + continue; + compiler->emit(kInstPop, gpReg.setIndex(i)); + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - EmitMoveArgOnStack / EmitMoveImmOnStack] +// ============================================================================ + +void X86X64Context::emitMoveVarOnStack( + uint32_t dstType, const Mem* dst, + uint32_t srcType, uint32_t srcIndex) { + + ASMJIT_ASSERT(srcIndex != kInvalidReg); + X86X64Compiler* compiler = getCompiler(); + + Mem m0(*dst); + X86Reg r0; + X86Reg r1; + + uint32_t regSize = compiler->getRegSize(); + uint32_t instCode; + + switch (dstType) { + case kVarTypeInt8: + case kVarTypeUInt8: + // Move DWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt8, kVarTypeUInt64)) + goto _MovGpD; + + // Move DWORD (Mm). + if (IntUtil::inInterval(srcType, kVarTypeMm, kVarTypeMm)) + goto _MovMmD; + + // Move DWORD (Xmm). + if (IntUtil::inInterval(srcType, kVarTypeXmm, kVarTypeXmmPd)) + goto _MovXmmD; + + break; + + case kVarTypeInt16: + case kVarTypeUInt16: + // Extend BYTE->WORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt8, kVarTypeUInt8)) { + r1.setSize(1); + r1.setCode(kRegTypeGpbLo, srcIndex); + + instCode = (dstType == kVarTypeInt16 && srcType == kVarTypeInt8) ? kInstMovsx : kInstMovzx; + goto _ExtendMovGpD; + } + + // Move DWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt16, kVarTypeUInt64)) + goto _MovGpD; + + // Move DWORD (Mm). + if (IntUtil::inInterval(srcType, kVarTypeMm, kVarTypeMm)) + goto _MovMmD; + + // Move DWORD (Xmm). + if (IntUtil::inInterval(srcType, kVarTypeXmm, kVarTypeXmmPd)) + goto _MovXmmD; + + break; + + case kVarTypeInt32: + case kVarTypeUInt32: + // Extend BYTE->DWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt8, kVarTypeUInt8)) { + r1.setSize(1); + r1.setCode(kRegTypeGpbLo, srcIndex); + + instCode = (dstType == kVarTypeInt32 && srcType == kVarTypeInt8) ? kInstMovsx : kInstMovzx; + goto _ExtendMovGpD; + } + + // Extend WORD->DWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt16, kVarTypeUInt16)) { + r1.setSize(2); + r1.setCode(kRegTypeGpw, srcIndex); + + instCode = (dstType == kVarTypeInt32 && srcType == kVarTypeInt16) ? kInstMovsx : kInstMovzx; + goto _ExtendMovGpD; + } + + // Move DWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt32, kVarTypeUInt64)) + goto _MovGpD; + + // Move DWORD (Mm). + if (IntUtil::inInterval(srcType, kVarTypeMm, kVarTypeMm)) + goto _MovMmD; + + // Move DWORD (Xmm). + if (IntUtil::inInterval(srcType, kVarTypeXmm, kVarTypeXmmPd)) + goto _MovXmmD; + break; + + case kVarTypeInt64: + case kVarTypeUInt64: + // Extend BYTE->QWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt8, kVarTypeUInt8)) { + r1.setSize(1); + r1.setCode(kRegTypeGpbLo, srcIndex); + + instCode = (dstType == kVarTypeInt64 && srcType == kVarTypeInt8) ? kInstMovsx : kInstMovzx; + goto _ExtendMovGpXQ; + } + + // Extend WORD->QWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt16, kVarTypeUInt16)) { + r1.setSize(2); + r1.setCode(kRegTypeGpw, srcIndex); + + instCode = (dstType == kVarTypeInt64 && srcType == kVarTypeInt16) ? kInstMovsx : kInstMovzx; + goto _ExtendMovGpXQ; + } + + // Extend DWORD->QWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt32, kVarTypeUInt32)) { + r1.setSize(4); + r1.setCode(kRegTypeGpd, srcIndex); + + instCode = kInstMovsxd; + if (dstType == kVarTypeInt64 && srcType == kVarTypeInt32) + goto _ExtendMovGpXQ; + else + goto _ZeroExtendGpDQ; + } + + // Move QWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt64, kVarTypeUInt64)) + goto _MovGpQ; + + // Move QWORD (Mm). + if (IntUtil::inInterval(srcType, kVarTypeMm, kVarTypeMm)) + goto _MovMmQ; + + // Move QWORD (Xmm). + if (IntUtil::inInterval(srcType, kVarTypeXmm, kVarTypeXmmPd)) + goto _MovXmmQ; + break; + + case kVarTypeMm: + // Extend BYTE->QWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt8, kVarTypeUInt8)) { + r1.setSize(1); + r1.setCode(kRegTypeGpbLo, srcIndex); + + instCode = kInstMovzx; + goto _ExtendMovGpXQ; + } + + // Extend WORD->QWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt16, kVarTypeUInt16)) { + r1.setSize(2); + r1.setCode(kRegTypeGpw, srcIndex); + + instCode = kInstMovzx; + goto _ExtendMovGpXQ; + } + + // Extend DWORD->QWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt32, kVarTypeUInt32)) + goto _ExtendMovGpDQ; + + // Move QWORD (Gp). + if (IntUtil::inInterval(srcType, kVarTypeInt64, kVarTypeUInt64)) + goto _MovGpQ; + + // Move QWORD (Mm). + if (IntUtil::inInterval(srcType, kVarTypeMm, kVarTypeMm)) + goto _MovMmQ; + + // Move QWORD (Xmm). + if (IntUtil::inInterval(srcType, kVarTypeXmm, kVarTypeXmmPd)) + goto _MovXmmQ; + break; + + case kVarTypeXmm: + case kVarTypeXmmPs: + case kVarTypeXmmPd: + // TODO: [COMPILER]. + break; + + case kVarTypeXmmSs: + // TODO: [COMPILER]. + break; + + case kVarTypeXmmSd: + // TODO: [COMPILER]. + break; + } + return; + + // Extend+Move Gp. +_ExtendMovGpD: + m0.setSize(4); + r0.setSize(4); + r0.setCode(kRegTypeGpd, srcIndex); + + compiler->emit(instCode, r0, r1); + compiler->emit(kInstMov, m0, r0); + return; + +_ExtendMovGpXQ: + if (regSize == 8) { + m0.setSize(8); + r0.setSize(8); + r0.setCode(kRegTypeGpq, srcIndex); + + compiler->emit(instCode, r0, r1); + compiler->emit(kInstMov, m0, r0); + } + else { + m0.setSize(4); + r0.setSize(4); + r0.setCode(kRegTypeGpd, srcIndex); + + compiler->emit(instCode, r0, r1); + +_ExtendMovGpDQ: + compiler->emit(kInstMov, m0, r0); + m0.adjust(4); + compiler->emit(kInstAnd, m0, 0); + } + return; + +_ZeroExtendGpDQ: + m0.setSize(4); + r0.setSize(4); + r0.setCode(kRegTypeGpd, srcIndex); + goto _ExtendMovGpDQ; + + // Move Gp. +_MovGpD: + m0.setSize(4); + r0.setSize(4); + r0.setCode(kRegTypeGpd, srcIndex); + compiler->emit(kInstMov, m0, r0); + return; + +_MovGpQ: + m0.setSize(8); + r0.setSize(8); + r0.setCode(kRegTypeGpq, srcIndex); + compiler->emit(kInstMov, m0, r0); + return; + + // Move Mm. +_MovMmD: + m0.setSize(4); + r0.setSize(8); + r0.setCode(kRegTypeMm, srcIndex); + compiler->emit(kInstMovd, m0, r0); + return; + +_MovMmQ: + m0.setSize(8); + r0.setSize(8); + r0.setCode(kRegTypeMm, srcIndex); + compiler->emit(kInstMovq, m0, r0); + return; + + // Move Xmm. +_MovXmmD: + m0.setSize(4); + r0.setSize(16); + r0.setCode(kRegTypeXmm, srcIndex); + compiler->emit(kInstMovd, m0, r0); + return; + +_MovXmmQ: + m0.setSize(8); + r0.setSize(16); + r0.setCode(kRegTypeXmm, srcIndex); + compiler->emit(kInstMovq, m0, r0); +} + +void X86X64Context::emitMoveImmOnStack(uint32_t dstType, const Mem* dst, const Imm* src) { + X86X64Compiler* compiler = getCompiler(); + + Mem mem(*dst); + Imm imm(*src); + + uint32_t regSize = compiler->getRegSize(); + + // One stack entry is equal to the native register size. That means that if + // we want to move 32-bit integer on the stack, we need to extend it to 64-bit + // integer. + mem.setSize(regSize); + + switch (dstType) { + case kVarTypeInt8: + case kVarTypeUInt8: + imm.truncateTo8Bits(); + compiler->emit(kInstMov, mem, imm); + break; + + case kVarTypeInt16: + case kVarTypeUInt16: + imm.truncateTo16Bits(); + compiler->emit(kInstMov, mem, imm); + break; + + case kVarTypeInt32: + case kVarTypeUInt32: +_Move32: + imm.truncateTo32Bits(); + compiler->emit(kInstMov, mem, imm); + break; + + case kVarTypeInt64: + case kVarTypeUInt64: +_Move64: + if (regSize == 4) { + uint32_t hi = imm.getUInt32Hi(); + + // Lo-Part. + imm.truncateTo32Bits(); + compiler->emit(kInstMov, mem, imm); + + // Hi-Part. + mem.adjust(regSize); + imm.setUInt32(hi); + compiler->emit(kInstMov, mem, imm); + } + else { + compiler->emit(kInstMov, mem, imm); + } + break; + + case kVarTypeFp32: + goto _Move32; + + case kVarTypeFp64: + goto _Move64; + + case kVarTypeFpEx: + // Not supported. + ASMJIT_ASSERT(!"Reached"); + break; + + case kVarTypeMm: + goto _Move64; + + case kVarTypeXmm: + case kVarTypeXmmSs: + case kVarTypeXmmPs: + case kVarTypeXmmSd: + case kVarTypeXmmPd: + if (regSize == 4) { + uint32_t hi = imm.getUInt32Hi(); + + // Lo-Part. + imm.truncateTo32Bits(); + compiler->emit(kInstMov, mem, imm); + + // Hi-Part. + mem.adjust(regSize); + imm.setUInt32(hi); + compiler->emit(kInstMov, mem, imm); + + // Zero part - performing AND should generate shorter code, because + // 8-bit immediate can be used instead of 32-bit immediate required + // by MOV instruction. + mem.adjust(regSize); + imm.setUInt32(0); + compiler->emit(kInstAnd, mem, imm); + + mem.adjust(regSize); + compiler->emit(kInstAnd, mem, imm); + } + else { + // Lo-Hi parts. + compiler->emit(kInstMov, mem, imm); + + // Zero part. + mem.adjust(regSize); + imm.setUInt32(0); + compiler->emit(kInstAnd, mem, imm); + } + break; + + default: + ASMJIT_ASSERT(!"Reached"); + break; + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - EmitMoveImmToReg] +// ============================================================================ + +void X86X64Context::emitMoveImmToReg(uint32_t dstType, uint32_t dstIndex, const Imm* src) { + ASMJIT_ASSERT(dstIndex != kInvalidReg); + X86X64Compiler* compiler = getCompiler(); + + X86Reg r0; + Imm imm(*src); + + switch (dstType) { + case kVarTypeInt8: + case kVarTypeUInt8: + imm.truncateTo8Bits(); + goto _Move32; + + case kVarTypeInt16: + case kVarTypeUInt16: + imm.truncateTo16Bits(); + goto _Move32; + + case kVarTypeInt32: + case kVarTypeUInt32: +_Move32Truncate: + imm.truncateTo32Bits(); +_Move32: + r0.setSize(4); + r0.setCode(kRegTypeGpd, dstIndex); + compiler->emit(kInstMov, r0, imm); + break; + + case kVarTypeInt64: + case kVarTypeUInt64: + // Move to GPD register will clear the HI-DWORD of GPQ register in 64-bit + // mode. + if (imm.isUInt32()) + goto _Move32Truncate; + + r0.setSize(8); + r0.setCode(kRegTypeGpq, dstIndex); + compiler->emit(kInstMov, r0, imm); + break; + + case kVarTypeFp32: + case kVarTypeFp64: + case kVarTypeFpEx: + // TODO: [COMPILER] EmitMoveImmToReg. + break; + + case kVarTypeMm: + // TODO: [COMPILER] EmitMoveImmToReg. + break; + + case kVarTypeXmm: + case kVarTypeXmmSs: + case kVarTypeXmmSd: + case kVarTypeXmmPs: + case kVarTypeXmmPd: + // TODO: [COMPILER] EmitMoveImmToReg. + break; + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - Register Management] +// ============================================================================ + +#if defined(ASMJIT_DEBUG) +template +static ASMJIT_INLINE void X86X64Context_checkStateVars(X86X64Context* self) { + VarState* state = self->getState(); + VarData** sVars = state->getListByClass(C); + + uint32_t regIndex; + uint32_t regMask; + uint32_t regCount = self->getRegsCount(C); + + uint32_t occupied = state->_occupied.get(C); + uint32_t modified = state->_modified.get(C); + + for (regIndex = 0, regMask = 1; regIndex < regCount; regIndex++, regMask <<= 1) { + VarData* vd = sVars[regIndex]; + + if (vd == NULL) { + ASMJIT_ASSERT((occupied & regMask) == 0); + ASMJIT_ASSERT((modified & regMask) == 0); + } + else { + ASMJIT_ASSERT((occupied & regMask) != 0); + ASMJIT_ASSERT((modified & regMask) == (static_cast(vd->isModified()) << regIndex)); + + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vd->getState() == kVarStateReg); + ASMJIT_ASSERT(vd->getRegIndex() == regIndex); + } + } +} + +void X86X64Context::_checkState() { + X86X64Context_checkStateVars(this); + X86X64Context_checkStateVars(this); + X86X64Context_checkStateVars(this); +} +#else +void X86X64Context::_checkState() {} +#endif // ASMJIT_DEBUG + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - State - Load] +// ============================================================================ + +template +static ASMJIT_INLINE void X86X64Context_loadStateVars(X86X64Context* self, VarState* target) { + VarState* state = self->getState(); + + VarData** sVars = state->getListByClass(C); + VarData** tVars = target->getListByClass(C); + + uint32_t regIndex; + uint32_t modified = target->_modified.get(C); + uint32_t regCount = self->getRegsCount(C); + + for (regIndex = 0; regIndex < regCount; regIndex++, modified >>= 1) { + VarData* vd = tVars[regIndex]; + sVars[regIndex] = vd; + + if (vd == NULL) + continue; + + vd->setState(kVarStateReg); + vd->setRegIndex(regIndex); + vd->setModified(modified & 0x1); + } +} + +void X86X64Context::loadState(BaseVarState* target_) { + VarState* state = getState(); + VarState* target = static_cast(target_); + + VarData** vdArray = _contextVd.getData(); + uint32_t vdCount = static_cast(_contextVd.getLength()); + + // Load allocated variables. + X86X64Context_loadStateVars(this, target); + X86X64Context_loadStateVars(this, target); + X86X64Context_loadStateVars(this, target); + + // Load masks. + state->_occupied = target->_occupied; + state->_modified = target->_modified; + + // Load states of other variables and clear their 'Modified' flags. + for (uint32_t i = 0; i < vdCount; i++) { + uint32_t vState = target->_cells[i].getState(); + + if (vState != kVarStateReg) { + vdArray[i]->setState(vState); + vdArray[i]->setModified(false); + } + } + + ASMJIT_CONTEXT_CHECK_STATE +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - State - Save] +// ============================================================================ + +BaseVarState* X86X64Context::saveState() { + VarData** vdArray = _contextVd.getData(); + uint32_t vdCount = static_cast(_contextVd.getLength()); + + size_t size = IntUtil::alignTo( + sizeof(VarState) + vdCount * sizeof(StateCell), sizeof(void*)); + + VarState* cur = getState(); + VarState* dst = _zoneAllocator.allocT(size); + + if (dst == NULL) + return NULL; + + // Store links. + ::memcpy(dst->_list, cur->_list, VarState::kAllCount * sizeof(VarData*)); + + // Store masks. + dst->_occupied = cur->_occupied; + dst->_modified = cur->_modified; + + // Store cells. + for (uint32_t i = 0; i < vdCount; i++) { + VarData* vd = static_cast(vdArray[i]); + StateCell& cell = dst->_cells[i]; + + cell.reset(); + cell.setState(vd->getState()); + } + + return dst; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - State - Switch] +// ============================================================================ + +template +static ASMJIT_INLINE void X86X64Context_switchStateVars(X86X64Context* self, VarState* src) { + VarState* dst = self->getState(); + + VarData** dstVars = dst->getListByClass(C); + VarData** srcVars = src->getListByClass(C); + + uint32_t regIndex; + uint32_t regMask; + uint32_t regCount = self->getRegsCount(C); + + StateCell* cells = src->_cells; + + bool didWork; + do { + didWork = false; + + for (regIndex = 0, regMask = 0x1; regIndex < regCount; regIndex++, regMask <<= 1) { + VarData* dVd = dstVars[regIndex]; + VarData* sVd = srcVars[regIndex]; + + if (dVd == sVd) + continue; + + if (dVd != NULL) { + StateCell& cell = cells[dVd->getContextId()]; + + if (cell.getState() != kVarStateReg) { + if (cell.getState() == kVarStateMem) + self->spill(dVd); + else + self->unuse(dVd); + + dVd = NULL; + didWork = true; + + if (sVd == NULL) + continue; + } + } + + if (dVd == NULL && sVd != NULL) { + if (sVd->getRegIndex() != kInvalidReg) + self->move(sVd, regIndex); + else + self->load(sVd, regIndex); + + didWork = true; + continue; + } + + if (dVd != NULL && sVd == NULL) { + StateCell& cell = cells[dVd->getContextId()]; + if (cell.getState() == kVarStateReg) + continue; + + if (cell.getState() == kVarStateMem) + self->spill(dVd); + else + self->unuse(dVd); + + didWork = true; + continue; + } + + if (C == kRegClassGp) { + self->swapGp(dVd, sVd); + + didWork = true; + continue; + } + else { + self->spill(dVd); + self->move(sVd, regIndex); + + didWork = true; + continue; + } + } + } while (didWork); + + uint32_t dstModified = dst->_modified.get(C); + uint32_t srcModified = src->_modified.get(C); + + if (dstModified != srcModified) { + for (regIndex = 0, regMask = 0x1; regIndex < regCount; regIndex++, regMask <<= 1) { + VarData* vd = dstVars[regIndex]; + + if (vd == NULL) + continue; + + if ((dstModified & regMask) && !(srcModified & regMask)) + self->save(vd); + else if (!(dstModified & regMask) && (srcModified & regMask)) + self->modify(vd); + } + } +} + +void X86X64Context::switchState(BaseVarState* src_) { + VarState* cur = getState(); + VarState* src = static_cast(src_); + + // Ignore if both states are equal. + if (cur == src) + return; + + // Switch variables. + X86X64Context_switchStateVars(this, src); + X86X64Context_switchStateVars(this, src); + X86X64Context_switchStateVars(this, src); + + // Copy occupied mask. + // TODO: Review. + // cur->_occupied = src->_occupied; + // cur->_modified = src->_modified; + + // Calculate changed state. + VarData** vdArray = _contextVd.getData(); + uint32_t vdCount = static_cast(_contextVd.getLength()); + + StateCell* cells = src->_cells; + for (uint32_t i = 0; i < vdCount; i++) { + VarData* vd = static_cast(vdArray[i]); + StateCell& cell = cells[i]; + + uint32_t vState = cell.getState(); + if (vState != kVarStateReg) { + vd->setState(vState); + vd->setModified(false); + } + } + + ASMJIT_CONTEXT_CHECK_STATE +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - State - Intersect] +// ============================================================================ + +void X86X64Context::intersectStates(BaseVarState* a_, BaseVarState* b_) { + VarState* aState = static_cast(a_); + VarState* bState = static_cast(b_); + + // TODO: [COMPILER] Intersect states. + + ASMJIT_CONTEXT_CHECK_STATE +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - GetJccFlow / GetOppositeJccFlow] +// ============================================================================ + +//! @internal +static ASMJIT_INLINE BaseNode* X86X64Context_getJccFlow(JumpNode* jNode) { + if (jNode->isTaken()) + return jNode->getTarget(); + else + return jNode->getNext(); +} + +//! @internal +static ASMJIT_INLINE BaseNode* X86X64Context_getOppositeJccFlow(JumpNode* jNode) { + if (jNode->isTaken()) + return jNode->getNext(); + else + return jNode->getTarget(); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - Prepare - SingleVarInst] +// ============================================================================ + +//! @internal +static void X86X64Context_prepareSingleVarInst(uint32_t code, VarAttr* va) { + switch (code) { + // - andn reg, reg ; Set all bits in reg to 0. + // - xor/pxor reg, reg ; Set all bits in reg to 0. + // - sub/psub reg, reg ; Set all bits in reg to 0. + // - pcmpgt reg, reg ; Set all bits in reg to 0. + // - pcmpeq reg, reg ; Set all bits in reg to 1. + case kInstPandn : + case kInstXor : case kInstXorpd : case kInstXorps : case kInstPxor : + case kInstSub: + case kInstPsubb : case kInstPsubw : case kInstPsubd : case kInstPsubq : + case kInstPsubsb : case kInstPsubsw : case kInstPsubusb : case kInstPsubusw : + case kInstPcmpeqb : case kInstPcmpeqw : case kInstPcmpeqd : case kInstPcmpeqq : + case kInstPcmpgtb : case kInstPcmpgtw : case kInstPcmpgtd : case kInstPcmpgtq : + va->delFlags(kVarAttrInReg); + break; + + // - and reg, reg ; Nop. + // - or reg, reg ; Nop. + // - xchg reg, reg ; Nop. + case kInstAnd : case kInstAndpd : case kInstAndps : case kInstPand : + case kInstOr : case kInstOrpd : case kInstOrps : case kInstPor : + case kInstXchg : + va->delFlags(kVarAttrOutReg); + break; + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - Prepare] +// ============================================================================ + +//! @internal +//! +//! @brief Add unreachable-flow data to the unreachable flow list. +static ASMJIT_INLINE Error X86X64Context_prepareAddUnreachableNode(X86X64Context* self, BaseNode* node) { + PodList::Link* link = self->_zoneAllocator.allocT::Link>(); + if (link == NULL) + return self->setError(kErrorNoHeapMemory); + + link->setValue(node); + self->_unreachableList.append(link); + + return kErrorOk; +} + +//! @internal +//! +//! @brief Add jump-flow data to the jcc flow list. +static ASMJIT_INLINE Error X86X64Context_prepareAddJccNode(X86X64Context* self, BaseNode* node) { + PodList::Link* link = self->_zoneAllocator.allocT::Link>(); + + if (link == NULL) + ASMJIT_PROPAGATE_ERROR(self->setError(kErrorNoHeapMemory)); + + link->setValue(node); + self->_jccList.append(link); + + return kErrorOk; +} + +//! @internal +//! +//! @brief Get mask of all registers actually used to pass function arguments. +static ASMJIT_INLINE RegMask X86X64Context_getUsedArgs(X86X64Context* self, X86X64CallNode* node, X86X64FuncDecl* decl) { + RegMask regs; + regs.reset(); + + uint32_t i; + uint32_t argCount = decl->getArgCount(); + + for (i = 0; i < argCount; i++) { + const FuncInOut& arg = decl->getArg(i); + if (!arg.hasRegIndex()) + continue; + regs.add(x86VarTypeToClass(arg.getVarType()), IntUtil::mask(arg.getRegIndex())); + } + + return regs; +} + +// ============================================================================ +// [Helpers] +// ============================================================================ + +static ASMJIT_INLINE SArgNode* X86X64Context_insertSArgNode( + X86X64Context* self, + X86X64Compiler* compiler, + X86X64CallNode* call, + VarData* vd, const uint32_t* gaRegs) { + + uint32_t vType = vd->getType(); + const VarInfo& vInfo = _varInfo[vType]; + uint32_t c = vInfo.getClass(); + + SArgNode* sArg = compiler->newNode(vd, call); + if (sArg == NULL) + return NULL; + + VarInst* vi = self->newVarInst(1); + if (vi == NULL) + return NULL; + + vi->_vaCount = 1; + vi->_count.reset(); + vi->_count.add(c); + vi->_start.reset(); + vi->_inRegs.reset(); + vi->_outRegs.reset(); + vi->_clobberedRegs.reset(); + vi->_list[0].setup(vd, kVarAttrInReg, 0, gaRegs[c]); + + sArg->setVarInst(vi); + + compiler->addNodeBefore(sArg, call); + return sArg; +} + +//! @internal +//! +//! @brief Prepare the given function @a func. +//! +//! For each node: +//! - Create and assign groupId and flowId. +//! - Collect all variables and merge them to vaList. +Error X86X64Context::fetch() { + X86X64Compiler* compiler = getCompiler(); + X86X64FuncNode* func = getFunc(); + + uint32_t arch = compiler->getArch(); + + BaseNode* node_ = func; + BaseNode* next = NULL; + BaseNode* stop = getStop(); + + uint32_t groupId = 1; + uint32_t flowId = 0; + + VarAttr vaTmpList[80]; + PodList::Link* jLink = NULL; + + // Function flags. + func->clearFuncFlags( + kFuncFlagIsNaked | + kFuncFlagPushPop | + kFuncFlagEmms | + kFuncFlagSFence | + kFuncFlagLFence ); + + if (func->getHint(kFuncHintNaked ) != 0) func->addFuncFlags(kFuncFlagIsNaked); + if (func->getHint(kFuncHintCompact) != 0) func->addFuncFlags(kFuncFlagPushPop | kFuncFlagEnter | kFuncFlagLeave); + if (func->getHint(kFuncHintPushPop) != 0) func->addFuncFlags(kFuncFlagPushPop); + if (func->getHint(kFuncHintEmms ) != 0) func->addFuncFlags(kFuncFlagEmms ); + if (func->getHint(kFuncHintSFence ) != 0) func->addFuncFlags(kFuncFlagSFence ); + if (func->getHint(kFuncHintLFence ) != 0) func->addFuncFlags(kFuncFlagLFence ); + + // Global allocable registers. + uint32_t* gaRegs = _gaRegs; + + if (!func->hasFuncFlag(kFuncFlagIsNaked)) + gaRegs[kRegClassGp] &= ~IntUtil::mask(kRegIndexBp); + + // Allowed index registers (Gp/Xmm/Ymm). + const uint32_t indexMask = IntUtil::bits(_baseRegsCount) & ~(IntUtil::mask(4, 12)); + + // -------------------------------------------------------------------------- + // [VI Macros] + // -------------------------------------------------------------------------- + +#define VI_BEGIN() \ + do { \ + uint32_t vaCount = 0; \ + RegCount regCount; \ + \ + RegMask inRegs; \ + RegMask outRegs; \ + RegMask clobberedRegs; \ + \ + regCount.reset(); \ + inRegs.reset(); \ + outRegs.reset(); \ + clobberedRegs.reset() + +#define VI_END(_Node_) \ + if (vaCount == 0 && clobberedRegs.isEmpty()) \ + break; \ + \ + VarInst* vi = newVarInst(vaCount); \ + if (vi == NULL) \ + goto _NoMemory; \ + \ + RegCount vaIndex; \ + vaIndex.makeIndex(regCount); \ + \ + vi->_vaCount = vaCount; \ + vi->_count = regCount; \ + vi->_start = vaIndex; \ + \ + vi->_inRegs = inRegs; \ + vi->_outRegs = outRegs; \ + vi->_clobberedRegs = clobberedRegs; \ + \ + VarAttr* va = vaTmpList; \ + while (vaCount) { \ + VarData* vd = va->getVd(); \ + \ + uint32_t class_ = vd->getClass(); \ + uint32_t index = vaIndex.get(class_); \ + \ + vaIndex.add(class_); \ + \ + if (va->_inRegs) \ + va->_allocableRegs = va->_inRegs; \ + else if (va->_outRegIndex != kInvalidReg) \ + va->_allocableRegs = IntUtil::mask(va->_outRegIndex); \ + else \ + va->_allocableRegs &= ~inRegs._regs[class_]; \ + \ + vd->_va = NULL; \ + vi->getVa(index)[0] = va[0]; \ + \ + va++; \ + vaCount--; \ + } \ + \ + _Node_->setVarInst(vi); \ + } while (0) + +#define VI_UPDATE_CID(_Vd_) \ + do { \ + if (!_Vd_->hasContextId()) { \ + _Vd_->setContextId(static_cast(_contextVd.getLength())); \ + if (_contextVd.append(_Vd_) != kErrorOk) \ + goto _NoMemory; \ + } \ + } while (0) + +#define VI_ADD_VAR(_Vd_, _Va_, _Flags_, _NewAllocable_) \ + do { \ + ASMJIT_ASSERT(_Vd_->_va == NULL); \ + \ + _Va_ = &vaTmpList[vaCount++]; \ + _Va_->setup(_Vd_, _Flags_, 0, _NewAllocable_); \ + _Va_->addVarCount(1); \ + _Vd_->setVa(_Va_); \ + \ + VI_UPDATE_CID(_Vd_); \ + regCount.add(_Vd_->getClass()); \ + } while (0) + +#define VI_MERGE_VAR(_Vd_, _Va_, _Flags_, _NewAllocable_) \ + do { \ + _Va_ = _Vd_->getVa(); \ + \ + if (_Va_ == NULL) { \ + _Va_ = &vaTmpList[vaCount++]; \ + _Va_->setup(_Vd_, 0, 0, _NewAllocable_); \ + _Vd_->setVa(_Va_); \ + \ + VI_UPDATE_CID(_Vd_); \ + regCount.add(_Vd_->getClass()); \ + } \ + \ + _Va_->addFlags(_Flags_); \ + _Va_->addVarCount(1); \ + } while (0) + + // -------------------------------------------------------------------------- + // [Loop] + // -------------------------------------------------------------------------- + + do { +_Do: + while (node_->isFetched()) { +_NextGroup: + if (jLink == NULL) + jLink = _jccList.getFirst(); + else + jLink = jLink->getNext(); + + if (jLink == NULL) + goto _Done; + node_ = X86X64Context_getOppositeJccFlow(static_cast(jLink->getValue())); + } + + flowId++; + + next = node_->getNext(); + node_->setFlowId(flowId); + + switch (node_->getType()) { + // ---------------------------------------------------------------------- + // [Align/Embed] + // ---------------------------------------------------------------------- + + case kNodeTypeAlign: + case kNodeTypeEmbed: + break; + + // ---------------------------------------------------------------------- + // [Hint] + // ---------------------------------------------------------------------- + + case kNodeTypeHint: { + HintNode* node = static_cast(node_); + VI_BEGIN(); + + if (node->getHint() == kVarHintAlloc) { + HintNode* cur = node; + + uint32_t remain[kRegClassCount]; + RegMask inRegs; + + remain[kRegClassGp] = _baseRegsCount - 1 - func->hasFuncFlag(kFuncFlagIsNaked); + remain[kRegClassFp] = kRegCountFp; + remain[kRegClassMm] = kRegCountMm; + remain[kRegClassXy] = _baseRegsCount; + inRegs.reset(); + + // Merge as many alloc-hints as possible. + for (;;) { + VarData* vd = static_cast(cur->getVd()); + VarAttr* va = vd->getVa(); + + uint32_t regClass = vd->getClass(); + uint32_t regIndex = cur->getValue(); + uint32_t regMask = 0; + + // We handle both kInvalidReg and kInvalidValue. + if (regIndex < kInvalidReg) + regMask = IntUtil::mask(regIndex); + + if (va == NULL) { + if ((inRegs._regs[regClass] & regMask) != 0) + break; + if (remain[regClass] == 0) + break; + VI_ADD_VAR(vd, va, kVarAttrInReg, gaRegs[regClass]); + + if (regMask != 0) { + inRegs._regs[regClass] ^= static_cast(regMask); + va->setInRegs(regMask); + va->setInRegIndex(regIndex); + } + + remain[regClass]--; + } + else if (regMask != 0) { + if ((inRegs._regs[regClass] & regMask) != 0 && va->getInRegs() != regMask) + break; + + inRegs._regs[regClass] ^= static_cast(va->getInRegs() | regMask); + va->setInRegs(regMask); + va->setInRegIndex(regIndex); + } + + if (cur != node) + compiler->removeNode(cur); + + cur = static_cast(node->getNext()); + if (cur == NULL || cur->getType() != kNodeTypeHint || cur->getHint() != kVarHintAlloc) + break; + } + + next = node->getNext(); + } + else { + VarData* vd = static_cast(node->getVd()); + VarAttr* va; + + uint32_t flags = 0; + + switch (node->getHint()) { + case kVarHintSpill: + flags = kVarAttrInMem; + break; + case kVarHintSave: + flags = kVarAttrInMem; + break; + case kVarHintSaveAndUnuse: + flags = kVarAttrInMem | kVarAttrUnuse; + break; + case kVarHintUnuse: + flags = kVarAttrUnuse; + break; + } + + VI_ADD_VAR(vd, va, flags, 0); + } + + VI_END(node_); + break; + } + + // ---------------------------------------------------------------------- + // [Target] + // ---------------------------------------------------------------------- + + case kNodeTypeTarget: { + break; + } + + // ---------------------------------------------------------------------- + // [Inst] + // ---------------------------------------------------------------------- + + case kNodeTypeInst: { + InstNode* node = static_cast(node_); + + uint32_t code = node->getCode(); + uint32_t flags = node->getFlags(); + + Operand* opList = node->getOpList(); + uint32_t opCount = node->getOpCount(); + + if (opCount) { + const InstInfo* info = &_instInfo[code]; + const X86X64SpecialInst* special = NULL; + VI_BEGIN(); + + // Collect instruction flags and merge all 'VarAttr's. + if (info->isFp()) + flags |= kNodeFlagIsFp; + + if (info->isSpecial() && (special = X86X64SpecialInst_get(code, opList, opCount)) != NULL) + flags |= kNodeFlagIsSpecial; + + uint32_t gpAllowedMask = 0xFFFFFFFF; + + for (uint32_t i = 0; i < opCount; i++) { + Operand* op = &opList[i]; + VarData* vd; + VarAttr* va; + + if (op->isVar()) { + vd = compiler->getVdById(op->getId()); + VI_MERGE_VAR(vd, va, 0, gaRegs[vd->getClass()] & gpAllowedMask); + + if (static_cast(op)->isGpb()) { + va->addFlags(static_cast(op)->isGpbLo() ? kVarAttrGpbLo : kVarAttrGpbHi); + if (arch == kArchX86) { + // If a byte register is accessed in 32-bit mode we have to limit + // all allocable registers for that variable to eax/ebx/ecx/edx. + // Other variables are not affected. + va->_allocableRegs &= 0x0F; + } + else { + // It's fine if lo-byte register is accessed in 64-bit mode; + // however, hi-byte has to be checked and if it's used all + // registers (Gp/Xmm) could be only allocated in the lower eight + // half. To do that, we patch 'allocableRegs' of all variables + // we collected until now and change the allocable restriction + // for variables that come after. + if (static_cast(op)->isGpbHi()) { + va->_allocableRegs &= 0x0F; + + if (gpAllowedMask != 0xFF) { + for (uint32_t j = 0; j < i; j++) + vaTmpList[j]._allocableRegs &= vaTmpList[j].hasFlag(kVarAttrGpbHi) ? 0x0F : 0xFF; + gpAllowedMask = 0xFF; + } + } + } + } + + if (special != NULL) { + uint32_t inReg = special[i].inReg; + uint32_t outReg = special[i].outReg; + uint32_t c; + + if (static_cast(op)->isGp()) + c = kRegClassGp; + else + c = kRegClassXy; + + if (inReg != kInvalidReg) { + uint32_t mask = IntUtil::mask(inReg); + inRegs.add(c, mask); + va->addInRegs(mask); + } + + if (outReg != kInvalidReg) { + uint32_t mask = IntUtil::mask(outReg); + outRegs.add(c, mask); + va->setOutRegIndex(outReg); + } + + va->addFlags(special[i].flags); + } + else { + uint32_t inFlags = kVarAttrInReg; + uint32_t outFlags = kVarAttrOutReg; + uint32_t combinedFlags; + + if (i == 0) { + // Default for the first operand. + combinedFlags = inFlags | outFlags; + + // Comparison/Test instructions never modify the source operand. + if (info->isTest()) { + combinedFlags = inFlags; + } + // Move instructions typically overwrite the first operand, but + // there are some exceptions based on the operands' size and type. + else if (info->isMove()) { + // Cvttsd2si/Cvttss2si. In 32-bit mode the whole destination is replaced. + // In 64-bit mode we need to check whether the destination operand size + // is 64-bits. + if (code == kInstCvttsd2si || code == kInstCvttss2si) + combinedFlags = vd->getSize() > 4 ? (op->isRegType(kRegTypeGpq) ? outFlags : inFlags | outFlags) : outFlags; + // Movss/Movsd. These instructions won't overwrite the whole register if move + // is between two registers. + else if (code == kInstMovss || code == kInstMovsd) + combinedFlags = opList[1].isMem() ? outFlags : inFlags | outFlags; + else + combinedFlags = outFlags; + } + // Imul. + else if (code == kInstImul && opCount == 3) { + combinedFlags = outFlags; + } + } + else { + // Default for secon/third operands. + combinedFlags = inFlags; + + // Xchg/Xadd/Imul/Idiv. + if (info->isXchg() || (code == kInstImul && opCount == 3 && i == 1)) + combinedFlags = inFlags | outFlags; + } + va->addFlags(combinedFlags); + } + } + else if (op->isMem()) { + Mem* m = static_cast(op); + node->setMemOpIndex(i); + + if (OperandUtil::isVarId(m->getBase()) && m->isBaseIndexType()) { + vd = compiler->getVdById(m->getBase()); + if (!vd->isStack()) { + VI_MERGE_VAR(vd, va, 0, gaRegs[vd->getClass()] & gpAllowedMask); + if (m->getMemType() == kMemTypeBaseIndex) { + va->addFlags(kVarAttrInReg); + } + else { + uint32_t inFlags = kVarAttrInMem; + uint32_t outFlags = kVarAttrOutMem; + uint32_t combinedFlags; + + if (i == 0) { + // Default for the first operand. + combinedFlags = inFlags | outFlags; + + // Comparison/Test instructions never modify the source operand. + if (info->isTest()) { + combinedFlags = inFlags; + } + // Move instructions typically overwrite the first operand, but + // there are some exceptions based on the operands' size and type. + else if (info->isMove()) { + // Movss. + if (code == kInstMovss) + combinedFlags = vd->getSize() == 4 ? outFlags : inFlags | outFlags; + // Movsd. + else if (code == kInstMovsd) + combinedFlags = vd->getSize() == 8 ? outFlags : inFlags | outFlags; + else + combinedFlags = outFlags; + } + } + else { + // Default for the second operand. + combinedFlags = inFlags; + if (info->isXchg()) + combinedFlags = inFlags | outFlags; + } + + va->addFlags(combinedFlags); + } + } + } + + if (OperandUtil::isVarId(m->getIndex())) { + // Restrict allocation to all registers except ESP/RSP/R12. + vd = compiler->getVdById(m->getIndex()); + VI_MERGE_VAR(vd, va, 0, gaRegs[kRegClassGp] & gpAllowedMask); + va->andAllocableRegs(indexMask); + va->addFlags(kVarAttrInReg); + } + } + } + + node->setFlags(flags); + if (vaCount) { + // Handle instructions which result in zeros/ones or nop if used with the + // same destination and source operand. + if (vaCount == 1 && opCount >= 2 && opList[0].isVar() && opList[1].isVar() && !node->hasMemOp()) + X86X64Context_prepareSingleVarInst(code, &vaTmpList[0]); + } + + VI_END(node_); + } + + // Handle conditional/unconditional jump. + if (node->isJmpOrJcc()) { + JumpNode* jNode = static_cast(node); + + BaseNode* jNext = jNode->getNext(); + TargetNode* jTarget = jNode->getTarget(); + + // If this jump is unconditional we put next node to unreachable node + // list so we can eliminate possible dead code. We have to do this in + // all cases since we are unable to translate without fetch() step. + // + // We also advance our node pointer to the target node to simulate + // natural flow of the function. + if (jNode->isJmp()) { + if (!jNext->isFetched()) + ASMJIT_PROPAGATE_ERROR(X86X64Context_prepareAddUnreachableNode(this, jNext)); + + node_ = jTarget; + goto _Do; + } + else { + if (jTarget->isFetched()) { + uint32_t jTargetFlowId = jTarget->getFlowId(); + + // Update kNodeFlagIsTaken flag to true if this is a conditional + // backward jump. This behavior can be overridden by using + // kCondHintUnlikely when the instruction is created. + if (!jNode->isTaken() && opCount == 1 && jTargetFlowId <= flowId) { + jNode->addFlags(kNodeFlagIsTaken); + } + } + else if (jNext->isFetched()) { + node_ = jTarget; + goto _Do; + } + else { + ASMJIT_PROPAGATE_ERROR(X86X64Context_prepareAddJccNode(this, jNode)); + + node_ = X86X64Context_getJccFlow(jNode); + goto _Do; + } + } + } + break; + } + + // ---------------------------------------------------------------------- + // [Func] + // ---------------------------------------------------------------------- + + case kNodeTypeFunc: { + ASMJIT_ASSERT(node_ == func); + X86X64FuncDecl* decl = func->getDecl(); + + VI_BEGIN(); + for (uint32_t i = 0, argCount = decl->getArgCount(); i < argCount; i++) { + const FuncInOut& arg = decl->getArg(i); + + VarData* vd = func->getArg(i); + VarAttr* va; + + if (vd == NULL) + continue; + + // Overlapped function arguments. + if (vd->getVa() != NULL) + return compiler->setError(kErrorCompilerOverlappedArgs); + VI_ADD_VAR(vd, va, 0, 0); + + if (x86VarTypeToClass(arg.getVarType()) == vd->getClass()) { + if (arg.hasRegIndex()) { + va->addFlags(kVarAttrOutReg); + va->setOutRegIndex(arg.getRegIndex()); + } + else { + va->addFlags(kVarAttrOutMem); + } + } + else { + // TODO: [COMPILER] Function Argument Conversion. + va->addFlags(kVarAttrOutDecide | kVarAttrOutConv); + } + } + VI_END(node_); + break; + } + + // ---------------------------------------------------------------------- + // [End] + // ---------------------------------------------------------------------- + + case kNodeTypeEnd: { + goto _NextGroup; + } + + // ---------------------------------------------------------------------- + // [Ret] + // ---------------------------------------------------------------------- + + case kNodeTypeRet: { + RetNode* node = static_cast(node_); + X86X64FuncDecl* decl = func->getDecl(); + + if (decl->hasRet()) { + const FuncInOut& ret = decl->getRet(0); + uint32_t retClass = x86VarTypeToClass(ret.getVarType()); + + VI_BEGIN(); + for (uint32_t i = 0; i < 2; i++) { + Operand* op = &node->_ret[i]; + + if (op->isVar()) { + VarData* vd = compiler->getVdById(op->getId()); + VarAttr* va; + + if (vd->getClass() == retClass) { + // TODO: [COMPILER] Fix RetNode fetch. + VI_MERGE_VAR(vd, va, 0, 0); + va->setInRegs(i == 0 ? IntUtil::mask(kRegIndexAx) : IntUtil::mask(kRegIndexDx)); + va->addFlags(kVarAttrInReg); + inRegs.add(retClass, va->getInRegs()); + } + } + } + VI_END(node_); + } + break; + } + + // ---------------------------------------------------------------------- + // [Call] + // ---------------------------------------------------------------------- + + case kNodeTypeCall: { + X86X64CallNode* node = static_cast(node_); + X86X64FuncDecl* decl = node->getDecl(); + + Operand* target = &node->_target; + Operand* argList = node->_args; + Operand* retList = node->_ret; + + func->addFuncFlags(kFuncFlagIsCaller); + func->mergeCallStackSize(node->_x86Decl.getArgStackSize()); + node->_usedArgs = X86X64Context_getUsedArgs(this, node, decl); + + uint32_t i; + uint32_t argCount = decl->getArgCount(); + uint32_t gpAllocableMask = gaRegs[kRegClassGp] & ~node->_usedArgs.get(kRegClassGp); + + VarData* vd; + VarAttr* va; + + VI_BEGIN(); + + // Function-call operand. + if (target->isVar()) { + vd = compiler->getVdById(target->getId()); + VI_MERGE_VAR(vd, va, 0, 0); + + va->addFlags(kVarAttrInReg | kVarAttrInCall); + if (va->getInRegs() == 0) + va->addAllocableRegs(gpAllocableMask); + } + else if (target->isMem()) { + Mem* m = static_cast(target); + + if (OperandUtil::isVarId(m->getBase()) && m->isBaseIndexType()) { + vd = compiler->getVdById(m->getBase()); + if (!vd->isStack()) { + VI_MERGE_VAR(vd, va, 0, 0); + if (m->getMemType() == kMemTypeBaseIndex) { + va->addFlags(kVarAttrInReg | kVarAttrInCall); + if (va->getInRegs() == 0) + va->addAllocableRegs(gpAllocableMask); + } + else { + va->addFlags(kVarAttrInMem | kVarAttrInCall); + } + } + } + + if (OperandUtil::isVarId(m->getIndex())) { + // Restrict allocation to all registers except ESP/RSP/R12. + vd = compiler->getVdById(m->getIndex()); + VI_MERGE_VAR(vd, va, 0, 0); + + va->addFlags(kVarAttrInReg | kVarAttrInCall); + if ((va->getInRegs() & ~indexMask) == 0) + va->andAllocableRegs(gpAllocableMask & indexMask); + } + } + + // Function-call arguments. + for (i = 0; i < argCount; i++) { + Operand* op = &argList[i]; + if (!op->isVar()) + continue; + + vd = compiler->getVdById(op->getId()); + VI_MERGE_VAR(vd, va, 0, 0); + + const FuncInOut& arg = decl->getArg(i); + if (arg.hasRegIndex()) { + uint32_t argType = arg.getVarType(); + uint32_t argClass = x86VarTypeToClass(argType); + + if (vd->getClass() == argClass) { + va->addInRegs(IntUtil::mask(arg.getRegIndex())); + va->addFlags(kVarAttrInReg | kVarAttrInArg); + } + else { + va->addFlags(kVarAttrInConv | kVarAttrInArg); + } + } + else { + va->addArgStackCount(); + va->addFlags(kVarAttrInStack | kVarAttrInArg); + } + } + + // Function-call return(s). + for (i = 0; i < 2; i++) { + Operand* op = &retList[i]; + if (!op->isVar()) + continue; + + const FuncInOut& ret = decl->getRet(i); + if (ret.hasRegIndex()) { + uint32_t retType = ret.getVarType(); + uint32_t retClass = x86VarTypeToClass(retType); + + vd = compiler->getVdById(op->getId()); + VI_MERGE_VAR(vd, va, 0, 0); + + if (vd->getClass() == retClass) { + va->setOutRegIndex(ret.getRegIndex()); + va->addFlags(kVarAttrOutReg | kVarAttrOutRet); + } + else { + va->addFlags(kVarAttrOutConv | kVarAttrOutRet); + } + } + } + + // Init clobbered. + clobberedRegs.set(kRegClassGp, IntUtil::bits(_baseRegsCount) & (~decl->getPreserved(kRegClassGp))); + clobberedRegs.set(kRegClassFp, IntUtil::bits(kRegCountFp ) ); + clobberedRegs.set(kRegClassMm, IntUtil::bits(kRegCountMm ) & (~decl->getPreserved(kRegClassMm))); + clobberedRegs.set(kRegClassXy, IntUtil::bits(_baseRegsCount) & (~decl->getPreserved(kRegClassXy))); + + // Split all variables allocated in stack-only (i.e. if the variable is + // only passed in stack; it doesn't matter how many times) and create + // extra nodes having only stack moves. It improves x86 code, because + // arguments can be moved on stack right after they are ready. + for (i = 0; i < vaCount; i++) { + VarAttr* va = &vaTmpList[i]; + + if ((va->getFlags() & kVarAttrInAll) == (kVarAttrInArg | kVarAttrInStack)) { + if (!X86X64Context_insertSArgNode(this, compiler, node, va->getVd(), gaRegs)) + goto _NoMemory; + va->delFlags(kVarAttrInAll); + } + } + + VI_END(node_); + break; + } + + default: + break; + } + + node_ = next; + } while (node_ != stop); + +_Done: + return kErrorOk; + + // -------------------------------------------------------------------------- + // [Failure] + // -------------------------------------------------------------------------- + +_NoMemory: + return compiler->setError(kErrorNoHeapMemory); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - AnalyzeFunc] +// ============================================================================ + +//! @internal +struct LivenessTarget { + //! @brief Previous. + LivenessTarget* prev; + + //! @brief Target node. + TargetNode* node; + //! @brief Jumped from. + JumpNode* from; +}; + +Error X86X64Context::analyze() { + FuncNode* func = getFunc(); + + BaseNode* node = func->getEnd(); + JumpNode* from = NULL; + + uint32_t bLen = static_cast( + ((_contextVd.getLength() + VarBits::kEntityBits - 1) / VarBits::kEntityBits)); + + LivenessTarget* ltCur = NULL; + LivenessTarget* ltUnused = NULL; + + // No variables. + if (bLen == 0) + return kErrorOk; + + VarBits* bCur = newBits(bLen); + if (bCur == NULL) + goto _NoMemory; + + // Allocate bits for code visited first time. +_OnVisit: + for (;;) { + if (node->hasLiveness()) { + if (bCur->_addBitsDelSource(node->getLiveness(), bCur, bLen)) + goto _OnPatch; + else + goto _OnDone; + } + + VarBits* bTmp = copyBits(bCur, bLen); + VarInst* vi = node->getVarInst(); + + if (bTmp == NULL) + goto _NoMemory; + node->setLiveness(bTmp); + + if (vi != NULL) { + uint32_t vaCount = vi->getVaCount(); + for (uint32_t i = 0; i < vaCount; i++) { + VarAttr* va = vi->getVa(i); + VarData* vd = va->getVd(); + + uint32_t flags = va->getFlags(); + uint32_t ctxId = vd->getContextId(); + + if ((flags & kVarAttrOutAll) && !(flags & kVarAttrInAll)) { + // Write-Only. + bTmp->setBit(ctxId); + bCur->delBit(ctxId); + } + else { + // Read-Only or Read/Write. + bTmp->setBit(ctxId); + bCur->setBit(ctxId); + } + } + } + + if (node->getType() == kNodeTypeTarget) + goto _OnTarget; + + if (node == func) + goto _OnDone; + node = node->getPrev(); + } + + // Patch already generated liveness bits. +_OnPatch: + for (;;) { + ASMJIT_ASSERT(node->hasLiveness()); + VarBits* bNode = node->getLiveness(); + + if (!bNode->_addBitsDelSource(bCur, bLen)) + goto _OnDone; + + if (node->getType() == kNodeTypeTarget) + goto _OnTarget; + + if (node == func) + goto _OnDone; + + node = node->getPrev(); + } + +_OnTarget: + if (static_cast(node)->getNumRefs() != 0) { + // Push a new LivenessTarget on the stack if needed. + if (ltCur == NULL || ltCur->node != node) { + LivenessTarget* ltTmp = ltUnused; + + if (ltTmp != NULL) { + ltUnused = ltUnused->prev; + } + else { + ltTmp = _zoneAllocator.allocT( + sizeof(LivenessTarget) - sizeof(VarBits) + bLen * sizeof(uintptr_t)); + + if (ltTmp == NULL) + goto _NoMemory; + } + + ltTmp->prev = ltCur; + ltTmp->node = static_cast(node); + ltCur = ltTmp; + + from = static_cast(node)->getFrom(); + ASMJIT_ASSERT(from != NULL); + } + else { + from = ltCur->from; + goto _OnJumpNext; + } + + // Visit/Patch. + do { + ltCur->from = from; + bCur->copyBits(node->getLiveness(), bLen); + + if (!from->hasLiveness()) { + node = from; + goto _OnVisit; + } + + if (bCur->delBits(from->getLiveness(), bLen)) { + node = from; + goto _OnPatch; + } + +_OnJumpNext: + from = from->getJumpNext(); + } while (from != NULL); + + // Pop the current LivenessTarget from the stack. + { + LivenessTarget* ltTmp = ltCur; + + ltCur = ltCur->prev; + ltTmp->prev = ltUnused; + ltUnused = ltTmp; + } + } + + bCur->copyBits(node->getLiveness(), bLen); + node = node->getPrev(); + + if (node->isJmp() || !node->isFetched()) + goto _OnDone; + + if (!node->hasLiveness()) + goto _OnVisit; + + if (bCur->delBits(node->getLiveness(), bLen)) + goto _OnPatch; + +_OnDone: + if (ltCur != NULL) { + node = ltCur->node; + from = ltCur->from; + + goto _OnJumpNext; + } + return kErrorOk; + +_NoMemory: + return setError(kErrorNoHeapMemory); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64BaseAlloc] +// ============================================================================ + +struct X86X64BaseAlloc { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86X64BaseAlloc(X86X64Context* context) { + _context = context; + _compiler = context->getCompiler(); + } + ASMJIT_INLINE ~X86X64BaseAlloc() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get the context. + ASMJIT_INLINE X86X64Context* getContext() const { return _context; } + //! @brief Get the current state (always the same instance as X86X64Context::_x86State). + ASMJIT_INLINE VarState* getState() const { return _context->getState(); } + + //! @brief Get the node. + ASMJIT_INLINE BaseNode* getNode() const { return _node; } + + //! @brief Get VarAttr list (all). + ASMJIT_INLINE VarAttr* getVaList() const { return _vaList[0]; } + //! @brief Get VarAttr list (per class). + ASMJIT_INLINE VarAttr* getVaListByClass(uint32_t c) const { return _vaList[c]; } + + //! @brief Get VarAttr count (all). + ASMJIT_INLINE uint32_t getVaCount() const { return _vaCount; } + //! @brief Get VarAttr count (per class). + ASMJIT_INLINE uint32_t getVaCountByClass(uint32_t c) const { return _count.get(c); } + + //! @brief Get whether all variables of class @a c are done. + ASMJIT_INLINE bool isVaDone(uint32_t c) const { return _done.get(c) == _count.get(c); } + + //! @brief Get how many variables have been allocated. + ASMJIT_INLINE uint32_t getVaDone(uint32_t c) const { return _done.get(c); } + + ASMJIT_INLINE void addVaDone(uint32_t c, uint32_t n = 1) { _done.add(c, n); } + + //! @brief Get number of allocable registers per class. + ASMJIT_INLINE uint32_t getGaRegs(uint32_t c) const { + return _context->_gaRegs[c]; + } + + // -------------------------------------------------------------------------- + // [Init / Cleanup] + // -------------------------------------------------------------------------- + +protected: + // Just to prevent calling these methods by X86X64Context::translate(). + + ASMJIT_INLINE void init(BaseNode* node, VarInst* vi); + ASMJIT_INLINE void cleanup(); + + // -------------------------------------------------------------------------- + // [Unuse] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void unuseBefore(); + + template + ASMJIT_INLINE void unuseAfter(); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Context. + X86X64Context* _context; + //! @brief Compiler. + X86X64Compiler* _compiler; + + //! @brief Node. + BaseNode* _node; + + //! @brief Variable instructions. + VarInst* _vi; + //! @brief VarAttr list (per register class). + VarAttr* _vaList[4]; + + //! @brief Count of all VarAttr's. + uint32_t _vaCount; + + //! @brief VarAttr's total counter. + RegCount _count; + //! @brief VarAttr's done counter. + RegCount _done; +}; + +// ============================================================================ +// [asmjit::x86x64::X86X64BaseAlloc - Init / Cleanup] +// ============================================================================ + +ASMJIT_INLINE void X86X64BaseAlloc::init(BaseNode* node, VarInst* vi) { + _node = node; + _vi = vi; + + // We have to set the correct cursor in case any instruction is emitted + // during the allocation phase; it has to be emitted before the current + // instruction. + _compiler->_setCursor(node->getPrev()); + + // Setup the lists of variables. + { + VarAttr* va = vi->getVaList(); + _vaList[kRegClassGp] = va; + _vaList[kRegClassFp] = va + vi->getVaStart(kRegClassFp); + _vaList[kRegClassMm] = va + vi->getVaStart(kRegClassMm); + _vaList[kRegClassXy] = va + vi->getVaStart(kRegClassXy); + } + + // Setup counters. + _vaCount = vi->getVaCount(); + + _count = vi->_count; + _done.reset(); + + // Connect Vd->Va. + for (uint32_t i = 0; i < _vaCount; i++) { + VarAttr* va = &_vaList[0][i]; + VarData* vd = va->getVd(); + + vd->setVa(va); + } +} + +ASMJIT_INLINE void X86X64BaseAlloc::cleanup() { + // Disconnect Vd->Va. + for (uint32_t i = 0; i < _vaCount; i++) { + VarAttr* va = &_vaList[0][i]; + VarData* vd = va->getVd(); + + vd->setVa(NULL); + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64BaseAlloc - Unuse] +// ============================================================================ + +template +ASMJIT_INLINE void X86X64BaseAlloc::unuseBefore() { + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + const uint32_t checkFlags = + kVarAttrInOutReg | + kVarAttrInMem | + kVarAttrInArg | + kVarAttrInStack | + kVarAttrInCall | + kVarAttrInConv ; + + for (uint32_t i = 0; i < count; i++) { + VarAttr* va = &list[i]; + + if ((va->getFlags() & checkFlags) == kVarAttrOutReg) { + _context->unuse(va->getVd()); + } + } +} + +template +ASMJIT_INLINE void X86X64BaseAlloc::unuseAfter() { + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + for (uint32_t i = 0; i < count; i++) { + VarAttr* va = &list[i]; + + if (va->getFlags() & kVarAttrUnuse) + _context->unuse(va->getVd()); + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64VarAlloc] +// ============================================================================ + +//! @internal +//! +//! @brief Register allocator context (asm instructions). +struct X86X64VarAlloc : public X86X64BaseAlloc { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86X64VarAlloc(X86X64Context* context) : X86X64BaseAlloc(context) {} + ASMJIT_INLINE ~X86X64VarAlloc() {} + + // -------------------------------------------------------------------------- + // [Run] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Error run(BaseNode* node); + + // -------------------------------------------------------------------------- + // [Init / Cleanup] + // -------------------------------------------------------------------------- + +protected: + // Just to prevent calling these methods by X86X64Context::translate(). + + ASMJIT_INLINE void init(BaseNode* node, VarInst* vi); + ASMJIT_INLINE void cleanup(); + + // -------------------------------------------------------------------------- + // [Plan / Spill / Alloc] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void plan(); + + template + ASMJIT_INLINE void spill(); + + template + ASMJIT_INLINE void alloc(); + + // -------------------------------------------------------------------------- + // [GuessAlloc / GuessSpill] + // -------------------------------------------------------------------------- + + //! @brief Guess which register is the best candidate for 'vd' from + //! 'allocableRegs'. + //! + //! The guess is based on looking ahead and inspecting register allocator + //! instructions. The main reason is to prevent allocation to a register + //! which is needed by next instruction(s). The guess look tries to go as far + //! as possible, after the remaining registers are zero, the mask of previous + //! registers (called 'safeRegs') is returned. + template + ASMJIT_INLINE uint32_t guessAlloc(VarData* vd, uint32_t allocableRegs); + + //! @brief Guess whether to move the given 'vd' instead of spill. + template + ASMJIT_INLINE uint32_t guessSpill(VarData* vd, uint32_t allocableRegs); + + // -------------------------------------------------------------------------- + // [Modified] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void modified(); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Will alloc to these registers. + RegMask _willAlloc; + //! @brief Will spill these registers. + RegMask _willSpill; +}; + +// ============================================================================ +// [asmjit::X86X64VarAlloc - Run] +// ============================================================================ + +ASMJIT_INLINE Error X86X64VarAlloc::run(BaseNode* node_) { + // Initialize. + VarInst* vi = node_->getVarInst(); + if (vi == NULL) + return kErrorOk; + + // Initialize the allocator; connect Vd->Va. + init(node_, vi); + + // Unuse overwritten variables. + unuseBefore(); + unuseBefore(); + unuseBefore(); + + // Plan the allocation. Planner assigns input/output registers for each + // variable and decides whether to allocate it in register or stack. + plan(); + plan(); + plan(); + + // Spill all variables marked by plan(). + spill(); + spill(); + spill(); + + // Alloc all variables marked by plan(). + alloc(); + alloc(); + alloc(); + + // Translate node operands. + if (node_->getType() == kNodeTypeInst) { + InstNode* node = static_cast(node_); + ASMJIT_PROPAGATE_ERROR(X86X64Context_translateOperands(_context, node->getOpList(), node->getOpCount())); + } + else if (node_->getType() == kNodeTypeSArg) { + SArgNode* node = static_cast(node_); + VarData* vd = node->getVd(); + + X86X64CallNode* call = static_cast(node->getCall()); + X86X64FuncDecl* decl = call->getDecl(); + + uint32_t argCount = decl->getArgCount(); + for (uint32_t i = 0; i < argCount; i++) { + Operand& op = call->getArg(i); + FuncInOut& arg = decl->getArg(i); + + if (!op.isVar() || op.getId() != vd->getId()) + continue; + + Mem dst = ptr(_context->_zsp, -static_cast(_context->getRegSize()) + arg.getStackOffset()); + _context->emitMoveVarOnStack(arg.getVarType(), &dst, vd->getType(), vd->getRegIndex()); + } + } + + // Mark variables as modified. + modified(); + modified(); + modified(); + + // Cleanup; disconnect Vd->Va. + cleanup(); + + // Update clobbered mask. + _context->_clobberedRegs.add(_willAlloc); + _context->_clobberedRegs.add(vi->_clobberedRegs); + + // Unuse. + unuseAfter(); + unuseAfter(); + unuseAfter(); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64VarAlloc - Init / Cleanup] +// ============================================================================ + +ASMJIT_INLINE void X86X64VarAlloc::init(BaseNode* node, VarInst* vi) { + X86X64BaseAlloc::init(node, vi); + + // These will block planner from assigning them during planning. Planner will + // add more registers when assigning registers to variables that don't need + // any specific register. + _willAlloc = vi->_inRegs; + _willAlloc.add(vi->_outRegs); + _willSpill.reset(); +} + +ASMJIT_INLINE void X86X64VarAlloc::cleanup() { + X86X64BaseAlloc::cleanup(); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64VarAlloc - Plan / Spill / Alloc] +// ============================================================================ + +template +ASMJIT_INLINE void X86X64VarAlloc::plan() { + if (isVaDone(C)) + return; + + uint32_t i; + + uint32_t willAlloc = _willAlloc.get(C); + uint32_t willFree = 0; + + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + // Calculate 'willAlloc' and 'willFree' masks based on mandatory masks. + for (i = 0; i < count; i++) { + VarAttr* va = &list[i]; + VarData* vd = va->getVd(); + + uint32_t vaFlags = va->getFlags(); + uint32_t regIndex = vd->getRegIndex(); + uint32_t regMask = (regIndex != kInvalidReg) ? IntUtil::mask(regIndex) : 0; + + if ((vaFlags & kVarAttrInOutReg) != 0) { + // Planning register allocation. First check whether the variable is + // already allocated in register and if it can stay allocated there. + // + // The following conditions may happen: + // + // a) Allocated register is one of the mandatoryRegs. + // b) Allocated register is one of the allocableRegs. + uint32_t mandatoryRegs = va->getInRegs(); + uint32_t allocableRegs = va->getAllocableRegs(); + + if (regMask != 0) { + // Special path for planning output-only registers. + if ((vaFlags & kVarAttrInOutReg) == kVarAttrOutReg) { + uint32_t outRegIndex = va->getOutRegIndex(); + mandatoryRegs = (outRegIndex != kInvalidReg) ? IntUtil::mask(outRegIndex) : 0; + + if ((mandatoryRegs | allocableRegs) & regMask) { + va->setOutRegIndex(regIndex); + va->addFlags(kVarAttrAllocOutDone); + + if (mandatoryRegs & regMask) { + // Case 'a' - 'willAlloc' contains initially all inRegs from all VarAttr's. + ASMJIT_ASSERT((willAlloc & regMask) != 0); + } + else { + // Case 'b'. + va->setOutRegIndex(regIndex); + willAlloc |= regMask; + } + + addVaDone(C); + continue; + } + } + else { + if ((mandatoryRegs | allocableRegs) & regMask) { + va->setInRegIndex(regIndex); + va->addFlags(kVarAttrAllocInDone); + + if (mandatoryRegs & regMask) { + // Case 'a' - 'willAlloc' contains initially all inRegs from all VarAttr's. + ASMJIT_ASSERT((willAlloc & regMask) != 0); + } + else { + // Case 'b'. + va->addInRegs(regMask); + willAlloc |= regMask; + } + + addVaDone(C); + continue; + } + } + } + + // Variable is not allocated or allocated in register that doesn't + // match inRegs or allocableRegs. The next step is to pick the best + // register for this variable. If inRegs contains any register the + // decision is simple - we have to follow, in other case will use + // the advantage of guessAlloc() to find a register (or registers) + // by looking ahead. But the best way to find a good register is not + // here since now we have no information about the registers that + // will be freed. So instead of finding register here, we just mark + // the current register (if variable is allocated) as 'willFree' so + // the planner can use this information in second step to plan other + // allocation of other variables. + willFree |= regMask; + continue; + } + else { + // Memory access - if variable is allocated it has to be freed. + if (regMask != 0) { + willFree |= regMask; + continue; + } + else { + va->addFlags(kVarAttrAllocInDone); + addVaDone(C); + continue; + } + } + } + + // Occupied registers without 'willFree' registers; contains basically + // all the registers we can use to allocate variables without inRegs + // speficied. + uint32_t occupied = state->_occupied.get(C) & ~willFree; + uint32_t willSpill = 0; + + // Find the best registers for variables that are not allocated yet. + for (i = 0; i < count; i++) { + VarAttr* va = &list[i]; + VarData* vd = va->getVd(); + + uint32_t vaFlags = va->getFlags(); + + if ((vaFlags & kVarAttrInOutReg) != 0) { + if ((vaFlags & kVarAttrInOutReg) == kVarAttrOutReg) { + if (vaFlags & kVarAttrAllocOutDone) + continue; + + // We skip all registers that have assigned outRegIndex. The only + // important thing is to not forget to spill it if occupied. + if (va->hasOutRegIndex()) { + uint32_t outRegs = IntUtil::mask(va->getOutRegIndex()); + willSpill |= occupied & outRegs; + continue; + } + } + else { + if (vaFlags & kVarAttrAllocInDone) + continue; + + // We skip all registers that have assigned inRegIndex (it indicates that + // the register to allocate into is known). + if (va->hasInRegIndex()) { + uint32_t inRegs = va->getInRegs(); + willSpill |= occupied & inRegs; + continue; + } + } + + uint32_t m = va->getInRegs(); + if (va->hasOutRegIndex()) + m |= IntUtil::mask(va->getOutRegIndex()); + + m = va->getAllocableRegs() & ~(willAlloc ^ m); + m = guessAlloc(vd, m); + ASMJIT_ASSERT(m != 0); + + uint32_t candidateRegs = m & ~occupied; + uint32_t regIndex; + uint32_t regMask; + + if (candidateRegs == 0) { + candidateRegs = m & occupied & ~state->_modified.get(C); + if (candidateRegs == 0) + candidateRegs = m; + } + + regIndex = IntUtil::findFirstBit(candidateRegs); + regMask = IntUtil::mask(regIndex); + + if ((vaFlags & kVarAttrInOutReg) == kVarAttrOutReg) { + va->setOutRegIndex(regIndex); + } + else { + va->setInRegIndex(regIndex); + va->setInRegs(regMask); + } + + willAlloc |= regMask; + willSpill |= regMask & occupied; + willFree &= ~regMask; + occupied |= regMask; + continue; + } + } + + // Set calculated masks back to the allocator; needed by spill() and alloc(). + _willSpill.set(C, willSpill); + _willAlloc.set(C, willAlloc); +} + +template +ASMJIT_INLINE void X86X64VarAlloc::spill() { + uint32_t m = _willSpill.get(C); + uint32_t i = static_cast(0) - 1; + + if (m == 0) + return; + + VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + // Available registers for decision if move has any benefit over spill. + uint32_t availableRegs = getGaRegs(C) & ~(state->_occupied.get(C) | m | _willAlloc.get(C)); + + do { + // We always advance one more to destroy the bit that we have found. + uint32_t bitIndex = IntUtil::findFirstBit(m) + 1; + + i += bitIndex; + m >>= bitIndex; + + VarData* vd = sVars[i]; + ASMJIT_ASSERT(vd != NULL); + ASMJIT_ASSERT(vd->getVa() == NULL); + + if (vd->isModified() && availableRegs) { + uint32_t m = guessSpill(vd, availableRegs); + + if (m != 0) { + uint32_t regIndex = IntUtil::findFirstBit(m); + uint32_t regMask = IntUtil::mask(regIndex); + + _context->move(vd, regIndex); + availableRegs ^= regMask; + continue; + } + } + + _context->spill(vd); + } while (m != 0); +} + +template +ASMJIT_INLINE void X86X64VarAlloc::alloc() { + if (isVaDone(C)) + return; + + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + uint32_t i; + bool didWork; + + // Alloc 'in' regs. + do { + didWork = false; + for (i = 0; i < count; i++) { + VarAttr* aVa = &list[i]; + VarData* aVd = aVa->getVd(); + + if ((aVa->getFlags() & (kVarAttrInReg | kVarAttrAllocInDone)) != kVarAttrInReg) + continue; + + uint32_t aIndex = aVd->getRegIndex(); + uint32_t bIndex = aVa->getInRegIndex(); + + // Shouldn't be the same. + ASMJIT_ASSERT(aIndex != bIndex); + + VarData* bVd = getState()->getListByClass(C)[bIndex]; + if (bVd != NULL) { + // Gp registers only - Swap two registers if we can solve two + // allocation tasks by a single 'xchg' instruction, swapping + // two registers required by the instruction/node or one register + // required with another non-required. + if (C == kRegClassGp && aIndex != kInvalidReg) { + VarAttr* bVa = bVd->getVa(); + _context->swapGp(aVd, bVd); + + aVa->addFlags(kVarAttrAllocInDone); + addVaDone(C); + + // Doublehit, two registers allocated by a single swap. + if (bVa != NULL && bVa->getInRegIndex() == aIndex) { + bVa->addFlags(kVarAttrAllocInDone); + addVaDone(C); + } + + didWork = true; + continue; + } + } + else if (aIndex != kInvalidReg) { + _context->move(aVd, bIndex); + + aVa->addFlags(kVarAttrAllocInDone); + addVaDone(C); + + didWork = true; + continue; + } + else { + _context->alloc(aVd, bIndex); + + aVa->addFlags(kVarAttrAllocInDone); + addVaDone(C); + + didWork = true; + continue; + } + } + } while (didWork); + + // Alloc 'out' regs. + for (i = 0; i < count; i++) { + VarAttr* va = &list[i]; + VarData* vd = va->getVd(); + + if ((va->getFlags() & (kVarAttrInOutReg | kVarAttrAllocOutDone)) != kVarAttrOutReg) + continue; + + uint32_t regIndex = va->getOutRegIndex(); + ASMJIT_ASSERT(regIndex != kInvalidReg); + + if (vd->getRegIndex() != regIndex) { + ASMJIT_ASSERT(sVars[regIndex] == NULL); + _context->attach(vd, regIndex, false); + } + + va->addFlags(kVarAttrAllocOutDone); + addVaDone(C); + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64VarAlloc - GuessAlloc / GuessSpill] +// ============================================================================ + +template +ASMJIT_INLINE uint32_t X86X64VarAlloc::guessAlloc(VarData* vd, uint32_t allocableRegs) { + ASMJIT_ASSERT(allocableRegs != 0); + + // Stop now if there is only one bit (register) set in 'allocableRegs' mask. + if (IntUtil::isPowerOf2(allocableRegs)) + return allocableRegs; + + uint32_t i; + uint32_t safeRegs = allocableRegs; + uint32_t maxLookAhead = _compiler->getMaxLookAhead(); + + // Look ahead and calculate mask of special registers on both - input/output. + BaseNode* node = _node; + for (i = 0; i < maxLookAhead; i++) { + // Stop on 'RetNode' and 'EndNode. + if (node->hasFlag(kNodeFlagIsRet)) + break; + + // Stop on conditional jump, we don't follow them. + if (node->hasFlag(kNodeFlagIsJcc)) + break; + + // Advance on non-conditional jump. + if (node->hasFlag(kNodeFlagIsJmp)) + node = static_cast(node)->getTarget(); + + node = node->getNext(); + ASMJIT_ASSERT(node != NULL); + + VarInst* vi = node->getVarInst(); + if (vi != NULL) { + VarAttr* va = vi->findVaByClass(C, vd); + if (va != NULL) { + uint32_t inRegs = va->getInRegs(); + if (inRegs != 0) { + safeRegs = allocableRegs; + allocableRegs &= inRegs; + + if (allocableRegs == 0) + goto _UseSafeRegs; + else + return allocableRegs; + } + } + + safeRegs = allocableRegs; + allocableRegs &= ~(vi->_inRegs.get(C) | vi->_outRegs.get(C) | vi->_clobberedRegs.get(C)); + + if (allocableRegs == 0) + break; + } + } + +_UseSafeRegs: + return safeRegs; +} + +template +ASMJIT_INLINE uint32_t X86X64VarAlloc::guessSpill(VarData* vd, uint32_t allocableRegs) { + ASMJIT_ASSERT(allocableRegs != 0); + + return 0; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64VarAlloc - Modified] +// ============================================================================ + +template +ASMJIT_INLINE void X86X64VarAlloc::modified() { + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + for (uint32_t i = 0; i < count; i++) { + VarAttr* va = &list[i]; + + if (va->hasFlag(kVarAttrOutReg)) { + VarData* vd = va->getVd(); + + uint32_t regIndex = vd->getRegIndex(); + uint32_t regMask = IntUtil::mask(regIndex); + + vd->setModified(true); + _context->_x86State._modified.add(C, regMask); + } + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64CallAlloc] +// ============================================================================ + +//! @internal +//! +//! @brief Register allocator context (function call). +struct X86X64CallAlloc : public X86X64BaseAlloc { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86X64CallAlloc(X86X64Context* context) : X86X64BaseAlloc(context) {} + ASMJIT_INLINE ~X86X64CallAlloc() {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get the node. + ASMJIT_INLINE X86X64CallNode* getNode() const { return static_cast(_node); } + + // -------------------------------------------------------------------------- + // [Run] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Error run(X86X64CallNode* node); + + // -------------------------------------------------------------------------- + // [Init / Cleanup] + // -------------------------------------------------------------------------- + +protected: + // Just to prevent calling these methods from X86X64Context::translate(). + + ASMJIT_INLINE void init(X86X64CallNode* node, VarInst* vi); + ASMJIT_INLINE void cleanup(); + + // -------------------------------------------------------------------------- + // [Plan / Alloc / Spill / Move] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void plan(); + + template + ASMJIT_INLINE void spill(); + + template + ASMJIT_INLINE void alloc(); + + // -------------------------------------------------------------------------- + // [AllocVars/Imms] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void allocVarsOnStack(); + ASMJIT_INLINE void allocImmsOnStack(); + + // -------------------------------------------------------------------------- + // [GuessAlloc / GuessSpill] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE uint32_t guessAlloc(VarData* vd, uint32_t allocableRegs); + + template + ASMJIT_INLINE uint32_t guessSpill(VarData* vd, uint32_t allocableRegs); + + // -------------------------------------------------------------------------- + // [Save] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void save(); + + // -------------------------------------------------------------------------- + // [Clobber] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void clobber(); + + // -------------------------------------------------------------------------- + // [Ret] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void ret(); + + // -------------------------------------------------------------------------- + // [Utils] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void stackArgDone(uint32_t argMask) { + ASMJIT_ASSERT(_stackArgsMask & argMask); + _stackArgsMask ^= argMask; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Will alloc to these registers. + RegMask _willAlloc; + //! @brief Will spill these registers. + RegMask _willSpill; + + //! @brief Pending stack-arguments mask. + uint32_t _stackArgsMask; + + //! @brief Argument index to VarAttr mapping. + VarAttr* _argToVa[kFuncArgCountLoHi]; +}; + +// ============================================================================ +// [asmjit::X86X64CallAlloc - Run] +// ============================================================================ + +ASMJIT_INLINE Error X86X64CallAlloc::run(X86X64CallNode* node) { + // Initialize. + VarInst* vi = node->getVarInst(); + if (vi == NULL) + return kErrorOk; + + // Initialize the allocator; prepare basics and connect Vd->Va. + init(node, vi); + + // Move whatever can be moved on the stack. + allocVarsOnStack(); + + // Plan register allocation. Planner is only able to assign one register per + // variable. If any variable is used multiple times it will be handled later. + plan(); + plan(); + plan(); + + // Spill. + spill(); + spill(); + spill(); + + // Alloc. + alloc(); + alloc(); + alloc(); + + // Move the remaining variables on the stack. + allocVarsOnStack(); + + // Unuse clobbered registers that are not used to pass function arguments and + // save variables used to pass function arguments that will be reused later on. + save(); + save(); + save(); + + // Allocate immediates in registers and on the stack. + allocImmsOnStack(); + + // Duplicate/Convert. + // TODO: + + // Translate call operand. + ASMJIT_PROPAGATE_ERROR(X86X64Context_translateOperands(_context, &node->_target, 1)); + + // Clobber. + clobber(); + clobber(); + clobber(); + + // If any instruction has to be emitted to properly handle function return it + // has to be emitted right after the call, thus the cursor has to be changed. + _compiler->_setCursor(node); + + // Return. + ret(); + + // Unuse. + unuseAfter(); + unuseAfter(); + unuseAfter(); + + // Cleanup; disconnect Vd->Va. + cleanup(); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86X64CallAlloc - Init / Cleanup] +// ============================================================================ + +ASMJIT_INLINE void X86X64CallAlloc::init(X86X64CallNode* node, VarInst* vi) { + X86X64BaseAlloc::init(node, vi); + + // Create mask of all registers that will be used to pass function arguments. + _willAlloc = node->_usedArgs; + _willSpill.reset(); + + // Initialize argToVa[] array and pending stack-args mask/count. + X86X64FuncDecl* decl = node->getDecl(); + uint32_t i, mask; + + Operand* argList = node->_args; + uint32_t argCount = decl->getArgCount(); + + _stackArgsMask = 0; + ::memset(_argToVa, 0, kFuncArgCountLoHi * sizeof(VarAttr*)); + + for (i = 0, mask = 1; i < argCount; i++, mask <<= 1) { + Operand* op = &argList[i]; + + if (!op->isVar()) + continue; + + VarData* vd = _compiler->getVdById(op->getId()); + VarAttr* va = vd->getVa(); + _argToVa[i] = va; + + const FuncInOut& arg = decl->getArg(i); + if (!arg.hasStackOffset()) + continue; + + if ((va->getFlags() & kVarAttrInAll) == 0) + continue; + _stackArgsMask |= mask; + } +} + +ASMJIT_INLINE void X86X64CallAlloc::cleanup() { + X86X64BaseAlloc::cleanup(); +} + +// ============================================================================ +// [asmjit::X86X64CallAlloc - Plan / Spill / Alloc] +// ============================================================================ + +template +ASMJIT_INLINE void X86X64CallAlloc::plan() { + uint32_t i; + uint32_t clobbered = _vi->_clobberedRegs.get(C); + + uint32_t willAlloc = _willAlloc.get(C); + uint32_t willFree = clobbered & ~willAlloc; + + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + // Calculate 'willAlloc' and 'willFree' masks based on mandatory masks. + for (i = 0; i < count; i++) { + VarAttr* va = &list[i]; + VarData* vd = va->getVd(); + + uint32_t vaFlags = va->getFlags(); + uint32_t regIndex = vd->getRegIndex(); + uint32_t regMask = (regIndex != kInvalidReg) ? IntUtil::mask(regIndex) : 0; + + if ((vaFlags & kVarAttrInReg) != 0) { + // Planning register allocation. First check whether the variable is + // already allocated in register and if it can stay there. Function + // arguments are passed either in a specific register or in stack so + // we care mostly of mandatory registers. + uint32_t inRegs = va->getInRegs(); + + if (inRegs == 0) { + inRegs = va->getAllocableRegs(); + } + + if (regMask & inRegs) { + va->setInRegIndex(regIndex); + va->addFlags(kVarAttrAllocInDone); + addVaDone(C); + } + else { + willFree |= regMask; + } + } + else { + // Memory access - if variable is allocated it has to be freed. + if (regMask != 0) { + willFree |= regMask; + } + else { + va->addFlags(kVarAttrAllocInDone); + addVaDone(C); + } + } + } + + // Occupied registers without 'willFree' registers; contains basically + // all the registers we can use to allocate variables without inRegs + // speficied. + uint32_t occupied = state->_occupied.get(C) & ~willFree; + uint32_t willSpill = 0; + + // Find the best registers for variables that are not allocated yet. Only + // useful for Gp registers used as call operand. + for (i = 0; i < count; i++) { + VarAttr* va = &list[i]; + VarData* vd = va->getVd(); + + uint32_t vaFlags = va->getFlags(); + if ((vaFlags & kVarAttrAllocInDone) != 0 || (vaFlags & kVarAttrInReg) == 0) + continue; + + // All registers except Gp used by call itself must have inRegIndex. + uint32_t m = va->getInRegs(); + if (C != kRegClassGp || m) { + ASMJIT_ASSERT(m != 0); + va->setInRegIndex(IntUtil::findFirstBit(m)); + willSpill |= occupied & m; + continue; + } + + m = va->getAllocableRegs() & ~(willAlloc ^ m); + m = guessAlloc(vd, m); + ASMJIT_ASSERT(m != 0); + + uint32_t candidateRegs = m & ~occupied; + if (candidateRegs == 0) { + candidateRegs = m & occupied & ~state->_modified.get(C); + if (candidateRegs == 0) + candidateRegs = m; + } + + if (!(vaFlags & (kVarAttrOutReg | kVarAttrUnuse)) && (candidateRegs & ~clobbered)) + candidateRegs &= ~clobbered; + + uint32_t regIndex = IntUtil::findFirstBit(candidateRegs); + uint32_t regMask = IntUtil::mask(regIndex); + + va->setInRegIndex(regIndex); + va->setInRegs(regMask); + + willAlloc |= regMask; + willSpill |= regMask & occupied; + willFree &= ~regMask; + + occupied |= regMask; + continue; + } + + // Set calculated masks back to the allocator; needed by spill() and alloc(). + _willSpill.set(C, willSpill); + _willAlloc.set(C, willAlloc); +} + +template +ASMJIT_INLINE void X86X64CallAlloc::spill() { + uint32_t m = _willSpill.get(C); + uint32_t i = static_cast(0) - 1; + + if (m == 0) + return; + + VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + // Available registers for decision if move has any benefit over spill. + uint32_t availableRegs = getGaRegs(C) & ~(state->_occupied.get(C) | m | _willAlloc.get(C)); + + do { + // We always advance one more to destroy the bit that we have found. + uint32_t bitIndex = IntUtil::findFirstBit(m) + 1; + + i += bitIndex; + m >>= bitIndex; + + VarData* vd = sVars[i]; + ASMJIT_ASSERT(vd != NULL); + ASMJIT_ASSERT(vd->getVa() == NULL); + + if (vd->isModified() && availableRegs) { + uint32_t m = guessSpill(vd, availableRegs); + + if (m != 0) { + uint32_t regIndex = IntUtil::findFirstBit(m); + uint32_t regMask = IntUtil::mask(regIndex); + + _context->move(vd, regIndex); + availableRegs ^= regMask; + continue; + } + } + + _context->spill(vd); + } while (m != 0); +} + +template +ASMJIT_INLINE void X86X64CallAlloc::alloc() { + if (isVaDone(C)) + return; + + VarAttr* list = getVaListByClass(C); + uint32_t count = getVaCountByClass(C); + + VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + uint32_t i; + bool didWork; + + do { + didWork = false; + for (i = 0; i < count; i++) { + VarAttr* aVa = &list[i]; + VarData* aVd = aVa->getVd(); + + if ((aVa->getFlags() & (kVarAttrInReg | kVarAttrAllocInDone)) != kVarAttrInReg) + continue; + + uint32_t aIndex = aVd->getRegIndex(); + uint32_t bIndex = aVa->getInRegIndex(); + + // Shouldn't be the same. + ASMJIT_ASSERT(aIndex != bIndex); + + VarData* bVd = getState()->getListByClass(C)[bIndex]; + if (bVd != NULL) { + VarAttr* bVa = bVd->getVa(); + + // Gp registers only - Swap two registers if we can solve two + // allocation tasks by a single 'xchg' instruction, swapping + // two registers required by the instruction/node or one register + // required with another non-required. + if (C == kRegClassGp) { + _context->swapGp(aVd, bVd); + + aVa->addFlags(kVarAttrAllocInDone); + addVaDone(C); + + // Doublehit, two registers allocated by a single swap. + if (bVa != NULL && bVa->getInRegIndex() == aIndex) { + bVa->addFlags(kVarAttrAllocInDone); + addVaDone(C); + } + + didWork = true; + continue; + } + } + else if (aIndex != kInvalidReg) { + _context->move(aVd, bIndex); + + aVa->addFlags(kVarAttrAllocInDone); + addVaDone(C); + + didWork = true; + continue; + } + else { + _context->alloc(aVd, bIndex); + + aVa->addFlags(kVarAttrAllocInDone); + addVaDone(C); + + didWork = true; + continue; + } + } + } while (didWork); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64CallAlloc - AllocVars/Imms] +// ============================================================================ + +ASMJIT_INLINE void X86X64CallAlloc::allocVarsOnStack() { + if (_stackArgsMask == 0) + return; + + X86X64CallNode* node = getNode(); + X86X64FuncDecl* decl = node->getDecl(); + + uint32_t i; + uint32_t mask; + + uint32_t argCount = decl->getArgCount(); + Operand* argList = node->_args; + + for (i = 0, mask = 1; i < argCount; i++, mask <<= 1) { + if ((_stackArgsMask & mask) == 0) + continue; + + VarAttr* va = _argToVa[i]; + ASMJIT_ASSERT(va != NULL); + ASMJIT_ASSERT(va->getArgStackCount() != 0); + + VarData* vd = va->getVd(); + uint32_t regIndex = vd->getRegIndex(); + + if (regIndex == kInvalidReg) + continue; + + const FuncInOut& arg = decl->getArg(i); + Mem dst = ptr(_context->_zsp, -static_cast(_context->getRegSize()) + arg.getStackOffset()); + + _context->emitMoveVarOnStack(arg.getVarType(), &dst, vd->getType(), regIndex); + stackArgDone(mask); + } +} + +ASMJIT_INLINE void X86X64CallAlloc::allocImmsOnStack() { + X86X64CallNode* node = getNode(); + X86X64FuncDecl* decl = node->getDecl(); + + uint32_t argCount = decl->getArgCount(); + Operand* argList = node->_args; + + for (uint32_t i = 0; i < argCount; i++) { + VarAttr* va = _argToVa[i]; + if (va != NULL) + continue; + + const Imm& imm = static_cast(node->getArg(i)); + const FuncInOut& arg = decl->getArg(i); + + if (arg.hasStackOffset()) { + Mem dst = ptr(_context->_zsp, -static_cast(_context->getRegSize()) + arg.getStackOffset()); + _context->emitMoveImmOnStack(arg.getVarType(), &dst, &imm); + } + else { + _context->emitMoveImmToReg(arg.getVarType(), arg.getRegIndex(), &imm); + } + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64CallAlloc - GuessAlloc / GuessSpill] +// ============================================================================ + +template +ASMJIT_INLINE uint32_t X86X64CallAlloc::guessAlloc(VarData* vd, uint32_t allocableRegs) { + ASMJIT_ASSERT(allocableRegs != 0); + + // Stop now if there is only one bit (register) set in 'allocableRegs' mask. + if (IntUtil::isPowerOf2(allocableRegs)) + return allocableRegs; + + uint32_t i; + uint32_t safeRegs = allocableRegs; + uint32_t maxLookAhead = _compiler->getMaxLookAhead(); + + // Look ahead and calculate mask of special registers on both - input/output. + BaseNode* node = _node; + for (i = 0; i < maxLookAhead; i++) { + // Stop on 'RetNode' and 'EndNode. + if (node->hasFlag(kNodeFlagIsRet)) + break; + + // Stop on conditional jump, we don't follow them. + if (node->hasFlag(kNodeFlagIsJcc)) + break; + + // Advance on non-conditional jump. + if (node->hasFlag(kNodeFlagIsJmp)) + node = static_cast(node)->getTarget(); + + node = node->getNext(); + ASMJIT_ASSERT(node != NULL); + + VarInst* vi = node->getVarInst(); + if (vi != NULL) { + VarAttr* va = vi->findVaByClass(C, vd); + if (va != NULL) { + uint32_t inRegs = va->getInRegs(); + if (inRegs != 0) { + safeRegs = allocableRegs; + allocableRegs &= inRegs; + + if (allocableRegs == 0) + goto _UseSafeRegs; + else + return allocableRegs; + } + } + + safeRegs = allocableRegs; + allocableRegs &= ~(vi->_inRegs.get(C) | vi->_outRegs.get(C) | vi->_clobberedRegs.get(C)); + + if (allocableRegs == 0) + break; + } + } + +_UseSafeRegs: + return safeRegs; +} + +template +ASMJIT_INLINE uint32_t X86X64CallAlloc::guessSpill(VarData* vd, uint32_t allocableRegs) { + ASMJIT_ASSERT(allocableRegs != 0); + + return 0; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64CallAlloc - Save] +// ============================================================================ + +template +ASMJIT_INLINE void X86X64CallAlloc::save() { + VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + uint32_t i; + uint32_t affected = _vi->_clobberedRegs.get(C) & state->_occupied.get(C) & state->_modified.get(C); + + for (i = 0; affected != 0; i++, affected >>= 1) { + if (affected & 0x1) { + VarData* vd = sVars[i]; + ASMJIT_ASSERT(vd != NULL); + ASMJIT_ASSERT(vd->isModified()); + + VarAttr* va = vd->getVa(); + if (va == NULL || !(va->getFlags() & kVarAttrInAll)) { + _context->save(vd); + } + } + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64CallAlloc - Clobber] +// ============================================================================ + +template +ASMJIT_INLINE void X86X64CallAlloc::clobber() { + VarState* state = getState(); + VarData** sVars = state->getListByClass(C); + + uint32_t i; + uint32_t affected = _vi->_clobberedRegs.get(C) & state->_occupied.get(C); + + for (i = 0; affected != 0; i++, affected >>= 1) { + if (affected & 0x1) { + VarData* vd = sVars[i]; + ASMJIT_ASSERT(vd != NULL); + + VarAttr* va = vd->getVa(); + uint32_t vdState = kVarStateUnused; + + if (!vd->isModified() || (va != NULL && (va->getFlags() & (kVarAttrOutAll | kVarAttrUnuse)) != 0)) { + vdState = kVarStateMem; + } + + _context->unuse(vd, vdState); + } + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64CallAlloc - Ret] +// ============================================================================ + +ASMJIT_INLINE void X86X64CallAlloc::ret() { + X86X64CallNode* node = getNode(); + X86X64FuncDecl* decl = node->getDecl(); + + uint32_t i; + Operand* retList = node->_ret; + + for (i = 0; i < 2; i++) { + const FuncInOut& ret = decl->getRet(i); + Operand* op = &retList[i]; + + if (!ret.hasRegIndex() || !op->isVar()) + continue; + + VarData* vd = _compiler->getVdById(op->getId()); + uint32_t regIndex = ret.getRegIndex(); + + switch (vd->getClass()) { + case kRegClassGp: + if (vd->getRegIndex() != kInvalidReg) + _context->unuse(vd); + _context->attach(vd, regIndex, true); + break; + case kRegClassMm: + if (vd->getRegIndex() != kInvalidReg) + _context->unuse(vd); + _context->attach(vd, regIndex, true); + break; + case kRegClassXy: + if (vd->getRegIndex() != kInvalidReg) + _context->unuse(vd); + _context->attach(vd, regIndex, true); + break; + } + } +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - TranslateOperands] +// ============================================================================ + +//! @internal +static Error X86X64Context_translateOperands(X86X64Context* self, Operand* opList, uint32_t opCount) { + X86X64Compiler* compiler = self->getCompiler(); + const VarInfo* varInfo = _varInfo; + + uint32_t hasGpdBase = compiler->getRegSize() == 4; + + // Translate variables into registers. + for (uint32_t i = 0; i < opCount; i++) { + Operand* op = &opList[i]; + + if (op->isVar()) { + VarData* vd = compiler->getVdById(op->getId()); + ASMJIT_ASSERT(vd != NULL); + ASMJIT_ASSERT(vd->getRegIndex() != kInvalidReg); + + op->_vreg.op = kOperandTypeReg; + op->_vreg.index = vd->getRegIndex(); + } + else if (op->isMem()) { + Mem* m = static_cast(op); + + if (m->isBaseIndexType() && OperandUtil::isVarId(m->getBase())) { + VarData* vd = compiler->getVdById(m->getBase()); + + if (m->getMemType() == kMemTypeBaseIndex) { + ASMJIT_ASSERT(vd->getRegIndex() != kInvalidReg); + op->_vmem.base = vd->getRegIndex(); + } + else { + if (!vd->isMemArg()) + self->getVarCell(vd); + + // Offset will be patched later by X86X64Context_patchFuncMem(). + m->setGpdBase(hasGpdBase); + m->adjust(vd->isMemArg() ? self->_argActualDisp : self->_varActualDisp); + } + } + + if (OperandUtil::isVarId(m->getIndex())) { + VarData* vd = compiler->getVdById(m->getIndex()); + ASMJIT_ASSERT(vd->getRegIndex() != kInvalidReg); + ASMJIT_ASSERT(vd->getRegIndex() != kRegIndexR12); + op->_vmem.index = vd->getRegIndex(); + } + } + } + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - TranslatePrologEpilog] +// ============================================================================ + +//! @internal +static Error X86X64Context_initFunc(X86X64Context* self, X86X64FuncNode* func) { + X86X64Compiler* compiler = self->getCompiler(); + X86X64FuncDecl* decl = func->getDecl(); + + RegMask& clobberedRegs = self->_clobberedRegs; + uint32_t regSize = compiler->getRegSize(); + + // Setup "Save-Restore" registers. + func->_saveRestoreRegs.set(kRegClassGp, clobberedRegs.get(kRegClassGp) & decl->getPreserved(kRegClassGp)); + func->_saveRestoreRegs.set(kRegClassFp, 0 ); + func->_saveRestoreRegs.set(kRegClassMm, clobberedRegs.get(kRegClassMm) & decl->getPreserved(kRegClassMm)); + func->_saveRestoreRegs.set(kRegClassXy, clobberedRegs.get(kRegClassXy) & decl->getPreserved(kRegClassXy)); + + ASMJIT_ASSERT(!func->_saveRestoreRegs.has(kRegClassGp, IntUtil::mask(kRegIndexSp))); + + // Setup required stack alignment and kFuncFlagIsStackMisaligned. + { + uint32_t requiredStackAlignment = IntUtil::iMax(self->_memMaxAlign, self->getRegSize()); + + if (requiredStackAlignment < 16) { + // Require 16-byte alignment 8-byte vars are used. + if (self->_mem8ByteVarsUsed) + requiredStackAlignment = 16; + else if (func->_saveRestoreRegs.get(kRegClassMm) || func->_saveRestoreRegs.get(kRegClassXy)) + requiredStackAlignment = 16; + else if (IntUtil::inInterval(func->getRequiredStackAlignment(), 8, 16)) + requiredStackAlignment = 16; + } + + if (func->getRequiredStackAlignment() < requiredStackAlignment) + func->setRequiredStackAlignment(requiredStackAlignment); + + func->updateRequiredStackAlignment(); + } + + // Adjust stack pointer if function is caller. + if (func->isCaller()) { + func->addFuncFlags(kFuncFlagIsStackAdjusted); + } + + // Adjust stack pointer if manual stack alignment is needed. + if (func->isStackMisaligned() && func->isNaked()) { + // Get a memory cell where the original stack frame will be stored. + MemCell* cell = self->_newStackCell(regSize, regSize); + if (cell == NULL) + return self->getError(); + + func->addFuncFlags(kFuncFlagIsStackAdjusted); + self->_stackFrameCell = cell; + + if (decl->getArgStackSize() > 0) { + func->addFuncFlags(kFuncFlagMoveArgs); + func->setExtraStackSize(decl->getArgStackSize()); + } + + // Get temporary register which will be used to align the stack frame. + uint32_t fRegMask = IntUtil::bits(self->_baseRegsCount); + uint32_t stackFrameCopyRegs; + + fRegMask &= ~(decl->getUsed(kRegClassGp) | IntUtil::mask(kRegIndexSp)); + stackFrameCopyRegs = fRegMask; + + // Try to remove modified registers from the mask. + uint32_t tRegMask = fRegMask & ~self->getClobberedRegs(kRegClassGp); + if (tRegMask != 0) + fRegMask = tRegMask; + + // Try to remove preserved registers from the mask. + tRegMask = fRegMask & decl->getPreserved(kRegClassGp); + if (tRegMask != 0) + fRegMask = tRegMask; + + ASMJIT_ASSERT(fRegMask != 0); + + uint32_t fRegIndex = IntUtil::findFirstBit(fRegMask); + func->_stackFrameRegIndex = static_cast(fRegIndex); + + // We have to save the register on the stack (it will be the part of prolog + // and epilog), however we shouldn't save it twice, so we will remove it + // from '_saveRestoreRegs' in case that it is preserved. + fRegMask = IntUtil::mask(fRegIndex); + if ((fRegMask & decl->getPreserved(kRegClassGp)) != 0) { + func->_saveRestoreRegs.del(kRegClassGp, fRegMask); + func->_isStackFrameRegPreserved = true; + } + + if (func->hasFuncFlag(kFuncFlagMoveArgs)) { + uint32_t maxRegs = (func->getArgStackSize() + regSize - 1) / regSize; + stackFrameCopyRegs &= ~fRegMask; + + tRegMask = stackFrameCopyRegs & self->getClobberedRegs(kRegClassGp); + uint32_t tRegCnt = IntUtil::bitCount(tRegMask); + + if (tRegCnt > 1 || (tRegCnt > 0 && tRegCnt <= maxRegs)) + stackFrameCopyRegs = tRegMask; + else + stackFrameCopyRegs = IntUtil::keepNOnesFromRight(stackFrameCopyRegs, IntUtil::iMin(maxRegs, 2)); + + func->_saveRestoreRegs.add(kRegClassGp, stackFrameCopyRegs & decl->getPreserved(kRegClassGp)); + IntUtil::indexNOnesFromRight(func->_stackFrameCopyGpIndex, stackFrameCopyRegs, maxRegs); + } + } + // If function is not naked we generate standard "EBP/RBP" stack frame. + else if (!func->isNaked()) { + uint32_t fRegIndex = kRegIndexBp; + + func->_stackFrameRegIndex = static_cast(fRegIndex); + func->_isStackFrameRegPreserved = true; + } + + ASMJIT_PROPAGATE_ERROR(self->resolveCellOffsets()); + + // Adjust stack pointer if requested memory can't fit into "Red Zone" or "Spill Zone". + if (self->_memAllTotal > IntUtil::iMax(func->getRedZoneSize(), func->getSpillZoneSize())) { + func->addFuncFlags(kFuncFlagIsStackAdjusted); + } + + // Setup stack size used to save preserved registers. + { + uint32_t memGpSize = IntUtil::bitCount(func->_saveRestoreRegs.get(kRegClassGp)) * regSize; + uint32_t memMmSize = IntUtil::bitCount(func->_saveRestoreRegs.get(kRegClassMm)) * 8; + uint32_t memXmmSize = IntUtil::bitCount(func->_saveRestoreRegs.get(kRegClassXy)) * 16; + + if (func->hasFuncFlag(kFuncFlagPushPop)) { + func->_pushPopStackSize = memGpSize; + func->_moveStackSize = memXmmSize + IntUtil::alignTo(memMmSize, 16); + } + else { + func->_pushPopStackSize = 0; + func->_moveStackSize = memXmmSize + IntUtil::alignTo(memMmSize + memGpSize, 16); + } + } + + // Setup adjusted stack size. + if (func->isStackMisaligned()) { + func->_alignStackSize = 0; + } + else { + // If function is aligned, the RETURN address is stored to the aligned + // [ZSP - PtrSize] which makes current ZSP unaligned. + int32_t v = regSize; + + // If we have to store function frame pointer we have to count it as well, + // because it is the first thing pushed on the stack. + if (func->hasStackFrameReg() && func->isStackFrameRegPreserved()) + v += regSize; + + // Count push/pop sequence. + v += func->getPushPopStackSize(); + + // Calculate the final offset to keep stack alignment. + func->_alignStackSize = IntUtil::deltaTo(v, func->getRequiredStackAlignment()); + } + + // Memory stack size. + func->_memStackSize = self->_memAllTotal; + func->_alignedMemStackSize = IntUtil::alignTo(func->_memStackSize, func->_requiredStackAlignment); + + if (func->isNaked()) { + self->_argBaseReg = kRegIndexSp; + + if (func->isStackAdjusted()) { + if (func->isStackMisaligned()) { + self->_argBaseOffset = static_cast( + func->getCallStackSize() + + func->getAlignedMemStackSize() + + func->getMoveStackSize() + + func->getAlignStackSize()); + self->_argBaseOffset -= regSize; + } + else { + self->_argBaseOffset = static_cast( + func->getCallStackSize() + + func->getAlignedMemStackSize() + + func->getMoveStackSize() + + func->getPushPopStackSize() + + func->getExtraStackSize() + + func->getAlignStackSize()); + } + } + else { + self->_argBaseOffset = func->getPushPopStackSize(); + } + } + else { + self->_argBaseReg = kRegIndexBp; + self->_argBaseOffset = regSize; // Caused by "push zbp". + } + + self->_varBaseReg = kRegIndexSp; + self->_varBaseOffset = func->getCallStackSize(); + + if (!func->isStackAdjusted()) { + self->_varBaseOffset = -static_cast( + func->_alignStackSize + + func->_alignedMemStackSize + + func->_moveStackSize); + } + + return kErrorOk; +} + +//! @internal +static Error X86X64Context_patchFuncMem(X86X64Context* self, X86X64FuncNode* func, BaseNode* stop) { + X86X64Compiler* compiler = self->getCompiler(); + BaseNode* node = func; + + do { + if (node->getType() == kNodeTypeInst) { + InstNode* iNode = static_cast(node); + + if (iNode->hasMemOp()) { + Mem* m = iNode->getMemOp(); + + if (m->getMemType() == kMemTypeStackIndex && OperandUtil::isVarId(m->getBase())) { + VarData* vd = compiler->getVdById(m->getBase()); + ASMJIT_ASSERT(vd != NULL); + + if (vd->isMemArg()) { + m->_vmem.base = self->_argBaseReg; + m->_vmem.displacement += vd->getMemOffset(); + m->_vmem.displacement += self->_argBaseOffset; + } + else { + MemCell* cell = vd->getMemCell(); + ASMJIT_ASSERT(cell != NULL); + + m->_vmem.base = self->_varBaseReg; + m->_vmem.displacement += cell->getOffset(); + m->_vmem.displacement += self->_varBaseOffset; + } + } + } + } + + node = node->getNext(); + } while (node != stop); + + return kErrorOk; +} + +//! @internal +static Error X86X64Context_translatePrologEpilog(X86X64Context* self, X86X64FuncNode* func) { + X86X64Compiler* compiler = self->getCompiler(); + X86X64FuncDecl* decl = func->getDecl(); + + uint32_t regSize = compiler->getRegSize(); + + int32_t stackSize = static_cast( + func->getAlignStackSize() + + func->getCallStackSize() + + func->getAlignedMemStackSize() + + func->getMoveStackSize() + + func->getExtraStackSize()); + int32_t stackAlignment = func->getRequiredStackAlignment(); + + int32_t stackBase; + int32_t stackPtr; + + if (func->isStackAdjusted()) { + stackBase = static_cast( + func->getCallStackSize() + + func->getAlignedMemStackSize()); + } + else { + stackBase = -static_cast( + func->getAlignedMemStackSize() + + func->getAlignStackSize() + + func->getExtraStackSize()); + } + + uint32_t i, mask; + uint32_t regsGp = func->getSaveRestoreRegs(kRegClassGp); + uint32_t regsMm = func->getSaveRestoreRegs(kRegClassMm); + uint32_t regsXmm = func->getSaveRestoreRegs(kRegClassXy); + + bool earlyPushPop = false; + bool useLeaEpilog = false; + + GpReg gpReg(self->_zsp); + GpReg fpReg(self->_zbp); + + Mem fpOffset; + + // -------------------------------------------------------------------------- + // [Prolog] + // -------------------------------------------------------------------------- + + compiler->_setCursor(func->getEntryNode()); + + if (compiler->getLogger()) + compiler->comment("Prolog"); + + // Entry. + if (func->isNaked()) { + if (func->isStackMisaligned()) { + fpReg.setIndex(func->getStackFrameRegIndex()); + fpOffset = ptr(self->_zsp, static_cast(self->_stackFrameCell->getOffset())); + + earlyPushPop = func->hasFuncFlag(kFuncFlagPushPop); + if (earlyPushPop) + self->emitPushSequence(regsGp); + + if (func->isStackFrameRegPreserved()) + compiler->emit(kInstPush, fpReg); + + compiler->emit(kInstMov, fpReg, self->_zsp); + } + } + else { + compiler->emit(kInstPush, fpReg); + compiler->emit(kInstMov, fpReg, self->_zsp); + } + + if (func->hasFuncFlag(kFuncFlagPushPop) && !earlyPushPop) { + self->emitPushSequence(regsGp); + if (func->isStackMisaligned() && regsGp != 0) + useLeaEpilog = true; + } + + // Adjust stack pointer. + if (func->isStackAdjusted()) { + stackBase = static_cast(func->getAlignedMemStackSize() + func->getCallStackSize()); + + if (stackSize) + compiler->emit(kInstSub, self->_zsp, stackSize); + + if (func->isStackMisaligned()) + compiler->emit(kInstAnd, self->_zsp, -stackAlignment); + + if (func->isStackMisaligned() && func->isNaked()) + compiler->emit(kInstMov, fpOffset, fpReg); + } + else { + stackBase = -static_cast(func->getAlignStackSize() + func->getMoveStackSize()); + } + + // Save Xmm/Mm/Gp (Mov). + stackPtr = stackBase; + for (i = 0, mask = regsXmm; mask != 0; i++, mask >>= 1) { + if (mask & 0x1) { + compiler->emit(kInstMovaps, oword_ptr(self->_zsp, stackPtr), xmm(i)); + stackPtr += 16; + } + } + + for (i = 0, mask = regsMm; mask != 0; i++, mask >>= 1) { + if (mask & 0x1) { + compiler->emit(kInstMovq, qword_ptr(self->_zsp, stackPtr), mm(i)); + stackPtr += 8; + } + } + + if (!func->hasFuncFlag(kFuncFlagPushPop)) { + for (i = 0, mask = regsGp; mask != 0; i++, mask >>= 1) { + if (mask & 0x1) { + compiler->emit(kInstMov, ptr(self->_zsp, stackPtr), gpReg.setIndex(i)); + stackPtr += regSize; + } + } + } + + // -------------------------------------------------------------------------- + // [Copy-Args] + // -------------------------------------------------------------------------- + + if (func->hasFuncFlag(kFuncFlagMoveArgs)) { + uint32_t argStackPos = 0; + uint32_t argStackSize = decl->getArgStackSize(); + + uint32_t moveIndex = 0; + uint32_t moveCount = (argStackSize + regSize - 1) / regSize; + + GpReg r[8]; + uint32_t numRegs = 0; + + for (i = 0; i < 6; i++) + if (func->_stackFrameCopyGpIndex[i] != kInvalidReg) + r[numRegs++] = gpReg.setIndex(func->_stackFrameCopyGpIndex[i]); + + int32_t dSrc = func->getPushPopStackSize() + regSize; + int32_t dDst = func->getAlignStackSize() + + func->getCallStackSize() + + func->getAlignedMemStackSize() + + func->getMoveStackSize(); + + if (func->isStackFrameRegPreserved()) + dSrc += regSize; + + Mem mSrc = ptr(fpReg, dSrc); + Mem mDst = ptr(self->_zsp, dDst); + + while (moveIndex < moveCount) { + uint32_t numMovs = IntUtil::iMin(moveCount - moveIndex, numRegs); + + for (i = 0; i < numMovs; i++) + compiler->emit(kInstMov, r[i], mSrc.adjusted((moveIndex + i) * regSize)); + for (i = 0; i < numMovs; i++) + compiler->emit(kInstMov, mDst.adjusted((moveIndex + i) * regSize), r[i]); + + argStackPos += numMovs * regSize; + moveIndex += numMovs; + } + } + + if (compiler->getLogger()) + compiler->comment("Body"); + + // -------------------------------------------------------------------------- + // [Epilog] + // -------------------------------------------------------------------------- + + compiler->_setCursor(func->getExitNode()); + + if (compiler->getLogger()) + compiler->comment("Epilog"); + + // Restore Xmm/Mm/Gp (Mov). + stackPtr = stackBase; + for (i = 0, mask = regsXmm; mask != 0; i++, mask >>= 1) { + if (mask & 0x1) { + compiler->emit(kInstMovaps, xmm(i), oword_ptr(self->_zsp, stackPtr)); + stackPtr += 16; + } + } + + for (i = 0, mask = regsMm; mask != 0; i++, mask >>= 1) { + if (mask & 0x1) { + compiler->emit(kInstMovq, mm(i), qword_ptr(self->_zsp, stackPtr)); + stackPtr += 8; + } + } + + if (!func->hasFuncFlag(kFuncFlagPushPop)) { + for (i = 0, mask = regsGp; mask != 0; i++, mask >>= 1) { + if (mask & 0x1) { + compiler->emit(kInstMov, gpReg.setIndex(i), ptr(self->_zsp, stackPtr)); + stackPtr += regSize; + } + } + } + + // Adjust stack. + if (useLeaEpilog) { + compiler->emit(kInstLea, self->_zsp, ptr(fpReg, -static_cast(func->getPushPopStackSize()))); + } + else if (!func->isStackMisaligned()) { + if (func->isStackAdjusted() && stackSize != 0) + compiler->emit(kInstAdd, self->_zsp, stackSize); + } + + // Restore Gp (Push/Pop). + if (func->hasFuncFlag(kFuncFlagPushPop) && !earlyPushPop) + self->emitPopSequence(regsGp); + + // Emms. + if (func->hasFuncFlag(kFuncFlagEmms)) + compiler->emit(kInstEmms); + + // MFence/SFence/LFence. + if (func->hasFuncFlag(kFuncFlagSFence) & func->hasFuncFlag(kFuncFlagLFence)) + compiler->emit(kInstMfence); + else if (func->hasFuncFlag(kFuncFlagSFence)) + compiler->emit(kInstSfence); + else if (func->hasFuncFlag(kFuncFlagLFence)) + compiler->emit(kInstLfence); + + // Leave. + if (func->isNaked()) { + if (func->isStackMisaligned()) { + compiler->emit(kInstMov, self->_zsp, fpOffset); + + if (func->isStackFrameRegPreserved()) + compiler->emit(kInstPop, fpReg); + + if (earlyPushPop) + self->emitPopSequence(regsGp); + } + } + else { + if (useLeaEpilog) { + compiler->emit(kInstPop, fpReg); + } + else if (func->hasFuncFlag(kFuncFlagLeave)) { + compiler->emit(kInstLeave); + } + else { + compiler->emit(kInstMov, self->_zsp, fpReg); + compiler->emit(kInstPop, fpReg); + } + } + + // Emit return. + if (decl->getCalleePopsStack()) + compiler->emit(kInstRet, static_cast(decl->getArgStackSize())); + else + compiler->emit(kInstRet); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - TranslateJump] +// ============================================================================ + +//! @internal +static void X86X64Context_translateJump(X86X64Context* self, JumpNode* jNode, TargetNode* jTarget) { + X86X64Compiler* compiler = self->getCompiler(); + BaseNode* extNode = self->getExtraBlock(); + + // TODO: [COMPILER] State Change. + compiler->_setCursor(extNode); + self->switchState(jTarget->getState()); + + // If any instruction was added during switchState() we have to wrap the + // generated code in a block. + if (compiler->getCursor() != extNode) { + TargetNode* jTrampolineTarget = compiler->newTarget(); + + // Add the jump to the target. + compiler->jmp(jTarget->getLabel()); + + // Add the trampoline-label we jump to change the state. + extNode = compiler->setCursor(extNode); + compiler->addNode(jTrampolineTarget); + + // Finally, patch the jump target. + ASMJIT_ASSERT(jNode->getOpCount() > 0); + jNode->_opList[0] = jTrampolineTarget->getLabel(); + jNode->_target = jTrampolineTarget; + } + + // Store the extNode and load the state back. + self->setExtraBlock(extNode); + self->loadState(jNode->_state); +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - TranslateRet] +// ============================================================================ + +static Error X86X64Context_translateRet(X86X64Context* self, RetNode* rNode, TargetNode* exitTarget) { + BaseNode* node = rNode->getNext(); + + while (node != NULL) { + switch (node->getType()) { + // If we have found an exit label we just return, there is no need to + // emit jump to that. + case kNodeTypeTarget: + if (static_cast(node) == exitTarget) + return kErrorOk; + goto _EmitRet; + + case kNodeTypeEmbed: + case kNodeTypeInst: + case kNodeTypeCall: + case kNodeTypeRet: + goto _EmitRet; + + // Continue iterating. + case kNodeTypeComment: + case kNodeTypeAlign: + case kNodeTypeHint: + break; + + // Invalid node to be here. + case kNodeTypeFunc: + return self->getCompiler()->setError(kErrorInvalidState); + + // We can't go forward from here. + case kNodeTypeEnd: + return kErrorOk; + } + + node = node->getNext(); + } + +_EmitRet: + { + X86X64Compiler* compiler = self->getCompiler(); + + compiler->_setCursor(rNode); + compiler->jmp(exitTarget->getLabel()); + } + return kErrorOk; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - Translate - Func] +// ============================================================================ + +Error X86X64Context::translate() { + X86X64Compiler* compiler = getCompiler(); + X86X64FuncNode* func = getFunc(); + + // Register allocator contexts. + X86X64VarAlloc vAlloc(this); + X86X64CallAlloc cAlloc(this); + + // Flow. + BaseNode* node_ = func; + BaseNode* next = NULL; + BaseNode* stop = getStop(); + + PodList::Link* jLink = _jccList.getFirst(); + + for (;;) { + while (node_->isTranslated()) { + // Switch state if we went to the already translated node. + if (node_->getType() == kNodeTypeTarget) { + TargetNode* node = static_cast(node_); + compiler->_setCursor(node->getPrev()); + switchState(node->getState()); + } + +_NextGroup: + if (jLink == NULL) { + goto _Done; + } + else { + JumpNode* jNode = static_cast(jLink->getValue()); + jLink = jLink->getNext(); + + BaseNode* jFlow = X86X64Context_getOppositeJccFlow(jNode); + loadState(jNode->getState()); + + // TODO: + if (jNode->getNext() == jFlow) { + } + else { + X86X64Context_translateJump(this, jNode, static_cast(jFlow)); + } + + node_ = jFlow; + if (node_->isTranslated()) + goto _NextGroup; + } + } + + next = node_->getNext(); + node_->addFlags(kNodeFlagIsTranslated); + + switch (node_->getType()) { + // ---------------------------------------------------------------------- + // [Align / Embed] + // ---------------------------------------------------------------------- + + case kNodeTypeAlign: + case kNodeTypeEmbed: + break; + + // ---------------------------------------------------------------------- + // [Target] + // ---------------------------------------------------------------------- + + case kNodeTypeTarget: { + TargetNode* node = static_cast(node_); + ASMJIT_ASSERT(!node->hasState()); + node->setState(saveState()); + break; + } + + // ---------------------------------------------------------------------- + // [Inst/Call/SArg/Ret] + // ---------------------------------------------------------------------- + + case kNodeTypeInst: + case kNodeTypeCall: + case kNodeTypeSArg: + // Update VarAttr's unuse flags based on liveness of the next node. + if (!node_->isJcc()) { + VarInst* vi = static_cast(node_->getVarInst()); + VarBits* liveness = next->getLiveness(); + + if (vi != NULL && liveness != NULL) { + VarAttr* vaList = vi->getVaList(); + uint32_t vaCount = vi->getVaCount(); + + for (uint32_t i = 0; i < vaCount; i++) { + VarAttr* va = &vaList[i]; + VarData* vd = va->getVd(); + + if (!liveness->getBit(vd->getContextId())) + va->addFlags(kVarAttrUnuse); + } + } + } + + if (node_->getType() == kNodeTypeCall) { + ASMJIT_PROPAGATE_ERROR(cAlloc.run(static_cast(node_))); + break; + } + // ... Fall through ... + + case kNodeTypeHint: + case kNodeTypeRet: { + ASMJIT_PROPAGATE_ERROR(vAlloc.run(node_)); + + // Handle conditional/unconditional jump. + if (node_->isJmpOrJcc()) { + JumpNode* node = static_cast(node_); + TargetNode* jTarget = node->getTarget(); + + if (node->isJmp()) { + if (jTarget->hasState()) { + compiler->_setCursor(node->getPrev()); + switchState(jTarget->getState()); + + goto _NextGroup; + } + else { + next = jTarget; + } + } + else { + BaseNode* jNext = node->getNext(); + + if (jTarget->isTranslated()) { + if (jNext->isTranslated()) { + ASMJIT_ASSERT(jNext->getType() == kNodeTypeTarget); + // TODO: [COMPILER] State - Do intersection of two states if possible. + } + + BaseVarState* savedState = saveState(); + node->setState(savedState); + + X86X64Context_translateJump(this, node, jTarget); + next = jNext; + } + else if (jNext->isTranslated()) { + ASMJIT_ASSERT(jNext->getType() == kNodeTypeTarget); + + BaseVarState* savedState = saveState(); + node->setState(savedState); + + compiler->_setCursor(node); + switchState(static_cast(jNext)->getState()); + + next = jTarget; + } + else { + node->setState(saveState()); + next = X86X64Context_getJccFlow(node); + } + } + } + else if (node_->isRet()) { + ASMJIT_PROPAGATE_ERROR( + X86X64Context_translateRet(this, static_cast(node_), func->getExitNode())); + } + break; + } + + // ---------------------------------------------------------------------- + // [Func] + // ---------------------------------------------------------------------- + + case kNodeTypeFunc: { + ASMJIT_ASSERT(node_ == func); + + X86X64FuncDecl* decl = func->getDecl(); + VarInst* vi = func->getVarInst(); + + if (vi != NULL) { + uint32_t i; + uint32_t argCount = func->_x86Decl.getArgCount(); + + for (i = 0; i < argCount; i++) { + const FuncInOut& arg = decl->getArg(i); + + VarData* vd = func->getArg(i); + VarAttr* va = vi->findVa(vd); + ASMJIT_ASSERT(va != NULL); + + if (vd->getFlags() & kVarAttrUnuse) + continue; + + // Special means that the argument is passed in register. + uint32_t regIndex = va->getOutRegIndex(); + if (regIndex != kInvalidReg && (va->getFlags() & kVarAttrOutConv) == 0) { + switch (vd->getClass()) { + case kRegClassGp: attach(vd, regIndex, true); break; + case kRegClassMm: attach(vd, regIndex, true); break; + case kRegClassXy: attach(vd, regIndex, true); break; + } + } + else if (va->hasFlag(kVarAttrOutConv)) { + // TODO: [COMPILER] Function Argument Conversion. + } + else { + vd->_isMemArg = true; + vd->setMemOffset(arg.getStackOffset()); + vd->setState(kVarStateMem); + } + } + } + break; + } + + // ---------------------------------------------------------------------- + // [End] + // ---------------------------------------------------------------------- + + case kNodeTypeEnd: { + goto _NextGroup; + } + + default: + break; + } + + if (next == stop) + goto _NextGroup; + node_ = next; + } + +_Done: + ASMJIT_PROPAGATE_ERROR(X86X64Context_initFunc(this, func)); + ASMJIT_PROPAGATE_ERROR(X86X64Context_patchFuncMem(this, func, stop)); + ASMJIT_PROPAGATE_ERROR(X86X64Context_translatePrologEpilog(this, func)); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64Context - Serialize] +// ============================================================================ + +template +static ASMJIT_INLINE Error X86X64Context_serialize(X86X64Context* self, X86X64Assembler* assembler, BaseNode* start, BaseNode* stop) { + BaseNode* node_ = start; + StringBuilder& sb = self->_stringBuilder; + + BaseLogger* logger; + const char* comment; + + if (LoggingEnabled) { + logger = assembler->getLogger(); + } + + // Create labels on Assembler side. + ASMJIT_PROPAGATE_ERROR( + assembler->_registerIndexedLabels(self->getCompiler()->_targets.getLength())); + + do { + if (LoggingEnabled) { + comment = node_->getComment(); + + if (node_->hasLiveness()) { + uint32_t i; + uint32_t vdCount = static_cast(self->_contextVd.getLength()); + + VarBits* liveness = node_->getLiveness(); + VarInst* vi = static_cast(node_->getVarInst()); + + sb.clear(); + sb.appendChars(' ', vdCount); + + for (i = 0; i < vdCount; i++) { + if (liveness->getBit(i)) + sb.getData()[i] = '.'; + } + + if (vi != NULL) { + uint32_t vaCount = vi->getVaCount(); + + for (i = 0; i < vaCount; i++) { + VarAttr* va = vi->getVa(i); + VarData* vd = va->getVd(); + + uint32_t flags = va->getFlags(); + char c = 'u'; + + if ( (flags & kVarAttrInAll) && !(flags & kVarAttrOutAll)) c = 'r'; + if (!(flags & kVarAttrInAll) && (flags & kVarAttrOutAll)) c = 'w'; + if ( (flags & kVarAttrInAll) && (flags & kVarAttrOutAll)) c = 'x'; + + if ((flags & kVarAttrUnuse)) + c -= 'a' - 'A'; + + sb.getData()[vd->getContextId()] = c; + } + } + + assembler->_comment = sb.getData(); + } + else { + assembler->_comment = comment; + } + } + + switch (node_->getType()) { + case kNodeTypeAlign: { + AlignNode* node = static_cast(node_); + assembler->align(node->getSize()); + break; + } + + case kNodeTypeEmbed: { + EmbedNode* node = static_cast(node_); + assembler->embed(node->getData(), node->getSize()); + break; + } + + case kNodeTypeComment: { + CommentNode* node = static_cast(node_); + if (LoggingEnabled) { + logger->logFormat(kLoggerStyleComment, + "%s; %s\n", logger->getIndentation(), node->getComment()); + } + break; + } + + case kNodeTypeHint: { + break; + } + + case kNodeTypeTarget: { + TargetNode* node = static_cast(node_); + assembler->bind(node->getLabel()); + break; + } + + case kNodeTypeInst: { + InstNode* node = static_cast(node_); + + uint32_t code = node->getCode(); + uint32_t opCount = node->getOpCount(); + + const Operand* opList = node->getOpList(); + assembler->_options = node->getOptions(); + + const Operand* o0 = &noOperand; + const Operand* o1 = &noOperand; + const Operand* o2 = &noOperand; + + if (node->isSpecial()) { + switch (code) { + case kInstCpuid: + break; + + case kInstCbw: + case kInstCdq: + case kInstCdqe: + case kInstCwd: + case kInstCwde: + case kInstCqo: + break; + + case kInstCmpxchg: + o0 = &opList[1]; + o1 = &opList[2]; + break; + + case kInstCmpxchg8b : + case kInstCmpxchg16b: + o0 = &opList[4]; + break; + + case kInstDaa: + case kInstDas: + break; + + case kInstImul: + case kInstMul: + case kInstIdiv: + case kInstDiv: + // We assume "Mul/Div dst_hi (implicit), dst_lo (implicit), src (explicit)". + ASMJIT_ASSERT(opCount == 3); + o0 = &opList[2]; + break; + + case kInstMovptr: + break; + + case kInstLahf: + case kInstSahf: + break; + + case kInstMaskmovq: + case kInstMaskmovdqu: + o0 = &opList[1]; + o1 = &opList[2]; + break; + + case kInstEnter: + o0 = &opList[0]; + o1 = &opList[1]; + break; + + case kInstLeave: + break; + + case kInstRet: + if (opCount > 0) + o0 = &opList[0]; + break; + + case kInstMonitor: + case kInstMwait: + break; + + case kInstPop: + o0 = &opList[0]; + break; + + case kInstPopa: + case kInstPopf: + break; + + case kInstPush: + o0 = &opList[0]; + break; + + case kInstPusha: + case kInstPushf: + break; + + case kInstRcl: + case kInstRcr: + case kInstRol: + case kInstRor: + case kInstSal: + case kInstSar: + case kInstShl: + case kInstShr: + o0 = &opList[0]; + o1 = &cl; + break; + + case kInstShld: + case kInstShrd: + o0 = &opList[0]; + o1 = &opList[1]; + o2 = &cl; + break; + + case kInstRdtsc: + case kInstRdtscp: + break; + + case kInstRepLodsb : case kInstRepLodsd : case kInstRepLodsq : case kInstRepLodsw : + case kInstRepMovsb : case kInstRepMovsd : case kInstRepMovsq : case kInstRepMovsw : + case kInstRepStosb : case kInstRepStosd : case kInstRepStosq : case kInstRepStosw : + case kInstRepeCmpsb : case kInstRepeCmpsd : case kInstRepeCmpsq : case kInstRepeCmpsw : + case kInstRepeScasb : case kInstRepeScasd : case kInstRepeScasq : case kInstRepeScasw : + case kInstRepneCmpsb: case kInstRepneCmpsd: case kInstRepneCmpsq: case kInstRepneCmpsw: + case kInstRepneScasb: case kInstRepneScasd: case kInstRepneScasq: case kInstRepneScasw: + break; + + default: + ASMJIT_ASSERT(!"Reached"); + } + } + else { + if (opCount > 0) o0 = &opList[0]; + if (opCount > 1) o1 = &opList[1]; + if (opCount > 2) o2 = &opList[2]; + } + + // We use this form, because it is the main one. + assembler->emit(code, *o0, *o1, *o2); + break; + } + + // Function scope and return is translated to another nodes, no special + // handling is required at this point. + case kNodeTypeFunc: + case kNodeTypeEnd: + case kNodeTypeRet: { + break; + } + + // Function call adds nodes before and after, but it's required to emit + // the call instruction by itself. + case kNodeTypeCall: { + X86X64CallNode* node = static_cast(node_); + assembler->emit(kInstCall, node->_target, noOperand, noOperand); + break; + } + + default: + break; + } + + node_ = node_->getNext(); + } while (node_ != stop); + + return kErrorOk; +} + +Error X86X64Context::serialize(BaseAssembler* assembler, BaseNode* start, BaseNode* stop) { + if (!assembler->hasLogger()) + return X86X64Context_serialize<0>(this, static_cast(assembler), start, stop); + else + return X86X64Context_serialize<1>(this, static_cast(assembler), start, stop); +} + +} // x86x64 namespace +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 diff --git a/src/asmjit/x86/x86context_p.h b/src/asmjit/x86/x86context_p.h new file mode 100644 index 0000000..62350fe --- /dev/null +++ b/src/asmjit/x86/x86context_p.h @@ -0,0 +1,498 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86CONTEXT_P_H +#define _ASMJIT_X86_X86CONTEXT_P_H + +// [Dependencies - AsmJit] +#include "../base/compiler.h" +#include "../base/context_p.h" +#include "../base/intutil.h" +#include "../x86/x86assembler.h" +#include "../x86/x86compiler.h" +#include "../x86/x86defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +//! @addtogroup asmjit_x86x64 +//! @{ + +// ============================================================================ +// [asmjit::Context] +// ============================================================================ + +//! @internal +//! +//! @brief Compiler context is used by @ref X86X64Compiler. +//! +//! Compiler context is used during compilation and normally developer doesn't +//! need access to it. The context is user per function (it's reset after each +//! function is generated). +struct X86X64Context : public BaseContext { + ASMJIT_NO_COPY(X86X64Context) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref Context instance. + X86X64Context(X86X64Compiler* compiler); + //! @brief Destroy the @ref Context instance. + virtual ~X86X64Context(); + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + virtual void reset(); + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get compiler as @ref X86X64Compiler. + ASMJIT_INLINE X86X64Compiler* getCompiler() const { return static_cast(_compiler); } + //! @brief Get function as @ref X86X64FuncNode. + ASMJIT_INLINE X86X64FuncNode* getFunc() const { return reinterpret_cast(_func); } + + ASMJIT_INLINE bool isX64() const { return _baseRegsCount == 16; } + + //! @brief Get clobbered registers (global). + ASMJIT_INLINE uint32_t getClobberedRegs(uint32_t c) { return _clobberedRegs.get(c); } + + // -------------------------------------------------------------------------- + // [Helpers] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE VarInst* newVarInst(uint32_t vaCount) { + return static_cast( + _zoneAllocator.alloc(sizeof(VarInst) + vaCount * sizeof(VarAttr))); + } + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + void emitLoad(VarData* vd, uint32_t regIndex, const char* reason); + void emitSave(VarData* vd, uint32_t regIndex, const char* reason); + void emitMove(VarData* vd, uint32_t toRegIndex, uint32_t fromRegIndex, const char* reason); + void emitSwapGp(VarData* aVd, VarData* bVd, uint32_t aIndex, uint32_t bIndex, const char* reason); + + void emitPushSequence(uint32_t regs); + void emitPopSequence(uint32_t regs); + + void emitMoveVarOnStack(uint32_t dstType, const Mem* dst, uint32_t srcType, uint32_t srcIndex); + void emitMoveImmOnStack(uint32_t dstType, const Mem* dst, const Imm* src); + + void emitMoveImmToReg(uint32_t dstType, uint32_t dstIndex, const Imm* src); + + // -------------------------------------------------------------------------- + // [Register Management] + // -------------------------------------------------------------------------- + + void _checkState(); + +#if defined(ASMJIT_DEBUG) +#define ASMJIT_CONTEXT_CHECK_STATE _checkState(); +#else +#define ASMJIT_CONTEXT_CHECK_STATE +#endif // ASMJIT_DEBUG + + ASMJIT_INLINE uint32_t getRegsCount(uint32_t c) const { + if (c == kRegClassGp || c == kRegClassXy) + return _baseRegsCount; + else + return 8; + } + + ASMJIT_INLINE uint32_t getRegSize() const { + return _zsp.getSize(); + } + + // -------------------------------------------------------------------------- + // [Attach / Detach] + // -------------------------------------------------------------------------- + + //! @brief Attach. + //! + //! Attach a register to the 'VarData', changing 'VarData' members to show + //! that the variable is currently alive and linking variable with the + //! current 'VarState'. + template + ASMJIT_INLINE void attach(VarData* vd, uint32_t regIndex, bool modified) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(regIndex != kInvalidReg); + + // Prevent Esp allocation if C==Gp. + ASMJIT_ASSERT(C != kRegClassGp || regIndex != kRegIndexSp); + + uint32_t regMask = IntUtil::mask(regIndex); + + vd->setState(kVarStateReg); + vd->setRegIndex(regIndex); + vd->setModified(modified); + + _x86State.getListByClass(C)[regIndex] = vd; + _x86State._occupied.add(C, regMask); + _x86State._modified.add(C, static_cast(modified) << regIndex); + + ASMJIT_CONTEXT_CHECK_STATE + } + + //! @brief Detach. + //! + //! The opposite of 'Attach'. Detach resets the members in 'VarData' + //! (regIndex, state and changed flags) and unlinks the variable with the + //! current 'VarState'. + template + ASMJIT_INLINE void detach(VarData* vd, uint32_t regIndex, uint32_t vState) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vd->getRegIndex() == regIndex); + ASMJIT_ASSERT(vState != kVarStateReg); + + uint32_t regMask = IntUtil::mask(regIndex); + + vd->setState(vState); + vd->resetRegIndex(); + vd->setModified(false); + + _x86State.getListByClass(C)[regIndex] = NULL; + _x86State._occupied.del(C, regMask); + _x86State._modified.del(C, regMask); + + ASMJIT_CONTEXT_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Rebase] + // -------------------------------------------------------------------------- + + //! @brief Rebase. + //! + //! Change the register of the 'VarData' changing also the current 'VarState'. + //! Rebase is nearly identical to 'Detach' and 'Attach' sequence, but doesn't + // change the 'VarData' modified flag. + template + ASMJIT_INLINE void rebase(VarData* vd, uint32_t newRegIndex, uint32_t oldRegIndex) { + ASMJIT_ASSERT(vd->getClass() == C); + + uint32_t newRegMask = IntUtil::mask(newRegIndex); + uint32_t oldRegMask = IntUtil::mask(oldRegIndex); + uint32_t bothRegMask = newRegMask ^ oldRegMask; + + vd->setRegIndex(newRegIndex); + + _x86State.getListByClass(C)[oldRegIndex] = NULL; + _x86State.getListByClass(C)[newRegIndex] = vd; + + _x86State._occupied.xor_(C, bothRegMask); + _x86State._modified.xor_(C, bothRegMask & -static_cast(vd->isModified())); + + ASMJIT_CONTEXT_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Load / Save] + // -------------------------------------------------------------------------- + + //! @brief Load. + //! + //! Load variable from its memory slot to a register, emitting 'Load' + //! instruction and changing the variable state to allocated. + template + ASMJIT_INLINE void load(VarData* vd, uint32_t regIndex) { + // Can be only called if variable is not allocated. + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vd->getState() != kVarStateReg); + ASMJIT_ASSERT(vd->getRegIndex() == kInvalidReg); + + emitLoad(vd, regIndex, "Load"); + attach(vd, regIndex, false); + + ASMJIT_CONTEXT_CHECK_STATE + } + + //! @brief Save. + //! + //! Save the variable into its home location, but keep it as allocated. + template + ASMJIT_INLINE void save(VarData* vd) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vd->getState() == kVarStateReg); + ASMJIT_ASSERT(vd->getRegIndex() != kInvalidReg); + + uint32_t regIndex = vd->getRegIndex(); + uint32_t regMask = IntUtil::mask(regIndex); + + emitSave(vd, regIndex, "Save"); + + vd->setModified(false); + _x86State._modified.del(C, regMask); + + ASMJIT_CONTEXT_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Move / Swap] + // -------------------------------------------------------------------------- + + //! @brief Move a register. + //! + //! Move register from one index to another, emitting 'Move' if needed. This + //! function does nothing if register is already at the given index. + template + ASMJIT_INLINE void move(VarData* vd, uint32_t regIndex) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vd->getState() == kVarStateReg); + ASMJIT_ASSERT(vd->getRegIndex() != kInvalidReg); + + uint32_t oldIndex = vd->getRegIndex(); + if (regIndex == oldIndex) + return; + + emitMove(vd, regIndex, oldIndex, "Move"); + rebase(vd, regIndex, oldIndex); + + ASMJIT_CONTEXT_CHECK_STATE + } + + //! @brief Swap two registers + //! + //! It's only possible to swap Gp registers. + ASMJIT_INLINE void swapGp(VarData* aVd, VarData* bVd) { + ASMJIT_ASSERT(aVd != bVd); + + ASMJIT_ASSERT(aVd->getClass() == kRegClassGp); + ASMJIT_ASSERT(aVd->getState() == kVarStateReg); + ASMJIT_ASSERT(aVd->getRegIndex() != kInvalidReg); + + ASMJIT_ASSERT(bVd->getClass() == kRegClassGp); + ASMJIT_ASSERT(bVd->getState() == kVarStateReg); + ASMJIT_ASSERT(bVd->getRegIndex() != kInvalidReg); + + uint32_t aIndex = aVd->getRegIndex(); + uint32_t bIndex = bVd->getRegIndex(); + + emitSwapGp(aVd, bVd, aIndex, bIndex, "Swap"); + + aVd->setRegIndex(bIndex); + bVd->setRegIndex(aIndex); + + _x86State.getListByClass(kRegClassGp)[aIndex] = bVd; + _x86State.getListByClass(kRegClassGp)[bIndex] = aVd; + + uint32_t m = aVd->isModified() ^ bVd->isModified(); + _x86State._modified.xor_(kRegClassGp, (m << aIndex) | (m << bIndex)); + + ASMJIT_CONTEXT_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Alloc / Spill] + // -------------------------------------------------------------------------- + + //! @brief Alloc + template + ASMJIT_INLINE void alloc(VarData* vd, uint32_t regIndex) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(regIndex != kInvalidReg); + + uint32_t oldRegIndex = vd->getRegIndex(); + uint32_t oldState = vd->getState(); + uint32_t regMask = IntUtil::mask(regIndex); + + ASMJIT_ASSERT(_x86State.getListByClass(C)[regIndex] == NULL || regIndex == oldRegIndex); + + if (oldState != kVarStateReg) { + if (oldState == kVarStateMem) + emitLoad(vd, regIndex, "Alloc"); + vd->setModified(false); + } + else if (oldRegIndex != regIndex) { + emitMove(vd, regIndex, oldRegIndex, "Alloc"); + + _x86State.getListByClass(C)[oldRegIndex] = NULL; + regMask ^= IntUtil::mask(oldRegIndex); + } + else { + return; + } + + vd->setState(kVarStateReg); + vd->setRegIndex(regIndex); + + _x86State.getListByClass(C)[regIndex] = vd; + _x86State._occupied.xor_(C, regMask); + _x86State._modified.xor_(C, regMask & -static_cast(vd->isModified())); + + ASMJIT_CONTEXT_CHECK_STATE + } + + //! @brief Spill. + //! + //! Spill variable/register, saves the content to the memory-home if modified. + template + ASMJIT_INLINE void spill(VarData* vd) { + ASMJIT_ASSERT(vd->getClass() == C); + if (vd->getState() != kVarStateReg) + return; + + uint32_t regIndex = vd->getRegIndex(); + + ASMJIT_ASSERT(regIndex != kInvalidReg); + ASMJIT_ASSERT(_x86State.getListByClass(C)[regIndex] == vd); + + if (vd->isModified()) + emitSave(vd, regIndex, "Spill"); + detach(vd, regIndex, kVarStateMem); + + ASMJIT_CONTEXT_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Modify] + // -------------------------------------------------------------------------- + + template + ASMJIT_INLINE void modify(VarData* vd) { + ASMJIT_ASSERT(vd->getClass() == C); + + uint32_t regIndex = vd->getRegIndex(); + uint32_t regMask = IntUtil::mask(regIndex); + + vd->setModified(true); + _x86State._modified.add(C, regMask); + + ASMJIT_CONTEXT_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [Unuse] + // -------------------------------------------------------------------------- + + //! @brief Unuse. + //! + //! Unuse variable, it will be detached it if it's allocated then its state + //! will be changed to kVarStateUnused. + template + ASMJIT_INLINE void unuse(VarData* vd, uint32_t vState = kVarStateUnused) { + ASMJIT_ASSERT(vd->getClass() == C); + ASMJIT_ASSERT(vState != kVarStateReg); + + uint32_t regIndex = vd->getRegIndex(); + if (regIndex != kInvalidReg) + detach(vd, regIndex, vState); + else + vd->setState(vState); + + ASMJIT_CONTEXT_CHECK_STATE + } + + // -------------------------------------------------------------------------- + // [State] + // -------------------------------------------------------------------------- + + //! @brief Get state as @ref VarState. + ASMJIT_INLINE VarState* getState() const { return const_cast(&_x86State); } + + virtual void loadState(BaseVarState* src); + virtual BaseVarState* saveState(); + + virtual void switchState(BaseVarState* src); + virtual void intersectStates(BaseVarState* a, BaseVarState* b); + + // -------------------------------------------------------------------------- + // [Memory] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Mem getVarMem(VarData* vd) { + (void)getVarCell(vd); + + Mem mem(_memSlot); + mem.setBase(vd->getId()); + return mem; + } + + // -------------------------------------------------------------------------- + // [Prepare] + // -------------------------------------------------------------------------- + + virtual Error fetch(); + virtual Error analyze(); + + // -------------------------------------------------------------------------- + // [Translate] + // -------------------------------------------------------------------------- + + virtual Error translate(); + + // -------------------------------------------------------------------------- + // [Serialize] + // -------------------------------------------------------------------------- + + virtual Error serialize(BaseAssembler* assembler, BaseNode* start, BaseNode* stop); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief X86/X64 stack-pointer (esp or rsp). + GpReg _zsp; + //! @brief X86/X64 frame-pointer (ebp or rbp). + GpReg _zbp; + //! @brief Temporary memory operand. + Mem _memSlot; + + //! @brief X86/X64 specific compiler state (linked with @ref _state). + VarState _x86State; + //! @brief Clobbered registers (for the whole function). + RegMask _clobberedRegs; + + //! @brief Memory cell where is stored address used to restore manually + //! aligned stack. + MemCell* _stackFrameCell; + + //! @brief Global allocable registers mask. + uint32_t _gaRegs[kRegClassCount]; + + //! @brief X86/X64 number of Gp/Xmm registers. + uint8_t _baseRegsCount; + //! @brief Function arguments base pointer (register). + uint8_t _argBaseReg; + //! @brief Function variables base pointer (register). + uint8_t _varBaseReg; + //! @brief Whether to emit comments. + uint8_t _emitComments; + + //! @brief Function arguments base offset. + int32_t _argBaseOffset; + //! @brief Function variables base offset. + int32_t _varBaseOffset; + + //! @brief Function arguments displacement. + int32_t _argActualDisp; + //! @brief Function variables displacement. + int32_t _varActualDisp; + + //! @brief Temporary string builder used for logging. + StringBuilderT<256> _stringBuilder; +}; + +//! @} + +} // x86x64 namespace +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_X86_X86CONTEXT_P_H diff --git a/src/asmjit/x86/x86cpu.cpp b/src/asmjit/x86/x86cpu.cpp new file mode 100644 index 0000000..8722304 --- /dev/null +++ b/src/asmjit/x86/x86cpu.cpp @@ -0,0 +1,306 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + +// [Dependencies - AsmJit] +#include "../base/intutil.h" +#include "../x86/x86cpu.h" + +// 2009-02-05: Thanks to Mike Tajmajer for VC7.1 compiler support. It shouldn't +// affect x64 compilation, because x64 compiler starts with VS2005 (VC8.0). +#if defined(_MSC_VER) && (_MSC_VER >= 1400) +#include +#endif // _MSC_VER >= 1400 + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +// ============================================================================ +// [asmjit::x86x64::hostCpuId] +// ============================================================================ + +// This is messy, I know. Cpuid is implemented as intrinsic in VS2005, but +// we should support other compilers as well. Main problem is that MS compilers +// in 64-bit mode not allows to use inline assembler, so we need intrinsic and +// we need also asm version. + +// hostCpuId() and detectCpuInfo() for x86 and x64 platforms begins here. +#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64) +void hostCpuId(uint32_t inEax, uint32_t inEcx, CpuId* result) { + +#if defined(_MSC_VER) +// 2009-02-05: Thanks to Mike Tajmajer for supporting VC7.1 compiler. +// ASMJIT_HOST_X64 is here only for readibility, only VS2005 can compile 64-bit code. +# if _MSC_VER >= 1400 || defined(ASMJIT_HOST_X64) + // Done by intrinsics. + __cpuidex(reinterpret_cast(result->i), inEax, inEcx); +# else // _MSC_VER < 1400 + uint32_t cpuid_eax = inEax; + uint32_t cpuid_ecx = inCax; + uint32_t* cpuid_out = result->i; + + __asm { + mov eax, cpuid_eax + mov ecx, cpuid_ecx + mov edi, cpuid_out + cpuid + mov dword ptr[edi + 0], eax + mov dword ptr[edi + 4], ebx + mov dword ptr[edi + 8], ecx + mov dword ptr[edi + 12], edx + } +# endif // _MSC_VER < 1400 + +#elif defined(__GNUC__) +// Note, patched to preserve ebx/rbx register which is used by GCC. +# if defined(ASMJIT_HOST_X86) +# define __myCpuId(inEax, inEcx, outEax, outEbx, outEcx, outEdx) \ + asm ("mov %%ebx, %%edi\n" \ + "cpuid\n" \ + "xchg %%edi, %%ebx\n" \ + : "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) : "a" (inEax), "c" (inEcx)) +# else +# define __myCpuId(inEax, inEcx, outEax, outEbx, outEcx, outEdx) \ + asm ("mov %%rbx, %%rdi\n" \ + "cpuid\n" \ + "xchg %%rdi, %%rbx\n" \ + : "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) : "a" (inEax), "c" (inEcx)) +# endif + __myCpuId(inEax, inEcx, result->eax, result->ebx, result->ecx, result->edx); +#endif // Compiler #ifdef. +} + +// ============================================================================ +// [asmjit::x86x64::cpuSimplifyBrandString] +// ============================================================================ + +static ASMJIT_INLINE void cpuSimplifyBrandString(char* s) { + // Always clear the current character in the buffer. It ensures that there + // is no garbage after the string NULL terminator. + char* d = s; + + char prev = 0; + char curr = s[0]; + s[0] = '\0'; + + for (;;) { + if (curr == 0) + break; + + if (curr == ' ') { + if (prev == '@' || s[1] == ' ' || s[1] == '@') + goto _Skip; + } + + d[0] = curr; + d++; + prev = curr; + +_Skip: + curr = *++s; + s[0] = '\0'; + } + + d[0] = '\0'; +} + +// ============================================================================ +// [asmjit::x86x64::CpuVendor] +// ============================================================================ + +struct CpuVendor { + uint32_t id; + char text[12]; +}; + +static const CpuVendor cpuVendorTable[] = { + { kCpuVendorAmd , { 'A', 'M', 'D', 'i', 's', 'b', 'e', 't', 't', 'e', 'r', '!' } }, + { kCpuVendorAmd , { 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' } }, + { kCpuVendorVia , { 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' } }, + { kCpuVendorNSM , { 'C', 'y', 'r', 'i', 'x', 'I', 'n', 's', 't', 'e', 'a', 'd' } }, + { kCpuVendorIntel , { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' } }, + { kCpuVendorTransmeta, { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'T', 'M', 'x', '8', '6' } }, + { kCpuVendorNSM , { 'G', 'e', 'o', 'd', 'e', ' ', 'b', 'y', ' ', 'N', 'S', 'C' } }, + { kCpuVendorTransmeta, { 'T', 'r', 'a', 'n', 's', 'm', 'e', 't', 'a', 'C', 'P', 'U' } }, + { kCpuVendorVia , { 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 } } +}; + +static ASMJIT_INLINE bool cpuVendorEq(const CpuVendor& info, const char* vendorString) { + const uint32_t* a = reinterpret_cast(info.text); + const uint32_t* b = reinterpret_cast(vendorString); + + return (a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]); +} + +// ============================================================================ +// [asmjit::x86x64::hostCpuDetect] +// ============================================================================ + +void hostCpuDetect(Cpu* out) { + CpuId regs; + + uint32_t i; + uint32_t maxId; + + // Clear everything except the '_size' member. + ::memset(reinterpret_cast(out) + sizeof(uint32_t), + 0, sizeof(BaseCpu) - sizeof(uint32_t)); + + // Fill safe defaults. + ::memcpy(out->_vendorString, "Unknown", 8); + out->_coresCount = BaseCpu::detectNumberOfCores(); + + // Get vendor string/id. + hostCpuId(0, 0, ®s); + + maxId = regs.eax; + ::memcpy(out->_vendorString, ®s.ebx, 4); + ::memcpy(out->_vendorString + 4, ®s.edx, 4); + ::memcpy(out->_vendorString + 8, ®s.ecx, 4); + + for (i = 0; i < 3; i++) { + if (cpuVendorEq(cpuVendorTable[i], out->_vendorString)) { + out->_vendorId = cpuVendorTable[i].id; + break; + } + } + + // Get feature flags in ecx/edx and family/model in eax. + hostCpuId(1, 0, ®s); + + // Fill family and model fields. + out->_family = (regs.eax >> 8) & 0x0F; + out->_model = (regs.eax >> 4) & 0x0F; + out->_stepping = (regs.eax ) & 0x0F; + + // Use extended family and model fields. + if (out->_family == 0x0F) { + out->_family += ((regs.eax >> 20) & 0xFF); + out->_model += ((regs.eax >> 16) & 0x0F) << 4; + } + + out->_processorType = ((regs.eax >> 12) & 0x03); + out->_brandIndex = ((regs.ebx ) & 0xFF); + out->_flushCacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8; + out->_maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF); + + if (regs.ecx & 0x00000001U) out->addFeature(kCpuFeatureSse3); + if (regs.ecx & 0x00000002U) out->addFeature(kCpuFeaturePclmulqdq); + if (regs.ecx & 0x00000008U) out->addFeature(kCpuFeatureMonitorMWait); + if (regs.ecx & 0x00000200U) out->addFeature(kCpuFeatureSsse3); + if (regs.ecx & 0x00002000U) out->addFeature(kCpuFeatureCmpXchg16B); + if (regs.ecx & 0x00080000U) out->addFeature(kCpuFeatureSse41); + if (regs.ecx & 0x00100000U) out->addFeature(kCpuFeatureSse42); + if (regs.ecx & 0x00400000U) out->addFeature(kCpuFeatureMovbe); + if (regs.ecx & 0x00800000U) out->addFeature(kCpuFeaturePopcnt); + if (regs.ecx & 0x02000000U) out->addFeature(kCpuFeatureAesni); + if (regs.ecx & 0x40000000U) out->addFeature(kCpuFeatureRdrand); + + if (regs.edx & 0x00000010U) out->addFeature(kCpuFeatureRdtsc); + if (regs.edx & 0x00000100U) out->addFeature(kCpuFeatureCmpXchg8B); + if (regs.edx & 0x00008000U) out->addFeature(kCpuFeatureCmov); + if (regs.edx & 0x00800000U) out->addFeature(kCpuFeatureMmx); + if (regs.edx & 0x01000000U) out->addFeature(kCpuFeatureFxsr); + if (regs.edx & 0x02000000U) out->addFeature(kCpuFeatureSse).addFeature(kCpuFeatureMmxExt); + if (regs.edx & 0x04000000U) out->addFeature(kCpuFeatureSse).addFeature(kCpuFeatureSse2); + if (regs.edx & 0x10000000U) out->addFeature(kCpuFeatureMultithreading); + + if (out->_vendorId == kCpuVendorAmd && (regs.edx & 0x10000000U)) { + // AMD sets Multithreading to ON if it has more cores. + if (out->_coresCount == 1) + out->_coresCount = 2; + } + + // Detect AVX. + if (regs.ecx & 0x10000000U) { + out->addFeature(kCpuFeatureAvx); + + if (regs.ecx & 0x00000800U) out->addFeature(kCpuFeatureXop); + if (regs.ecx & 0x00004000U) out->addFeature(kCpuFeatureFma3); + if (regs.ecx & 0x00010000U) out->addFeature(kCpuFeatureFma4); + if (regs.ecx & 0x20000000U) out->addFeature(kCpuFeatureF16C); + } + + // Detect new features if the processor supports CPUID-07. + if (maxId >= 7) { + hostCpuId(7, 0, ®s); + + if (regs.ebx & 0x00000001) out->addFeature(kCpuFeatureFsGsBase); + if (regs.ebx & 0x00000008) out->addFeature(kCpuFeatureBmi); + if (regs.ebx & 0x00000010) out->addFeature(kCpuFeatureHle); + if (regs.ebx & 0x00000100) out->addFeature(kCpuFeatureBmi2); + if (regs.ebx & 0x00000200) out->addFeature(kCpuFeatureRepMovsbStosbExt); + if (regs.ebx & 0x00000800) out->addFeature(kCpuFeatureRtm); + + // AVX2 depends on AVX. + if (out->hasFeature(kCpuFeatureAvx)) { + if (regs.ebx & 0x00000020) out->addFeature(kCpuFeatureAvx2); + } + } + + // Calling cpuid with 0x80000000 as the in argument gets the number of valid + // extended IDs. + hostCpuId(0x80000000, 0, ®s); + + uint32_t maxExtId = IntUtil::iMin(regs.eax, 0x80000004); + uint32_t* brand = reinterpret_cast(out->_brandString); + + for (i = 0x80000001; i <= maxExtId; i++) { + hostCpuId(i, 0, ®s); + + switch (i) { + case 0x80000001: + if (regs.ecx & 0x00000001U) out->addFeature(kCpuFeatureLahfSahf); + if (regs.ecx & 0x00000020U) out->addFeature(kCpuFeatureLzcnt); + if (regs.ecx & 0x00000040U) out->addFeature(kCpuFeatureSse4A); + if (regs.ecx & 0x00000080U) out->addFeature(kCpuFeatureMsse); + if (regs.ecx & 0x00000100U) out->addFeature(kCpuFeaturePrefetch); + + if (regs.edx & 0x00100000U) out->addFeature(kCpuFeatureExecuteDisableBit); + if (regs.edx & 0x00200000U) out->addFeature(kCpuFeatureFfxsr); + if (regs.edx & 0x00400000U) out->addFeature(kCpuFeatureMmxExt); + if (regs.edx & 0x08000000U) out->addFeature(kCpuFeatureRdtscp); + if (regs.edx & 0x40000000U) out->addFeature(kCpuFeature3dNowExt).addFeature(kCpuFeatureMmxExt); + if (regs.edx & 0x80000000U) out->addFeature(kCpuFeature3dNow); + break; + + case 0x80000002: + case 0x80000003: + case 0x80000004: + *brand++ = regs.eax; + *brand++ = regs.ebx; + *brand++ = regs.ecx; + *brand++ = regs.edx; + break; + + default: + // Additional features can be detected in the future. + break; + } + } + + // Simplify the brand string (remove unnecessary spaces to make printing nicer). + cpuSimplifyBrandString(out->_brandString); +} +#endif + +} // x86x64 namespace +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 diff --git a/src/asmjit/x86/x86cpu.h b/src/asmjit/x86/x86cpu.h new file mode 100644 index 0000000..c8d31c1 --- /dev/null +++ b/src/asmjit/x86/x86cpu.h @@ -0,0 +1,218 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86CPU_H +#define _ASMJIT_X86_X86CPU_H + +// [Dependencies - AsmJit] +#include "../base/cpu.h" +#include "../base/defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +//! @addtogroup asmjit_x86x64 +//! @{ + +// ============================================================================ +// [asmjit::x86x64::kCpuFeature] +// ============================================================================ + +//! @brief X86 CPU features. +ASMJIT_ENUM(kCpuFeature) { + //! @brief Cpu has multithreading. + kCpuFeatureMultithreading = 1, + //! @brief Cpu has execute disable bit. + kCpuFeatureExecuteDisableBit, + //! @brief Cpu has RDTSC. + kCpuFeatureRdtsc, + //! @brief Cpu has RDTSCP. + kCpuFeatureRdtscp, + //! @brief Cpu has CMOV. + kCpuFeatureCmov, + //! @brief Cpu has CMPXCHG8B. + kCpuFeatureCmpXchg8B, + //! @brief Cpu has CMPXCHG16B (x64). + kCpuFeatureCmpXchg16B, + //! @brief Cpu has CLFUSH. + kCpuFeatureClflush, + //! @brief Cpu has PREFETCH. + kCpuFeaturePrefetch, + //! @brief Cpu has LAHF/SAHF. + kCpuFeatureLahfSahf, + //! @brief Cpu has FXSAVE/FXRSTOR. + kCpuFeatureFxsr, + //! @brief Cpu has FXSAVE/FXRSTOR optimizations. + kCpuFeatureFfxsr, + //! @brief Cpu has MMX. + kCpuFeatureMmx, + //! @brief Cpu has extended MMX. + kCpuFeatureMmxExt, + //! @brief Cpu has 3dNow! + kCpuFeature3dNow, + //! @brief Cpu has enchanced 3dNow! + kCpuFeature3dNowExt, + //! @brief Cpu has SSE. + kCpuFeatureSse, + //! @brief Cpu has SSE2. + kCpuFeatureSse2, + //! @brief Cpu has SSE3. + kCpuFeatureSse3, + //! @brief Cpu has Supplemental SSE3 (SSSE3). + kCpuFeatureSsse3, + //! @brief Cpu has SSE4.A. + kCpuFeatureSse4A, + //! @brief Cpu has SSE4.1. + kCpuFeatureSse41, + //! @brief Cpu has SSE4.2. + kCpuFeatureSse42, + //! @brief Cpu has Misaligned SSE (MSSE). + kCpuFeatureMsse, + //! @brief Cpu has MONITOR and MWAIT. + kCpuFeatureMonitorMWait, + //! @brief Cpu has MOVBE. + kCpuFeatureMovbe, + //! @brief Cpu has POPCNT. + kCpuFeaturePopcnt, + //! @brief Cpu has LZCNT. + kCpuFeatureLzcnt, + //! @brief Cpu has AESNI. + kCpuFeatureAesni, + //! @brief Cpu has PCLMULQDQ. + kCpuFeaturePclmulqdq, + //! @brief Cpu has RDRAND. + kCpuFeatureRdrand, + //! @brief Cpu has AVX. + kCpuFeatureAvx, + //! @brief Cpu has AVX2. + kCpuFeatureAvx2, + //! @brief Cpu has F16C. + kCpuFeatureF16C, + //! @brief Cpu has FMA3. + kCpuFeatureFma3, + //! @brief Cpu has FMA4. + kCpuFeatureFma4, + //! @brief Cpu has XOP. + kCpuFeatureXop, + //! @brief Cpu has BMI. + kCpuFeatureBmi, + //! @brief Cpu has BMI2. + kCpuFeatureBmi2, + //! @brief Cpu has HLE. + kCpuFeatureHle, + //! @brief Cpu has RTM. + kCpuFeatureRtm, + //! @brief Cpu has FSGSBASE. + kCpuFeatureFsGsBase, + //! @brief Cpu has enhanced REP MOVSB/STOSB. + kCpuFeatureRepMovsbStosbExt, + + //! @brief Count of X86/X64 Cpu features. + kCpuFeatureCount +}; + +// ============================================================================ +// [asmjit::x86x64::CpuId] +// ============================================================================ + +//! @brief X86/X64 cpuid output. +union CpuId { + //! @brief EAX/EBX/ECX/EDX output. + uint32_t i[4]; + + struct { + //! @brief EAX output. + uint32_t eax; + //! @brief EBX output. + uint32_t ebx; + //! @brief ECX output. + uint32_t ecx; + //! @brief EDX output. + uint32_t edx; + }; +}; + +// ============================================================================ +// [asmjit::x86x64::Cpu] +// ============================================================================ + +struct Cpu : public BaseCpu { + ASMJIT_NO_COPY(Cpu) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Cpu(uint32_t size = sizeof(Cpu)) : BaseCpu(size) {} + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get processor type. + ASMJIT_INLINE uint32_t getProcessorType() const { return _processorType; } + //! @brief Get brand index. + ASMJIT_INLINE uint32_t getBrandIndex() const { return _brandIndex; } + //! @brief Get flush cache line size. + ASMJIT_INLINE uint32_t getFlushCacheLineSize() const { return _flushCacheLineSize; } + //! @brief Get maximum logical processors count. + ASMJIT_INLINE uint32_t getMaxLogicalProcessors() const { return _maxLogicalProcessors; } + + // -------------------------------------------------------------------------- + // [Statics] + // -------------------------------------------------------------------------- + + //! @brief Get global instance of @ref X86CpuInfo. + static ASMJIT_INLINE const Cpu* getHost() + { return static_cast(BaseCpu::getHost()); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Processor type. + uint32_t _processorType; + //! @brief Brand index. + uint32_t _brandIndex; + //! @brief Flush cache line size in bytes. + uint32_t _flushCacheLineSize; + //! @brief Maximum number of addressable IDs for logical processors. + uint32_t _maxLogicalProcessors; +}; + +// ============================================================================ +// [asmjit::x86x64::hostCpuId / hostCpuDetect] +// ============================================================================ + +#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64) +//! @brief Calls CPUID instruction with eax == @a inEax and ecx === @a inEcx +//! and stores the result to @a result. +//! +//! @c cpuid() function has one input parameter that is passed to cpuid through +//! eax register and results in four output values representing result of cpuid +//! instruction (eax, ebx, ecx and edx registers). +ASMJIT_API void hostCpuId(uint32_t inEax, uint32_t inEcx, CpuId* result); + +//! @brief Detect CPU features to x86x64::Cpu structure @a out. +//! +//! @sa @c BaseCpu. +ASMJIT_API void hostCpuDetect(Cpu* out); +#endif // ASMJIT_HOST_X86 || ASMJIT_HOST_X64 + +//! @} + +} // x86x64 namespace +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_X86_X86CPU_H diff --git a/src/asmjit/x86/x86defs.cpp b/src/asmjit/x86/x86defs.cpp new file mode 100644 index 0000000..6152430 --- /dev/null +++ b/src/asmjit/x86/x86defs.cpp @@ -0,0 +1,3342 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + +// [Dependencies - AsmJit] +#include "../x86/x86defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +// ============================================================================ +// [asmjit::x86x64::Instructions and Condition Codes] +// ============================================================================ + +// Autogenerated by src-gendefs.js: +// +// ${kInstData:Begin} +const char _instName[] = + "\0" + "adc\0" + "add\0" + "addpd\0" + "addps\0" + "addsd\0" + "addss\0" + "addsubpd\0" + "addsubps\0" + "aesdec\0" + "aesdeclast\0" + "aesenc\0" + "aesenclast\0" + "aesimc\0" + "aeskeygenassist\0" + "and\0" + "andn\0" + "andnpd\0" + "andnps\0" + "andpd\0" + "andps\0" + "bextr\0" + "blendpd\0" + "blendps\0" + "blendvpd\0" + "blendvps\0" + "blsi\0" + "blsmsk\0" + "blsr\0" + "bsf\0" + "bsr\0" + "bswap\0" + "bt\0" + "btc\0" + "btr\0" + "bts\0" + "bzhi\0" + "call\0" + "cbw\0" + "cdq\0" + "cdqe\0" + "clc\0" + "cld\0" + "clflush\0" + "cmc\0" + "cmova\0" + "cmovae\0" + "cmovb\0" + "cmovbe\0" + "cmovc\0" + "cmove\0" + "cmovg\0" + "cmovge\0" + "cmovl\0" + "cmovle\0" + "cmovna\0" + "cmovnae\0" + "cmovnb\0" + "cmovnbe\0" + "cmovnc\0" + "cmovne\0" + "cmovng\0" + "cmovnge\0" + "cmovnl\0" + "cmovnle\0" + "cmovno\0" + "cmovnp\0" + "cmovns\0" + "cmovnz\0" + "cmovo\0" + "cmovp\0" + "cmovpe\0" + "cmovpo\0" + "cmovs\0" + "cmovz\0" + "cmp\0" + "cmppd\0" + "cmpps\0" + "cmpsd\0" + "cmpss\0" + "cmpxchg\0" + "cmpxchg16b\0" + "cmpxchg8b\0" + "comisd\0" + "comiss\0" + "cpuid\0" + "cqo\0" + "crc32\0" + "cvtdq2pd\0" + "cvtdq2ps\0" + "cvtpd2dq\0" + "cvtpd2pi\0" + "cvtpd2ps\0" + "cvtpi2pd\0" + "cvtpi2ps\0" + "cvtps2dq\0" + "cvtps2pd\0" + "cvtps2pi\0" + "cvtsd2si\0" + "cvtsd2ss\0" + "cvtsi2sd\0" + "cvtsi2ss\0" + "cvtss2sd\0" + "cvtss2si\0" + "cvttpd2dq\0" + "cvttpd2pi\0" + "cvttps2dq\0" + "cvttps2pi\0" + "cvttsd2si\0" + "cvttss2si\0" + "cwd\0" + "cwde\0" + "daa\0" + "das\0" + "dec\0" + "div\0" + "divpd\0" + "divps\0" + "divsd\0" + "divss\0" + "dppd\0" + "dpps\0" + "emms\0" + "enter\0" + "extractps\0" + "f2xm1\0" + "fabs\0" + "fadd\0" + "faddp\0" + "fbld\0" + "fbstp\0" + "fchs\0" + "fclex\0" + "fcmovb\0" + "fcmovbe\0" + "fcmove\0" + "fcmovnb\0" + "fcmovnbe\0" + "fcmovne\0" + "fcmovnu\0" + "fcmovu\0" + "fcom\0" + "fcomi\0" + "fcomip\0" + "fcomp\0" + "fcompp\0" + "fcos\0" + "fdecstp\0" + "fdiv\0" + "fdivp\0" + "fdivr\0" + "fdivrp\0" + "femms\0" + "ffree\0" + "fiadd\0" + "ficom\0" + "ficomp\0" + "fidiv\0" + "fidivr\0" + "fild\0" + "fimul\0" + "fincstp\0" + "finit\0" + "fist\0" + "fistp\0" + "fisttp\0" + "fisub\0" + "fisubr\0" + "fld\0" + "fld1\0" + "fldcw\0" + "fldenv\0" + "fldl2e\0" + "fldl2t\0" + "fldlg2\0" + "fldln2\0" + "fldpi\0" + "fldz\0" + "fmul\0" + "fmulp\0" + "fnclex\0" + "fninit\0" + "fnop\0" + "fnsave\0" + "fnstcw\0" + "fnstenv\0" + "fnstsw\0" + "fpatan\0" + "fprem\0" + "fprem1\0" + "fptan\0" + "frndint\0" + "frstor\0" + "fsave\0" + "fscale\0" + "fsin\0" + "fsincos\0" + "fsqrt\0" + "fst\0" + "fstcw\0" + "fstenv\0" + "fstp\0" + "fstsw\0" + "fsub\0" + "fsubp\0" + "fsubr\0" + "fsubrp\0" + "ftst\0" + "fucom\0" + "fucomi\0" + "fucomip\0" + "fucomp\0" + "fucompp\0" + "fwait\0" + "fxam\0" + "fxch\0" + "fxrstor\0" + "fxsave\0" + "fxtract\0" + "fyl2x\0" + "fyl2xp1\0" + "haddpd\0" + "haddps\0" + "hsubpd\0" + "hsubps\0" + "idiv\0" + "imul\0" + "inc\0" + "int\0" + "ja\0" + "jae\0" + "jb\0" + "jbe\0" + "jc\0" + "je\0" + "jg\0" + "jge\0" + "jl\0" + "jle\0" + "jna\0" + "jnae\0" + "jnb\0" + "jnbe\0" + "jnc\0" + "jne\0" + "jng\0" + "jnge\0" + "jnl\0" + "jnle\0" + "jno\0" + "jnp\0" + "jns\0" + "jnz\0" + "jo\0" + "jp\0" + "jpe\0" + "jpo\0" + "js\0" + "jz\0" + "jmp\0" + "lahf\0" + "lddqu\0" + "ldmxcsr\0" + "lea\0" + "leave\0" + "lfence\0" + "lzcnt\0" + "maskmovdqu\0" + "maskmovq\0" + "maxpd\0" + "maxps\0" + "maxsd\0" + "maxss\0" + "mfence\0" + "minpd\0" + "minps\0" + "minsd\0" + "minss\0" + "monitor\0" + "mov\0" + "movapd\0" + "movaps\0" + "movbe\0" + "movd\0" + "movddup\0" + "movdq2q\0" + "movdqa\0" + "movdqu\0" + "movhlps\0" + "movhpd\0" + "movhps\0" + "movlhps\0" + "movlpd\0" + "movlps\0" + "movmskpd\0" + "movmskps\0" + "movntdq\0" + "movntdqa\0" + "movnti\0" + "movntpd\0" + "movntps\0" + "movntq\0" + "mov_ptr\0" + "movq\0" + "movq2dq\0" + "movsd\0" + "movshdup\0" + "movsldup\0" + "movss\0" + "movsx\0" + "movsxd\0" + "movupd\0" + "movups\0" + "movzx\0" + "mpsadbw\0" + "mul\0" + "mulpd\0" + "mulps\0" + "mulsd\0" + "mulss\0" + "mulx\0" + "mwait\0" + "neg\0" + "nop\0" + "not\0" + "or\0" + "orpd\0" + "orps\0" + "pabsb\0" + "pabsd\0" + "pabsw\0" + "packssdw\0" + "packsswb\0" + "packusdw\0" + "packuswb\0" + "paddb\0" + "paddd\0" + "paddq\0" + "paddsb\0" + "paddsw\0" + "paddusb\0" + "paddusw\0" + "paddw\0" + "palignr\0" + "pand\0" + "pandn\0" + "pause\0" + "pavgb\0" + "pavgw\0" + "pblendvb\0" + "pblendw\0" + "pclmulqdq\0" + "pcmpeqb\0" + "pcmpeqd\0" + "pcmpeqq\0" + "pcmpeqw\0" + "pcmpestri\0" + "pcmpestrm\0" + "pcmpgtb\0" + "pcmpgtd\0" + "pcmpgtq\0" + "pcmpgtw\0" + "pcmpistri\0" + "pcmpistrm\0" + "pdep\0" + "pext\0" + "pextrb\0" + "pextrd\0" + "pextrq\0" + "pextrw\0" + "pf2id\0" + "pf2iw\0" + "pfacc\0" + "pfadd\0" + "pfcmpeq\0" + "pfcmpge\0" + "pfcmpgt\0" + "pfmax\0" + "pfmin\0" + "pfmul\0" + "pfnacc\0" + "pfpnacc\0" + "pfrcp\0" + "pfrcpit1\0" + "pfrcpit2\0" + "pfrsqit1\0" + "pfrsqrt\0" + "pfsub\0" + "pfsubr\0" + "phaddd\0" + "phaddsw\0" + "phaddw\0" + "phminposuw\0" + "phsubd\0" + "phsubsw\0" + "phsubw\0" + "pi2fd\0" + "pi2fw\0" + "pinsrb\0" + "pinsrd\0" + "pinsrq\0" + "pinsrw\0" + "pmaddubsw\0" + "pmaddwd\0" + "pmaxsb\0" + "pmaxsd\0" + "pmaxsw\0" + "pmaxub\0" + "pmaxud\0" + "pmaxuw\0" + "pminsb\0" + "pminsd\0" + "pminsw\0" + "pminub\0" + "pminud\0" + "pminuw\0" + "pmovmskb\0" + "pmovsxbd\0" + "pmovsxbq\0" + "pmovsxbw\0" + "pmovsxdq\0" + "pmovsxwd\0" + "pmovsxwq\0" + "pmovzxbd\0" + "pmovzxbq\0" + "pmovzxbw\0" + "pmovzxdq\0" + "pmovzxwd\0" + "pmovzxwq\0" + "pmuldq\0" + "pmulhrsw\0" + "pmulhuw\0" + "pmulhw\0" + "pmulld\0" + "pmullw\0" + "pmuludq\0" + "pop\0" + "popa\0" + "popcnt\0" + "popf\0" + "por\0" + "prefetch\0" + "prefetch_3dnow\0" + "prefetchw_3dnow\0" + "psadbw\0" + "pshufb\0" + "pshufd\0" + "pshufhw\0" + "pshuflw\0" + "pshufw\0" + "psignb\0" + "psignd\0" + "psignw\0" + "pslld\0" + "pslldq\0" + "psllq\0" + "psllw\0" + "psrad\0" + "psraw\0" + "psrld\0" + "psrldq\0" + "psrlq\0" + "psrlw\0" + "psubb\0" + "psubd\0" + "psubq\0" + "psubsb\0" + "psubsw\0" + "psubusb\0" + "psubusw\0" + "psubw\0" + "pswapd\0" + "ptest\0" + "punpckhbw\0" + "punpckhdq\0" + "punpckhqdq\0" + "punpckhwd\0" + "punpcklbw\0" + "punpckldq\0" + "punpcklqdq\0" + "punpcklwd\0" + "push\0" + "pusha\0" + "pushf\0" + "pxor\0" + "rcl\0" + "rcpps\0" + "rcpss\0" + "rcr\0" + "rdfsbase\0" + "rdgsbase\0" + "rdrand\0" + "rdtsc\0" + "rdtscp\0" + "rep lodsb\0" + "rep lodsd\0" + "rep lodsq\0" + "rep lodsw\0" + "rep movsb\0" + "rep movsd\0" + "rep movsq\0" + "rep movsw\0" + "rep stosb\0" + "rep stosd\0" + "rep stosq\0" + "rep stosw\0" + "repe cmpsb\0" + "repe cmpsd\0" + "repe cmpsq\0" + "repe cmpsw\0" + "repe scasb\0" + "repe scasd\0" + "repe scasq\0" + "repe scasw\0" + "repne cmpsb\0" + "repne cmpsd\0" + "repne cmpsq\0" + "repne cmpsw\0" + "repne scasb\0" + "repne scasd\0" + "repne scasq\0" + "repne scasw\0" + "ret\0" + "rol\0" + "ror\0" + "rorx\0" + "roundpd\0" + "roundps\0" + "roundsd\0" + "roundss\0" + "rsqrtps\0" + "rsqrtss\0" + "sahf\0" + "sal\0" + "sar\0" + "sarx\0" + "sbb\0" + "seta\0" + "setae\0" + "setb\0" + "setbe\0" + "setc\0" + "sete\0" + "setg\0" + "setge\0" + "setl\0" + "setle\0" + "setna\0" + "setnae\0" + "setnb\0" + "setnbe\0" + "setnc\0" + "setne\0" + "setng\0" + "setnge\0" + "setnl\0" + "setnle\0" + "setno\0" + "setnp\0" + "setns\0" + "setnz\0" + "seto\0" + "setp\0" + "setpe\0" + "setpo\0" + "sets\0" + "setz\0" + "sfence\0" + "shl\0" + "shld\0" + "shlx\0" + "shr\0" + "shrd\0" + "shrx\0" + "shufpd\0" + "shufps\0" + "sqrtpd\0" + "sqrtps\0" + "sqrtsd\0" + "sqrtss\0" + "stc\0" + "std\0" + "stmxcsr\0" + "sub\0" + "subpd\0" + "subps\0" + "subsd\0" + "subss\0" + "test\0" + "tzcnt\0" + "ucomisd\0" + "ucomiss\0" + "ud2\0" + "unpckhpd\0" + "unpckhps\0" + "unpcklpd\0" + "unpcklps\0" + "vaddpd\0" + "vaddps\0" + "vaddsd\0" + "vaddss\0" + "vaddsubpd\0" + "vaddsubps\0" + "vaesdec\0" + "vaesdeclast\0" + "vaesenc\0" + "vaesenclast\0" + "vaesimc\0" + "vaeskeygenassist\0" + "vandnpd\0" + "vandnps\0" + "vandpd\0" + "vandps\0" + "vblendpd\0" + "vblendps\0" + "vblendvpd\0" + "vblendvps\0" + "vbroadcastf128\0" + "vbroadcasti128\0" + "vbroadcastsd\0" + "vbroadcastss\0" + "vcmppd\0" + "vcmpps\0" + "vcmpsd\0" + "vcmpss\0" + "vcomisd\0" + "vcomiss\0" + "vcvtdq2pd\0" + "vcvtdq2ps\0" + "vcvtpd2dq\0" + "vcvtpd2ps\0" + "vcvtph2ps\0" + "vcvtps2dq\0" + "vcvtps2pd\0" + "vcvtps2ph\0" + "vcvtsd2si\0" + "vcvtsd2ss\0" + "vcvtsi2sd\0" + "vcvtsi2ss\0" + "vcvtss2sd\0" + "vcvtss2si\0" + "vcvttpd2dq\0" + "vcvttps2dq\0" + "vcvttsd2si\0" + "vcvttss2si\0" + "vdivpd\0" + "vdivps\0" + "vdivsd\0" + "vdivss\0" + "vdppd\0" + "vdpps\0" + "vextractf128\0" + "vextracti128\0" + "vextractps\0" + "vfmadd132pd\0" + "vfmadd132ps\0" + "vfmadd132sd\0" + "vfmadd132ss\0" + "vfmadd213pd\0" + "vfmadd213ps\0" + "vfmadd213sd\0" + "vfmadd213ss\0" + "vfmadd231pd\0" + "vfmadd231ps\0" + "vfmadd231sd\0" + "vfmadd231ss\0" + "vfmaddsub132pd\0" + "vfmaddsub132ps\0" + "vfmaddsub213pd\0" + "vfmaddsub213ps\0" + "vfmaddsub231pd\0" + "vfmaddsub231ps\0" + "vfmsub132pd\0" + "vfmsub132ps\0" + "vfmsub132sd\0" + "vfmsub132ss\0" + "vfmsub213pd\0" + "vfmsub213ps\0" + "vfmsub213sd\0" + "vfmsub213ss\0" + "vfmsub231pd\0" + "vfmsub231ps\0" + "vfmsub231sd\0" + "vfmsub231ss\0" + "vfmsubadd132pd\0" + "vfmsubadd132ps\0" + "vfmsubadd213pd\0" + "vfmsubadd213ps\0" + "vfmsubadd231pd\0" + "vfmsubadd231ps\0" + "vfnmadd132pd\0" + "vfnmadd132ps\0" + "vfnmadd132sd\0" + "vfnmadd132ss\0" + "vfnmadd213pd\0" + "vfnmadd213ps\0" + "vfnmadd213sd\0" + "vfnmadd213ss\0" + "vfnmadd231pd\0" + "vfnmadd231ps\0" + "vfnmadd231sd\0" + "vfnmadd231ss\0" + "vfnmsub132pd\0" + "vfnmsub132ps\0" + "vfnmsub132sd\0" + "vfnmsub132ss\0" + "vfnmsub213pd\0" + "vfnmsub213ps\0" + "vfnmsub213sd\0" + "vfnmsub213ss\0" + "vfnmsub231pd\0" + "vfnmsub231ps\0" + "vfnmsub231sd\0" + "vfnmsub231ss\0" + "vgatherdpd\0" + "vgatherdps\0" + "vgatherqpd\0" + "vgatherqps\0" + "vhaddpd\0" + "vhaddps\0" + "vhsubpd\0" + "vhsubps\0" + "vinsertf128\0" + "vinserti128\0" + "vinsertps\0" + "vlddqu\0" + "vldmxcsr\0" + "vmaskmovdqu\0" + "vmaskmovpd\0" + "vmaskmovps\0" + "vmaxpd\0" + "vmaxps\0" + "vmaxsd\0" + "vmaxss\0" + "vminpd\0" + "vminps\0" + "vminsd\0" + "vminss\0" + "vmovapd\0" + "vmovaps\0" + "vmovd\0" + "vmovddup\0" + "vmovdqa\0" + "vmovdqu\0" + "vmovhlps\0" + "vmovhpd\0" + "vmovhps\0" + "vmovlhps\0" + "vmovlpd\0" + "vmovlps\0" + "vmovmskpd\0" + "vmovmskps\0" + "vmovntdq\0" + "vmovntdqa\0" + "vmovntpd\0" + "vmovntps\0" + "vmovq\0" + "vmovsd\0" + "vmovshdup\0" + "vmovsldup\0" + "vmovss\0" + "vmovupd\0" + "vmovups\0" + "vmpsadbw\0" + "vmulpd\0" + "vmulps\0" + "vmulsd\0" + "vmulss\0" + "vorpd\0" + "vorps\0" + "vpabsb\0" + "vpabsd\0" + "vpabsw\0" + "vpackssdw\0" + "vpacksswb\0" + "vpackusdw\0" + "vpackuswb\0" + "vpaddb\0" + "vpaddd\0" + "vpaddq\0" + "vpaddsb\0" + "vpaddsw\0" + "vpaddusb\0" + "vpaddusw\0" + "vpaddw\0" + "vpalignr\0" + "vpand\0" + "vpandn\0" + "vpavgb\0" + "vpavgw\0" + "vpblendd\0" + "vpblendvb\0" + "vpblendw\0" + "vpbroadcastb\0" + "vpbroadcastd\0" + "vpbroadcastq\0" + "vpbroadcastw\0" + "vpclmulqdq\0" + "vpcmpeqb\0" + "vpcmpeqd\0" + "vpcmpeqq\0" + "vpcmpeqw\0" + "vpcmpestri\0" + "vpcmpestrm\0" + "vpcmpgtb\0" + "vpcmpgtd\0" + "vpcmpgtq\0" + "vpcmpgtw\0" + "vpcmpistri\0" + "vpcmpistrm\0" + "vperm2f128\0" + "vperm2i128\0" + "vpermd\0" + "vpermilpd\0" + "vpermilps\0" + "vpermpd\0" + "vpermps\0" + "vpermq\0" + "vpextrb\0" + "vpextrd\0" + "vpextrq\0" + "vpextrw\0" + "vpgatherdd\0" + "vpgatherdq\0" + "vpgatherqd\0" + "vpgatherqq\0" + "vphaddd\0" + "vphaddsw\0" + "vphaddw\0" + "vphminposuw\0" + "vphsubd\0" + "vphsubsw\0" + "vphsubw\0" + "vpinsrb\0" + "vpinsrd\0" + "vpinsrq\0" + "vpinsrw\0" + "vpmaddubsw\0" + "vpmaddwd\0" + "vpmaskmovd\0" + "vpmaskmovq\0" + "vpmaxsb\0" + "vpmaxsd\0" + "vpmaxsw\0" + "vpmaxub\0" + "vpmaxud\0" + "vpmaxuw\0" + "vpminsb\0" + "vpminsd\0" + "vpminsw\0" + "vpminub\0" + "vpminud\0" + "vpminuw\0" + "vpmovmskb\0" + "vpmovsxbd\0" + "vpmovsxbq\0" + "vpmovsxbw\0" + "vpmovsxdq\0" + "vpmovsxwd\0" + "vpmovsxwq\0" + "vpmovzxbd\0" + "vpmovzxbq\0" + "vpmovzxbw\0" + "vpmovzxdq\0" + "vpmovzxwd\0" + "vpmovzxwq\0" + "vpmuldq\0" + "vpmulhrsw\0" + "vpmulhuw\0" + "vpmulhw\0" + "vpmulld\0" + "vpmullw\0" + "vpmuludq\0" + "vpor\0" + "vpsadbw\0" + "vpshufb\0" + "vpshufd\0" + "vpshufhw\0" + "vpshuflw\0" + "vpsignb\0" + "vpsignd\0" + "vpsignw\0" + "vpslld\0" + "vpslldq\0" + "vpsllq\0" + "vpsllvd\0" + "vpsllvq\0" + "vpsllw\0" + "vpsrad\0" + "vpsravd\0" + "vpsraw\0" + "vpsrld\0" + "vpsrldq\0" + "vpsrlq\0" + "vpsrlvd\0" + "vpsrlvq\0" + "vpsrlw\0" + "vpsubb\0" + "vpsubd\0" + "vpsubq\0" + "vpsubsb\0" + "vpsubsw\0" + "vpsubusb\0" + "vpsubusw\0" + "vpsubw\0" + "vptest\0" + "vpunpckhbw\0" + "vpunpckhdq\0" + "vpunpckhqdq\0" + "vpunpckhwd\0" + "vpunpcklbw\0" + "vpunpckldq\0" + "vpunpcklqdq\0" + "vpunpcklwd\0" + "vpxor\0" + "vrcpps\0" + "vrcpss\0" + "vroundpd\0" + "vroundps\0" + "vroundsd\0" + "vroundss\0" + "vrsqrtps\0" + "vrsqrtss\0" + "vshufpd\0" + "vshufps\0" + "vsqrtpd\0" + "vsqrtps\0" + "vsqrtsd\0" + "vsqrtss\0" + "vstmxcsr\0" + "vsubpd\0" + "vsubps\0" + "vsubsd\0" + "vsubss\0" + "vtestpd\0" + "vtestps\0" + "vucomisd\0" + "vucomiss\0" + "vunpckhpd\0" + "vunpckhps\0" + "vunpcklpd\0" + "vunpcklps\0" + "vxorpd\0" + "vxorps\0" + "vzeroall\0" + "vzeroupper\0" + "wrfsbase\0" + "wrgsbase\0" + "xadd\0" + "xchg\0" + "xor\0" + "xorpd\0" + "xorps\0"; + +enum kInstData_NameIndex { + kInstNone_NameIndex = 0, + kInstAdc_NameIndex = 1, + kInstAdd_NameIndex = 5, + kInstAddpd_NameIndex = 9, + kInstAddps_NameIndex = 15, + kInstAddsd_NameIndex = 21, + kInstAddss_NameIndex = 27, + kInstAddsubpd_NameIndex = 33, + kInstAddsubps_NameIndex = 42, + kInstAesdec_NameIndex = 51, + kInstAesdeclast_NameIndex = 58, + kInstAesenc_NameIndex = 69, + kInstAesenclast_NameIndex = 76, + kInstAesimc_NameIndex = 87, + kInstAeskeygenassist_NameIndex = 94, + kInstAnd_NameIndex = 110, + kInstAndn_NameIndex = 114, + kInstAndnpd_NameIndex = 119, + kInstAndnps_NameIndex = 126, + kInstAndpd_NameIndex = 133, + kInstAndps_NameIndex = 139, + kInstBextr_NameIndex = 145, + kInstBlendpd_NameIndex = 151, + kInstBlendps_NameIndex = 159, + kInstBlendvpd_NameIndex = 167, + kInstBlendvps_NameIndex = 176, + kInstBlsi_NameIndex = 185, + kInstBlsmsk_NameIndex = 190, + kInstBlsr_NameIndex = 197, + kInstBsf_NameIndex = 202, + kInstBsr_NameIndex = 206, + kInstBswap_NameIndex = 210, + kInstBt_NameIndex = 216, + kInstBtc_NameIndex = 219, + kInstBtr_NameIndex = 223, + kInstBts_NameIndex = 227, + kInstBzhi_NameIndex = 231, + kInstCall_NameIndex = 236, + kInstCbw_NameIndex = 241, + kInstCdq_NameIndex = 245, + kInstCdqe_NameIndex = 249, + kInstClc_NameIndex = 254, + kInstCld_NameIndex = 258, + kInstClflush_NameIndex = 262, + kInstCmc_NameIndex = 270, + kInstCmova_NameIndex = 274, + kInstCmovae_NameIndex = 280, + kInstCmovb_NameIndex = 287, + kInstCmovbe_NameIndex = 293, + kInstCmovc_NameIndex = 300, + kInstCmove_NameIndex = 306, + kInstCmovg_NameIndex = 312, + kInstCmovge_NameIndex = 318, + kInstCmovl_NameIndex = 325, + kInstCmovle_NameIndex = 331, + kInstCmovna_NameIndex = 338, + kInstCmovnae_NameIndex = 345, + kInstCmovnb_NameIndex = 353, + kInstCmovnbe_NameIndex = 360, + kInstCmovnc_NameIndex = 368, + kInstCmovne_NameIndex = 375, + kInstCmovng_NameIndex = 382, + kInstCmovnge_NameIndex = 389, + kInstCmovnl_NameIndex = 397, + kInstCmovnle_NameIndex = 404, + kInstCmovno_NameIndex = 412, + kInstCmovnp_NameIndex = 419, + kInstCmovns_NameIndex = 426, + kInstCmovnz_NameIndex = 433, + kInstCmovo_NameIndex = 440, + kInstCmovp_NameIndex = 446, + kInstCmovpe_NameIndex = 452, + kInstCmovpo_NameIndex = 459, + kInstCmovs_NameIndex = 466, + kInstCmovz_NameIndex = 472, + kInstCmp_NameIndex = 478, + kInstCmppd_NameIndex = 482, + kInstCmpps_NameIndex = 488, + kInstCmpsd_NameIndex = 494, + kInstCmpss_NameIndex = 500, + kInstCmpxchg_NameIndex = 506, + kInstCmpxchg16b_NameIndex = 514, + kInstCmpxchg8b_NameIndex = 525, + kInstComisd_NameIndex = 535, + kInstComiss_NameIndex = 542, + kInstCpuid_NameIndex = 549, + kInstCqo_NameIndex = 555, + kInstCrc32_NameIndex = 559, + kInstCvtdq2pd_NameIndex = 565, + kInstCvtdq2ps_NameIndex = 574, + kInstCvtpd2dq_NameIndex = 583, + kInstCvtpd2pi_NameIndex = 592, + kInstCvtpd2ps_NameIndex = 601, + kInstCvtpi2pd_NameIndex = 610, + kInstCvtpi2ps_NameIndex = 619, + kInstCvtps2dq_NameIndex = 628, + kInstCvtps2pd_NameIndex = 637, + kInstCvtps2pi_NameIndex = 646, + kInstCvtsd2si_NameIndex = 655, + kInstCvtsd2ss_NameIndex = 664, + kInstCvtsi2sd_NameIndex = 673, + kInstCvtsi2ss_NameIndex = 682, + kInstCvtss2sd_NameIndex = 691, + kInstCvtss2si_NameIndex = 700, + kInstCvttpd2dq_NameIndex = 709, + kInstCvttpd2pi_NameIndex = 719, + kInstCvttps2dq_NameIndex = 729, + kInstCvttps2pi_NameIndex = 739, + kInstCvttsd2si_NameIndex = 749, + kInstCvttss2si_NameIndex = 759, + kInstCwd_NameIndex = 769, + kInstCwde_NameIndex = 773, + kInstDaa_NameIndex = 778, + kInstDas_NameIndex = 782, + kInstDec_NameIndex = 786, + kInstDiv_NameIndex = 790, + kInstDivpd_NameIndex = 794, + kInstDivps_NameIndex = 800, + kInstDivsd_NameIndex = 806, + kInstDivss_NameIndex = 812, + kInstDppd_NameIndex = 818, + kInstDpps_NameIndex = 823, + kInstEmms_NameIndex = 828, + kInstEnter_NameIndex = 833, + kInstExtractps_NameIndex = 839, + kInstF2xm1_NameIndex = 849, + kInstFabs_NameIndex = 855, + kInstFadd_NameIndex = 860, + kInstFaddp_NameIndex = 865, + kInstFbld_NameIndex = 871, + kInstFbstp_NameIndex = 876, + kInstFchs_NameIndex = 882, + kInstFclex_NameIndex = 887, + kInstFcmovb_NameIndex = 893, + kInstFcmovbe_NameIndex = 900, + kInstFcmove_NameIndex = 908, + kInstFcmovnb_NameIndex = 915, + kInstFcmovnbe_NameIndex = 923, + kInstFcmovne_NameIndex = 932, + kInstFcmovnu_NameIndex = 940, + kInstFcmovu_NameIndex = 948, + kInstFcom_NameIndex = 955, + kInstFcomi_NameIndex = 960, + kInstFcomip_NameIndex = 966, + kInstFcomp_NameIndex = 973, + kInstFcompp_NameIndex = 979, + kInstFcos_NameIndex = 986, + kInstFdecstp_NameIndex = 991, + kInstFdiv_NameIndex = 999, + kInstFdivp_NameIndex = 1004, + kInstFdivr_NameIndex = 1010, + kInstFdivrp_NameIndex = 1016, + kInstFemms_NameIndex = 1023, + kInstFfree_NameIndex = 1029, + kInstFiadd_NameIndex = 1035, + kInstFicom_NameIndex = 1041, + kInstFicomp_NameIndex = 1047, + kInstFidiv_NameIndex = 1054, + kInstFidivr_NameIndex = 1060, + kInstFild_NameIndex = 1067, + kInstFimul_NameIndex = 1072, + kInstFincstp_NameIndex = 1078, + kInstFinit_NameIndex = 1086, + kInstFist_NameIndex = 1092, + kInstFistp_NameIndex = 1097, + kInstFisttp_NameIndex = 1103, + kInstFisub_NameIndex = 1110, + kInstFisubr_NameIndex = 1116, + kInstFld_NameIndex = 1123, + kInstFld1_NameIndex = 1127, + kInstFldcw_NameIndex = 1132, + kInstFldenv_NameIndex = 1138, + kInstFldl2e_NameIndex = 1145, + kInstFldl2t_NameIndex = 1152, + kInstFldlg2_NameIndex = 1159, + kInstFldln2_NameIndex = 1166, + kInstFldpi_NameIndex = 1173, + kInstFldz_NameIndex = 1179, + kInstFmul_NameIndex = 1184, + kInstFmulp_NameIndex = 1189, + kInstFnclex_NameIndex = 1195, + kInstFninit_NameIndex = 1202, + kInstFnop_NameIndex = 1209, + kInstFnsave_NameIndex = 1214, + kInstFnstcw_NameIndex = 1221, + kInstFnstenv_NameIndex = 1228, + kInstFnstsw_NameIndex = 1236, + kInstFpatan_NameIndex = 1243, + kInstFprem_NameIndex = 1250, + kInstFprem1_NameIndex = 1256, + kInstFptan_NameIndex = 1263, + kInstFrndint_NameIndex = 1269, + kInstFrstor_NameIndex = 1277, + kInstFsave_NameIndex = 1284, + kInstFscale_NameIndex = 1290, + kInstFsin_NameIndex = 1297, + kInstFsincos_NameIndex = 1302, + kInstFsqrt_NameIndex = 1310, + kInstFst_NameIndex = 1316, + kInstFstcw_NameIndex = 1320, + kInstFstenv_NameIndex = 1326, + kInstFstp_NameIndex = 1333, + kInstFstsw_NameIndex = 1338, + kInstFsub_NameIndex = 1344, + kInstFsubp_NameIndex = 1349, + kInstFsubr_NameIndex = 1355, + kInstFsubrp_NameIndex = 1361, + kInstFtst_NameIndex = 1368, + kInstFucom_NameIndex = 1373, + kInstFucomi_NameIndex = 1379, + kInstFucomip_NameIndex = 1386, + kInstFucomp_NameIndex = 1394, + kInstFucompp_NameIndex = 1401, + kInstFwait_NameIndex = 1409, + kInstFxam_NameIndex = 1415, + kInstFxch_NameIndex = 1420, + kInstFxrstor_NameIndex = 1425, + kInstFxsave_NameIndex = 1433, + kInstFxtract_NameIndex = 1440, + kInstFyl2x_NameIndex = 1448, + kInstFyl2xp1_NameIndex = 1454, + kInstHaddpd_NameIndex = 1462, + kInstHaddps_NameIndex = 1469, + kInstHsubpd_NameIndex = 1476, + kInstHsubps_NameIndex = 1483, + kInstIdiv_NameIndex = 1490, + kInstImul_NameIndex = 1495, + kInstInc_NameIndex = 1500, + kInstInt_NameIndex = 1504, + kInstJa_NameIndex = 1508, + kInstJae_NameIndex = 1511, + kInstJb_NameIndex = 1515, + kInstJbe_NameIndex = 1518, + kInstJc_NameIndex = 1522, + kInstJe_NameIndex = 1525, + kInstJg_NameIndex = 1528, + kInstJge_NameIndex = 1531, + kInstJl_NameIndex = 1535, + kInstJle_NameIndex = 1538, + kInstJna_NameIndex = 1542, + kInstJnae_NameIndex = 1546, + kInstJnb_NameIndex = 1551, + kInstJnbe_NameIndex = 1555, + kInstJnc_NameIndex = 1560, + kInstJne_NameIndex = 1564, + kInstJng_NameIndex = 1568, + kInstJnge_NameIndex = 1572, + kInstJnl_NameIndex = 1577, + kInstJnle_NameIndex = 1581, + kInstJno_NameIndex = 1586, + kInstJnp_NameIndex = 1590, + kInstJns_NameIndex = 1594, + kInstJnz_NameIndex = 1598, + kInstJo_NameIndex = 1602, + kInstJp_NameIndex = 1605, + kInstJpe_NameIndex = 1608, + kInstJpo_NameIndex = 1612, + kInstJs_NameIndex = 1616, + kInstJz_NameIndex = 1619, + kInstJmp_NameIndex = 1622, + kInstLahf_NameIndex = 1626, + kInstLddqu_NameIndex = 1631, + kInstLdmxcsr_NameIndex = 1637, + kInstLea_NameIndex = 1645, + kInstLeave_NameIndex = 1649, + kInstLfence_NameIndex = 1655, + kInstLzcnt_NameIndex = 1662, + kInstMaskmovdqu_NameIndex = 1668, + kInstMaskmovq_NameIndex = 1679, + kInstMaxpd_NameIndex = 1688, + kInstMaxps_NameIndex = 1694, + kInstMaxsd_NameIndex = 1700, + kInstMaxss_NameIndex = 1706, + kInstMfence_NameIndex = 1712, + kInstMinpd_NameIndex = 1719, + kInstMinps_NameIndex = 1725, + kInstMinsd_NameIndex = 1731, + kInstMinss_NameIndex = 1737, + kInstMonitor_NameIndex = 1743, + kInstMov_NameIndex = 1751, + kInstMovapd_NameIndex = 1755, + kInstMovaps_NameIndex = 1762, + kInstMovbe_NameIndex = 1769, + kInstMovd_NameIndex = 1775, + kInstMovddup_NameIndex = 1780, + kInstMovdq2q_NameIndex = 1788, + kInstMovdqa_NameIndex = 1796, + kInstMovdqu_NameIndex = 1803, + kInstMovhlps_NameIndex = 1810, + kInstMovhpd_NameIndex = 1818, + kInstMovhps_NameIndex = 1825, + kInstMovlhps_NameIndex = 1832, + kInstMovlpd_NameIndex = 1840, + kInstMovlps_NameIndex = 1847, + kInstMovmskpd_NameIndex = 1854, + kInstMovmskps_NameIndex = 1863, + kInstMovntdq_NameIndex = 1872, + kInstMovntdqa_NameIndex = 1880, + kInstMovnti_NameIndex = 1889, + kInstMovntpd_NameIndex = 1896, + kInstMovntps_NameIndex = 1904, + kInstMovntq_NameIndex = 1912, + kInstMovptr_NameIndex = 1919, + kInstMovq_NameIndex = 1927, + kInstMovq2dq_NameIndex = 1932, + kInstMovsd_NameIndex = 1940, + kInstMovshdup_NameIndex = 1946, + kInstMovsldup_NameIndex = 1955, + kInstMovss_NameIndex = 1964, + kInstMovsx_NameIndex = 1970, + kInstMovsxd_NameIndex = 1976, + kInstMovupd_NameIndex = 1983, + kInstMovups_NameIndex = 1990, + kInstMovzx_NameIndex = 1997, + kInstMpsadbw_NameIndex = 2003, + kInstMul_NameIndex = 2011, + kInstMulpd_NameIndex = 2015, + kInstMulps_NameIndex = 2021, + kInstMulsd_NameIndex = 2027, + kInstMulss_NameIndex = 2033, + kInstMulx_NameIndex = 2039, + kInstMwait_NameIndex = 2044, + kInstNeg_NameIndex = 2050, + kInstNop_NameIndex = 2054, + kInstNot_NameIndex = 2058, + kInstOr_NameIndex = 2062, + kInstOrpd_NameIndex = 2065, + kInstOrps_NameIndex = 2070, + kInstPabsb_NameIndex = 2075, + kInstPabsd_NameIndex = 2081, + kInstPabsw_NameIndex = 2087, + kInstPackssdw_NameIndex = 2093, + kInstPacksswb_NameIndex = 2102, + kInstPackusdw_NameIndex = 2111, + kInstPackuswb_NameIndex = 2120, + kInstPaddb_NameIndex = 2129, + kInstPaddd_NameIndex = 2135, + kInstPaddq_NameIndex = 2141, + kInstPaddsb_NameIndex = 2147, + kInstPaddsw_NameIndex = 2154, + kInstPaddusb_NameIndex = 2161, + kInstPaddusw_NameIndex = 2169, + kInstPaddw_NameIndex = 2177, + kInstPalignr_NameIndex = 2183, + kInstPand_NameIndex = 2191, + kInstPandn_NameIndex = 2196, + kInstPause_NameIndex = 2202, + kInstPavgb_NameIndex = 2208, + kInstPavgw_NameIndex = 2214, + kInstPblendvb_NameIndex = 2220, + kInstPblendw_NameIndex = 2229, + kInstPclmulqdq_NameIndex = 2237, + kInstPcmpeqb_NameIndex = 2247, + kInstPcmpeqd_NameIndex = 2255, + kInstPcmpeqq_NameIndex = 2263, + kInstPcmpeqw_NameIndex = 2271, + kInstPcmpestri_NameIndex = 2279, + kInstPcmpestrm_NameIndex = 2289, + kInstPcmpgtb_NameIndex = 2299, + kInstPcmpgtd_NameIndex = 2307, + kInstPcmpgtq_NameIndex = 2315, + kInstPcmpgtw_NameIndex = 2323, + kInstPcmpistri_NameIndex = 2331, + kInstPcmpistrm_NameIndex = 2341, + kInstPdep_NameIndex = 2351, + kInstPext_NameIndex = 2356, + kInstPextrb_NameIndex = 2361, + kInstPextrd_NameIndex = 2368, + kInstPextrq_NameIndex = 2375, + kInstPextrw_NameIndex = 2382, + kInstPf2id_NameIndex = 2389, + kInstPf2iw_NameIndex = 2395, + kInstPfacc_NameIndex = 2401, + kInstPfadd_NameIndex = 2407, + kInstPfcmpeq_NameIndex = 2413, + kInstPfcmpge_NameIndex = 2421, + kInstPfcmpgt_NameIndex = 2429, + kInstPfmax_NameIndex = 2437, + kInstPfmin_NameIndex = 2443, + kInstPfmul_NameIndex = 2449, + kInstPfnacc_NameIndex = 2455, + kInstPfpnacc_NameIndex = 2462, + kInstPfrcp_NameIndex = 2470, + kInstPfrcpit1_NameIndex = 2476, + kInstPfrcpit2_NameIndex = 2485, + kInstPfrsqit1_NameIndex = 2494, + kInstPfrsqrt_NameIndex = 2503, + kInstPfsub_NameIndex = 2511, + kInstPfsubr_NameIndex = 2517, + kInstPhaddd_NameIndex = 2524, + kInstPhaddsw_NameIndex = 2531, + kInstPhaddw_NameIndex = 2539, + kInstPhminposuw_NameIndex = 2546, + kInstPhsubd_NameIndex = 2557, + kInstPhsubsw_NameIndex = 2564, + kInstPhsubw_NameIndex = 2572, + kInstPi2fd_NameIndex = 2579, + kInstPi2fw_NameIndex = 2585, + kInstPinsrb_NameIndex = 2591, + kInstPinsrd_NameIndex = 2598, + kInstPinsrq_NameIndex = 2605, + kInstPinsrw_NameIndex = 2612, + kInstPmaddubsw_NameIndex = 2619, + kInstPmaddwd_NameIndex = 2629, + kInstPmaxsb_NameIndex = 2637, + kInstPmaxsd_NameIndex = 2644, + kInstPmaxsw_NameIndex = 2651, + kInstPmaxub_NameIndex = 2658, + kInstPmaxud_NameIndex = 2665, + kInstPmaxuw_NameIndex = 2672, + kInstPminsb_NameIndex = 2679, + kInstPminsd_NameIndex = 2686, + kInstPminsw_NameIndex = 2693, + kInstPminub_NameIndex = 2700, + kInstPminud_NameIndex = 2707, + kInstPminuw_NameIndex = 2714, + kInstPmovmskb_NameIndex = 2721, + kInstPmovsxbd_NameIndex = 2730, + kInstPmovsxbq_NameIndex = 2739, + kInstPmovsxbw_NameIndex = 2748, + kInstPmovsxdq_NameIndex = 2757, + kInstPmovsxwd_NameIndex = 2766, + kInstPmovsxwq_NameIndex = 2775, + kInstPmovzxbd_NameIndex = 2784, + kInstPmovzxbq_NameIndex = 2793, + kInstPmovzxbw_NameIndex = 2802, + kInstPmovzxdq_NameIndex = 2811, + kInstPmovzxwd_NameIndex = 2820, + kInstPmovzxwq_NameIndex = 2829, + kInstPmuldq_NameIndex = 2838, + kInstPmulhrsw_NameIndex = 2845, + kInstPmulhuw_NameIndex = 2854, + kInstPmulhw_NameIndex = 2862, + kInstPmulld_NameIndex = 2869, + kInstPmullw_NameIndex = 2876, + kInstPmuludq_NameIndex = 2883, + kInstPop_NameIndex = 2891, + kInstPopa_NameIndex = 2895, + kInstPopcnt_NameIndex = 2900, + kInstPopf_NameIndex = 2907, + kInstPor_NameIndex = 2912, + kInstPrefetch_NameIndex = 2916, + kInstPrefetch3dNow_NameIndex = 2925, + kInstPrefetchw3dNow_NameIndex = 2940, + kInstPsadbw_NameIndex = 2956, + kInstPshufb_NameIndex = 2963, + kInstPshufd_NameIndex = 2970, + kInstPshufhw_NameIndex = 2977, + kInstPshuflw_NameIndex = 2985, + kInstPshufw_NameIndex = 2993, + kInstPsignb_NameIndex = 3000, + kInstPsignd_NameIndex = 3007, + kInstPsignw_NameIndex = 3014, + kInstPslld_NameIndex = 3021, + kInstPslldq_NameIndex = 3027, + kInstPsllq_NameIndex = 3034, + kInstPsllw_NameIndex = 3040, + kInstPsrad_NameIndex = 3046, + kInstPsraw_NameIndex = 3052, + kInstPsrld_NameIndex = 3058, + kInstPsrldq_NameIndex = 3064, + kInstPsrlq_NameIndex = 3071, + kInstPsrlw_NameIndex = 3077, + kInstPsubb_NameIndex = 3083, + kInstPsubd_NameIndex = 3089, + kInstPsubq_NameIndex = 3095, + kInstPsubsb_NameIndex = 3101, + kInstPsubsw_NameIndex = 3108, + kInstPsubusb_NameIndex = 3115, + kInstPsubusw_NameIndex = 3123, + kInstPsubw_NameIndex = 3131, + kInstPswapd_NameIndex = 3137, + kInstPtest_NameIndex = 3144, + kInstPunpckhbw_NameIndex = 3150, + kInstPunpckhdq_NameIndex = 3160, + kInstPunpckhqdq_NameIndex = 3170, + kInstPunpckhwd_NameIndex = 3181, + kInstPunpcklbw_NameIndex = 3191, + kInstPunpckldq_NameIndex = 3201, + kInstPunpcklqdq_NameIndex = 3211, + kInstPunpcklwd_NameIndex = 3222, + kInstPush_NameIndex = 3232, + kInstPusha_NameIndex = 3237, + kInstPushf_NameIndex = 3243, + kInstPxor_NameIndex = 3249, + kInstRcl_NameIndex = 3254, + kInstRcpps_NameIndex = 3258, + kInstRcpss_NameIndex = 3264, + kInstRcr_NameIndex = 3270, + kInstRdfsbase_NameIndex = 3274, + kInstRdgsbase_NameIndex = 3283, + kInstRdrand_NameIndex = 3292, + kInstRdtsc_NameIndex = 3299, + kInstRdtscp_NameIndex = 3305, + kInstRepLodsb_NameIndex = 3312, + kInstRepLodsd_NameIndex = 3322, + kInstRepLodsq_NameIndex = 3332, + kInstRepLodsw_NameIndex = 3342, + kInstRepMovsb_NameIndex = 3352, + kInstRepMovsd_NameIndex = 3362, + kInstRepMovsq_NameIndex = 3372, + kInstRepMovsw_NameIndex = 3382, + kInstRepStosb_NameIndex = 3392, + kInstRepStosd_NameIndex = 3402, + kInstRepStosq_NameIndex = 3412, + kInstRepStosw_NameIndex = 3422, + kInstRepeCmpsb_NameIndex = 3432, + kInstRepeCmpsd_NameIndex = 3443, + kInstRepeCmpsq_NameIndex = 3454, + kInstRepeCmpsw_NameIndex = 3465, + kInstRepeScasb_NameIndex = 3476, + kInstRepeScasd_NameIndex = 3487, + kInstRepeScasq_NameIndex = 3498, + kInstRepeScasw_NameIndex = 3509, + kInstRepneCmpsb_NameIndex = 3520, + kInstRepneCmpsd_NameIndex = 3532, + kInstRepneCmpsq_NameIndex = 3544, + kInstRepneCmpsw_NameIndex = 3556, + kInstRepneScasb_NameIndex = 3568, + kInstRepneScasd_NameIndex = 3580, + kInstRepneScasq_NameIndex = 3592, + kInstRepneScasw_NameIndex = 3604, + kInstRet_NameIndex = 3616, + kInstRol_NameIndex = 3620, + kInstRor_NameIndex = 3624, + kInstRorx_NameIndex = 3628, + kInstRoundpd_NameIndex = 3633, + kInstRoundps_NameIndex = 3641, + kInstRoundsd_NameIndex = 3649, + kInstRoundss_NameIndex = 3657, + kInstRsqrtps_NameIndex = 3665, + kInstRsqrtss_NameIndex = 3673, + kInstSahf_NameIndex = 3681, + kInstSal_NameIndex = 3686, + kInstSar_NameIndex = 3690, + kInstSarx_NameIndex = 3694, + kInstSbb_NameIndex = 3699, + kInstSeta_NameIndex = 3703, + kInstSetae_NameIndex = 3708, + kInstSetb_NameIndex = 3714, + kInstSetbe_NameIndex = 3719, + kInstSetc_NameIndex = 3725, + kInstSete_NameIndex = 3730, + kInstSetg_NameIndex = 3735, + kInstSetge_NameIndex = 3740, + kInstSetl_NameIndex = 3746, + kInstSetle_NameIndex = 3751, + kInstSetna_NameIndex = 3757, + kInstSetnae_NameIndex = 3763, + kInstSetnb_NameIndex = 3770, + kInstSetnbe_NameIndex = 3776, + kInstSetnc_NameIndex = 3783, + kInstSetne_NameIndex = 3789, + kInstSetng_NameIndex = 3795, + kInstSetnge_NameIndex = 3801, + kInstSetnl_NameIndex = 3808, + kInstSetnle_NameIndex = 3814, + kInstSetno_NameIndex = 3821, + kInstSetnp_NameIndex = 3827, + kInstSetns_NameIndex = 3833, + kInstSetnz_NameIndex = 3839, + kInstSeto_NameIndex = 3845, + kInstSetp_NameIndex = 3850, + kInstSetpe_NameIndex = 3855, + kInstSetpo_NameIndex = 3861, + kInstSets_NameIndex = 3867, + kInstSetz_NameIndex = 3872, + kInstSfence_NameIndex = 3877, + kInstShl_NameIndex = 3884, + kInstShld_NameIndex = 3888, + kInstShlx_NameIndex = 3893, + kInstShr_NameIndex = 3898, + kInstShrd_NameIndex = 3902, + kInstShrx_NameIndex = 3907, + kInstShufpd_NameIndex = 3912, + kInstShufps_NameIndex = 3919, + kInstSqrtpd_NameIndex = 3926, + kInstSqrtps_NameIndex = 3933, + kInstSqrtsd_NameIndex = 3940, + kInstSqrtss_NameIndex = 3947, + kInstStc_NameIndex = 3954, + kInstStd_NameIndex = 3958, + kInstStmxcsr_NameIndex = 3962, + kInstSub_NameIndex = 3970, + kInstSubpd_NameIndex = 3974, + kInstSubps_NameIndex = 3980, + kInstSubsd_NameIndex = 3986, + kInstSubss_NameIndex = 3992, + kInstTest_NameIndex = 3998, + kInstTzcnt_NameIndex = 4003, + kInstUcomisd_NameIndex = 4009, + kInstUcomiss_NameIndex = 4017, + kInstUd2_NameIndex = 4025, + kInstUnpckhpd_NameIndex = 4029, + kInstUnpckhps_NameIndex = 4038, + kInstUnpcklpd_NameIndex = 4047, + kInstUnpcklps_NameIndex = 4056, + kInstVaddpd_NameIndex = 4065, + kInstVaddps_NameIndex = 4072, + kInstVaddsd_NameIndex = 4079, + kInstVaddss_NameIndex = 4086, + kInstVaddsubpd_NameIndex = 4093, + kInstVaddsubps_NameIndex = 4103, + kInstVaesdec_NameIndex = 4113, + kInstVaesdeclast_NameIndex = 4121, + kInstVaesenc_NameIndex = 4133, + kInstVaesenclast_NameIndex = 4141, + kInstVaesimc_NameIndex = 4153, + kInstVaeskeygenassist_NameIndex = 4161, + kInstVandnpd_NameIndex = 4178, + kInstVandnps_NameIndex = 4186, + kInstVandpd_NameIndex = 4194, + kInstVandps_NameIndex = 4201, + kInstVblendpd_NameIndex = 4208, + kInstVblendps_NameIndex = 4217, + kInstVblendvpd_NameIndex = 4226, + kInstVblendvps_NameIndex = 4236, + kInstVbroadcastf128_NameIndex = 4246, + kInstVbroadcasti128_NameIndex = 4261, + kInstVbroadcastsd_NameIndex = 4276, + kInstVbroadcastss_NameIndex = 4289, + kInstVcmppd_NameIndex = 4302, + kInstVcmpps_NameIndex = 4309, + kInstVcmpsd_NameIndex = 4316, + kInstVcmpss_NameIndex = 4323, + kInstVcomisd_NameIndex = 4330, + kInstVcomiss_NameIndex = 4338, + kInstVcvtdq2pd_NameIndex = 4346, + kInstVcvtdq2ps_NameIndex = 4356, + kInstVcvtpd2dq_NameIndex = 4366, + kInstVcvtpd2ps_NameIndex = 4376, + kInstVcvtph2ps_NameIndex = 4386, + kInstVcvtps2dq_NameIndex = 4396, + kInstVcvtps2pd_NameIndex = 4406, + kInstVcvtps2ph_NameIndex = 4416, + kInstVcvtsd2si_NameIndex = 4426, + kInstVcvtsd2ss_NameIndex = 4436, + kInstVcvtsi2sd_NameIndex = 4446, + kInstVcvtsi2ss_NameIndex = 4456, + kInstVcvtss2sd_NameIndex = 4466, + kInstVcvtss2si_NameIndex = 4476, + kInstVcvttpd2dq_NameIndex = 4486, + kInstVcvttps2dq_NameIndex = 4497, + kInstVcvttsd2si_NameIndex = 4508, + kInstVcvttss2si_NameIndex = 4519, + kInstVdivpd_NameIndex = 4530, + kInstVdivps_NameIndex = 4537, + kInstVdivsd_NameIndex = 4544, + kInstVdivss_NameIndex = 4551, + kInstVdppd_NameIndex = 4558, + kInstVdpps_NameIndex = 4564, + kInstVextractf128_NameIndex = 4570, + kInstVextracti128_NameIndex = 4583, + kInstVextractps_NameIndex = 4596, + kInstVfmadd132pd_NameIndex = 4607, + kInstVfmadd132ps_NameIndex = 4619, + kInstVfmadd132sd_NameIndex = 4631, + kInstVfmadd132ss_NameIndex = 4643, + kInstVfmadd213pd_NameIndex = 4655, + kInstVfmadd213ps_NameIndex = 4667, + kInstVfmadd213sd_NameIndex = 4679, + kInstVfmadd213ss_NameIndex = 4691, + kInstVfmadd231pd_NameIndex = 4703, + kInstVfmadd231ps_NameIndex = 4715, + kInstVfmadd231sd_NameIndex = 4727, + kInstVfmadd231ss_NameIndex = 4739, + kInstVfmaddsub132pd_NameIndex = 4751, + kInstVfmaddsub132ps_NameIndex = 4766, + kInstVfmaddsub213pd_NameIndex = 4781, + kInstVfmaddsub213ps_NameIndex = 4796, + kInstVfmaddsub231pd_NameIndex = 4811, + kInstVfmaddsub231ps_NameIndex = 4826, + kInstVfmsub132pd_NameIndex = 4841, + kInstVfmsub132ps_NameIndex = 4853, + kInstVfmsub132sd_NameIndex = 4865, + kInstVfmsub132ss_NameIndex = 4877, + kInstVfmsub213pd_NameIndex = 4889, + kInstVfmsub213ps_NameIndex = 4901, + kInstVfmsub213sd_NameIndex = 4913, + kInstVfmsub213ss_NameIndex = 4925, + kInstVfmsub231pd_NameIndex = 4937, + kInstVfmsub231ps_NameIndex = 4949, + kInstVfmsub231sd_NameIndex = 4961, + kInstVfmsub231ss_NameIndex = 4973, + kInstVfmsubadd132pd_NameIndex = 4985, + kInstVfmsubadd132ps_NameIndex = 5000, + kInstVfmsubadd213pd_NameIndex = 5015, + kInstVfmsubadd213ps_NameIndex = 5030, + kInstVfmsubadd231pd_NameIndex = 5045, + kInstVfmsubadd231ps_NameIndex = 5060, + kInstVfnmadd132pd_NameIndex = 5075, + kInstVfnmadd132ps_NameIndex = 5088, + kInstVfnmadd132sd_NameIndex = 5101, + kInstVfnmadd132ss_NameIndex = 5114, + kInstVfnmadd213pd_NameIndex = 5127, + kInstVfnmadd213ps_NameIndex = 5140, + kInstVfnmadd213sd_NameIndex = 5153, + kInstVfnmadd213ss_NameIndex = 5166, + kInstVfnmadd231pd_NameIndex = 5179, + kInstVfnmadd231ps_NameIndex = 5192, + kInstVfnmadd231sd_NameIndex = 5205, + kInstVfnmadd231ss_NameIndex = 5218, + kInstVfnmsub132pd_NameIndex = 5231, + kInstVfnmsub132ps_NameIndex = 5244, + kInstVfnmsub132sd_NameIndex = 5257, + kInstVfnmsub132ss_NameIndex = 5270, + kInstVfnmsub213pd_NameIndex = 5283, + kInstVfnmsub213ps_NameIndex = 5296, + kInstVfnmsub213sd_NameIndex = 5309, + kInstVfnmsub213ss_NameIndex = 5322, + kInstVfnmsub231pd_NameIndex = 5335, + kInstVfnmsub231ps_NameIndex = 5348, + kInstVfnmsub231sd_NameIndex = 5361, + kInstVfnmsub231ss_NameIndex = 5374, + kInstVgatherdpd_NameIndex = 5387, + kInstVgatherdps_NameIndex = 5398, + kInstVgatherqpd_NameIndex = 5409, + kInstVgatherqps_NameIndex = 5420, + kInstVhaddpd_NameIndex = 5431, + kInstVhaddps_NameIndex = 5439, + kInstVhsubpd_NameIndex = 5447, + kInstVhsubps_NameIndex = 5455, + kInstVinsertf128_NameIndex = 5463, + kInstVinserti128_NameIndex = 5475, + kInstVinsertps_NameIndex = 5487, + kInstVlddqu_NameIndex = 5497, + kInstVldmxcsr_NameIndex = 5504, + kInstVmaskmovdqu_NameIndex = 5513, + kInstVmaskmovpd_NameIndex = 5525, + kInstVmaskmovps_NameIndex = 5536, + kInstVmaxpd_NameIndex = 5547, + kInstVmaxps_NameIndex = 5554, + kInstVmaxsd_NameIndex = 5561, + kInstVmaxss_NameIndex = 5568, + kInstVminpd_NameIndex = 5575, + kInstVminps_NameIndex = 5582, + kInstVminsd_NameIndex = 5589, + kInstVminss_NameIndex = 5596, + kInstVmovapd_NameIndex = 5603, + kInstVmovaps_NameIndex = 5611, + kInstVmovd_NameIndex = 5619, + kInstVmovddup_NameIndex = 5625, + kInstVmovdqa_NameIndex = 5634, + kInstVmovdqu_NameIndex = 5642, + kInstVmovhlps_NameIndex = 5650, + kInstVmovhpd_NameIndex = 5659, + kInstVmovhps_NameIndex = 5667, + kInstVmovlhps_NameIndex = 5675, + kInstVmovlpd_NameIndex = 5684, + kInstVmovlps_NameIndex = 5692, + kInstVmovmskpd_NameIndex = 5700, + kInstVmovmskps_NameIndex = 5710, + kInstVmovntdq_NameIndex = 5720, + kInstVmovntdqa_NameIndex = 5729, + kInstVmovntpd_NameIndex = 5739, + kInstVmovntps_NameIndex = 5748, + kInstVmovq_NameIndex = 5757, + kInstVmovsd_NameIndex = 5763, + kInstVmovshdup_NameIndex = 5770, + kInstVmovsldup_NameIndex = 5780, + kInstVmovss_NameIndex = 5790, + kInstVmovupd_NameIndex = 5797, + kInstVmovups_NameIndex = 5805, + kInstVmpsadbw_NameIndex = 5813, + kInstVmulpd_NameIndex = 5822, + kInstVmulps_NameIndex = 5829, + kInstVmulsd_NameIndex = 5836, + kInstVmulss_NameIndex = 5843, + kInstVorpd_NameIndex = 5850, + kInstVorps_NameIndex = 5856, + kInstVpabsb_NameIndex = 5862, + kInstVpabsd_NameIndex = 5869, + kInstVpabsw_NameIndex = 5876, + kInstVpackssdw_NameIndex = 5883, + kInstVpacksswb_NameIndex = 5893, + kInstVpackusdw_NameIndex = 5903, + kInstVpackuswb_NameIndex = 5913, + kInstVpaddb_NameIndex = 5923, + kInstVpaddd_NameIndex = 5930, + kInstVpaddq_NameIndex = 5937, + kInstVpaddsb_NameIndex = 5944, + kInstVpaddsw_NameIndex = 5952, + kInstVpaddusb_NameIndex = 5960, + kInstVpaddusw_NameIndex = 5969, + kInstVpaddw_NameIndex = 5978, + kInstVpalignr_NameIndex = 5985, + kInstVpand_NameIndex = 5994, + kInstVpandn_NameIndex = 6000, + kInstVpavgb_NameIndex = 6007, + kInstVpavgw_NameIndex = 6014, + kInstVpblendd_NameIndex = 6021, + kInstVpblendvb_NameIndex = 6030, + kInstVpblendw_NameIndex = 6040, + kInstVpbroadcastb_NameIndex = 6049, + kInstVpbroadcastd_NameIndex = 6062, + kInstVpbroadcastq_NameIndex = 6075, + kInstVpbroadcastw_NameIndex = 6088, + kInstVpclmulqdq_NameIndex = 6101, + kInstVpcmpeqb_NameIndex = 6112, + kInstVpcmpeqd_NameIndex = 6121, + kInstVpcmpeqq_NameIndex = 6130, + kInstVpcmpeqw_NameIndex = 6139, + kInstVpcmpestri_NameIndex = 6148, + kInstVpcmpestrm_NameIndex = 6159, + kInstVpcmpgtb_NameIndex = 6170, + kInstVpcmpgtd_NameIndex = 6179, + kInstVpcmpgtq_NameIndex = 6188, + kInstVpcmpgtw_NameIndex = 6197, + kInstVpcmpistri_NameIndex = 6206, + kInstVpcmpistrm_NameIndex = 6217, + kInstVperm2f128_NameIndex = 6228, + kInstVperm2i128_NameIndex = 6239, + kInstVpermd_NameIndex = 6250, + kInstVpermilpd_NameIndex = 6257, + kInstVpermilps_NameIndex = 6267, + kInstVpermpd_NameIndex = 6277, + kInstVpermps_NameIndex = 6285, + kInstVpermq_NameIndex = 6293, + kInstVpextrb_NameIndex = 6300, + kInstVpextrd_NameIndex = 6308, + kInstVpextrq_NameIndex = 6316, + kInstVpextrw_NameIndex = 6324, + kInstVpgatherdd_NameIndex = 6332, + kInstVpgatherdq_NameIndex = 6343, + kInstVpgatherqd_NameIndex = 6354, + kInstVpgatherqq_NameIndex = 6365, + kInstVphaddd_NameIndex = 6376, + kInstVphaddsw_NameIndex = 6384, + kInstVphaddw_NameIndex = 6393, + kInstVphminposuw_NameIndex = 6401, + kInstVphsubd_NameIndex = 6413, + kInstVphsubsw_NameIndex = 6421, + kInstVphsubw_NameIndex = 6430, + kInstVpinsrb_NameIndex = 6438, + kInstVpinsrd_NameIndex = 6446, + kInstVpinsrq_NameIndex = 6454, + kInstVpinsrw_NameIndex = 6462, + kInstVpmaddubsw_NameIndex = 6470, + kInstVpmaddwd_NameIndex = 6481, + kInstVpmaskmovd_NameIndex = 6490, + kInstVpmaskmovq_NameIndex = 6501, + kInstVpmaxsb_NameIndex = 6512, + kInstVpmaxsd_NameIndex = 6520, + kInstVpmaxsw_NameIndex = 6528, + kInstVpmaxub_NameIndex = 6536, + kInstVpmaxud_NameIndex = 6544, + kInstVpmaxuw_NameIndex = 6552, + kInstVpminsb_NameIndex = 6560, + kInstVpminsd_NameIndex = 6568, + kInstVpminsw_NameIndex = 6576, + kInstVpminub_NameIndex = 6584, + kInstVpminud_NameIndex = 6592, + kInstVpminuw_NameIndex = 6600, + kInstVpmovmskb_NameIndex = 6608, + kInstVpmovsxbd_NameIndex = 6618, + kInstVpmovsxbq_NameIndex = 6628, + kInstVpmovsxbw_NameIndex = 6638, + kInstVpmovsxdq_NameIndex = 6648, + kInstVpmovsxwd_NameIndex = 6658, + kInstVpmovsxwq_NameIndex = 6668, + kInstVpmovzxbd_NameIndex = 6678, + kInstVpmovzxbq_NameIndex = 6688, + kInstVpmovzxbw_NameIndex = 6698, + kInstVpmovzxdq_NameIndex = 6708, + kInstVpmovzxwd_NameIndex = 6718, + kInstVpmovzxwq_NameIndex = 6728, + kInstVpmuldq_NameIndex = 6738, + kInstVpmulhrsw_NameIndex = 6746, + kInstVpmulhuw_NameIndex = 6756, + kInstVpmulhw_NameIndex = 6765, + kInstVpmulld_NameIndex = 6773, + kInstVpmullw_NameIndex = 6781, + kInstVpmuludq_NameIndex = 6789, + kInstVpor_NameIndex = 6798, + kInstVpsadbw_NameIndex = 6803, + kInstVpshufb_NameIndex = 6811, + kInstVpshufd_NameIndex = 6819, + kInstVpshufhw_NameIndex = 6827, + kInstVpshuflw_NameIndex = 6836, + kInstVpsignb_NameIndex = 6845, + kInstVpsignd_NameIndex = 6853, + kInstVpsignw_NameIndex = 6861, + kInstVpslld_NameIndex = 6869, + kInstVpslldq_NameIndex = 6876, + kInstVpsllq_NameIndex = 6884, + kInstVpsllvd_NameIndex = 6891, + kInstVpsllvq_NameIndex = 6899, + kInstVpsllw_NameIndex = 6907, + kInstVpsrad_NameIndex = 6914, + kInstVpsravd_NameIndex = 6921, + kInstVpsraw_NameIndex = 6929, + kInstVpsrld_NameIndex = 6936, + kInstVpsrldq_NameIndex = 6943, + kInstVpsrlq_NameIndex = 6951, + kInstVpsrlvd_NameIndex = 6958, + kInstVpsrlvq_NameIndex = 6966, + kInstVpsrlw_NameIndex = 6974, + kInstVpsubb_NameIndex = 6981, + kInstVpsubd_NameIndex = 6988, + kInstVpsubq_NameIndex = 6995, + kInstVpsubsb_NameIndex = 7002, + kInstVpsubsw_NameIndex = 7010, + kInstVpsubusb_NameIndex = 7018, + kInstVpsubusw_NameIndex = 7027, + kInstVpsubw_NameIndex = 7036, + kInstVptest_NameIndex = 7043, + kInstVpunpckhbw_NameIndex = 7050, + kInstVpunpckhdq_NameIndex = 7061, + kInstVpunpckhqdq_NameIndex = 7072, + kInstVpunpckhwd_NameIndex = 7084, + kInstVpunpcklbw_NameIndex = 7095, + kInstVpunpckldq_NameIndex = 7106, + kInstVpunpcklqdq_NameIndex = 7117, + kInstVpunpcklwd_NameIndex = 7129, + kInstVpxor_NameIndex = 7140, + kInstVrcpps_NameIndex = 7146, + kInstVrcpss_NameIndex = 7153, + kInstVroundpd_NameIndex = 7160, + kInstVroundps_NameIndex = 7169, + kInstVroundsd_NameIndex = 7178, + kInstVroundss_NameIndex = 7187, + kInstVrsqrtps_NameIndex = 7196, + kInstVrsqrtss_NameIndex = 7205, + kInstVshufpd_NameIndex = 7214, + kInstVshufps_NameIndex = 7222, + kInstVsqrtpd_NameIndex = 7230, + kInstVsqrtps_NameIndex = 7238, + kInstVsqrtsd_NameIndex = 7246, + kInstVsqrtss_NameIndex = 7254, + kInstVstmxcsr_NameIndex = 7262, + kInstVsubpd_NameIndex = 7271, + kInstVsubps_NameIndex = 7278, + kInstVsubsd_NameIndex = 7285, + kInstVsubss_NameIndex = 7292, + kInstVtestpd_NameIndex = 7299, + kInstVtestps_NameIndex = 7307, + kInstVucomisd_NameIndex = 7315, + kInstVucomiss_NameIndex = 7324, + kInstVunpckhpd_NameIndex = 7333, + kInstVunpckhps_NameIndex = 7343, + kInstVunpcklpd_NameIndex = 7353, + kInstVunpcklps_NameIndex = 7363, + kInstVxorpd_NameIndex = 7373, + kInstVxorps_NameIndex = 7380, + kInstVzeroall_NameIndex = 7387, + kInstVzeroupper_NameIndex = 7396, + kInstWrfsbase_NameIndex = 7407, + kInstWrgsbase_NameIndex = 7416, + kInstXadd_NameIndex = 7425, + kInstXchg_NameIndex = 7430, + kInstXor_NameIndex = 7435, + kInstXorpd_NameIndex = 7439, + kInstXorps_NameIndex = 7445 +}; +// ${kInstData:End} + +#define INST(_Code_, _Name_, _Group_, _Flags_, _OpFlags0_, _OpFlags1_, _OpFlags2_, _OpFlags3_, _OpCode0_, _OpCode1_) \ + { _Code_##_NameIndex, _Flags_, _Group_, { 0, 0, 0 }, { _OpFlags0_, _OpFlags1_, _OpFlags2_, _OpFlags3_ }, { _OpCode0_, _OpCode1_ } } + +#define G(_Group_) kInstGroup##_Group_ +#define F(_Flags_) kInstFlag##_Flags_ +#define O(_Op_) kInstOp##_Op_ + +#define U 0 +#define L kInstOpCode_L_True + +#define O_000000(_OpCode_, _R_) (kInstOpCode_PP_00 | kInstOpCode_MM_00 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_000F00(_OpCode_, _R_) (kInstOpCode_PP_00 | kInstOpCode_MM_0F | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_000F01(_OpCode_, _R_) (kInstOpCode_PP_00 | kInstOpCode_MM_0F01 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_000F0F(_OpCode_, _R_) (kInstOpCode_PP_00 | kInstOpCode_MM_0F | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_000F38(_OpCode_, _R_) (kInstOpCode_PP_00 | kInstOpCode_MM_0F38 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_000F3A(_OpCode_, _R_) (kInstOpCode_PP_00 | kInstOpCode_MM_0F3A | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_660000(_OpCode_, _R_) (kInstOpCode_PP_66 | kInstOpCode_MM_00 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_660F00(_OpCode_, _R_) (kInstOpCode_PP_66 | kInstOpCode_MM_0F | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_660F38(_OpCode_, _R_) (kInstOpCode_PP_66 | kInstOpCode_MM_0F38 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_660F3A(_OpCode_, _R_) (kInstOpCode_PP_66 | kInstOpCode_MM_0F3A | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_9B0000(_OpCode_, _R_) (kInstOpCode_PP_9B | kInstOpCode_MM_00 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_F20000(_OpCode_, _R_) (kInstOpCode_PP_F2 | kInstOpCode_MM_00 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_F20F00(_OpCode_, _R_) (kInstOpCode_PP_F2 | kInstOpCode_MM_0F | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_F20F38(_OpCode_, _R_) (kInstOpCode_PP_F2 | kInstOpCode_MM_0F38 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_F20F3A(_OpCode_, _R_) (kInstOpCode_PP_F2 | kInstOpCode_MM_0F3A | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_F30000(_OpCode_, _R_) (kInstOpCode_PP_F3 | kInstOpCode_MM_00 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_F30F00(_OpCode_, _R_) (kInstOpCode_PP_F3 | kInstOpCode_MM_0F | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_F30F38(_OpCode_, _R_) (kInstOpCode_PP_F3 | kInstOpCode_MM_0F38 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_F30F3A(_OpCode_, _R_) (kInstOpCode_PP_F3 | kInstOpCode_MM_0F3A | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) + +#define O_00_X(_OpCode_, _R_) (kInstOpCode_PP_00 | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) +#define O_9B_X(_OpCode_, _R_) (kInstOpCode_PP_9B | (0x##_OpCode_) | ((_R_) << kInstOpCode_O_Shift)) + +const InstInfo _instInfo[] = { + // Inst-Code | Inst-Name | Inst-Group | Inst-Flags | Op-Flags[0] | Op-Flags[1] | Op-Flags[2] | Op-Flags[2] | Op[0] | Op[1] | + INST(kInstNone , "" , G(None) , F(None) , 0 , 0 , 0 , 0 , 0 , 0 ), + INST(kInstAdc , "adc" , G(X86Arith) , F(Lock) , O(GqdwbMem) , O(GqdwbMem)|O(Imm) , U , U , O_000000(10,2) , U ), + INST(kInstAdd , "add" , G(X86Arith) , F(Lock) , O(GqdwbMem) , O(GqdwbMem)|O(Imm) , U , U , O_000000(00,0) , U ), + INST(kInstAddpd , "addpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(58,U) , U ), + INST(kInstAddps , "addps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(58,U) , U ), + INST(kInstAddsd , "addsd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(58,U) , U ), + INST(kInstAddss , "addss" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(58,U) , U ), + INST(kInstAddsubpd , "addsubpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(D0,U) , U ), + INST(kInstAddsubps , "addsubps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(D0,U) , U ), + INST(kInstAesdec , "aesdec" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(DE,U) , U ), + INST(kInstAesdeclast , "aesdeclast" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(DF,U) , U ), + INST(kInstAesenc , "aesenc" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(DC,U) , U ), + INST(kInstAesenclast , "aesenclast" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(DD,U) , U ), + INST(kInstAesimc , "aesimc" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(DB,U) , U ), + INST(kInstAeskeygenassist , "aeskeygenassist" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(DF,U) , U ), + INST(kInstAnd , "and" , G(X86Arith) , F(Lock) , O(GqdwbMem) , O(GqdwbMem)|O(Imm) , U , U , O_000000(20,4) , U ), + INST(kInstAndn , "andn" , G(AvxRvm) , F(None) , O(Gqd) , O(Gqd) , O(GqdMem) , U , O_000F38(F2,U) , U ), + INST(kInstAndnpd , "andnpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(55,U) , U ), + INST(kInstAndnps , "andnps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(55,U) , U ), + INST(kInstAndpd , "andpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(54,U) , U ), + INST(kInstAndps , "andps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(54,U) , U ), + INST(kInstBextr , "bextr" , G(AvxRmv) , F(None) , O(Gqd) , O(GqdMem) , O(Gqd) , U , O_000F38(F7,U) , U ), + INST(kInstBlendpd , "blendpd" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(0D,U) , U ), + INST(kInstBlendps , "blendps" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(0C,U) , U ), + INST(kInstBlendvpd , "blendvpd" , G(ExtRm) , F(Special) , O(Xmm) , O(XmmMem) , U , U , O_660F38(15,U) , U ), + INST(kInstBlendvps , "blendvps" , G(ExtRm) , F(Special) , O(Xmm) , O(XmmMem) , U , U , O_660F38(14,U) , U ), + INST(kInstBlsi , "blsi" , G(AvxVm) , F(None) , O(Gqd) , O(GqdMem) , U , U , O_000F38(F3,3) , U ), + INST(kInstBlsmsk , "blsmsk" , G(AvxVm) , F(None) , O(Gqd) , O(GqdMem) , U , U , O_000F38(F3,2) , U ), + INST(kInstBlsr , "blsr" , G(AvxVm) , F(None) , O(Gqd) , O(GqdMem) , U , U , O_000F38(F3,1) , U ), + INST(kInstBsf , "bsf" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(BC,U) , U ), + INST(kInstBsr , "bsr" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(BD,U) , U ), + INST(kInstBswap , "bswap" , G(X86BSwap) , F(None) , O(Gqd) , U , U , U , O_000F00(C8,U) , U ), + INST(kInstBt , "bt" , G(X86BTest) , F(Test) , O(Gqdw)|O(Mem) , O(Gqdw)|O(Imm) , U , U , O_000F00(A3,U) , O_000F00(BA,4) ), + INST(kInstBtc , "btc" , G(X86BTest) , F(Lock) , O(Gqdw)|O(Mem) , O(Gqdw)|O(Imm) , U , U , O_000F00(BB,U) , O_000F00(BA,7) ), + INST(kInstBtr , "btr" , G(X86BTest) , F(Lock) , O(Gqdw)|O(Mem) , O(Gqdw)|O(Imm) , U , U , O_000F00(B3,U) , O_000F00(BA,6) ), + INST(kInstBts , "bts" , G(X86BTest) , F(Lock) , O(Gqdw)|O(Mem) , O(Gqdw)|O(Imm) , U , U , O_000F00(AB,U) , O_000F00(BA,5) ), + INST(kInstBzhi , "bzhi" , G(AvxRmv) , F(None) , O(Gqd) , O(GqdMem) , O(Gqd) , U , O_000F38(F5,U) , U ), + INST(kInstCall , "call" , G(X86Call) , F(Flow) , O(Gqd) |O(Mem) , U , U , U , O_000000(FF,2) , O_000000(E8,U) ), + INST(kInstCbw , "cbw" , G(X86Op) , F(Special) , U , U , U , U , O_660000(98,U) , U ), + INST(kInstCdq , "cdq" , G(X86Op) , F(Special) , U , U , U , U , O_000000(99,U) , U ), + INST(kInstCdqe , "cdqe" , G(X86Op) , F(Special)|F(W), U , U , U , U , O_000000(98,U) , U ), + INST(kInstClc , "clc" , G(X86Op) , F(None) , U , U , U , U , O_000000(F8,U) , U ), + INST(kInstCld , "cld" , G(X86Op) , F(None) , U , U , U , U , O_000000(FC,U) , U ), + INST(kInstClflush , "clflush" , G(X86M) , F(None) , O(Mem) , U , U , U , O_000F00(AE,7) , U ), + INST(kInstCmc , "cmc" , G(X86Op) , F(None) , U , U , U , U , O_000000(F5,U) , U ), + INST(kInstCmova , "cmova" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(47,U) , U ), + INST(kInstCmovae , "cmovae" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(43,U) , U ), + INST(kInstCmovb , "cmovb" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(42,U) , U ), + INST(kInstCmovbe , "cmovbe" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(46,U) , U ), + INST(kInstCmovc , "cmovc" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(42,U) , U ), + INST(kInstCmove , "cmove" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(44,U) , U ), + INST(kInstCmovg , "cmovg" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4F,U) , U ), + INST(kInstCmovge , "cmovge" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4D,U) , U ), + INST(kInstCmovl , "cmovl" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4C,U) , U ), + INST(kInstCmovle , "cmovle" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4E,U) , U ), + INST(kInstCmovna , "cmovna" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(46,U) , U ), + INST(kInstCmovnae , "cmovnae" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(42,U) , U ), + INST(kInstCmovnb , "cmovnb" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(43,U) , U ), + INST(kInstCmovnbe , "cmovnbe" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(47,U) , U ), + INST(kInstCmovnc , "cmovnc" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(43,U) , U ), + INST(kInstCmovne , "cmovne" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(45,U) , U ), + INST(kInstCmovng , "cmovng" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4E,U) , U ), + INST(kInstCmovnge , "cmovnge" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4C,U) , U ), + INST(kInstCmovnl , "cmovnl" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4D,U) , U ), + INST(kInstCmovnle , "cmovnle" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4F,U) , U ), + INST(kInstCmovno , "cmovno" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(41,U) , U ), + INST(kInstCmovnp , "cmovnp" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4B,U) , U ), + INST(kInstCmovns , "cmovns" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(49,U) , U ), + INST(kInstCmovnz , "cmovnz" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(45,U) , U ), + INST(kInstCmovo , "cmovo" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(40,U) , U ), + INST(kInstCmovp , "cmovp" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4A,U) , U ), + INST(kInstCmovpe , "cmovpe" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4A,U) , U ), + INST(kInstCmovpo , "cmovpo" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(4B,U) , U ), + INST(kInstCmovs , "cmovs" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(48,U) , U ), + INST(kInstCmovz , "cmovz" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_000F00(44,U) , U ), + INST(kInstCmp , "cmp" , G(X86Arith) , F(Test) , O(GqdwbMem) , O(GqdwbMem)|O(Imm) , U , U , O_000000(38,7) , U ), + INST(kInstCmppd , "cmppd" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F00(C2,U) , U ), + INST(kInstCmpps , "cmpps" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_000F00(C2,U) , U ), + INST(kInstCmpsd , "cmpsd" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_F20F00(C2,U) , U ), + INST(kInstCmpss , "cmpss" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_F30F00(C2,U) , U ), + INST(kInstCmpxchg , "cmpxchg" , G(X86RmReg) , F(Lock)|F(Special) , U , U , U , U , O_000F00(B0,U) , U ), + INST(kInstCmpxchg16b , "cmpxchg16b" , G(X86M) , F(Special)|F(W), O(Mem) , U , U , U , O_000F00(C7,1) , U ), + INST(kInstCmpxchg8b , "cmpxchg8b" , G(X86M) , F(Special) , O(Mem) , U , U , U , O_000F00(C7,1) , U ), + INST(kInstComisd , "comisd" , G(ExtRm) , F(Test) , O(Xmm) , O(XmmMem) , U , U , O_660F00(2F,U) , U ), + INST(kInstComiss , "comiss" , G(ExtRm) , F(Test) , O(Xmm) , O(XmmMem) , U , U , O_000F00(2F,U) , U ), + INST(kInstCpuid , "cpuid" , G(X86Op) , F(Special) , U , U , U , U , O_000F00(A2,U) , U ), + INST(kInstCqo , "cqo" , G(X86Op) , F(Special)|F(W), U , U , U , U , O_000000(99,U) , U ), + INST(kInstCrc32 , "crc32" , G(ExtCrc) , F(None) , O(Gqd) , O(GqdwbMem) , U , U , O_F20F38(F0,U) , U ), + INST(kInstCvtdq2pd , "cvtdq2pd" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(E6,U) , U ), + INST(kInstCvtdq2ps , "cvtdq2ps" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_000F00(5B,U) , U ), + INST(kInstCvtpd2dq , "cvtpd2dq" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(E6,U) , U ), + INST(kInstCvtpd2pi , "cvtpd2pi" , G(ExtRm) , F(Move) , O(Mm) , O(XmmMem) , U , U , O_660F00(2D,U) , U ), + INST(kInstCvtpd2ps , "cvtpd2ps" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F00(5A,U) , U ), + INST(kInstCvtpi2pd , "cvtpi2pd" , G(ExtRm) , F(Move) , O(Xmm) , O(MmMem) , U , U , O_660F00(2A,U) , U ), + INST(kInstCvtpi2ps , "cvtpi2ps" , G(ExtRm) , F(None) , O(Xmm) , O(MmMem) , U , U , O_000F00(2A,U) , U ), + INST(kInstCvtps2dq , "cvtps2dq" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F00(5B,U) , U ), + INST(kInstCvtps2pd , "cvtps2pd" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_000F00(5A,U) , U ), + INST(kInstCvtps2pi , "cvtps2pi" , G(ExtRm) , F(Move) , O(Mm) , O(XmmMem) , U , U , O_000F00(2D,U) , U ), + INST(kInstCvtsd2si , "cvtsd2si" , G(ExtRm_Q) , F(Move) , O(Gqd) , O(XmmMem) , U , U , O_F20F00(2D,U) , U ), + INST(kInstCvtsd2ss , "cvtsd2ss" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(5A,U) , U ), + INST(kInstCvtsi2sd , "cvtsi2sd" , G(ExtRm_Q) , F(None) , O(Xmm) , O(GqdMem) , U , U , O_F20F00(2A,U) , U ), + INST(kInstCvtsi2ss , "cvtsi2ss" , G(ExtRm_Q) , F(None) , O(Xmm) , O(GqdMem) , U , U , O_F30F00(2A,U) , U ), + INST(kInstCvtss2sd , "cvtss2sd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(5A,U) , U ), + INST(kInstCvtss2si , "cvtss2si" , G(ExtRm_Q) , F(None) , O(Gqd) , O(XmmMem) , U , U , O_F30F00(2D,U) , U ), + INST(kInstCvttpd2dq , "cvttpd2dq" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F00(E6,U) , U ), + INST(kInstCvttpd2pi , "cvttpd2pi" , G(ExtRm) , F(Move) , O(Mm) , O(XmmMem) , U , U , O_660F00(2C,U) , U ), + INST(kInstCvttps2dq , "cvttps2dq" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(5B,U) , U ), + INST(kInstCvttps2pi , "cvttps2pi" , G(ExtRm) , F(Move) , O(Mm) , O(XmmMem) , U , U , O_000F00(2C,U) , U ), + INST(kInstCvttsd2si , "cvttsd2si" , G(ExtRm_Q) , F(Move) , O(Gqd) , O(XmmMem) , U , U , O_F20F00(2C,U) , U ), + INST(kInstCvttss2si , "cvttss2si" , G(ExtRm_Q) , F(Move) , O(Gqd) , O(XmmMem) , U , U , O_F30F00(2C,U) , U ), + INST(kInstCwd , "cwd" , G(X86Op) , F(Special) , U , U , U , U , O_660000(99,U) , U ), + INST(kInstCwde , "cwde" , G(X86Op) , F(Special) , U , U , U , U , O_000000(98,U) , U ), + INST(kInstDaa , "daa" , G(X86Op) , F(Special) , U , U , U , U , O_000000(27,U) , U ), + INST(kInstDas , "das" , G(X86Op) , F(Special) , U , U , U , U , O_000000(2F,U) , U ), + INST(kInstDec , "dec" , G(X86IncDec) , F(Lock) , O(GqdwbMem) , U , U , U , O_000000(FE,1) , O_000000(48,U) ), + INST(kInstDiv , "div" , G(X86Rm_B) , F(Special) , U , U , U , U , O_000000(F6,6) , U ), + INST(kInstDivpd , "divpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(5E,U) , U ), + INST(kInstDivps , "divps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(5E,U) , U ), + INST(kInstDivsd , "divsd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(5E,U) , U ), + INST(kInstDivss , "divss" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(5E,U) , U ), + INST(kInstDppd , "dppd" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(41,U) , U ), + INST(kInstDpps , "dpps" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(40,U) , U ), + INST(kInstEmms , "emms" , G(X86Op) , F(None) , U , U , U , U , O_000F00(77,U) , U ), + INST(kInstEnter , "enter" , G(X86Enter) , F(Special) , U , U , U , U , O_000000(C8,U) , U ), + INST(kInstExtractps , "extractps" , G(ExtExtract) , F(Move) , O(Gqd)|O(Mem) , O(Xmm) , U , U , O_660F3A(17,U) , O_660F3A(17,U) ), + INST(kInstF2xm1 , "f2xm1" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9F0,U) , U ), + INST(kInstFabs , "fabs" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9E1,U) , U ), + INST(kInstFadd , "fadd" , G(FpuArith) , F(Fp)|F(Mem4_8) , O(FpMem) , O(Fp) , U , U , O_00_X(C0C0,0) , U ), + INST(kInstFaddp , "faddp" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DEC0,U) , U ), + INST(kInstFbld , "fbld" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_000000(DF,4) , U ), + INST(kInstFbstp , "fbstp" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_000000(DF,6) , U ), + INST(kInstFchs , "fchs" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9E0,U) , U ), + INST(kInstFclex , "fclex" , G(FpuOp) , F(Fp) , U , U , U , U , O_9B_X(DBE2,U) , U ), + INST(kInstFcmovb , "fcmovb" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DAC0,U) , U ), + INST(kInstFcmovbe , "fcmovbe" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DAD0,U) , U ), + INST(kInstFcmove , "fcmove" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DAC8,U) , U ), + INST(kInstFcmovnb , "fcmovnb" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DBC0,U) , U ), + INST(kInstFcmovnbe , "fcmovnbe" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DBD0,U) , U ), + INST(kInstFcmovne , "fcmovne" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DBC8,U) , U ), + INST(kInstFcmovnu , "fcmovnu" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DBD8,U) , U ), + INST(kInstFcmovu , "fcmovu" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DAD8,U) , U ), + INST(kInstFcom , "fcom" , G(FpuCom) , F(Fp) , O(Fp)|O(Mem) , O(Fp) , U , U , O_00_X(D0D0,2) , U ), + INST(kInstFcomi , "fcomi" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DBF0,U) , U ), + INST(kInstFcomip , "fcomip" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DFF0,U) , U ), + INST(kInstFcomp , "fcomp" , G(FpuCom) , F(Fp) , O(Fp)|O(Mem) , O(Fp) , U , U , O_00_X(D8D8,3) , U ), + INST(kInstFcompp , "fcompp" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(DED9,U) , U ), + INST(kInstFcos , "fcos" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9FF,U) , U ), + INST(kInstFdecstp , "fdecstp" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9F6,U) , U ), + INST(kInstFdiv , "fdiv" , G(FpuArith) , F(Fp)|F(Mem4_8) , O(FpMem) , O(Fp) , U , U , O_00_X(F0F8,6) , U ), + INST(kInstFdivp , "fdivp" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DEF8,U) , U ), + INST(kInstFdivr , "fdivr" , G(FpuArith) , F(Fp)|F(Mem4_8) , O(FpMem) , O(Fp) , U , U , O_00_X(F8F0,7) , U ), + INST(kInstFdivrp , "fdivrp" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DEF0,U) , U ), + INST(kInstFemms , "femms" , G(X86Op) , F(Fp) , U , U , U , U , O_000F00(0E,U) , U ), + INST(kInstFfree , "ffree" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DDC0,U) , U ), + INST(kInstFiadd , "fiadd" , G(FpuM) , F(Fp)|F(Mem2_4) , O(Mem) , U , U , U , O_000000(DA,0) , U ), + INST(kInstFicom , "ficom" , G(FpuM) , F(Fp)|F(Mem2_4) , O(Mem) , U , U , U , O_000000(DA,2) , U ), + INST(kInstFicomp , "ficomp" , G(FpuM) , F(Fp)|F(Mem2_4) , O(Mem) , U , U , U , O_000000(DA,3) , U ), + INST(kInstFidiv , "fidiv" , G(FpuM) , F(Fp)|F(Mem2_4) , O(Mem) , U , U , U , O_000000(DA,6) , U ), + INST(kInstFidivr , "fidivr" , G(FpuM) , F(Fp)|F(Mem2_4) , O(Mem) , U , U , U , O_000000(DA,7) , U ), + INST(kInstFild , "fild" , G(FpuM) , F(Fp)|F(Mem2_4_8) , O(Mem) , U , U , U , O_000000(DB,0) , O_000000(DF,5) ), + INST(kInstFimul , "fimul" , G(FpuM) , F(Fp)|F(Mem2_4) , O(Mem) , U , U , U , O_000000(DA,1) , U ), + INST(kInstFincstp , "fincstp" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9F7,U) , U ), + INST(kInstFinit , "finit" , G(FpuOp) , F(Fp) , U , U , U , U , O_9B_X(DBE3,U) , U ), + INST(kInstFist , "fist" , G(FpuM) , F(Fp)|F(Mem2_4) , O(Mem) , U , U , U , O_000000(DB,2) , U ), + INST(kInstFistp , "fistp" , G(FpuM) , F(Fp)|F(Mem2_4_8) , O(Mem) , U , U , U , O_000000(DB,3) , O_000000(DF,7) ), + INST(kInstFisttp , "fisttp" , G(FpuM) , F(Fp)|F(Mem2_4_8) , O(Mem) , U , U , U , O_000000(DB,1) , O_000000(DD,1) ), + INST(kInstFisub , "fisub" , G(FpuM) , F(Fp)|F(Mem2_4) , O(Mem) , U , U , U , O_000000(DA,4) , U ), + INST(kInstFisubr , "fisubr" , G(FpuM) , F(Fp)|F(Mem2_4) , O(Mem) , U , U , U , O_000000(DA,5) , U ), + INST(kInstFld , "fld" , G(FpuFldFst) , F(Fp)|F(Mem4_8_10) , O(Mem) , U , U , U , O_000000(D9,0) , O_000000(DB,5) ), + INST(kInstFld1 , "fld1" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9E8,U) , U ), + INST(kInstFldcw , "fldcw" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_000000(D9,5) , U ), + INST(kInstFldenv , "fldenv" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_000000(D9,4) , U ), + INST(kInstFldl2e , "fldl2e" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9EA,U) , U ), + INST(kInstFldl2t , "fldl2t" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9E9,U) , U ), + INST(kInstFldlg2 , "fldlg2" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9EC,U) , U ), + INST(kInstFldln2 , "fldln2" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9ED,U) , U ), + INST(kInstFldpi , "fldpi" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9EB,U) , U ), + INST(kInstFldz , "fldz" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9EE,U) , U ), + INST(kInstFmul , "fmul" , G(FpuArith) , F(Fp)|F(Mem4_8) , O(FpMem) , O(Fp) , U , U , O_00_X(C8C8,1) , U ), + INST(kInstFmulp , "fmulp" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DEC8,U) , U ), + INST(kInstFnclex , "fnclex" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(DBE2,U) , U ), + INST(kInstFninit , "fninit" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(DBE3,U) , U ), + INST(kInstFnop , "fnop" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9D0,U) , U ), + INST(kInstFnsave , "fnsave" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_000000(DD,6) , U ), + INST(kInstFnstcw , "fnstcw" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_000000(D9,7) , U ), + INST(kInstFnstenv , "fnstenv" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_000000(D9,6) , U ), + INST(kInstFnstsw , "fnstsw" , G(FpuStsw) , F(Fp) , O(Mem) , U , U , U , O_000000(DD,7) , O_00_X(DFE0,U) ), + INST(kInstFpatan , "fpatan" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9F3,U) , U ), + INST(kInstFprem , "fprem" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9F8,U) , U ), + INST(kInstFprem1 , "fprem1" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9F5,U) , U ), + INST(kInstFptan , "fptan" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9F2,U) , U ), + INST(kInstFrndint , "frndint" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9FC,U) , U ), + INST(kInstFrstor , "frstor" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_000000(DD,4) , U ), + INST(kInstFsave , "fsave" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_9B0000(DD,6) , U ), + INST(kInstFscale , "fscale" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9FD,U) , U ), + INST(kInstFsin , "fsin" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9FE,U) , U ), + INST(kInstFsincos , "fsincos" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9FB,U) , U ), + INST(kInstFsqrt , "fsqrt" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9FA,U) , U ), + INST(kInstFst , "fst" , G(FpuFldFst) , F(Fp)|F(Mem4_8) , O(Mem) , U , U , U , O_000000(D9,2) , U ), + INST(kInstFstcw , "fstcw" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_9B0000(D9,7) , U ), + INST(kInstFstenv , "fstenv" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_9B0000(D9,6) , U ), + INST(kInstFstp , "fstp" , G(FpuFldFst) , F(Fp)|F(Mem4_8_10) , O(Mem) , U , U , U , O_000000(D9,3) , O_000000(DB,7) ), + INST(kInstFstsw , "fstsw" , G(FpuStsw) , F(Fp) , O(Mem) , U , U , U , O_9B0000(DD,7) , O_9B_X(DFE0,U) ), + INST(kInstFsub , "fsub" , G(FpuArith) , F(Fp)|F(Mem4_8) , O(FpMem) , O(Fp) , U , U , O_00_X(E0E8,4) , U ), + INST(kInstFsubp , "fsubp" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DEE8,U) , U ), + INST(kInstFsubr , "fsubr" , G(FpuArith) , F(Fp)|F(Mem4_8) , O(FpMem) , O(Fp) , U , U , O_00_X(E8E0,5) , U ), + INST(kInstFsubrp , "fsubrp" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DEE0,U) , U ), + INST(kInstFtst , "ftst" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9E4,U) , U ), + INST(kInstFucom , "fucom" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DDE0,U) , U ), + INST(kInstFucomi , "fucomi" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DBE8,U) , U ), + INST(kInstFucomip , "fucomip" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DFE8,U) , U ), + INST(kInstFucomp , "fucomp" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(DDE8,U) , U ), + INST(kInstFucompp , "fucompp" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(DAE9,U) , U ), + INST(kInstFwait , "fwait" , G(X86Op) , F(Fp) , U , U , U , U , O_000000(DB,U) , U ), + INST(kInstFxam , "fxam" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9E5,U) , U ), + INST(kInstFxch , "fxch" , G(FpuR) , F(Fp) , O(Fp) , U , U , U , O_00_X(D9C8,U) , U ), + INST(kInstFxrstor , "fxrstor" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_000F00(AE,1) , U ), + INST(kInstFxsave , "fxsave" , G(X86M) , F(Fp) , O(Mem) , U , U , U , O_000F00(AE,0) , U ), + INST(kInstFxtract , "fxtract" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9F4,U) , U ), + INST(kInstFyl2x , "fyl2x" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9F1,U) , U ), + INST(kInstFyl2xp1 , "fyl2xp1" , G(FpuOp) , F(Fp) , U , U , U , U , O_00_X(D9F9,U) , U ), + INST(kInstHaddpd , "haddpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(7C,U) , U ), + INST(kInstHaddps , "haddps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(7C,U) , U ), + INST(kInstHsubpd , "hsubpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(7D,U) , U ), + INST(kInstHsubps , "hsubps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(7D,U) , U ), + INST(kInstIdiv , "idiv" , G(X86Rm_B) , F(Special) , 0 , 0 , U , U , O_000000(F6,7) , U ), + INST(kInstImul , "imul" , G(X86Imul) , F(Special) , 0 , 0 , U , U , U , U ), + INST(kInstInc , "inc" , G(X86IncDec) , F(Lock) , O(GqdwbMem) , U , U , U , O_000000(FE,0) , O_000000(40,U) ), + INST(kInstInt , "int" , G(X86Int) , F(None) , U , U , U , U , O_000000(CC,U) , U ), + INST(kInstJa , "ja" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(77,U) , U ), + INST(kInstJae , "jae" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(73,U) , U ), + INST(kInstJb , "jb" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(72,U) , U ), + INST(kInstJbe , "jbe" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(76,U) , U ), + INST(kInstJc , "jc" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(72,U) , U ), + INST(kInstJe , "je" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(74,U) , U ), + INST(kInstJg , "jg" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7F,U) , U ), + INST(kInstJge , "jge" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7D,U) , U ), + INST(kInstJl , "jl" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7C,U) , U ), + INST(kInstJle , "jle" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7E,U) , U ), + INST(kInstJna , "jna" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(76,U) , U ), + INST(kInstJnae , "jnae" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(72,U) , U ), + INST(kInstJnb , "jnb" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(73,U) , U ), + INST(kInstJnbe , "jnbe" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(77,U) , U ), + INST(kInstJnc , "jnc" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(73,U) , U ), + INST(kInstJne , "jne" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(75,U) , U ), + INST(kInstJng , "jng" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7E,U) , U ), + INST(kInstJnge , "jnge" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7C,U) , U ), + INST(kInstJnl , "jnl" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7D,U) , U ), + INST(kInstJnle , "jnle" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7F,U) , U ), + INST(kInstJno , "jno" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(71,U) , U ), + INST(kInstJnp , "jnp" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7B,U) , U ), + INST(kInstJns , "jns" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(79,U) , U ), + INST(kInstJnz , "jnz" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(75,U) , U ), + INST(kInstJo , "jo" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(70,U) , U ), + INST(kInstJp , "jp" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7A,U) , U ), + INST(kInstJpe , "jpe" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7A,U) , U ), + INST(kInstJpo , "jpo" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(7B,U) , U ), + INST(kInstJs , "js" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(78,U) , U ), + INST(kInstJz , "jz" , G(X86Jcc) , F(Flow) , U , U , U , U , O_000000(74,U) , U ), + INST(kInstJmp , "jmp" , G(X86Jmp) , F(Flow) , U , U , U , U , O_000000(FF,4) , U ), + INST(kInstLahf , "lahf" , G(X86Op) , F(Special) , U , U , U , U , O_000000(9F,U) , U ), + INST(kInstLddqu , "lddqu" , G(ExtRm) , F(Move) , O(Xmm) , O(Mem) , U , U , O_F20F00(F0,U) , U ), + INST(kInstLdmxcsr , "ldmxcsr" , G(X86M) , F(None) , O(Mem) , U , U , U , O_000F00(AE,2) , U ), + INST(kInstLea , "lea" , G(X86Lea) , F(Move) , O(Gqd) , O(Mem) , U , U , O_000000(8D,U) , U ), + INST(kInstLeave , "leave" , G(X86Op) , F(Special) , U , U , U , U , O_000000(C9,U) , U ), + INST(kInstLfence , "lfence" , G(ExtFence) , F(None) , U , U , U , U , O_000F00(AE,5) , U ), + INST(kInstLzcnt , "lzcnt" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_F30F00(BD,U) , U ), + INST(kInstMaskmovdqu , "maskmovdqu" , G(ExtRm) , F(Special) , O(Xmm) , O(Xmm) , U , U , O_660F00(57,U) , U ), + INST(kInstMaskmovq , "maskmovq" , G(ExtRm) , F(Special) , O(Mm) , O(Mm) , U , U , O_000F00(F7,U) , U ), + INST(kInstMaxpd , "maxpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(5F,U) , U ), + INST(kInstMaxps , "maxps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(5F,U) , U ), + INST(kInstMaxsd , "maxsd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(5F,U) , U ), + INST(kInstMaxss , "maxss" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(5F,U) , U ), + INST(kInstMfence , "mfence" , G(ExtFence) , F(None) , U , U , U , U , O_000F00(AE,6) , U ), + INST(kInstMinpd , "minpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(5D,U) , U ), + INST(kInstMinps , "minps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(5D,U) , U ), + INST(kInstMinsd , "minsd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(5D,U) , U ), + INST(kInstMinss , "minss" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(5D,U) , U ), + INST(kInstMonitor , "monitor" , G(X86Op) , F(Special) , U , U , U , U , O_000F01(C8,U) , U ), + INST(kInstMov , "mov" , G(X86Mov) , F(Move) , O(GqdwbMem) , O(GqdwbMem)|O(Imm) , U , U , U , U ), + INST(kInstMovapd , "movapd" , G(ExtMov) , F(Move) , O(XmmMem) , O(XmmMem) , U , U , O_660F00(28,U) , O_660F00(29,U) ), + INST(kInstMovaps , "movaps" , G(ExtMov) , F(Move) , O(XmmMem) , O(XmmMem) , U , U , O_000F00(28,U) , O_000F00(29,U) ), + INST(kInstMovbe , "movbe" , G(ExtMovBe) , F(Move) , O(GqdwMem) , O(GqdwMem) , U , U , O_000F38(F0,U) , O_000F38(F1,U) ), + INST(kInstMovd , "movd" , G(ExtMovD) , F(Move) , O(Gd)|O(MmXmmMem) , O(Gd)|O(MmXmmMem) , U , U , O_000F00(6E,U) , O_000F00(7E,U) ), + INST(kInstMovddup , "movddup" , G(ExtMov) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(12,U) , U ), + INST(kInstMovdq2q , "movdq2q" , G(ExtMov) , F(Move) , O(Mm) , O(Xmm) , U , U , O_F20F00(D6,U) , U ), + INST(kInstMovdqa , "movdqa" , G(ExtMov) , F(Move) , O(XmmMem) , O(XmmMem) , U , U , O_660F00(6F,U) , O_660F00(7F,U) ), + INST(kInstMovdqu , "movdqu" , G(ExtMov) , F(Move) , O(XmmMem) , O(XmmMem) , U , U , O_F30F00(6F,U) , O_F30F00(7F,U) ), + INST(kInstMovhlps , "movhlps" , G(ExtMov) , F(None) , O(Xmm) , O(Xmm) , U , U , O_000F00(12,U) , U ), + INST(kInstMovhpd , "movhpd" , G(ExtMov) , F(None) , O(XmmMem) , O(XmmMem) , U , U , O_660F00(16,U) , O_660F00(17,U) ), + INST(kInstMovhps , "movhps" , G(ExtMov) , F(None) , O(XmmMem) , O(XmmMem) , U , U , O_000F00(16,U) , O_000F00(17,U) ), + INST(kInstMovlhps , "movlhps" , G(ExtMov) , F(None) , O(Xmm) , O(Xmm) , U , U , O_000F00(16,U) , U ), + INST(kInstMovlpd , "movlpd" , G(ExtMov) , F(None) , O(XmmMem) , O(XmmMem) , U , U , O_660F00(12,U) , O_660F00(13,U) ), + INST(kInstMovlps , "movlps" , G(ExtMov) , F(None) , O(XmmMem) , O(XmmMem) , U , U , O_000F00(12,U) , O_000F00(13,U) ), + INST(kInstMovmskpd , "movmskpd" , G(ExtMovNoRexW) , F(Move) , O(Gqd) , O(Xmm) , U , U , O_660F00(50,U) , U ), + INST(kInstMovmskps , "movmskps" , G(ExtMovNoRexW) , F(Move) , O(Gqd) , O(Xmm) , U , U , O_000F00(50,U) , U ), + INST(kInstMovntdq , "movntdq" , G(ExtMov) , F(Move) , O(Mem) , O(Xmm) , U , U , U , O_660F00(E7,U) ), + INST(kInstMovntdqa , "movntdqa" , G(ExtMov) , F(Move) , O(Xmm) , O(Mem) , U , U , O_660F38(2A,U) , U ), + INST(kInstMovnti , "movnti" , G(ExtMov) , F(Move) , O(Mem) , O(Gqd) , U , U , U , O_000F00(C3,U) ), + INST(kInstMovntpd , "movntpd" , G(ExtMov) , F(Move) , O(Mem) , O(Xmm) , U , U , U , O_660F00(2B,U) ), + INST(kInstMovntps , "movntps" , G(ExtMov) , F(Move) , O(Mem) , O(Xmm) , U , U , U , O_000F00(2B,U) ), + INST(kInstMovntq , "movntq" , G(ExtMov) , F(Move) , O(Mem) , O(Mm) , U , U , U , O_000F00(E7,U) ), + INST(kInstMovptr , "mov_ptr" , G(X86MovPtr) , F(Move)|F(Special) , O(Gqdwb) , O(Imm) , U , U , O_000000(A0,U) , O_000000(A2,U) ), + INST(kInstMovq , "movq" , G(ExtMovQ) , F(Move) , O(Gq)|O(MmXmmMem) , O(Gq)|O(MmXmmMem) , U , U , U , U ), + INST(kInstMovq2dq , "movq2dq" , G(ExtRm) , F(Move) , O(Xmm) , O(Mm) , U , U , O_F30F00(D6,U) , U ), + INST(kInstMovsd , "movsd" , G(ExtMov) , F(Move) , O(XmmMem) , O(XmmMem) , U , U , O_F20F00(10,U) , O_F20F00(11,U) ), + INST(kInstMovshdup , "movshdup" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(16,U) , U ), + INST(kInstMovsldup , "movsldup" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(12,U) , U ), + INST(kInstMovss , "movss" , G(ExtMov) , F(Move) , O(XmmMem) , O(XmmMem) , U , U , O_F30F00(10,U) , O_F30F00(11,U) ), + INST(kInstMovsx , "movsx" , G(X86MovSxZx) , F(None) , O(Gqdw) , O(GwbMem) , U , U , O_000F00(BE,U) , U ), + INST(kInstMovsxd , "movsxd" , G(X86MovSxZx) , F(Move) , O(Gq) , O(GdMem) , U , U , O_000000(63,U) , U ), + INST(kInstMovupd , "movupd" , G(ExtMov) , F(Move) , O(XmmMem) , O(XmmMem) , U , U , O_660F00(10,U) , O_660F00(11,U) ), + INST(kInstMovups , "movups" , G(ExtMov) , F(Move) , O(XmmMem) , O(XmmMem) , U , U , O_000F00(10,U) , O_000F00(11,U) ), + INST(kInstMovzx , "movzx" , G(X86MovSxZx) , F(Move) , O(Gqdw) , O(GwbMem) , U , U , O_000F00(B6,U) , U ), + INST(kInstMpsadbw , "mpsadbw" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(42,U) , U ), + INST(kInstMul , "mul" , G(X86Rm_B) , F(Special) , 0 , 0 , U , U , O_000000(F6,4) , U ), + INST(kInstMulpd , "mulpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(59,U) , U ), + INST(kInstMulps , "mulps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(59,U) , U ), + INST(kInstMulsd , "mulsd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(59,U) , U ), + INST(kInstMulss , "mulss" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(59,U) , U ), + INST(kInstMulx , "mulx" , G(AvxRvm) , F(None) , O(Gqd) , O(Gqd) , O(GqdMem) , U , O_F20F38(F6,U) , U ), + INST(kInstMwait , "mwait" , G(X86Op) , F(Special) , U , U , U , U , O_000F01(C9,U) , U ), + INST(kInstNeg , "neg" , G(X86Rm_B) , F(Lock) , O(GqdwbMem) , U , U , U , O_000000(F6,3) , U ), + INST(kInstNop , "nop" , G(X86Op) , F(None) , U , U , U , U , O_000000(90,U) , U ), + INST(kInstNot , "not" , G(X86Rm_B) , F(Lock) , O(GqdwbMem) , U , U , U , O_000000(F6,2) , U ), + INST(kInstOr , "or" , G(X86Arith) , F(Lock) , O(GqdwbMem) , O(GqdwbMem)|O(Imm) , U , U , O_000000(08,1) , U ), + INST(kInstOrpd , "orpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(56,U) , U ), + INST(kInstOrps , "orps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(56,U) , U ), + INST(kInstPabsb , "pabsb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(1C,U) , U ), + INST(kInstPabsd , "pabsd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(1E,U) , U ), + INST(kInstPabsw , "pabsw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(1D,U) , U ), + INST(kInstPackssdw , "packssdw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(6B,U) , U ), + INST(kInstPacksswb , "packsswb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(63,U) , U ), + INST(kInstPackusdw , "packusdw" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(2B,U) , U ), + INST(kInstPackuswb , "packuswb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(67,U) , U ), + INST(kInstPaddb , "paddb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(FC,U) , U ), + INST(kInstPaddd , "paddd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(FE,U) , U ), + INST(kInstPaddq , "paddq" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(D4,U) , U ), + INST(kInstPaddsb , "paddsb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(EC,U) , U ), + INST(kInstPaddsw , "paddsw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(ED,U) , U ), + INST(kInstPaddusb , "paddusb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(DC,U) , U ), + INST(kInstPaddusw , "paddusw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(DD,U) , U ), + INST(kInstPaddw , "paddw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(FD,U) , U ), + INST(kInstPalignr , "palignr" , G(ExtRmi_P) , F(None) , O(MmXmm) , O(MmXmmMem) , O(Imm) , U , O_000F3A(0F,U) , U ), + INST(kInstPand , "pand" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(DB,U) , U ), + INST(kInstPandn , "pandn" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(DF,U) , U ), + INST(kInstPause , "pause" , G(X86Op) , F(None) , U , U , U , U , O_F30000(90,U) , U ), + INST(kInstPavgb , "pavgb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(E0,U) , U ), + INST(kInstPavgw , "pavgw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(E3,U) , U ), + INST(kInstPblendvb , "pblendvb" , G(ExtRm) , F(Special) , O(Xmm) , O(XmmMem) , U , U , O_660F38(10,U) , U ), + INST(kInstPblendw , "pblendw" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(0E,U) , U ), + INST(kInstPclmulqdq , "pclmulqdq" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(44,U) , U ), + INST(kInstPcmpeqb , "pcmpeqb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(74,U) , U ), + INST(kInstPcmpeqd , "pcmpeqd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(76,U) , U ), + INST(kInstPcmpeqq , "pcmpeqq" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(29,U) , U ), + INST(kInstPcmpeqw , "pcmpeqw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(75,U) , U ), + INST(kInstPcmpestri , "pcmpestri" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(61,U) , U ), + INST(kInstPcmpestrm , "pcmpestrm" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(60,U) , U ), + INST(kInstPcmpgtb , "pcmpgtb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(64,U) , U ), + INST(kInstPcmpgtd , "pcmpgtd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(66,U) , U ), + INST(kInstPcmpgtq , "pcmpgtq" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(37,U) , U ), + INST(kInstPcmpgtw , "pcmpgtw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(65,U) , U ), + INST(kInstPcmpistri , "pcmpistri" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(63,U) , U ), + INST(kInstPcmpistrm , "pcmpistrm" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(62,U) , U ), + INST(kInstPdep , "pdep" , G(AvxRvm) , F(None) , O(Gqd) , O(Gqd) , O(GqdMem) , U , O_F20F38(F5,U) , U ), + INST(kInstPext , "pext" , G(AvxRvm) , F(None) , O(Gqd) , O(Gqd) , O(GqdMem) , U , O_F30F38(F5,U) , U ), + INST(kInstPextrb , "pextrb" , G(ExtExtract) , F(Move) , O(Gd)|O(Gb)|O(Mem) , O(Xmm) , U , U , O_000F3A(14,U) , O_000F3A(14,U) ), + INST(kInstPextrd , "pextrd" , G(ExtExtract) , F(Move) , O(GdMem) , O(Xmm) , U , U , O_000F3A(16,U) , O_000F3A(16,U) ), + INST(kInstPextrq , "pextrq" , G(ExtExtract) , F(Move) |F(W), O(GqdMem) , O(Xmm) , U , U , O_000F3A(16,U) , O_000F3A(16,U) ), + INST(kInstPextrw , "pextrw" , G(ExtExtract) , F(Move) , O(GdMem) , O(MmXmm) , U , U , O_000F00(C5,U) , O_000F3A(15,U) ), + INST(kInstPf2id , "pf2id" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(1D,U) , U ), + INST(kInstPf2iw , "pf2iw" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(1C,U) , U ), + INST(kInstPfacc , "pfacc" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(AE,U) , U ), + INST(kInstPfadd , "pfadd" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(9E,U) , U ), + INST(kInstPfcmpeq , "pfcmpeq" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(B0,U) , U ), + INST(kInstPfcmpge , "pfcmpge" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(90,U) , U ), + INST(kInstPfcmpgt , "pfcmpgt" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(A0,U) , U ), + INST(kInstPfmax , "pfmax" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(A4,U) , U ), + INST(kInstPfmin , "pfmin" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(94,U) , U ), + INST(kInstPfmul , "pfmul" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(B4,U) , U ), + INST(kInstPfnacc , "pfnacc" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(8A,U) , U ), + INST(kInstPfpnacc , "pfpnacc" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(8E,U) , U ), + INST(kInstPfrcp , "pfrcp" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(96,U) , U ), + INST(kInstPfrcpit1 , "pfrcpit1" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(A6,U) , U ), + INST(kInstPfrcpit2 , "pfrcpit2" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(B6,U) , U ), + INST(kInstPfrsqit1 , "pfrsqit1" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(A7,U) , U ), + INST(kInstPfrsqrt , "pfrsqrt" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(97,U) , U ), + INST(kInstPfsub , "pfsub" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(9A,U) , U ), + INST(kInstPfsubr , "pfsubr" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(AA,U) , U ), + INST(kInstPhaddd , "phaddd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(02,U) , U ), + INST(kInstPhaddsw , "phaddsw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(03,U) , U ), + INST(kInstPhaddw , "phaddw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(01,U) , U ), + INST(kInstPhminposuw , "phminposuw" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(41,U) , U ), + INST(kInstPhsubd , "phsubd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(06,U) , U ), + INST(kInstPhsubsw , "phsubsw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(07,U) , U ), + INST(kInstPhsubw , "phsubw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(05,U) , U ), + INST(kInstPi2fd , "pi2fd" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(0D,U) , U ), + INST(kInstPi2fw , "pi2fw" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(0C,U) , U ), + INST(kInstPinsrb , "pinsrb" , G(ExtRmi) , F(None) , O(Xmm) , O(GdMem) , O(Imm) , U , O_660F3A(20,U) , U ), + INST(kInstPinsrd , "pinsrd" , G(ExtRmi) , F(None) , O(Xmm) , O(GdMem) , O(Imm) , U , O_660F3A(22,U) , U ), + INST(kInstPinsrq , "pinsrq" , G(ExtRmi) , F(W), O(Xmm) , O(GqMem) , O(Imm) , U , O_660F3A(22,U) , U ), + INST(kInstPinsrw , "pinsrw" , G(ExtRmi_P) , F(None) , O(MmXmm) , O(GdMem) , O(Imm) , U , O_000F00(C4,U) , U ), + INST(kInstPmaddubsw , "pmaddubsw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(04,U) , U ), + INST(kInstPmaddwd , "pmaddwd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(F5,U) , U ), + INST(kInstPmaxsb , "pmaxsb" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(3C,U) , U ), + INST(kInstPmaxsd , "pmaxsd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(3D,U) , U ), + INST(kInstPmaxsw , "pmaxsw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(EE,U) , U ), + INST(kInstPmaxub , "pmaxub" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(DE,U) , U ), + INST(kInstPmaxud , "pmaxud" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(3F,U) , U ), + INST(kInstPmaxuw , "pmaxuw" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(3E,U) , U ), + INST(kInstPminsb , "pminsb" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(38,U) , U ), + INST(kInstPminsd , "pminsd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(39,U) , U ), + INST(kInstPminsw , "pminsw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(EA,U) , U ), + INST(kInstPminub , "pminub" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(DA,U) , U ), + INST(kInstPminud , "pminud" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(3B,U) , U ), + INST(kInstPminuw , "pminuw" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(3A,U) , U ), + INST(kInstPmovmskb , "pmovmskb" , G(ExtRm_Q) , F(Move) , O(Gqd) , O(MmXmm) , U , U , O_000F00(D7,U) , U ), + INST(kInstPmovsxbd , "pmovsxbd" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(21,U) , U ), + INST(kInstPmovsxbq , "pmovsxbq" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(22,U) , U ), + INST(kInstPmovsxbw , "pmovsxbw" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(20,U) , U ), + INST(kInstPmovsxdq , "pmovsxdq" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(25,U) , U ), + INST(kInstPmovsxwd , "pmovsxwd" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(23,U) , U ), + INST(kInstPmovsxwq , "pmovsxwq" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(24,U) , U ), + INST(kInstPmovzxbd , "pmovzxbd" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(31,U) , U ), + INST(kInstPmovzxbq , "pmovzxbq" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(32,U) , U ), + INST(kInstPmovzxbw , "pmovzxbw" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(30,U) , U ), + INST(kInstPmovzxdq , "pmovzxdq" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(35,U) , U ), + INST(kInstPmovzxwd , "pmovzxwd" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(33,U) , U ), + INST(kInstPmovzxwq , "pmovzxwq" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F38(34,U) , U ), + INST(kInstPmuldq , "pmuldq" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(28,U) , U ), + INST(kInstPmulhrsw , "pmulhrsw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(0B,U) , U ), + INST(kInstPmulhuw , "pmulhuw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(E4,U) , U ), + INST(kInstPmulhw , "pmulhw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(E5,U) , U ), + INST(kInstPmulld , "pmulld" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(40,U) , U ), + INST(kInstPmullw , "pmullw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(D5,U) , U ), + INST(kInstPmuludq , "pmuludq" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(F4,U) , U ), + INST(kInstPop , "pop" , G(X86Pop) , F(Special) , 0 , U , U , U , O_000000(8F,0) , O_000000(58,U) ), + INST(kInstPopa , "popa" , G(X86Op) , F(Special) , U , U , U , U , O_000000(61,U) , U ), + INST(kInstPopcnt , "popcnt" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_F30F00(B8,U) , U ), + INST(kInstPopf , "popf" , G(X86Op) , F(Special) , U , U , U , U , O_000000(9D,U) , U ), + INST(kInstPor , "por" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(EB,U) , U ), + INST(kInstPrefetch , "prefetch" , G(ExtPrefetch) , F(None) , O(Mem) , O(Imm) , U , U , O_000F00(18,U) , U ), + INST(kInstPrefetch3dNow , "prefetch_3dnow" , G(X86M) , F(None) , O(Mem) , U , U , U , O_000F00(0D,0) , U ), + INST(kInstPrefetchw3dNow , "prefetchw_3dnow" , G(X86M) , F(None) , O(Mem) , U , U , U , O_000F00(0D,1) , U ), + INST(kInstPsadbw , "psadbw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(F6,U) , U ), + INST(kInstPshufb , "pshufb" , G(ExtRm_P) , F(Move) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(00,U) , U ), + INST(kInstPshufd , "pshufd" , G(ExtRmi) , F(Move) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F00(70,U) , U ), + INST(kInstPshufhw , "pshufhw" , G(ExtRmi) , F(Move) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_F30F00(70,U) , U ), + INST(kInstPshuflw , "pshuflw" , G(ExtRmi) , F(Move) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_F20F00(70,U) , U ), + INST(kInstPshufw , "pshufw" , G(ExtRmi_P) , F(Move) , O(MmXmm) , O(MmXmmMem) , O(Imm) , U , O_000F00(70,U) , U ), + INST(kInstPsignb , "psignb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(08,U) , U ), + INST(kInstPsignd , "psignd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(0A,U) , U ), + INST(kInstPsignw , "psignw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F38(09,U) , U ), + INST(kInstPslld , "pslld" , G(ExtRmRi_P) , F(None) , O(MmXmm) , O(MmXmmMem)|O(Imm) , U , U , O_000F00(F2,U) , O_000F00(72,6) ), + INST(kInstPslldq , "pslldq" , G(ExtRmRi) , F(None) , O(Xmm) , O(Imm) , U , U , U , O_660F00(73,7) ), + INST(kInstPsllq , "psllq" , G(ExtRmRi_P) , F(None) , O(MmXmm) , O(MmXmmMem)|O(Imm) , U , U , O_000F00(F3,U) , O_000F00(73,6) ), + INST(kInstPsllw , "psllw" , G(ExtRmRi_P) , F(None) , O(MmXmm) , O(MmXmmMem)|O(Imm) , U , U , O_000F00(F1,U) , O_000F00(71,6) ), + INST(kInstPsrad , "psrad" , G(ExtRmRi_P) , F(None) , O(MmXmm) , O(MmXmmMem)|O(Imm) , U , U , O_000F00(E2,U) , O_000F00(72,4) ), + INST(kInstPsraw , "psraw" , G(ExtRmRi_P) , F(None) , O(MmXmm) , O(MmXmmMem)|O(Imm) , U , U , O_000F00(E1,U) , O_000F00(71,4) ), + INST(kInstPsrld , "psrld" , G(ExtRmRi_P) , F(None) , O(MmXmm) , O(MmXmmMem)|O(Imm) , U , U , O_000F00(D2,U) , O_000F00(72,2) ), + INST(kInstPsrldq , "psrldq" , G(ExtRmRi) , F(None) , O(Xmm) , O(Imm) , U , U , U , O_660F00(73,3) ), + INST(kInstPsrlq , "psrlq" , G(ExtRmRi_P) , F(None) , O(MmXmm) , O(MmXmmMem)|O(Imm) , U , U , O_000F00(D3,U) , O_000F00(73,2) ), + INST(kInstPsrlw , "psrlw" , G(ExtRmRi_P) , F(None) , O(MmXmm) , O(MmXmmMem)|O(Imm) , U , U , O_000F00(D1,U) , O_000F00(71,2) ), + INST(kInstPsubb , "psubb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(F8,U) , U ), + INST(kInstPsubd , "psubd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(FA,U) , U ), + INST(kInstPsubq , "psubq" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(FB,U) , U ), + INST(kInstPsubsb , "psubsb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(E8,U) , U ), + INST(kInstPsubsw , "psubsw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(E9,U) , U ), + INST(kInstPsubusb , "psubusb" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(D8,U) , U ), + INST(kInstPsubusw , "psubusw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(D9,U) , U ), + INST(kInstPsubw , "psubw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(F9,U) , U ), + INST(kInstPswapd , "pswapd" , G(3dNow) , F(None) , O(Mm) , O(MmMem) , U , U , O_000F0F(BB,U) , U ), + INST(kInstPtest , "ptest" , G(ExtRm) , F(Test) , O(Xmm) , O(XmmMem) , U , U , O_660F38(17,U) , U ), + INST(kInstPunpckhbw , "punpckhbw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(68,U) , U ), + INST(kInstPunpckhdq , "punpckhdq" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(6A,U) , U ), + INST(kInstPunpckhqdq , "punpckhqdq" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(6D,U) , U ), + INST(kInstPunpckhwd , "punpckhwd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(69,U) , U ), + INST(kInstPunpcklbw , "punpcklbw" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(60,U) , U ), + INST(kInstPunpckldq , "punpckldq" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(62,U) , U ), + INST(kInstPunpcklqdq , "punpcklqdq" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(6C,U) , U ), + INST(kInstPunpcklwd , "punpcklwd" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(61,U) , U ), + INST(kInstPush , "push" , G(X86Push) , F(Special) , 0 , U , U , U , O_000000(FF,6) , O_000000(50,U) ), + INST(kInstPusha , "pusha" , G(X86Op) , F(Special) , U , U , U , U , O_000000(60,U) , U ), + INST(kInstPushf , "pushf" , G(X86Op) , F(Special) , U , U , U , U , O_000000(9C,U) , U ), + INST(kInstPxor , "pxor" , G(ExtRm_P) , F(None) , O(MmXmm) , O(MmXmmMem) , U , U , O_000F00(EF,U) , U ), + INST(kInstRcl , "rcl" , G(X86Rot) , F(Special) , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , O_000000(D0,2) , U ), + INST(kInstRcpps , "rcpps" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_000F00(53,U) , U ), + INST(kInstRcpss , "rcpss" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(53,U) , U ), + INST(kInstRcr , "rcr" , G(X86Rot) , F(Special) , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , O_000000(D0,3) , U ), + INST(kInstRdfsbase , "rdfsbase" , G(X86Rm) , F(None) , O(Gqd) , U , U , U , O_F30F00(AE,0) , U ), + INST(kInstRdgsbase , "rdgsbase" , G(X86Rm) , F(None) , O(Gqd) , U , U , U , O_F30F00(AE,1) , U ), + INST(kInstRdrand , "rdrand" , G(X86Rm) , F(None) , O(Gqdw) , U , U , U , O_000F00(C7,6) , U ), + INST(kInstRdtsc , "rdtsc" , G(X86Op) , F(Special) , U , U , U , U , O_000F00(31,U) , U ), + INST(kInstRdtscp , "rdtscp" , G(X86Op) , F(Special) , U , U , U , U , O_000F01(F9,U) , U ), + INST(kInstRepLodsb , "rep lodsb" , G(X86Rep) , F(Special) , O(Mem) , U , U , U , O_000000(AC,1) , U ), + INST(kInstRepLodsd , "rep lodsd" , G(X86Rep) , F(Special) , O(Mem) , U , U , U , O_000000(AD,1) , U ), + INST(kInstRepLodsq , "rep lodsq" , G(X86Rep) , F(Special)|F(W), O(Mem) , U , U , U , O_000000(AD,1) , U ), + INST(kInstRepLodsw , "rep lodsw" , G(X86Rep) , F(Special) , O(Mem) , U , U , U , O_660000(AD,1) , U ), + INST(kInstRepMovsb , "rep movsb" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_000000(A4,1) , U ), + INST(kInstRepMovsd , "rep movsd" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_000000(A5,1) , U ), + INST(kInstRepMovsq , "rep movsq" , G(X86Rep) , F(Special)|F(W), O(Mem) , O(Mem) , U , U , O_000000(A5,1) , U ), + INST(kInstRepMovsw , "rep movsw" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_660000(A5,1) , U ), + INST(kInstRepStosb , "rep stosb" , G(X86Rep) , F(Special) , O(Mem) , U , U , U , O_000000(AA,1) , U ), + INST(kInstRepStosd , "rep stosd" , G(X86Rep) , F(Special) , O(Mem) , U , U , U , O_000000(AB,1) , U ), + INST(kInstRepStosq , "rep stosq" , G(X86Rep) , F(Special)|F(W), O(Mem) , U , U , U , O_000000(AB,1) , U ), + INST(kInstRepStosw , "rep stosw" , G(X86Rep) , F(Special) , O(Mem) , U , U , U , O_660000(AB,1) , U ), + INST(kInstRepeCmpsb , "repe cmpsb" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_000000(A6,1) , U ), + INST(kInstRepeCmpsd , "repe cmpsd" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_000000(A7,1) , U ), + INST(kInstRepeCmpsq , "repe cmpsq" , G(X86Rep) , F(Special)|F(W), O(Mem) , O(Mem) , U , U , O_000000(A7,1) , U ), + INST(kInstRepeCmpsw , "repe cmpsw" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_660000(A7,1) , U ), + INST(kInstRepeScasb , "repe scasb" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_000000(AE,1) , U ), + INST(kInstRepeScasd , "repe scasd" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_000000(AF,1) , U ), + INST(kInstRepeScasq , "repe scasq" , G(X86Rep) , F(Special)|F(W), O(Mem) , O(Mem) , U , U , O_000000(AF,1) , U ), + INST(kInstRepeScasw , "repe scasw" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_660000(AF,1) , U ), + INST(kInstRepneCmpsb , "repne cmpsb" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_000000(A6,0) , U ), + INST(kInstRepneCmpsd , "repne cmpsd" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_000000(A7,0) , U ), + INST(kInstRepneCmpsq , "repne cmpsq" , G(X86Rep) , F(Special)|F(W), O(Mem) , O(Mem) , U , U , O_000000(A7,0) , U ), + INST(kInstRepneCmpsw , "repne cmpsw" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_660000(A7,0) , U ), + INST(kInstRepneScasb , "repne scasb" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_000000(AE,0) , U ), + INST(kInstRepneScasd , "repne scasd" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_000000(AF,0) , U ), + INST(kInstRepneScasq , "repne scasq" , G(X86Rep) , F(Special)|F(W), O(Mem) , O(Mem) , U , U , O_000000(AF,0) , U ), + INST(kInstRepneScasw , "repne scasw" , G(X86Rep) , F(Special) , O(Mem) , O(Mem) , U , U , O_660000(AF,0) , U ), + INST(kInstRet , "ret" , G(X86Ret) , F(Special) , U , U , U , U , O_000000(C2,U) , U ), + INST(kInstRol , "rol" , G(X86Rot) , F(Special) , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , O_000000(D0,0) , U ), + INST(kInstRor , "ror" , G(X86Rot) , F(Special) , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , O_000000(D0,1) , U ), + INST(kInstRorx , "rorx" , G(AvxRmi) , F(None) , O(Gqd) , O(GqdMem) , O(Imm) , U , O_F20F3A(F0,U) , U ), + INST(kInstRoundpd , "roundpd" , G(ExtRmi) , F(Move) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(09,U) , U ), + INST(kInstRoundps , "roundps" , G(ExtRmi) , F(Move) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(08,U) , U ), + INST(kInstRoundsd , "roundsd" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(0B,U) , U ), + INST(kInstRoundss , "roundss" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(0A,U) , U ), + INST(kInstRsqrtps , "rsqrtps" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_000F00(52,U) , U ), + INST(kInstRsqrtss , "rsqrtss" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(52,U) , U ), + INST(kInstSahf , "sahf" , G(X86Op) , F(Special) , U , U , U , U , O_000000(9E,U) , U ), + INST(kInstSal , "sal" , G(X86Rot) , F(Special) , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , O_000000(D0,4) , U ), + INST(kInstSar , "sar" , G(X86Rot) , F(Special) , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , O_000000(D0,7) , U ), + INST(kInstSarx , "sarx" , G(AvxRmv) , F(None) , O(Gqd) , O(GqdMem) , O(Gqd) , U , O_F30F38(F7,U) , U ), + INST(kInstSbb , "sbb" , G(X86Arith) , F(Lock) , O(GqdwbMem) , O(GqdwbMem)|O(Imm) , U , U , O_000000(18,3) , U ), + INST(kInstSeta , "seta" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(97,U) , U ), + INST(kInstSetae , "setae" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(93,U) , U ), + INST(kInstSetb , "setb" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(92,U) , U ), + INST(kInstSetbe , "setbe" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(96,U) , U ), + INST(kInstSetc , "setc" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(92,U) , U ), + INST(kInstSete , "sete" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(94,U) , U ), + INST(kInstSetg , "setg" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9F,U) , U ), + INST(kInstSetge , "setge" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9D,U) , U ), + INST(kInstSetl , "setl" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9C,U) , U ), + INST(kInstSetle , "setle" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9E,U) , U ), + INST(kInstSetna , "setna" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(96,U) , U ), + INST(kInstSetnae , "setnae" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(92,U) , U ), + INST(kInstSetnb , "setnb" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(93,U) , U ), + INST(kInstSetnbe , "setnbe" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(97,U) , U ), + INST(kInstSetnc , "setnc" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(93,U) , U ), + INST(kInstSetne , "setne" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(95,U) , U ), + INST(kInstSetng , "setng" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9E,U) , U ), + INST(kInstSetnge , "setnge" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9C,U) , U ), + INST(kInstSetnl , "setnl" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9D,U) , U ), + INST(kInstSetnle , "setnle" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9F,U) , U ), + INST(kInstSetno , "setno" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(91,U) , U ), + INST(kInstSetnp , "setnp" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9B,U) , U ), + INST(kInstSetns , "setns" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(99,U) , U ), + INST(kInstSetnz , "setnz" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(95,U) , U ), + INST(kInstSeto , "seto" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(90,U) , U ), + INST(kInstSetp , "setp" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9A,U) , U ), + INST(kInstSetpe , "setpe" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9A,U) , U ), + INST(kInstSetpo , "setpo" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(9B,U) , U ), + INST(kInstSets , "sets" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(98,U) , U ), + INST(kInstSetz , "setz" , G(X86Set) , F(None) , O(GbMem) , U , U , U , O_000F00(94,U) , U ), + INST(kInstSfence , "sfence" , G(ExtFence) , F(None) , U , U , U , U , O_000F00(AE,7) , U ), + INST(kInstShl , "shl" , G(X86Rot) , F(Special) , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , O_000000(D0,4) , U ), + INST(kInstShld , "shld" , G(X86Shlrd) , F(Special) , O(GqdwbMem) , O(Gb) , U , U , O_000F00(A4,U) , U ), + INST(kInstShlx , "shlx" , G(AvxRmv) , F(None) , O(Gqd) , O(GqdMem) , O(Gqd) , U , O_660F38(F7,U) , U ), + INST(kInstShr , "shr" , G(X86Rot) , F(Special) , O(GqdwbMem) , O(Gb)|O(Imm) , U , U , O_000000(D0,5) , U ), + INST(kInstShrd , "shrd" , G(X86Shlrd) , F(Special) , O(GqdwbMem) , O(Gqdwb) , U , U , O_000F00(AC,U) , U ), + INST(kInstShrx , "shrx" , G(AvxRmv) , F(None) , O(Gqd) , O(GqdMem) , O(Gqd) , U , O_F20F38(F7,U) , U ), + INST(kInstShufpd , "shufpd" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F00(C6,U) , U ), + INST(kInstShufps , "shufps" , G(ExtRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_000F00(C6,U) , U ), + INST(kInstSqrtpd , "sqrtpd" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_660F00(51,U) , U ), + INST(kInstSqrtps , "sqrtps" , G(ExtRm) , F(Move) , O(Xmm) , O(XmmMem) , U , U , O_000F00(51,U) , U ), + INST(kInstSqrtsd , "sqrtsd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(51,U) , U ), + INST(kInstSqrtss , "sqrtss" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(51,U) , U ), + INST(kInstStc , "stc" , G(X86Op) , F(None) , U , U , U , U , O_000000(F9,U) , U ), + INST(kInstStd , "std" , G(X86Op) , F(None) , U , U , U , U , O_000000(FD,U) , U ), + INST(kInstStmxcsr , "stmxcsr" , G(X86M) , F(None) , O(Mem) , U , U , U , O_000F00(AE,3) , U ), + INST(kInstSub , "sub" , G(X86Arith) , F(Lock) , O(GqdwbMem) , O(GqdwbMem)|O(Imm) , U , U , O_000000(28,5) , U ), + INST(kInstSubpd , "subpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(5C,U) , U ), + INST(kInstSubps , "subps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(5C,U) , U ), + INST(kInstSubsd , "subsd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F20F00(5C,U) , U ), + INST(kInstSubss , "subss" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_F30F00(5C,U) , U ), + INST(kInstTest , "test" , G(X86Test) , F(Test) , O(GqdwbMem) , O(Gqdwb)|O(Imm) , U , U , O_000000(84,U) , O_000000(F6,U) ), + INST(kInstTzcnt , "tzcnt" , G(X86RegRm) , F(None) , O(Gqdw) , O(GqdwMem) , U , U , O_F30F00(BC,U) , U ), + INST(kInstUcomisd , "ucomisd" , G(ExtRm) , F(Test) , O(Xmm) , O(XmmMem) , U , U , O_660F00(2E,U) , U ), + INST(kInstUcomiss , "ucomiss" , G(ExtRm) , F(Test) , O(Xmm) , O(XmmMem) , U , U , O_000F00(2E,U) , U ), + INST(kInstUd2 , "ud2" , G(X86Op) , F(None) , U , U , U , U , O_000F00(0B,U) , U ), + INST(kInstUnpckhpd , "unpckhpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(15,U) , U ), + INST(kInstUnpckhps , "unpckhps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(15,U) , U ), + INST(kInstUnpcklpd , "unpcklpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(14,U) , U ), + INST(kInstUnpcklps , "unpcklps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(14,U) , U ), + INST(kInstVaddpd , "vaddpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(58,U) , U ), + INST(kInstVaddps , "vaddps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(58,U) , U ), + INST(kInstVaddsd , "vaddsd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F20F00(58,U) , U ), + INST(kInstVaddss , "vaddss" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F30F00(58,U) , U ), + INST(kInstVaddsubpd , "vaddsubpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(D0,U) , U ), + INST(kInstVaddsubps , "vaddsubps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F20F00(D0,U) , U ), + INST(kInstVaesdec , "vaesdec" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(DE,U) , U ), + INST(kInstVaesdeclast , "vaesdeclast" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(DF,U) , U ), + INST(kInstVaesenc , "vaesenc" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(DC,U) , U ), + INST(kInstVaesenclast , "vaesenclast" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(DD,U) , U ), + INST(kInstVaesimc , "vaesimc" , G(AvxRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(DB,U) , U ), + INST(kInstVaeskeygenassist , "vaeskeygenassist" , G(AvxRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(DF,U) , U ), + INST(kInstVandnpd , "vandnpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(55,U) , U ), + INST(kInstVandnps , "vandnps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(55,U) , U ), + INST(kInstVandpd , "vandpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(54,U) , U ), + INST(kInstVandps , "vandps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(54,U) , U ), + INST(kInstVblendpd , "vblendpd" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_660F3A(0D,U) , U ), + INST(kInstVblendps , "vblendps" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_660F3A(0C,U) , U ), + INST(kInstVblendvpd , "vblendvpd" , G(AvxRvmr_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmm) , O_660F3A(4B,U) , U ), + INST(kInstVblendvps , "vblendvps" , G(AvxRvmr_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmm) , O_660F3A(4A,U) , U ), + INST(kInstVbroadcastf128 , "vbroadcastf128" , G(AvxRm) , F(None) , O(Ymm) , O(Mem) , U , U , O_660F38(1A,U)|L, U ), + INST(kInstVbroadcasti128 , "vbroadcasti128" , G(AvxRm) , F(None) , O(Ymm) , O(Mem) , U , U , O_660F38(5A,U)|L, U ), + INST(kInstVbroadcastsd , "vbroadcastsd" , G(AvxRm) , F(None) , O(Ymm) , O(XmmMem) , U , U , O_660F38(19,U)|L, U ), + INST(kInstVbroadcastss , "vbroadcastss" , G(AvxRm) , F(None) , O(Ymm) , O(XmmMem) , U , U , O_660F38(18,U) , U ), + INST(kInstVcmppd , "vcmppd" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_660F00(C2,U) , U ), + INST(kInstVcmpps , "vcmpps" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_000F00(C2,U) , U ), + INST(kInstVcmpsd , "vcmpsd" , G(AvxRvmi) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , O_F20F00(C2,U) , U ), + INST(kInstVcmpss , "vcmpss" , G(AvxRvmi) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , O_F30F00(C2,U) , U ), + INST(kInstVcomisd , "vcomisd" , G(AvxRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(2F,U) , U ), + INST(kInstVcomiss , "vcomiss" , G(AvxRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(2F,U) , U ), + INST(kInstVcvtdq2pd , "vcvtdq2pd" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmMem) , U , U , O_F30F00(E6,U) , U ), + INST(kInstVcvtdq2ps , "vcvtdq2ps" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_000F00(5B,U) , U ), + INST(kInstVcvtpd2dq , "vcvtpd2dq" , G(AvxRm) , F(None) , O(Xmm) , O(XmmYmmMem) , U , U , O_F20F00(E6,U) , U ), + INST(kInstVcvtpd2ps , "vcvtpd2ps" , G(AvxRm) , F(None) , O(Xmm) , O(XmmYmmMem) , U , U , O_660F00(5A,U) , U ), + INST(kInstVcvtph2ps , "vcvtph2ps" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmMem) , U , U , O_660F38(13,U) , U ), + INST(kInstVcvtps2dq , "vcvtps2dq" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F00(5B,U) , U ), + INST(kInstVcvtps2pd , "vcvtps2pd" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmMem) , U , U , O_000F00(5A,U) , U ), + INST(kInstVcvtps2ph , "vcvtps2ph" , G(AvxMri_P) , F(None) , O(XmmMem) , O(XmmYmm) , O(Imm) , U , O_660F3A(1D,U) , U ), + INST(kInstVcvtsd2si , "vcvtsd2si" , G(AvxRm) , F(None) , O(Gqd) , O(XmmMem) , U , U , O_F20F00(2D,U) , U ), + INST(kInstVcvtsd2ss , "vcvtsd2ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_F20F00(5A,U) , U ), + INST(kInstVcvtsi2sd , "vcvtsi2sd" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(GqdMem) , U , O_F20F00(2A,U) , U ), + INST(kInstVcvtsi2ss , "vcvtsi2ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(GqdMem) , U , O_F30F00(2A,U) , U ), + INST(kInstVcvtss2sd , "vcvtss2sd" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_F30F00(5A,U) , U ), + INST(kInstVcvtss2si , "vcvtss2si" , G(AvxRm) , F(None) , O(Gqd) , O(XmmMem) , U , U , O_F20F00(2D,U) , U ), + INST(kInstVcvttpd2dq , "vcvttpd2dq" , G(AvxRm_P) , F(None) , O(Xmm) , O(XmmYmmMem) , U , U , O_660F00(E6,U) , U ), + INST(kInstVcvttps2dq , "vcvttps2dq" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_F30F00(5B,U) , U ), + INST(kInstVcvttsd2si , "vcvttsd2si" , G(AvxRm) , F(None) , O(Gqd) , O(XmmMem) , U , U , O_F20F00(2C,U) , U ), + INST(kInstVcvttss2si , "vcvttss2si" , G(AvxRm) , F(None) , O(Gqd) , O(XmmMem) , U , U , O_F30F00(2C,U) , U ), + INST(kInstVdivpd , "vdivpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(5E,U) , U ), + INST(kInstVdivps , "vdivps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(5E,U) , U ), + INST(kInstVdivsd , "vdivsd" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_F20F00(5E,U) , U ), + INST(kInstVdivss , "vdivss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_F30F00(5E,U) , U ), + INST(kInstVdppd , "vdppd" , G(AvxRvmi) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , O_660F3A(41,U) , U ), + INST(kInstVdpps , "vdpps" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_660F3A(40,U) , U ), + INST(kInstVextractf128 , "vextractf128" , G(AvxMri) , F(None) , O(XmmMem) , O(Ymm) , O(Imm) , U , O_660F3A(19,U)|L, U ), + INST(kInstVextracti128 , "vextracti128" , G(AvxMri) , F(None) , O(XmmMem) , O(Ymm) , O(Imm) , U , O_660F3A(39,U)|L, U ), + INST(kInstVextractps , "vextractps" , G(AvxMri) , F(None) , O(GqdMem) , O(Xmm) , O(Imm) , U , O_660F3A(17,U) , U ), + INST(kInstVfmadd132pd , "vfmadd132pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(98,U) , U ), + INST(kInstVfmadd132ps , "vfmadd132ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(98,U) , U ), + INST(kInstVfmadd132sd , "vfmadd132sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(99,U) , U ), + INST(kInstVfmadd132ss , "vfmadd132ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(99,U) , U ), + INST(kInstVfmadd213pd , "vfmadd213pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(A8,U) , U ), + INST(kInstVfmadd213ps , "vfmadd213ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(A8,U) , U ), + INST(kInstVfmadd213sd , "vfmadd213sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(A9,U) , U ), + INST(kInstVfmadd213ss , "vfmadd213ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(A9,U) , U ), + INST(kInstVfmadd231pd , "vfmadd231pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(B8,U) , U ), + INST(kInstVfmadd231ps , "vfmadd231ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(B8,U) , U ), + INST(kInstVfmadd231sd , "vfmadd231sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(B9,U) , U ), + INST(kInstVfmadd231ss , "vfmadd231ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(B9,U) , U ), + INST(kInstVfmaddsub132pd , "vfmaddsub132pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(96,U) , U ), + INST(kInstVfmaddsub132ps , "vfmaddsub132ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(96,U) , U ), + INST(kInstVfmaddsub213pd , "vfmaddsub213pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(A6,U) , U ), + INST(kInstVfmaddsub213ps , "vfmaddsub213ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(A6,U) , U ), + INST(kInstVfmaddsub231pd , "vfmaddsub231pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(B6,U) , U ), + INST(kInstVfmaddsub231ps , "vfmaddsub231ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(B6,U) , U ), + INST(kInstVfmsub132pd , "vfmsub132pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(9A,U) , U ), + INST(kInstVfmsub132ps , "vfmsub132ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(9A,U) , U ), + INST(kInstVfmsub132sd , "vfmsub132sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(9B,U) , U ), + INST(kInstVfmsub132ss , "vfmsub132ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(9B,U) , U ), + INST(kInstVfmsub213pd , "vfmsub213pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(AA,U) , U ), + INST(kInstVfmsub213ps , "vfmsub213ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(AA,U) , U ), + INST(kInstVfmsub213sd , "vfmsub213sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(AB,U) , U ), + INST(kInstVfmsub213ss , "vfmsub213ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(AB,U) , U ), + INST(kInstVfmsub231pd , "vfmsub231pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(BA,U) , U ), + INST(kInstVfmsub231ps , "vfmsub231ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(BA,U) , U ), + INST(kInstVfmsub231sd , "vfmsub231sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(BB,U) , U ), + INST(kInstVfmsub231ss , "vfmsub231ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(BB,U) , U ), + INST(kInstVfmsubadd132pd , "vfmsubadd132pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(97,U) , U ), + INST(kInstVfmsubadd132ps , "vfmsubadd132ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(97,U) , U ), + INST(kInstVfmsubadd213pd , "vfmsubadd213pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(A7,U) , U ), + INST(kInstVfmsubadd213ps , "vfmsubadd213ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(A7,U) , U ), + INST(kInstVfmsubadd231pd , "vfmsubadd231pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(B7,U) , U ), + INST(kInstVfmsubadd231ps , "vfmsubadd231ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(B7,U) , U ), + INST(kInstVfnmadd132pd , "vfnmadd132pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(9C,U) , U ), + INST(kInstVfnmadd132ps , "vfnmadd132ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(9C,U) , U ), + INST(kInstVfnmadd132sd , "vfnmadd132sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(9D,U) , U ), + INST(kInstVfnmadd132ss , "vfnmadd132ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(9D,U) , U ), + INST(kInstVfnmadd213pd , "vfnmadd213pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(AC,U) , U ), + INST(kInstVfnmadd213ps , "vfnmadd213ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(AC,U) , U ), + INST(kInstVfnmadd213sd , "vfnmadd213sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(AD,U) , U ), + INST(kInstVfnmadd213ss , "vfnmadd213ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(AD,U) , U ), + INST(kInstVfnmadd231pd , "vfnmadd231pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(BC,U) , U ), + INST(kInstVfnmadd231ps , "vfnmadd231ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(BC,U) , U ), + INST(kInstVfnmadd231sd , "vfnmadd231sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(BC,U) , U ), + INST(kInstVfnmadd231ss , "vfnmadd231ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(BC,U) , U ), + INST(kInstVfnmsub132pd , "vfnmsub132pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(9E,U) , U ), + INST(kInstVfnmsub132ps , "vfnmsub132ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(9E,U) , U ), + INST(kInstVfnmsub132sd , "vfnmsub132sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(9F,U) , U ), + INST(kInstVfnmsub132ss , "vfnmsub132ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(9F,U) , U ), + INST(kInstVfnmsub213pd , "vfnmsub213pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(AE,U) , U ), + INST(kInstVfnmsub213ps , "vfnmsub213ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(AE,U) , U ), + INST(kInstVfnmsub213sd , "vfnmsub213sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(AF,U) , U ), + INST(kInstVfnmsub213ss , "vfnmsub213ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(AF,U) , U ), + INST(kInstVfnmsub231pd , "vfnmsub231pd" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(BE,U) , U ), + INST(kInstVfnmsub231ps , "vfnmsub231ps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(BE,U) , U ), + INST(kInstVfnmsub231sd , "vfnmsub231sd" , G(AvxRvm) , F(W), O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(BF,U) , U ), + INST(kInstVfnmsub231ss , "vfnmsub231ss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_660F38(BF,U) , U ), + INST(kInstVgatherdpd , "vgatherdpd" , G(AvxGather) , F(W), O(XmmYmm) , O(Mem) , O(XmmYmm) , U , O_660F38(92,U) , U ), + INST(kInstVgatherdps , "vgatherdps" , G(AvxGather) , F(None) , O(XmmYmm) , O(Mem) , O(XmmYmm) , U , O_660F38(92,U) , U ), + INST(kInstVgatherqpd , "vgatherqpd" , G(AvxGather) , F(W), O(XmmYmm) , O(Mem) , O(XmmYmm) , U , O_660F38(93,U) , U ), + INST(kInstVgatherqps , "vgatherqps" , G(AvxGatherEx) , F(None) , O(Xmm) , O(Mem) , O(Xmm) , U , O_660F38(93,U) , U ), + INST(kInstVhaddpd , "vhaddpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(7C,U) , U ), + INST(kInstVhaddps , "vhaddps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F20F00(7C,U) , U ), + INST(kInstVhsubpd , "vhsubpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(7D,U) , U ), + INST(kInstVhsubps , "vhsubps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F20F00(7D,U) , U ), + INST(kInstVinsertf128 , "vinsertf128" , G(AvxRvmi) , F(None) , O(Ymm) , O(Ymm) , O(XmmMem) , O(Imm) , O_660F3A(18,U)|L, U ), + INST(kInstVinserti128 , "vinserti128" , G(AvxRvmi) , F(None) , O(Ymm) , O(Ymm) , O(XmmMem) , O(Imm) , O_660F3A(38,U)|L, U ), + INST(kInstVinsertps , "vinsertps" , G(AvxRvmi) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , O_660F3A(21,U) , U ), + INST(kInstVlddqu , "vlddqu" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(Mem) , U , U , O_F20F00(F0,U) , U ), + INST(kInstVldmxcsr , "vldmxcsr" , G(AvxM) , F(None) , O(Mem) , U , U , U , O_000F00(AE,2) , U ), + INST(kInstVmaskmovdqu , "vmaskmovdqu" , G(AvxRm) , F(None) , O(Xmm) , O(Xmm) , U , U , O_660F00(F7,U) , U ), + INST(kInstVmaskmovpd , "vmaskmovpd" , G(AvxRvmMvr_P) , F(None) , O(XmmYmmMem) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(2D,U) , O_660F38(2F,U) ), + INST(kInstVmaskmovps , "vmaskmovps" , G(AvxRvmMvr_P) , F(None) , O(XmmYmmMem) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(2C,U) , O_660F38(2E,U) ), + INST(kInstVmaxpd , "vmaxpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(5F,U) , U ), + INST(kInstVmaxps , "vmaxps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(5F,U) , U ), + INST(kInstVmaxsd , "vmaxsd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F20F00(5F,U) , U ), + INST(kInstVmaxss , "vmaxss" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F30F00(5F,U) , U ), + INST(kInstVminpd , "vminpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(5D,U) , U ), + INST(kInstVminps , "vminps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(5D,U) , U ), + INST(kInstVminsd , "vminsd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F20F00(5D,U) , U ), + INST(kInstVminss , "vminss" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F30F00(5D,U) , U ), + INST(kInstVmovapd , "vmovapd" , G(AvxRmMr_P) , F(None) , O(XmmYmmMem) , O(XmmYmmMem) , U , U , O_660F00(28,U) , O_660F00(29,U) ), + INST(kInstVmovaps , "vmovaps" , G(AvxRmMr_P) , F(None) , O(XmmYmmMem) , O(XmmYmmMem) , U , U , O_000F00(28,U) , O_000F00(29,U) ), + INST(kInstVmovd , "vmovd" , G(AvxRmMr) , F(None) , O(XmmMem) , O(XmmMem) , U , U , O_660F00(6E,U) , O_660F00(7E,U) ), + INST(kInstVmovddup , "vmovddup" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_F20F00(12,U) , U ), + INST(kInstVmovdqa , "vmovdqa" , G(AvxRmMr_P) , F(None) , O(XmmYmmMem) , O(XmmYmmMem) , U , U , O_660F00(6F,U) , O_660F00(7F,U) ), + INST(kInstVmovdqu , "vmovdqu" , G(AvxRmMr_P) , F(None) , O(XmmYmmMem) , O(XmmYmmMem) , U , U , O_F30F00(6F,U) , O_F30F00(7F,U) ), + INST(kInstVmovhlps , "vmovhlps" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(Xmm) , U , O_000F00(12,U) , U ), + INST(kInstVmovhpd , "vmovhpd" , G(AvxRvmMr) , F(None) , O(XmmMem) , O(Xmm) , O(Mem) , U , O_660F00(16,U) , O_660F00(17,U) ), + INST(kInstVmovhps , "vmovhps" , G(AvxRvmMr) , F(None) , O(XmmMem) , O(Xmm) , O(Mem) , U , O_000F00(16,U) , O_000F00(17,U) ), + INST(kInstVmovlhps , "vmovlhps" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(Xmm) , U , O_000F00(16,U) , U ), + INST(kInstVmovlpd , "vmovlpd" , G(AvxRvmMr) , F(None) , O(XmmMem) , O(Xmm) , O(Mem) , U , O_660F00(12,U) , O_660F00(13,U) ), + INST(kInstVmovlps , "vmovlps" , G(AvxRvmMr) , F(None) , O(XmmMem) , O(Xmm) , O(Mem) , U , O_000F00(12,U) , O_000F00(13,U) ), + INST(kInstVmovmskpd , "vmovmskpd" , G(AvxRm_P) , F(None) , O(Gqd) , O(XmmYmm) , U , U , O_660F00(50,U) , U ), + INST(kInstVmovmskps , "vmovmskps" , G(AvxRm_P) , F(None) , O(Gqd) , O(XmmYmm) , U , U , O_000F00(50,U) , U ), + INST(kInstVmovntdq , "vmovntdq" , G(AvxMr) , F(None) , O(Mem) , O(XmmYmm) , U , U , O_660F00(E7,U) , U ), + INST(kInstVmovntdqa , "vmovntdqa" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(Mem) , U , U , O_660F38(2A,U) , U ), + INST(kInstVmovntpd , "vmovntpd" , G(AvxMr_P) , F(None) , O(Mem) , O(XmmYmm) , U , U , O_660F00(2B,U) , U ), + INST(kInstVmovntps , "vmovntps" , G(AvxMr_P) , F(None) , O(Mem) , O(XmmYmm) , U , U , O_000F00(2B,U) , U ), + INST(kInstVmovq , "vmovq" , G(AvxRmMr) , F(W), O(XmmMem) , O(XmmMem) , U , U , O_660F00(6E,U) , O_660F00(7E,U) ), + INST(kInstVmovsd , "vmovsd" , G(AvxMovSsSd) , F(None) , O(XmmMem) , O(XmmMem) , O(Xmm) , U , O_F20F00(10,U) , O_F20F00(11,U) ), + INST(kInstVmovshdup , "vmovshdup" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_F30F00(16,U) , U ), + INST(kInstVmovsldup , "vmovsldup" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_F30F00(12,U) , U ), + INST(kInstVmovss , "vmovss" , G(AvxMovSsSd) , F(None) , O(XmmMem) , O(Xmm) , O(Xmm) , U , O_F30F00(10,U) , O_F30F00(11,U) ), + INST(kInstVmovupd , "vmovupd" , G(AvxRmMr_P) , F(None) , O(XmmYmmMem) , O(XmmYmmMem) , U , U , O_660F00(10,U) , O_660F00(11,U) ), + INST(kInstVmovups , "vmovups" , G(AvxRmMr_P) , F(None) , O(XmmYmmMem) , O(XmmYmmMem) , U , U , O_000F00(10,U) , O_000F00(11,U) ), + INST(kInstVmpsadbw , "vmpsadbw" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_660F3A(42,U) , U ), + INST(kInstVmulpd , "vmulpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(59,U) , U ), + INST(kInstVmulps , "vmulps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(59,U) , U ), + INST(kInstVmulsd , "vmulsd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F20F00(59,U) , U ), + INST(kInstVmulss , "vmulss" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_F30F00(59,U) , U ), + INST(kInstVorpd , "vorpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(56,U) , U ), + INST(kInstVorps , "vorps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(56,U) , U ), + INST(kInstVpabsb , "vpabsb" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(1C,U) , U ), + INST(kInstVpabsd , "vpabsd" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(1E,U) , U ), + INST(kInstVpabsw , "vpabsw" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(1D,U) , U ), + INST(kInstVpackssdw , "vpackssdw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(6B,U) , U ), + INST(kInstVpacksswb , "vpacksswb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(63,U) , U ), + INST(kInstVpackusdw , "vpackusdw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(2B,U) , U ), + INST(kInstVpackuswb , "vpackuswb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(67,U) , U ), + INST(kInstVpaddb , "vpaddb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(FC,U) , U ), + INST(kInstVpaddd , "vpaddd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(FE,U) , U ), + INST(kInstVpaddq , "vpaddq" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(D4,U) , U ), + INST(kInstVpaddsb , "vpaddsb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(EC,U) , U ), + INST(kInstVpaddsw , "vpaddsw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(ED,U) , U ), + INST(kInstVpaddusb , "vpaddusb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(DC,U) , U ), + INST(kInstVpaddusw , "vpaddusw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(DD,U) , U ), + INST(kInstVpaddw , "vpaddw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(FD,U) , U ), + INST(kInstVpalignr , "vpalignr" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_660F3A(0F,U) , U ), + INST(kInstVpand , "vpand" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(DB,U) , U ), + INST(kInstVpandn , "vpandn" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(DF,U) , U ), + INST(kInstVpavgb , "vpavgb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(E0,U) , U ), + INST(kInstVpavgw , "vpavgw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(E3,U) , U ), + INST(kInstVpblendd , "vpblendd" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_660F3A(02,U) , U ), + INST(kInstVpblendvb , "vpblendvb" , G(AvxRvmr) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmm) , O_660F3A(4C,U) , U ), + INST(kInstVpblendw , "vpblendw" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_660F3A(0E,U) , U ), + INST(kInstVpbroadcastb , "vpbroadcastb" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmMem) , U , U , O_660F38(78,U) , U ), + INST(kInstVpbroadcastd , "vpbroadcastd" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmMem) , U , U , O_660F38(58,U) , U ), + INST(kInstVpbroadcastq , "vpbroadcastq" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmMem) , U , U , O_660F38(59,U) , U ), + INST(kInstVpbroadcastw , "vpbroadcastw" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmMem) , U , U , O_660F38(79,U) , U ), + INST(kInstVpclmulqdq , "vpclmulqdq" , G(AvxRvmi) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , O_660F3A(44,U) , U ), + INST(kInstVpcmpeqb , "vpcmpeqb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(74,U) , U ), + INST(kInstVpcmpeqd , "vpcmpeqd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(76,U) , U ), + INST(kInstVpcmpeqq , "vpcmpeqq" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(29,U) , U ), + INST(kInstVpcmpeqw , "vpcmpeqw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(75,U) , U ), + INST(kInstVpcmpestri , "vpcmpestri" , G(AvxRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(61,U) , U ), + INST(kInstVpcmpestrm , "vpcmpestrm" , G(AvxRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(60,U) , U ), + INST(kInstVpcmpgtb , "vpcmpgtb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(64,U) , U ), + INST(kInstVpcmpgtd , "vpcmpgtd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(66,U) , U ), + INST(kInstVpcmpgtq , "vpcmpgtq" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(37,U) , U ), + INST(kInstVpcmpgtw , "vpcmpgtw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(65,U) , U ), + INST(kInstVpcmpistri , "vpcmpistri" , G(AvxRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(63,U) , U ), + INST(kInstVpcmpistrm , "vpcmpistrm" , G(AvxRmi) , F(None) , O(Xmm) , O(XmmMem) , O(Imm) , U , O_660F3A(62,U) , U ), + INST(kInstVperm2f128 , "vperm2f128" , G(AvxRvmi) , F(None) , O(Ymm) , O(Ymm) , O(YmmMem) , O(Imm) , O_660F3A(06,U)|L, U ), + INST(kInstVperm2i128 , "vperm2i128" , G(AvxRvmi) , F(None) , O(Ymm) , O(Ymm) , O(YmmMem) , O(Imm) , O_660F3A(46,U)|L, U ), + INST(kInstVpermd , "vpermd" , G(AvxRvm) , F(None) , O(Ymm) , O(Ymm) , O(YmmMem) , U , O_660F38(36,U)|L, U ), + INST(kInstVpermilpd , "vpermilpd" , G(AvxRvmRmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmmMem)|O(Imm) , U , O_660F38(0D,U) , O_660F3A(05,U) ), + INST(kInstVpermilps , "vpermilps" , G(AvxRvmRmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmmMem)|O(Imm) , U , O_660F38(0C,U) , O_660F3A(04,U) ), + INST(kInstVpermpd , "vpermpd" , G(AvxRmi) , F(W), O(Ymm) , O(YmmMem) , O(Imm) , U , O_660F3A(01,U)|L, U ), + INST(kInstVpermps , "vpermps" , G(AvxRvm) , F(None) , O(Ymm) , O(Ymm) , O(YmmMem) , U , O_660F38(16,U)|L, U ), + INST(kInstVpermq , "vpermq" , G(AvxRmi) , F(W), O(Ymm) , O(YmmMem) , O(Imm) , U , O_660F3A(00,U)|L, U ), + INST(kInstVpextrb , "vpextrb" , G(AvxMri) , F(None) , O(GqdwbMem) , O(Xmm) , O(Imm) , U , O_660F3A(14,U) , U ), + INST(kInstVpextrd , "vpextrd" , G(AvxMri) , F(None) , O(GqdMem) , O(Xmm) , O(Imm) , U , O_660F3A(16,U) , U ), + INST(kInstVpextrq , "vpextrq" , G(AvxMri) , F(W), O(GqMem) , O(Xmm) , O(Imm) , U , O_660F3A(16,U) , U ), + INST(kInstVpextrw , "vpextrw" , G(AvxMri) , F(None) , O(GqdwMem) , O(Xmm) , O(Imm) , U , O_660F3A(15,U) , U ), + INST(kInstVpgatherdd , "vpgatherdd" , G(AvxGather) , F(None) , O(XmmYmm) , O(Mem) , O(XmmYmm) , U , O_660F38(90,U) , U ), + INST(kInstVpgatherdq , "vpgatherdq" , G(AvxGather) , F(W), O(XmmYmm) , O(Mem) , O(XmmYmm) , U , O_660F38(90,U) , U ), + INST(kInstVpgatherqd , "vpgatherqd" , G(AvxGatherEx) , F(None) , O(Xmm) , O(Mem) , O(Xmm) , U , O_660F38(91,U) , U ), + INST(kInstVpgatherqq , "vpgatherqq" , G(AvxGather) , F(W), O(XmmYmm) , O(Mem) , O(XmmYmm) , U , O_660F38(91,U) , U ), + INST(kInstVphaddd , "vphaddd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(02,U) , U ), + INST(kInstVphaddsw , "vphaddsw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(03,U) , U ), + INST(kInstVphaddw , "vphaddw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(01,U) , U ), + INST(kInstVphminposuw , "vphminposuw" , G(AvxRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F38(41,U) , U ), + INST(kInstVphsubd , "vphsubd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(06,U) , U ), + INST(kInstVphsubsw , "vphsubsw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(07,U) , U ), + INST(kInstVphsubw , "vphsubw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(05,U) , U ), + INST(kInstVpinsrb , "vpinsrb" , G(AvxRvmi) , F(None) , O(Xmm) , O(Xmm) , O(GqdwbMem) , O(Imm) , O_660F3A(20,U) , U ), + INST(kInstVpinsrd , "vpinsrd" , G(AvxRvmi) , F(None) , O(Xmm) , O(Xmm) , O(GqdMem) , O(Imm) , O_660F3A(22,U) , U ), + INST(kInstVpinsrq , "vpinsrq" , G(AvxRvmi) , F(W), O(Xmm) , O(Xmm) , O(GqMem) , O(Imm) , O_660F3A(22,U) , U ), + INST(kInstVpinsrw , "vpinsrw" , G(AvxRvmi) , F(None) , O(Xmm) , O(Xmm) , O(GqdwMem) , O(Imm) , O_660F00(C4,U) , U ), + INST(kInstVpmaddubsw , "vpmaddubsw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(04,U) , U ), + INST(kInstVpmaddwd , "vpmaddwd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(F5,U) , U ), + INST(kInstVpmaskmovd , "vpmaskmovd" , G(AvxRvmMvr_P) , F(None) , O(XmmYmmMem) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(8C,U) , O_660F38(8E,U) ), + INST(kInstVpmaskmovq , "vpmaskmovq" , G(AvxRvmMvr_P) , F(W), O(XmmYmmMem) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(8C,U) , O_660F38(8E,U) ), + INST(kInstVpmaxsb , "vpmaxsb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(3C,U) , U ), + INST(kInstVpmaxsd , "vpmaxsd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(3D,U) , U ), + INST(kInstVpmaxsw , "vpmaxsw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(EE,U) , U ), + INST(kInstVpmaxub , "vpmaxub" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(DE,U) , U ), + INST(kInstVpmaxud , "vpmaxud" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(3F,U) , U ), + INST(kInstVpmaxuw , "vpmaxuw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(3E,U) , U ), + INST(kInstVpminsb , "vpminsb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(38,U) , U ), + INST(kInstVpminsd , "vpminsd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(39,U) , U ), + INST(kInstVpminsw , "vpminsw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(EA,U) , U ), + INST(kInstVpminub , "vpminub" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(DA,U) , U ), + INST(kInstVpminud , "vpminud" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(3B,U) , U ), + INST(kInstVpminuw , "vpminuw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(3A,U) , U ), + INST(kInstVpmovmskb , "vpmovmskb" , G(AvxRm_P) , F(None) , O(Gqd) , O(XmmYmm) , U , U , O_660F00(D7,U) , U ), + INST(kInstVpmovsxbd , "vpmovsxbd" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(21,U) , U ), + INST(kInstVpmovsxbq , "vpmovsxbq" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(22,U) , U ), + INST(kInstVpmovsxbw , "vpmovsxbw" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(20,U) , U ), + INST(kInstVpmovsxdq , "vpmovsxdq" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(25,U) , U ), + INST(kInstVpmovsxwd , "vpmovsxwd" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(23,U) , U ), + INST(kInstVpmovsxwq , "vpmovsxwq" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(24,U) , U ), + INST(kInstVpmovzxbd , "vpmovzxbd" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(31,U) , U ), + INST(kInstVpmovzxbq , "vpmovzxbq" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(32,U) , U ), + INST(kInstVpmovzxbw , "vpmovzxbw" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(30,U) , U ), + INST(kInstVpmovzxdq , "vpmovzxdq" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(35,U) , U ), + INST(kInstVpmovzxwd , "vpmovzxwd" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(33,U) , U ), + INST(kInstVpmovzxwq , "vpmovzxwq" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(34,U) , U ), + INST(kInstVpmuldq , "vpmuldq" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(28,U) , U ), + INST(kInstVpmulhrsw , "vpmulhrsw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(0B,U) , U ), + INST(kInstVpmulhuw , "vpmulhuw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(E4,U) , U ), + INST(kInstVpmulhw , "vpmulhw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(E5,U) , U ), + INST(kInstVpmulld , "vpmulld" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(40,U) , U ), + INST(kInstVpmullw , "vpmullw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(D5,U) , U ), + INST(kInstVpmuludq , "vpmuludq" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(F4,U) , U ), + INST(kInstVpor , "vpor" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(EB,U) , U ), + INST(kInstVpsadbw , "vpsadbw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(F6,U) , U ), + INST(kInstVpshufb , "vpshufb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(00,U) , U ), + INST(kInstVpshufd , "vpshufd" , G(AvxRmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , U , O_660F00(70,U) , U ), + INST(kInstVpshufhw , "vpshufhw" , G(AvxRmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , U , O_F30F00(70,U) , U ), + INST(kInstVpshuflw , "vpshuflw" , G(AvxRmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , U , O_F20F00(70,U) , U ), + INST(kInstVpsignb , "vpsignb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(08,U) , U ), + INST(kInstVpsignd , "vpsignd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(0A,U) , U ), + INST(kInstVpsignw , "vpsignw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(09,U) , U ), + INST(kInstVpslld , "vpslld" , G(AvxRvmVmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmmMem)|O(Imm) , U , O_660F00(F2,U) , O_660F00(72,6) ), + INST(kInstVpslldq , "vpslldq" , G(AvxVmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , U , O_660F00(73,7) , U ), + INST(kInstVpsllq , "vpsllq" , G(AvxRvmVmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmmMem)|O(Imm) , U , O_660F00(F3,U) , O_660F00(73,6) ), + INST(kInstVpsllvd , "vpsllvd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(47,U) , U ), + INST(kInstVpsllvq , "vpsllvq" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(47,U) , U ), + INST(kInstVpsllw , "vpsllw" , G(AvxRvmVmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmmMem)|O(Imm) , U , O_660F00(F1,U) , O_660F00(71,6) ), + INST(kInstVpsrad , "vpsrad" , G(AvxRvmVmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmmMem)|O(Imm) , U , O_660F00(E2,U) , O_660F00(72,4) ), + INST(kInstVpsravd , "vpsravd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(46,U) , U ), + INST(kInstVpsraw , "vpsraw" , G(AvxRvmVmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmmMem)|O(Imm) , U , O_660F00(E1,U) , O_660F00(71,4) ), + INST(kInstVpsrld , "vpsrld" , G(AvxRvmVmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmmMem)|O(Imm) , U , O_660F00(D2,U) , O_660F00(72,2) ), + INST(kInstVpsrldq , "vpsrldq" , G(AvxVmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , U , O_660F00(73,3) , U ), + INST(kInstVpsrlq , "vpsrlq" , G(AvxRvmVmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmmMem)|O(Imm) , U , O_660F00(D3,U) , O_660F00(73,2) ), + INST(kInstVpsrlvd , "vpsrlvd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(45,U) , U ), + INST(kInstVpsrlvq , "vpsrlvq" , G(AvxRvm_P) , F(W), O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F38(45,U) , U ), + INST(kInstVpsrlw , "vpsrlw" , G(AvxRvmVmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(XmmYmmMem)|O(Imm) , U , O_660F00(D1,U) , O_660F00(71,2) ), + INST(kInstVpsubb , "vpsubb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(F8,U) , U ), + INST(kInstVpsubd , "vpsubd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(FA,U) , U ), + INST(kInstVpsubq , "vpsubq" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(FB,U) , U ), + INST(kInstVpsubsb , "vpsubsb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(E8,U) , U ), + INST(kInstVpsubsw , "vpsubsw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(E9,U) , U ), + INST(kInstVpsubusb , "vpsubusb" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(D8,U) , U ), + INST(kInstVpsubusw , "vpsubusw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(D9,U) , U ), + INST(kInstVpsubw , "vpsubw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(F9,U) , U ), + INST(kInstVptest , "vptest" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(17,U) , U ), + INST(kInstVpunpckhbw , "vpunpckhbw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(68,U) , U ), + INST(kInstVpunpckhdq , "vpunpckhdq" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(6A,U) , U ), + INST(kInstVpunpckhqdq , "vpunpckhqdq" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(6D,U) , U ), + INST(kInstVpunpckhwd , "vpunpckhwd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(69,U) , U ), + INST(kInstVpunpcklbw , "vpunpcklbw" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(60,U) , U ), + INST(kInstVpunpckldq , "vpunpckldq" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(62,U) , U ), + INST(kInstVpunpcklqdq , "vpunpcklqdq" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(6C,U) , U ), + INST(kInstVpunpcklwd , "vpunpcklwd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(61,U) , U ), + INST(kInstVpxor , "vpxor" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(EF,U) , U ), + INST(kInstVrcpps , "vrcpps" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_000F00(53,U) , U ), + INST(kInstVrcpss , "vrcpss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_F30F00(53,U) , U ), + INST(kInstVroundpd , "vroundpd" , G(AvxRmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , U , O_660F3A(09,U) , U ), + INST(kInstVroundps , "vroundps" , G(AvxRmi_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , U , O_660F3A(08,U) , U ), + INST(kInstVroundsd , "vroundsd" , G(AvxRvmi) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , O_660F3A(0B,U) , U ), + INST(kInstVroundss , "vroundss" , G(AvxRvmi) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , O(Imm) , O_660F3A(0A,U) , U ), + INST(kInstVrsqrtps , "vrsqrtps" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_000F00(52,U) , U ), + INST(kInstVrsqrtss , "vrsqrtss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_F30F00(52,U) , U ), + INST(kInstVshufpd , "vshufpd" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_660F00(C6,U) , U ), + INST(kInstVshufps , "vshufps" , G(AvxRvmi_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , O(Imm) , O_000F00(C6,U) , U ), + INST(kInstVsqrtpd , "vsqrtpd" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F00(51,U) , U ), + INST(kInstVsqrtps , "vsqrtps" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_000F00(51,U) , U ), + INST(kInstVsqrtsd , "vsqrtsd" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_F20F00(51,U) , U ), + INST(kInstVsqrtss , "vsqrtss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_F30F00(51,U) , U ), + INST(kInstVstmxcsr , "vstmxcsr" , G(AvxM) , F(None) , O(Mem) , U , U , U , O_000F00(AE,3) , U ), + INST(kInstVsubpd , "vsubpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(5C,U) , U ), + INST(kInstVsubps , "vsubps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(5C,U) , U ), + INST(kInstVsubsd , "vsubsd" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_F20F00(5C,U) , U ), + INST(kInstVsubss , "vsubss" , G(AvxRvm) , F(None) , O(Xmm) , O(Xmm) , O(XmmMem) , U , O_F30F00(5C,U) , U ), + INST(kInstVtestpd , "vtestpd" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(0F,U) , U ), + INST(kInstVtestps , "vtestps" , G(AvxRm_P) , F(None) , O(XmmYmm) , O(XmmYmmMem) , U , U , O_660F38(0E,U) , U ), + INST(kInstVucomisd , "vucomisd" , G(AvxRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(2E,U) , U ), + INST(kInstVucomiss , "vucomiss" , G(AvxRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(2E,U) , U ), + INST(kInstVunpckhpd , "vunpckhpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(15,U) , U ), + INST(kInstVunpckhps , "vunpckhps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(15,U) , U ), + INST(kInstVunpcklpd , "vunpcklpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(14,U) , U ), + INST(kInstVunpcklps , "vunpcklps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(14,U) , U ), + INST(kInstVxorpd , "vxorpd" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_660F00(57,U) , U ), + INST(kInstVxorps , "vxorps" , G(AvxRvm_P) , F(None) , O(XmmYmm) , O(XmmYmm) , O(XmmYmmMem) , U , O_000F00(57,U) , U ), + INST(kInstVzeroall , "vzeroall" , G(AvxOp) , F(None) , U , U , U , U , O_000F00(77,U)|L, U ), + INST(kInstVzeroupper , "vzeroupper" , G(AvxOp) , F(None) , U , U , U , U , O_000F00(77,U) , U ), + INST(kInstWrfsbase , "wrfsbase" , G(X86Rm) , F(None) , O(Gqd) , U , U , U , O_F30F00(AE,2) , U ), + INST(kInstWrgsbase , "wrgsbase" , G(X86Rm) , F(None) , O(Gqd) , U , U , U , O_F30F00(AE,3) , U ), + INST(kInstXadd , "xadd" , G(X86Xadd) , F(Xchg)|F(Lock) , O(GqdwbMem) , O(Gqdwb) , U , U , O_000F00(C0,U) , U ), + INST(kInstXchg , "xchg" , G(X86Xchg) , F(Xchg)|F(Lock) , O(GqdwbMem) , O(Gqdwb) , U , U , O_000000(86,U) , U ), + INST(kInstXor , "xor" , G(X86Arith) , F(Lock) , O(GqdwbMem) , O(GqdwbMem)|O(Imm) , U , U , O_000000(30,6) , U ), + INST(kInstXorpd , "xorpd" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_660F00(57,U) , U ), + INST(kInstXorps , "xorps" , G(ExtRm) , F(None) , O(Xmm) , O(XmmMem) , U , U , O_000F00(57,U) , U ) +}; + +#undef O_00_X +#undef O_9B_X + +#undef O_F30F3A +#undef O_F30F38 +#undef O_F30F00 +#undef O_F30000 +#undef O_F20F3A +#undef O_F20F38 +#undef O_F20F00 +#undef O_F20000 +#undef O_9B0000 +#undef O_660F3A +#undef O_660F38 +#undef O_660F00 +#undef O_660000 +#undef O_000F3A +#undef O_000F38 +#undef O_000F0F +#undef O_000F01 +#undef O_000F00 +#undef O_000000 + +#undef L +#undef U + +#undef O +#undef F +#undef G + +#undef INST + +const uint32_t _reverseCond[20] = { + /* kCondO -> */ kCondO, + /* kCondNO -> */ kCondNO, + /* kCondB -> */ kCondA, + /* kCondAE -> */ kCondBE, + /* kCondE -> */ kCondE, + /* kCondNE -> */ kCondNE, + /* kCondBE -> */ kCondAE, + /* kCondA -> */ kCondB, + /* kCondS -> */ kCondS, + /* kCondNS -> */ kCondNS, + /* kCondPE -> */ kCondPE, + /* kCondPO -> */ kCondPO, + + /* kCondL -> */ kCondG, + /* kCondGE -> */ kCondLE, + + /* kCondLE -> */ kCondGE, + /* kCondG -> */ kCondL, + + /* kCondFpuUnordered -> */ kCondFpuUnordered, + /* kCondFpuNotUnordered -> */ kCondFpuNotUnordered, + + 0x12, + 0x13 +}; + +#define COND_TO_INST(_Inst_) { \ + _Inst_##o, \ + _Inst_##no, \ + _Inst_##b, \ + _Inst_##ae, \ + _Inst_##e, \ + _Inst_##ne, \ + _Inst_##be, \ + _Inst_##a, \ + _Inst_##s, \ + _Inst_##ns, \ + _Inst_##pe, \ + _Inst_##po, \ + _Inst_##l, \ + _Inst_##ge, \ + _Inst_##le, \ + _Inst_##g, \ + \ + kInstNone, \ + kInstNone, \ + kInstNone, \ + kInstNone \ +} + +const uint32_t _condToCmovcc[20] = COND_TO_INST(kInstCmov); +const uint32_t _condToJcc [20] = COND_TO_INST(kInstJ ); +const uint32_t _condToSetcc [20] = COND_TO_INST(kInstSet ); + +#undef COND_TO_INST + +// ============================================================================ +// [asmjit::x86x64::Variables] +// ============================================================================ + +#define C(_Class_) kRegClass##_Class_ +#define D(_Desc_) kVarDesc##_Desc_ + +const VarInfo _varInfo[] = { + /* 00: kVarTypeInt8 */ { kRegTypeGpbLo, 1 , C(Gp), 0 , "gpb" }, + /* 01: kVarTypeUInt8 */ { kRegTypeGpbLo, 1 , C(Gp), 0 , "gpb" }, + /* 02: kVarTypeInt16 */ { kRegTypeGpw , 2 , C(Gp), 0 , "gpw" }, + /* 03: kVarTypeUInt16 */ { kRegTypeGpw , 2 , C(Gp), 0 , "gpw" }, + /* 04: kVarTypeInt32 */ { kRegTypeGpd , 4 , C(Gp), 0 , "gpd" }, + /* 05: kVarTypeUInt32 */ { kRegTypeGpd , 4 , C(Gp), 0 , "gpd" }, + /* 06: kVarTypeInt64 */ { kRegTypeGpq , 8 , C(Gp), 0 , "gpq" }, + /* 07: kVarTypeUInt64 */ { kRegTypeGpq , 8 , C(Gp), 0 , "gpq" }, + /* 08: kVarTypeIntPtr */ { 0 , 0 , C(Gp), 0 , "" }, // Remapped. + /* 09: kVarTypeUIntPtr */ { 0 , 0 , C(Gp), 0 , "" }, // Remapped. + /* 10: kVarTypeFp32 */ { kRegTypeFp , 4 , C(Fp), D(Sp) , "fp" }, + /* 11: kVarTypeFp64 */ { kRegTypeFp , 8 , C(Fp), D(Dp) , "fp" }, + /* 12: kVarTypeFpEx */ { kRegTypeFp , 16, C(Fp), D(Sp) , "fp" }, + /* 13: kVarTypeMm */ { kRegTypeMm , 8 , C(Mm), 0 , "mm" }, + /* 14: kVarTypeXmm */ { kRegTypeXmm , 16, C(Xy), 0 , "xmm" }, + /* 15: kVarTypeXmmSs */ { kRegTypeXmm , 4 , C(Xy), D(Sp) , "xmm" }, + /* 16: kVarTypeXmmSd */ { kRegTypeXmm , 8 , C(Xy), D(Dp) , "xmm" }, + /* 17: kVarTypeXmmPs */ { kRegTypeXmm , 16, C(Xy), D(Sp) | D(Packed), "xmm" }, + /* 18: kVarTypeXmmPd */ { kRegTypeXmm , 16, C(Xy), D(Dp) | D(Packed), "xmm" }, + /* 19: kVarTypeYmm */ { kRegTypeYmm , 32, C(Xy), 0 , "ymm" }, + /* 20: kVarTypeYmmPs */ { kRegTypeYmm , 32, C(Xy), D(Sp) | D(Packed), "ymm" }, + /* 21: kVarTypeYmmPd */ { kRegTypeYmm , 32, C(Xy), D(Dp) | D(Packed), "ymm" } +}; + +#undef D +#undef C + +// ============================================================================ +// [asmjit::x86x64::Registers] +// ============================================================================ + +const GpReg noGpReg(kInvalidReg, kInvalidReg, 0); + +const GpReg al(kRegTypeGpbLo, kRegIndexAx, 1); +const GpReg cl(kRegTypeGpbLo, kRegIndexCx, 1); +const GpReg dl(kRegTypeGpbLo, kRegIndexDx, 1); +const GpReg bl(kRegTypeGpbLo, kRegIndexBx, 1); +const GpReg ah(kRegTypeGpbHi, kRegIndexAx, 1); +const GpReg ch(kRegTypeGpbHi, kRegIndexCx, 1); +const GpReg dh(kRegTypeGpbHi, kRegIndexDx, 1); +const GpReg bh(kRegTypeGpbHi, kRegIndexBx, 1); + +const GpReg ax(kRegTypeGpw, kRegIndexAx, 2); +const GpReg cx(kRegTypeGpw, kRegIndexCx, 2); +const GpReg dx(kRegTypeGpw, kRegIndexDx, 2); +const GpReg bx(kRegTypeGpw, kRegIndexBx, 2); +const GpReg sp(kRegTypeGpw, kRegIndexSp, 2); +const GpReg bp(kRegTypeGpw, kRegIndexBp, 2); +const GpReg si(kRegTypeGpw, kRegIndexSi, 2); +const GpReg di(kRegTypeGpw, kRegIndexDi, 2); + +const GpReg eax(kRegTypeGpd, kRegIndexAx, 4); +const GpReg ecx(kRegTypeGpd, kRegIndexCx, 4); +const GpReg edx(kRegTypeGpd, kRegIndexDx, 4); +const GpReg ebx(kRegTypeGpd, kRegIndexBx, 4); +const GpReg esp(kRegTypeGpd, kRegIndexSp, 4); +const GpReg ebp(kRegTypeGpd, kRegIndexBp, 4); +const GpReg esi(kRegTypeGpd, kRegIndexSi, 4); +const GpReg edi(kRegTypeGpd, kRegIndexDi, 4); + +const FpReg fp0(kRegTypeFp, kRegIndexFp0, 10); +const FpReg fp1(kRegTypeFp, kRegIndexFp1, 10); +const FpReg fp2(kRegTypeFp, kRegIndexFp2, 10); +const FpReg fp3(kRegTypeFp, kRegIndexFp3, 10); +const FpReg fp4(kRegTypeFp, kRegIndexFp4, 10); +const FpReg fp5(kRegTypeFp, kRegIndexFp5, 10); +const FpReg fp6(kRegTypeFp, kRegIndexFp6, 10); +const FpReg fp7(kRegTypeFp, kRegIndexFp7, 10); + +const MmReg mm0(kRegTypeMm, kRegIndexMm0, 8); +const MmReg mm1(kRegTypeMm, kRegIndexMm1, 8); +const MmReg mm2(kRegTypeMm, kRegIndexMm2, 8); +const MmReg mm3(kRegTypeMm, kRegIndexMm3, 8); +const MmReg mm4(kRegTypeMm, kRegIndexMm4, 8); +const MmReg mm5(kRegTypeMm, kRegIndexMm5, 8); +const MmReg mm6(kRegTypeMm, kRegIndexMm6, 8); +const MmReg mm7(kRegTypeMm, kRegIndexMm7, 8); + +const XmmReg xmm0(kRegTypeXmm, kRegIndexXmm0, 16); +const XmmReg xmm1(kRegTypeXmm, kRegIndexXmm1, 16); +const XmmReg xmm2(kRegTypeXmm, kRegIndexXmm2, 16); +const XmmReg xmm3(kRegTypeXmm, kRegIndexXmm3, 16); +const XmmReg xmm4(kRegTypeXmm, kRegIndexXmm4, 16); +const XmmReg xmm5(kRegTypeXmm, kRegIndexXmm5, 16); +const XmmReg xmm6(kRegTypeXmm, kRegIndexXmm6, 16); +const XmmReg xmm7(kRegTypeXmm, kRegIndexXmm7, 16); + +const YmmReg ymm0(kRegTypeYmm, kRegIndexYmm0, 32); +const YmmReg ymm1(kRegTypeYmm, kRegIndexYmm1, 32); +const YmmReg ymm2(kRegTypeYmm, kRegIndexYmm2, 32); +const YmmReg ymm3(kRegTypeYmm, kRegIndexYmm3, 32); +const YmmReg ymm4(kRegTypeYmm, kRegIndexYmm4, 32); +const YmmReg ymm5(kRegTypeYmm, kRegIndexYmm5, 32); +const YmmReg ymm6(kRegTypeYmm, kRegIndexYmm6, 32); +const YmmReg ymm7(kRegTypeYmm, kRegIndexYmm7, 32); + +const SegReg cs(kRegTypeSeg, kSegCs, 2); +const SegReg ss(kRegTypeSeg, kSegSs, 2); +const SegReg ds(kRegTypeSeg, kSegDs, 2); +const SegReg es(kRegTypeSeg, kSegEs, 2); +const SegReg fs(kRegTypeSeg, kSegFs, 2); +const SegReg gs(kRegTypeSeg, kSegGs, 2); + +// ============================================================================ +// [asmjit::Mem - abs[]] +// ============================================================================ + +Mem ptr_abs(Ptr pAbs, int32_t disp, uint32_t size) { + Mem m(DontInitialize); + + m._init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeAbsolute, 0, kInvalidValue); + m._vmem.index = kInvalidValue; + m._vmem.displacement = static_cast((intptr_t)(pAbs + disp)); + + return m; +} + +Mem ptr_abs(Ptr pAbs, const X86Reg& index, uint32_t shift, int32_t disp, uint32_t size) { + Mem m(DontInitialize); + uint32_t flags = shift << kMemShiftIndex; + + if (index.isXmm()) flags |= kMemVSibXmm << kMemVSibIndex; + if (index.isYmm()) flags |= kMemVSibYmm << kMemVSibIndex; + + m._init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeAbsolute, flags, kInvalidValue); + m._vmem.index = index.getRegIndex(); + m._vmem.displacement = static_cast((intptr_t)(pAbs + disp)); + + return m; +} + +Mem ptr_abs(Ptr pAbs, const X86Var& index, uint32_t shift, int32_t disp, uint32_t size) { + Mem m(DontInitialize); + uint32_t flags = shift << kMemShiftIndex; + + if (index.isXmm()) flags |= kMemVSibXmm << kMemVSibIndex; + if (index.isYmm()) flags |= kMemVSibYmm << kMemVSibIndex; + + m._init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeAbsolute, flags, kInvalidValue); + m._vmem.index = index.getId(); + m._vmem.displacement = static_cast((intptr_t)(pAbs + disp)); + + return m; +} + +} // x86x64 namespace +} // asmjit namespace + +// ============================================================================ +// [asmjit::x86] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X86) + +namespace asmjit { +namespace x86 { + +const uint8_t _varMapping[kVarTypeCount] = { + /* 00: kVarTypeInt8 */ kVarTypeInt8, + /* 01: kVarTypeUInt8 */ kVarTypeUInt8, + /* 02: kVarTypeInt16 */ kVarTypeInt16, + /* 03: kVarTypeUInt16 */ kVarTypeUInt16, + /* 04: kVarTypeInt32 */ kVarTypeInt32, + /* 05: kVarTypeUInt32 */ kVarTypeUInt32, + /* 06: kVarTypeInt64 */ kVarTypeInt32, // Remapped (Invalid in 32-bit mode). + /* 07: kVarTypeUInt64 */ kVarTypeUInt32, // Remapped (Invalid in 32-bit mode). + /* 08: kVarTypeIntPtr */ kVarTypeInt32, // Remapped. + /* 09: kVarTypeUIntPtr */ kVarTypeUInt32, // Remapped. + /* 10: kVarTypeFp32 */ kVarTypeFp32, + /* 11: kVarTypeFp64 */ kVarTypeFp64, + /* 12: kVarTypeFpEx */ kVarTypeFpEx, + /* 13: kVarTypeMm */ kVarTypeMm, + /* 14: kVarTypeXmm */ kVarTypeXmm, + /* 15: kVarTypeXmmSs */ kVarTypeXmmSs, + /* 16: kVarTypeXmmSd */ kVarTypeXmmSd, + /* 17: kVarTypeXmmPs */ kVarTypeXmmPs, + /* 18: kVarTypeXmmPd */ kVarTypeXmmPd, + /* 19: kVarTypeYmm */ kVarTypeYmm, + /* 20: kVarTypeYmmPs */ kVarTypeYmmPs, + /* 21: kVarTypeYmmPd */ kVarTypeYmmPd +}; + +const GpReg zax(kRegTypeGpd, kRegIndexAx, 4); +const GpReg zcx(kRegTypeGpd, kRegIndexCx, 4); +const GpReg zdx(kRegTypeGpd, kRegIndexDx, 4); +const GpReg zbx(kRegTypeGpd, kRegIndexBx, 4); +const GpReg zsp(kRegTypeGpd, kRegIndexSp, 4); +const GpReg zbp(kRegTypeGpd, kRegIndexBp, 4); +const GpReg zsi(kRegTypeGpd, kRegIndexSi, 4); +const GpReg zdi(kRegTypeGpd, kRegIndexDi, 4); + +} // x86 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X86 + +// ============================================================================ +// [asmjit::x64] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X64) +namespace asmjit { +namespace x64 { + +const uint8_t _varMapping[kVarTypeCount] = { + /* 00: kVarTypeInt8 */ kVarTypeInt8, + /* 01: kVarTypeUInt8 */ kVarTypeUInt8, + /* 02: kVarTypeInt16 */ kVarTypeInt16, + /* 03: kVarTypeUInt16 */ kVarTypeUInt16, + /* 04: kVarTypeInt32 */ kVarTypeInt32, + /* 05: kVarTypeUInt32 */ kVarTypeUInt32, + /* 06: kVarTypeInt64 */ kVarTypeInt64, + /* 07: kVarTypeUInt64 */ kVarTypeUInt64, + /* 08: kVarTypeIntPtr */ kVarTypeInt64, // Remapped. + /* 09: kVarTypeUIntPtr */ kVarTypeUInt64, // Remapped. + /* 10: kVarTypeFp32 */ kVarTypeFp32, + /* 11: kVarTypeFp64 */ kVarTypeFp64, + /* 12: kVarTypeFpEx */ kVarTypeFpEx, + /* 13: kVarTypeMm */ kVarTypeMm, + /* 14: kVarTypeXmm */ kVarTypeXmm, + /* 15: kVarTypeXmmSs */ kVarTypeXmmSs, + /* 16: kVarTypeXmmSd */ kVarTypeXmmSd, + /* 17: kVarTypeXmmPs */ kVarTypeXmmPs, + /* 18: kVarTypeXmmPd */ kVarTypeXmmPd, + /* 19: kVarTypeYmm */ kVarTypeYmm, + /* 20: kVarTypeYmmPs */ kVarTypeYmmPs, + /* 21: kVarTypeYmmPd */ kVarTypeYmmPd +}; + +const GpReg spl(kRegTypeGpbLo, kRegIndexSp, 1); +const GpReg bpl(kRegTypeGpbLo, kRegIndexBp, 1); +const GpReg sil(kRegTypeGpbLo, kRegIndexSi, 1); +const GpReg dil(kRegTypeGpbLo, kRegIndexDi, 1); +const GpReg r8b(kRegTypeGpbLo, kRegIndexR8, 1); +const GpReg r9b(kRegTypeGpbLo, kRegIndexR9, 1); +const GpReg r10b(kRegTypeGpbLo, kRegIndexR10, 1); +const GpReg r11b(kRegTypeGpbLo, kRegIndexR11, 1); +const GpReg r12b(kRegTypeGpbLo, kRegIndexR12, 1); +const GpReg r13b(kRegTypeGpbLo, kRegIndexR13, 1); +const GpReg r14b(kRegTypeGpbLo, kRegIndexR14, 1); +const GpReg r15b(kRegTypeGpbLo, kRegIndexR15, 1); + +const GpReg r8w(kRegTypeGpw, kRegIndexR8, 2); +const GpReg r9w(kRegTypeGpw, kRegIndexR9, 2); +const GpReg r10w(kRegTypeGpw, kRegIndexR10, 2); +const GpReg r11w(kRegTypeGpw, kRegIndexR11, 2); +const GpReg r12w(kRegTypeGpw, kRegIndexR12, 2); +const GpReg r13w(kRegTypeGpw, kRegIndexR13, 2); +const GpReg r14w(kRegTypeGpw, kRegIndexR14, 2); +const GpReg r15w(kRegTypeGpw, kRegIndexR15, 2); + +const GpReg r8d(kRegTypeGpd, kRegIndexR8, 4); +const GpReg r9d(kRegTypeGpd, kRegIndexR9, 4); +const GpReg r10d(kRegTypeGpd, kRegIndexR10, 4); +const GpReg r11d(kRegTypeGpd, kRegIndexR11, 4); +const GpReg r12d(kRegTypeGpd, kRegIndexR12, 4); +const GpReg r13d(kRegTypeGpd, kRegIndexR13, 4); +const GpReg r14d(kRegTypeGpd, kRegIndexR14, 4); +const GpReg r15d(kRegTypeGpd, kRegIndexR15, 4); + +const GpReg rax(kRegTypeGpq, kRegIndexAx, 8); +const GpReg rcx(kRegTypeGpq, kRegIndexCx, 8); +const GpReg rdx(kRegTypeGpq, kRegIndexDx, 8); +const GpReg rbx(kRegTypeGpq, kRegIndexBx, 8); +const GpReg rsp(kRegTypeGpq, kRegIndexSp, 8); +const GpReg rbp(kRegTypeGpq, kRegIndexBp, 8); +const GpReg rsi(kRegTypeGpq, kRegIndexSi, 8); +const GpReg rdi(kRegTypeGpq, kRegIndexDi, 8); + +const GpReg r8(kRegTypeGpq, kRegIndexR8, 8); +const GpReg r9(kRegTypeGpq, kRegIndexR9, 8); +const GpReg r10(kRegTypeGpq, kRegIndexR10, 8); +const GpReg r11(kRegTypeGpq, kRegIndexR11, 8); +const GpReg r12(kRegTypeGpq, kRegIndexR12, 8); +const GpReg r13(kRegTypeGpq, kRegIndexR13, 8); +const GpReg r14(kRegTypeGpq, kRegIndexR14, 8); +const GpReg r15(kRegTypeGpq, kRegIndexR15, 8); + +const GpReg zax(kRegTypeGpq, kRegIndexAx, 8); +const GpReg zcx(kRegTypeGpq, kRegIndexCx, 8); +const GpReg zdx(kRegTypeGpq, kRegIndexDx, 8); +const GpReg zbx(kRegTypeGpq, kRegIndexBx, 8); +const GpReg zsp(kRegTypeGpq, kRegIndexSp, 8); +const GpReg zbp(kRegTypeGpq, kRegIndexBp, 8); +const GpReg zsi(kRegTypeGpq, kRegIndexSi, 8); +const GpReg zdi(kRegTypeGpq, kRegIndexDi, 8); + +const XmmReg xmm8(kRegTypeXmm, kRegIndexXmm8, 16); +const XmmReg xmm9(kRegTypeXmm, kRegIndexXmm9, 16); +const XmmReg xmm10(kRegTypeXmm, kRegIndexXmm10, 16); +const XmmReg xmm11(kRegTypeXmm, kRegIndexXmm11, 16); +const XmmReg xmm12(kRegTypeXmm, kRegIndexXmm12, 16); +const XmmReg xmm13(kRegTypeXmm, kRegIndexXmm13, 16); +const XmmReg xmm14(kRegTypeXmm, kRegIndexXmm14, 16); +const XmmReg xmm15(kRegTypeXmm, kRegIndexXmm15, 16); + +const YmmReg ymm8(kRegTypeYmm, kRegIndexYmm8, 32); +const YmmReg ymm9(kRegTypeYmm, kRegIndexYmm9, 32); +const YmmReg ymm10(kRegTypeYmm, kRegIndexYmm10, 32); +const YmmReg ymm11(kRegTypeYmm, kRegIndexYmm11, 32); +const YmmReg ymm12(kRegTypeYmm, kRegIndexYmm12, 32); +const YmmReg ymm13(kRegTypeYmm, kRegIndexYmm13, 32); +const YmmReg ymm14(kRegTypeYmm, kRegIndexYmm14, 32); +const YmmReg ymm15(kRegTypeYmm, kRegIndexYmm15, 32); + +} // x64 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X64 + +#include "../base/apiend.h" + +// [Guard] +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 diff --git a/src/asmjit/x86/x86defs.h b/src/asmjit/x86/x86defs.h new file mode 100644 index 0000000..2bc1130 --- /dev/null +++ b/src/asmjit/x86/x86defs.h @@ -0,0 +1,4209 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86DEFS_H +#define _ASMJIT_X86_X86DEFS_H + +// [Dependencies - AsmJit] +#include "../base/assembler.h" +#include "../base/assert.h" +#include "../base/defs.h" +#include "../base/vectypes.h" +#include "../base/compiler.h" +#include "../base/intutil.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +//! @addtogroup asmjit_x86x64 +//! @{ + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct InstInfo; +struct VarInfo; + +struct X86Reg; +struct X86Var; + +struct GpReg; +struct GpVar; + +struct MmReg; +struct MmVar; + +struct XmmReg; +struct XmmVar; + +struct YmmReg; +struct YmmVar; + +// ============================================================================ +// [asmjit::x86x64::Typedefs] +// ============================================================================ + +typedef Vec64Data MmData; +typedef Vec128Data XmmData; +typedef Vec256Data YmmData; + +// ============================================================================ +// [asmjit::x86x64::Instruction and Condition Codes] +// ============================================================================ + +//! @internal +//! +//! @brief X86/X64 instructions' names. +ASMJIT_VAR const char _instName[]; + +//! @internal +//! +//! @brief X86/X64 instructions' information. +ASMJIT_VAR const InstInfo _instInfo[]; + +//! @internal +//! +//! @brief X86/X64 condition codes to reversed condition codes map. +ASMJIT_VAR const uint32_t _reverseCond[20]; + +//! @internal +//! +//! @brief X86X64 condition codes to "cmovcc" group map. +ASMJIT_VAR const uint32_t _condToCmovcc[20]; + +//! @internal +//! +//! @brief X86X64 condition codes to "jcc" group map. +ASMJIT_VAR const uint32_t _condToJcc[20]; + +//! @internal +//! +//! @brief X86X64 condition codes to "setcc" group map. +ASMJIT_VAR const uint32_t _condToSetcc[20]; + +// ============================================================================ +// [asmjit::x86x64::Variables] +// ============================================================================ + +ASMJIT_VAR const VarInfo _varInfo[]; + +// ============================================================================ +// [asmjit::x86x64::kRegClass] +// ============================================================================ + +//! @brief X86 variable class. +ASMJIT_ENUM(kRegClass) { + // kRegClassGp defined in base/defs.h; it's used by all implementations. + + //! @brief X86/X64 Fp register class. + kRegClassFp = 1, + //! @brief X86/X64 Mm register class. + kRegClassMm = 2, + //! @brief X86/X64 Xmm/Ymm register class. + kRegClassXy = 3, + + //! @brief Count of X86/X64 register classes. + kRegClassCount = 4 +}; + +// ============================================================================ +// [asmjit::x86x64::kRegCount] +// ============================================================================ + +ASMJIT_ENUM(kRegCount) { + //! @brief Count of Fp registers (8). + kRegCountFp = 8, + //! @brief Count of Mm registers (8). + kRegCountMm = 8, + //! @brief Count of segment registers (6). + kRegCountSeg = 6 +}; + +// ============================================================================ +// [asmjit::x86x64::kRegType] +// ============================================================================ + +//! @brief X86 register types. +ASMJIT_ENUM(kRegType) { + //! @brief Gpb-lo register (AL, BL, CL, DL, ...). + kRegTypeGpbLo = 0x01, + //! @brief Gpb-hi register (AH, BH, CH, DH only). + kRegTypeGpbHi = 0x02, + + //! @internal + //! + //! @brief Gpb-hi register patched to native index (4-7). + kRegTypePatchedGpbHi = kRegTypeGpbLo | kRegTypeGpbHi, + + //! @brief Gpw register. + kRegTypeGpw = 0x10, + //! @brief Gpd register. + kRegTypeGpd = 0x20, + //! @brief Gpq register. + kRegTypeGpq = 0x30, + + //! @brief Fp register. + kRegTypeFp = 0x50, + //! @brief Mm register. + kRegTypeMm = 0x60, + + //! @brief Xmm register. + kRegTypeXmm = 0x70, + //! @brief Ymm register. + kRegTypeYmm = 0x80, + //! @brief Zmm register. + kRegTypeZmm = 0x90, + + //! @brief Segment register. + kRegTypeSeg = 0xF0 +}; + +// ============================================================================ +// [asmjit::x86x64::kRegIndex] +// ============================================================================ + +//! @brief X86 register indices. +//! +//! These codes are real, don't miss with @c REG enum! and don't use these +//! values if you are not writing AsmJit code. +ASMJIT_ENUM(kRegIndex) { + //! @brief Index of AL/AH/AX/EAX/RAX registers. + kRegIndexAx = 0, + //! @brief Index of CL/CH/CX/ECX/RCX registers. + kRegIndexCx = 1, + //! @brief Index of DL/DH/DX/EDX/RDX registers. + kRegIndexDx = 2, + //! @brief Index of BL/BH/BX/EBX/RBX registers. + kRegIndexBx = 3, + //! @brief Index of SPL/SP/ESP/RSP registers. + kRegIndexSp = 4, + //! @brief Index of BPL/BP/EBP/RBP registers. + kRegIndexBp = 5, + //! @brief Index of SIL/SI/ESI/RSI registers. + kRegIndexSi = 6, + //! @brief Index of DIL/DI/EDI/RDI registers. + kRegIndexDi = 7, + //! @brief Index of R8B/R8W/R8D/R8 registers (64-bit only). + kRegIndexR8 = 8, + //! @brief Index of R9B/R9W/R9D/R9 registers (64-bit only). + kRegIndexR9 = 9, + //! @brief Index of R10B/R10W/R10D/R10 registers (64-bit only). + kRegIndexR10 = 10, + //! @brief Index of R11B/R11W/R11D/R11 registers (64-bit only). + kRegIndexR11 = 11, + //! @brief Index of R12B/R12W/R12D/R12 registers (64-bit only). + kRegIndexR12 = 12, + //! @brief Index of R13B/R13W/R13D/R13 registers (64-bit only). + kRegIndexR13 = 13, + //! @brief Index of R14B/R14W/R14D/R14 registers (64-bit only). + kRegIndexR14 = 14, + //! @brief Index of R15B/R15W/R15D/R15 registers (64-bit only). + kRegIndexR15 = 15, + + //! @brief Index of FP0 register. + kRegIndexFp0 = 0, + //! @brief Index of FP1 register. + kRegIndexFp1 = 1, + //! @brief Index of FP2 register. + kRegIndexFp2 = 2, + //! @brief Index of FP3 register. + kRegIndexFp3 = 3, + //! @brief Index of FP4 register. + kRegIndexFp4 = 4, + //! @brief Index of FP5 register. + kRegIndexFp5 = 5, + //! @brief Index of FP6 register. + kRegIndexFp6 = 6, + //! @brief Index of FP7 register. + kRegIndexFp7 = 7, + + //! @brief Index of MM0 register. + kRegIndexMm0 = 0, + //! @brief Index of MM1 register. + kRegIndexMm1 = 1, + //! @brief Index of MM2 register. + kRegIndexMm2 = 2, + //! @brief Index of MM3 register. + kRegIndexMm3 = 3, + //! @brief Index of MM4 register. + kRegIndexMm4 = 4, + //! @brief Index of MM5 register. + kRegIndexMm5 = 5, + //! @brief Index of MM6 register. + kRegIndexMm6 = 6, + //! @brief Index of MM7 register. + kRegIndexMm7 = 7, + + //! @brief Index of XMM0 register. + kRegIndexXmm0 = 0, + //! @brief Index of XMM1 register. + kRegIndexXmm1 = 1, + //! @brief Index of XMM2 register. + kRegIndexXmm2 = 2, + //! @brief Index of XMM3 register. + kRegIndexXmm3 = 3, + //! @brief Index of XMM4 register. + kRegIndexXmm4 = 4, + //! @brief Index of XMM5 register. + kRegIndexXmm5 = 5, + //! @brief Index of XMM6 register. + kRegIndexXmm6 = 6, + //! @brief Index of XMM7 register. + kRegIndexXmm7 = 7, + //! @brief Index of XMM8 register (64-bit only). + kRegIndexXmm8 = 8, + //! @brief Index of XMM9 register (64-bit only). + kRegIndexXmm9 = 9, + //! @brief Index of XMM10 register (64-bit only). + kRegIndexXmm10 = 10, + //! @brief Index of XMM11 register (64-bit only). + kRegIndexXmm11 = 11, + //! @brief Index of XMM12 register (64-bit only). + kRegIndexXmm12 = 12, + //! @brief Index of XMM13 register (64-bit only). + kRegIndexXmm13 = 13, + //! @brief Index of XMM14 register (64-bit only). + kRegIndexXmm14 = 14, + //! @brief Index of XMM15 register (64-bit only). + kRegIndexXmm15 = 15, + + //! @brief Index of YMM0 register. + kRegIndexYmm0 = 0, + //! @brief Index of YMM1 register. + kRegIndexYmm1 = 1, + //! @brief Index of YMM2 register. + kRegIndexYmm2 = 2, + //! @brief Index of YMM3 register. + kRegIndexYmm3 = 3, + //! @brief Index of YMM4 register. + kRegIndexYmm4 = 4, + //! @brief Index of YMM5 register. + kRegIndexYmm5 = 5, + //! @brief Index of YMM6 register. + kRegIndexYmm6 = 6, + //! @brief Index of YMM7 register. + kRegIndexYmm7 = 7, + //! @brief Index of YMM8 register (64-bit only). + kRegIndexYmm8 = 8, + //! @brief Index of YMM9 register (64-bit only). + kRegIndexYmm9 = 9, + //! @brief Index of YMM10 register (64-bit only). + kRegIndexYmm10 = 10, + //! @brief Index of YMM11 register (64-bit only). + kRegIndexYmm11 = 11, + //! @brief Index of YMM12 register (64-bit only). + kRegIndexYmm12 = 12, + //! @brief Index of YMM13 register (64-bit only). + kRegIndexYmm13 = 13, + //! @brief Index of YMM14 register (64-bit only). + kRegIndexYmm14 = 14, + //! @brief Index of YMM15 register (64-bit only). + kRegIndexYmm15 = 15 +}; + +// ============================================================================ +// [asmjit::x86x64::kSeg] +// ============================================================================ + +//! @brief X86 segment codes. +ASMJIT_ENUM(kSeg) { + //! @brief No segment. + kSegDefault = 0, + //! @brief Es segment. + kSegEs = 1, + //! @brief Cs segment. + kSegCs = 2, + //! @brief Ss segment. + kSegSs = 3, + //! @brief Ds segment. + kSegDs = 4, + //! @brief Fs segment. + kSegFs = 5, + //! @brief Gs segment. + kSegGs = 6 +}; + +// ============================================================================ +// [asmjit::x86x64::kMemVSib] +// ============================================================================ + +//! @brief X86 index register legacy and AVX2 (VSIB) support. +ASMJIT_ENUM(kMemVSib) { + //! @brief Memory operand uses Gp or no index register. + kMemVSibGpz = 0, + //! @brief Memory operand uses Xmm or no index register. + kMemVSibXmm = 1, + //! @brief Memory operand uses Ymm or no index register. + kMemVSibYmm = 2 +}; + +// ============================================================================ +// [asmjit::x86x64::kMemFlags] +// ============================================================================ + +//! @internal +//! +//! @brief X86 specific memory flags. +ASMJIT_ENUM(kMemFlags) { + kMemSegBits = 0x7, + kMemSegIndex = 0, + kMemSegMask = kMemSegBits << kMemSegIndex, + + kMemGpdBits = 0x1, + kMemGpdIndex = 3, + kMemGpdMask = kMemGpdBits << kMemGpdIndex, + + kMemVSibBits = 0x3, + kMemVSibIndex = 4, + kMemVSibMask = kMemVSibBits << kMemVSibIndex, + + kMemShiftBits = 0x3, + kMemShiftIndex = 6, + kMemShiftMask = kMemShiftBits << kMemShiftIndex +}; + +// ============================================================================ +// [asmjit::x86x64::kPrefetchHint] +// ============================================================================ + +//! @brief X86 Prefetch hints. +ASMJIT_ENUM(kPrefetchHint) { + //! @brief Prefetch using NT hint. + kPrefetchNta = 0, + //! @brief Prefetch to L0 cache. + kPrefetchT0 = 1, + //! @brief Prefetch to L1 cache. + kPrefetchT1 = 2, + //! @brief Prefetch to L2 cache. + kPrefetchT2 = 3 +}; + +// ============================================================================ +// [asmjit::x86x64::kFPSW] +// ============================================================================ + +//! @brief X86 FPU status Word. +ASMJIT_ENUM(kFPSW) { + kFPSW_Invalid = 0x0001, + kFPSW_Denormalized = 0x0002, + kFPSW_DivByZero = 0x0004, + kFPSW_Overflow = 0x0008, + kFPSW_Underflow = 0x0010, + kFPSW_Precision = 0x0020, + kFPSW_StackFault = 0x0040, + kFPSW_Interrupt = 0x0080, + kFPSW_C0 = 0x0100, + kFPSW_C1 = 0x0200, + kFPSW_C2 = 0x0400, + kFPSW_Top = 0x3800, + kFPSW_C3 = 0x4000, + kFPSW_Busy = 0x8000 +}; + +// ============================================================================ +// [asmjit::x86x64::kFPCW] +// ============================================================================ + +//! @brief X86 FPU control Word. +ASMJIT_ENUM(kFPCW) { + kFPCW_EM_Mask = 0x003F, // Bits 0-5. + kFPCW_EM_Invalid = 0x0001, + kFPCW_EM_Denormal = 0x0002, + kFPCW_EM_DivByZero = 0x0004, + kFPCW_EM_Overflow = 0x0008, + kFPCW_EM_Underflow = 0x0010, + kFPCW_EM_Inexact = 0x0020, + + kFPCW_PC_Mask = 0x0300, // Bits 8-9. + kFPCW_PC_Float = 0x0000, + kFPCW_PC_Reserved = 0x0100, + kFPCW_PC_Double = 0x0200, + kFPCW_PC_Extended = 0x0300, + + kFPCW_RC_Mask = 0x0C00, // Bits 10-11. + kFPCW_RC_Nearest = 0x0000, + kFPCW_RC_Down = 0x0400, + kFPCW_RC_Up = 0x0800, + kFPCW_RC_Truncate = 0x0C00, + + kFPCW_IC_Mask = 0x1000, // Bit 12. + kFPCW_IC_Projective = 0x0000, + kFPCW_IC_Affine = 0x1000 +}; + +// ============================================================================ +// [asmjit::x86x64::kInstCode] +// ============================================================================ + +//! @brief X86 instruction codes. +//! +//! Note that these instruction codes are AsmJit specific. Each instruction has +//! a unique ID that is used as an index to AsmJit instruction table. +ASMJIT_ENUM(kInstCode) { + kInstAdc = 1, // X86/X64 + kInstAdd, // X86/X64 + kInstAddpd, // SSE2 + kInstAddps, // SSE + kInstAddsd, // SSE2 + kInstAddss, // SSE + kInstAddsubpd, // SSE3 + kInstAddsubps, // SSE3 + kInstAesdec, // AESNI + kInstAesdeclast, // AESNI + kInstAesenc, // AESNI + kInstAesenclast, // AESNI + kInstAesimc, // AESNI + kInstAeskeygenassist, // AESNI + kInstAnd, // X86/X64 + kInstAndn, // BMI + kInstAndnpd, // SSE2 + kInstAndnps, // SSE + kInstAndpd, // SSE2 + kInstAndps, // SSE + kInstBextr, // BMI + kInstBlendpd, // SSE4.1 + kInstBlendps, // SSE4.1 + kInstBlendvpd, // SSE4.1 + kInstBlendvps, // SSE4.1 + kInstBlsi, // BMI + kInstBlsmsk, // BMI + kInstBlsr, // BMI + kInstBsf, // X86/X64 + kInstBsr, // X86/X64 + kInstBswap, // X86/X64 (i486) + kInstBt, // X86/X64 + kInstBtc, // X86/X64 + kInstBtr, // X86/X64 + kInstBts, // X86/X64 + kInstBzhi, // BMI2 + kInstCall, // X86/X64 + kInstCbw, // X86/X64 + kInstCdq, // X86/X64 + kInstCdqe, // X64 only + kInstClc, // X86/X64 + kInstCld, // X86/X64 + kInstClflush, // SSE2 + kInstCmc, // X86/X64 + kInstCmova, // X86/X64 (cmovcc) (i586) + kInstCmovae, // X86/X64 (cmovcc) (i586) + kInstCmovb, // X86/X64 (cmovcc) (i586) + kInstCmovbe, // X86/X64 (cmovcc) (i586) + kInstCmovc, // X86/X64 (cmovcc) (i586) + kInstCmove, // X86/X64 (cmovcc) (i586) + kInstCmovg, // X86/X64 (cmovcc) (i586) + kInstCmovge, // X86/X64 (cmovcc) (i586) + kInstCmovl, // X86/X64 (cmovcc) (i586) + kInstCmovle, // X86/X64 (cmovcc) (i586) + kInstCmovna, // X86/X64 (cmovcc) (i586) + kInstCmovnae, // X86/X64 (cmovcc) (i586) + kInstCmovnb, // X86/X64 (cmovcc) (i586) + kInstCmovnbe, // X86/X64 (cmovcc) (i586) + kInstCmovnc, // X86/X64 (cmovcc) (i586) + kInstCmovne, // X86/X64 (cmovcc) (i586) + kInstCmovng, // X86/X64 (cmovcc) (i586) + kInstCmovnge, // X86/X64 (cmovcc) (i586) + kInstCmovnl, // X86/X64 (cmovcc) (i586) + kInstCmovnle, // X86/X64 (cmovcc) (i586) + kInstCmovno, // X86/X64 (cmovcc) (i586) + kInstCmovnp, // X86/X64 (cmovcc) (i586) + kInstCmovns, // X86/X64 (cmovcc) (i586) + kInstCmovnz, // X86/X64 (cmovcc) (i586) + kInstCmovo, // X86/X64 (cmovcc) (i586) + kInstCmovp, // X86/X64 (cmovcc) (i586) + kInstCmovpe, // X86/X64 (cmovcc) (i586) + kInstCmovpo, // X86/X64 (cmovcc) (i586) + kInstCmovs, // X86/X64 (cmovcc) (i586) + kInstCmovz, // X86/X64 (cmovcc) (i586) + kInstCmp, // X86/X64 + kInstCmppd, // SSE2 + kInstCmpps, // SSE + kInstCmpsd, // SSE2 + kInstCmpss, // SSE + kInstCmpxchg, // X86/X64 (i486) + kInstCmpxchg16b, // X64 only + kInstCmpxchg8b, // X86/X64 (i586) + kInstComisd, // SSE2 + kInstComiss, // SSE + kInstCpuid, // X86/X64 (i486) + kInstCqo, // X64 only + kInstCrc32, // SSE4.2 + kInstCvtdq2pd, // SSE2 + kInstCvtdq2ps, // SSE2 + kInstCvtpd2dq, // SSE2 + kInstCvtpd2pi, // SSE2 + kInstCvtpd2ps, // SSE2 + kInstCvtpi2pd, // SSE2 + kInstCvtpi2ps, // SSE + kInstCvtps2dq, // SSE2 + kInstCvtps2pd, // SSE2 + kInstCvtps2pi, // SSE + kInstCvtsd2si, // SSE2 + kInstCvtsd2ss, // SSE2 + kInstCvtsi2sd, // SSE2 + kInstCvtsi2ss, // SSE + kInstCvtss2sd, // SSE2 + kInstCvtss2si, // SSE + kInstCvttpd2dq, // SSE2 + kInstCvttpd2pi, // SSE2 + kInstCvttps2dq, // SSE2 + kInstCvttps2pi, // SSE + kInstCvttsd2si, // SSE2 + kInstCvttss2si, // SSE + kInstCwd, // X86/X64 + kInstCwde, // X86/X64 + kInstDaa, // X86 only + kInstDas, // X86 only + kInstDec, // X86/X64 + kInstDiv, // X86/X64 + kInstDivpd, // SSE2 + kInstDivps, // SSE + kInstDivsd, // SSE2 + kInstDivss, // SSE + kInstDppd, // SSE4.1 + kInstDpps, // SSE4.1 + kInstEmms, // MMX + kInstEnter, // X86/X64 + kInstExtractps, // SSE4.1 + kInstF2xm1, // FPU + kInstFabs, // FPU + kInstFadd, // FPU + kInstFaddp, // FPU + kInstFbld, // FPU + kInstFbstp, // FPU + kInstFchs, // FPU + kInstFclex, // FPU + kInstFcmovb, // FPU + kInstFcmovbe, // FPU + kInstFcmove, // FPU + kInstFcmovnb, // FPU + kInstFcmovnbe, // FPU + kInstFcmovne, // FPU + kInstFcmovnu, // FPU + kInstFcmovu, // FPU + kInstFcom, // FPU + kInstFcomi, // FPU + kInstFcomip, // FPU + kInstFcomp, // FPU + kInstFcompp, // FPU + kInstFcos, // FPU + kInstFdecstp, // FPU + kInstFdiv, // FPU + kInstFdivp, // FPU + kInstFdivr, // FPU + kInstFdivrp, // FPU + kInstFemms, // 3dNow! + kInstFfree, // FPU + kInstFiadd, // FPU + kInstFicom, // FPU + kInstFicomp, // FPU + kInstFidiv, // FPU + kInstFidivr, // FPU + kInstFild, // FPU + kInstFimul, // FPU + kInstFincstp, // FPU + kInstFinit, // FPU + kInstFist, // FPU + kInstFistp, // FPU + kInstFisttp, // SSE3 + kInstFisub, // FPU + kInstFisubr, // FPU + kInstFld, // FPU + kInstFld1, // FPU + kInstFldcw, // FPU + kInstFldenv, // FPU + kInstFldl2e, // FPU + kInstFldl2t, // FPU + kInstFldlg2, // FPU + kInstFldln2, // FPU + kInstFldpi, // FPU + kInstFldz, // FPU + kInstFmul, // FPU + kInstFmulp, // FPU + kInstFnclex, // FPU + kInstFninit, // FPU + kInstFnop, // FPU + kInstFnsave, // FPU + kInstFnstcw, // FPU + kInstFnstenv, // FPU + kInstFnstsw, // FPU + kInstFpatan, // FPU + kInstFprem, // FPU + kInstFprem1, // FPU + kInstFptan, // FPU + kInstFrndint, // FPU + kInstFrstor, // FPU + kInstFsave, // FPU + kInstFscale, // FPU + kInstFsin, // FPU + kInstFsincos, // FPU + kInstFsqrt, // FPU + kInstFst, // FPU + kInstFstcw, // FPU + kInstFstenv, // FPU + kInstFstp, // FPU + kInstFstsw, // FPU + kInstFsub, // FPU + kInstFsubp, // FPU + kInstFsubr, // FPU + kInstFsubrp, // FPU + kInstFtst, // FPU + kInstFucom, // FPU + kInstFucomi, // FPU + kInstFucomip, // FPU + kInstFucomp, // FPU + kInstFucompp, // FPU + kInstFwait, // FPU + kInstFxam, // FPU + kInstFxch, // FPU + kInstFxrstor, // FPU + kInstFxsave, // FPU + kInstFxtract, // FPU + kInstFyl2x, // FPU + kInstFyl2xp1, // FPU + kInstHaddpd, // SSE3 + kInstHaddps, // SSE3 + kInstHsubpd, // SSE3 + kInstHsubps, // SSE3 + kInstIdiv, // X86/X64 + kInstImul, // X86/X64 + kInstInc, // X86/X64 + kInstInt, // X86/X64 + kInstJa, // X86/X64 (jcc) + kInstJae, // X86/X64 (jcc) + kInstJb, // X86/X64 (jcc) + kInstJbe, // X86/X64 (jcc) + kInstJc, // X86/X64 (jcc) + kInstJe, // X86/X64 (jcc) + kInstJg, // X86/X64 (jcc) + kInstJge, // X86/X64 (jcc) + kInstJl, // X86/X64 (jcc) + kInstJle, // X86/X64 (jcc) + kInstJna, // X86/X64 (jcc) + kInstJnae, // X86/X64 (jcc) + kInstJnb, // X86/X64 (jcc) + kInstJnbe, // X86/X64 (jcc) + kInstJnc, // X86/X64 (jcc) + kInstJne, // X86/X64 (jcc) + kInstJng, // X86/X64 (jcc) + kInstJnge, // X86/X64 (jcc) + kInstJnl, // X86/X64 (jcc) + kInstJnle, // X86/X64 (jcc) + kInstJno, // X86/X64 (jcc) + kInstJnp, // X86/X64 (jcc) + kInstJns, // X86/X64 (jcc) + kInstJnz, // X86/X64 (jcc) + kInstJo, // X86/X64 (jcc) + kInstJp, // X86/X64 (jcc) + kInstJpe, // X86/X64 (jcc) + kInstJpo, // X86/X64 (jcc) + kInstJs, // X86/X64 (jcc) + kInstJz, // X86/X64 (jcc) + kInstJmp, // X86/X64 (jmp) + kInstLahf, // X86/X64 (CPUID NEEDED) + kInstLddqu, // SSE3 + kInstLdmxcsr, // SSE + kInstLea, // X86/X64 + kInstLeave, // X86/X64 + kInstLfence, // SSE2 + kInstLzcnt, // LZCNT + kInstMaskmovdqu, // SSE2 + kInstMaskmovq, // MMX-Ext + kInstMaxpd, // SSE2 + kInstMaxps, // SSE + kInstMaxsd, // SSE2 + kInstMaxss, // SSE + kInstMfence, // SSE2 + kInstMinpd, // SSE2 + kInstMinps, // SSE + kInstMinsd, // SSE2 + kInstMinss, // SSE + kInstMonitor, // SSE3 + kInstMov, // X86/X64 + kInstMovapd, // SSE2 + kInstMovaps, // SSE + kInstMovbe, // SSE3 - Intel-Atom + kInstMovd, // MMX/SSE2 + kInstMovddup, // SSE3 + kInstMovdq2q, // SSE2 + kInstMovdqa, // SSE2 + kInstMovdqu, // SSE2 + kInstMovhlps, // SSE + kInstMovhpd, // SSE2 + kInstMovhps, // SSE + kInstMovlhps, // SSE + kInstMovlpd, // SSE2 + kInstMovlps, // SSE + kInstMovmskpd, // SSE2 + kInstMovmskps, // SSE2 + kInstMovntdq, // SSE2 + kInstMovntdqa, // SSE4.1 + kInstMovnti, // SSE2 + kInstMovntpd, // SSE2 + kInstMovntps, // SSE + kInstMovntq, // MMX-Ext + kInstMovptr, // X86/X64 + kInstMovq, // MMX/SSE/SSE2 + kInstMovq2dq, // SSE2 + kInstMovsd, // SSE2 + kInstMovshdup, // SSE3 + kInstMovsldup, // SSE3 + kInstMovss, // SSE + kInstMovsx, // X86/X64 + kInstMovsxd, // X86/X64 + kInstMovupd, // SSE2 + kInstMovups, // SSE + kInstMovzx, // X86/X64 + kInstMpsadbw, // SSE4.1 + kInstMul, // X86/X64 + kInstMulpd, // SSE2 + kInstMulps, // SSE + kInstMulsd, // SSE2 + kInstMulss, // SSE + kInstMulx, // BMI2 + kInstMwait, // SSE3 + kInstNeg, // X86/X64 + kInstNop, // X86/X64 + kInstNot, // X86/X64 + kInstOr, // X86/X64 + kInstOrpd, // SSE2 + kInstOrps, // SSE + kInstPabsb, // SSSE3 + kInstPabsd, // SSSE3 + kInstPabsw, // SSSE3 + kInstPackssdw, // MMX/SSE2 + kInstPacksswb, // MMX/SSE2 + kInstPackusdw, // SSE4.1 + kInstPackuswb, // MMX/SSE2 + kInstPaddb, // MMX/SSE2 + kInstPaddd, // MMX/SSE2 + kInstPaddq, // SSE2 + kInstPaddsb, // MMX/SSE2 + kInstPaddsw, // MMX/SSE2 + kInstPaddusb, // MMX/SSE2 + kInstPaddusw, // MMX/SSE2 + kInstPaddw, // MMX/SSE2 + kInstPalignr, // SSSE3 + kInstPand, // MMX/SSE2 + kInstPandn, // MMX/SSE2 + kInstPause, // SSE2. + kInstPavgb, // MMX-Ext + kInstPavgw, // MMX-Ext + kInstPblendvb, // SSE4.1 + kInstPblendw, // SSE4.1 + kInstPclmulqdq, // PCLMULQDQ + kInstPcmpeqb, // MMX/SSE2 + kInstPcmpeqd, // MMX/SSE2 + kInstPcmpeqq, // SSE4.1 + kInstPcmpeqw, // MMX/SSE2 + kInstPcmpestri, // SSE4.2 + kInstPcmpestrm, // SSE4.2 + kInstPcmpgtb, // MMX/SSE2 + kInstPcmpgtd, // MMX/SSE2 + kInstPcmpgtq, // SSE4.2 + kInstPcmpgtw, // MMX/SSE2 + kInstPcmpistri, // SSE4.2 + kInstPcmpistrm, // SSE4.2 + kInstPdep, // BMI2 + kInstPext, // BMI2 + kInstPextrb, // SSE4.1 + kInstPextrd, // SSE4.1 + kInstPextrq, // SSE4.1 + kInstPextrw, // MMX-Ext/SSE2 + kInstPf2id, // 3dNow! + kInstPf2iw, // Enhanced 3dNow! + kInstPfacc, // 3dNow! + kInstPfadd, // 3dNow! + kInstPfcmpeq, // 3dNow! + kInstPfcmpge, // 3dNow! + kInstPfcmpgt, // 3dNow! + kInstPfmax, // 3dNow! + kInstPfmin, // 3dNow! + kInstPfmul, // 3dNow! + kInstPfnacc, // Enhanced 3dNow! + kInstPfpnacc, // Enhanced 3dNow! + kInstPfrcp, // 3dNow! + kInstPfrcpit1, // 3dNow! + kInstPfrcpit2, // 3dNow! + kInstPfrsqit1, // 3dNow! + kInstPfrsqrt, // 3dNow! + kInstPfsub, // 3dNow! + kInstPfsubr, // 3dNow! + kInstPhaddd, // SSSE3 + kInstPhaddsw, // SSSE3 + kInstPhaddw, // SSSE3 + kInstPhminposuw, // SSE4.1 + kInstPhsubd, // SSSE3 + kInstPhsubsw, // SSSE3 + kInstPhsubw, // SSSE3 + kInstPi2fd, // 3dNow! + kInstPi2fw, // Enhanced 3dNow! + kInstPinsrb, // SSE4.1 + kInstPinsrd, // SSE4.1 + kInstPinsrq, // SSE4.1 + kInstPinsrw, // MMX-Ext + kInstPmaddubsw, // SSSE3 + kInstPmaddwd, // MMX/SSE2 + kInstPmaxsb, // SSE4.1 + kInstPmaxsd, // SSE4.1 + kInstPmaxsw, // MMX-Ext + kInstPmaxub, // MMX-Ext + kInstPmaxud, // SSE4.1 + kInstPmaxuw, // SSE4.1 + kInstPminsb, // SSE4.1 + kInstPminsd, // SSE4.1 + kInstPminsw, // MMX-Ext + kInstPminub, // MMX-Ext + kInstPminud, // SSE4.1 + kInstPminuw, // SSE4.1 + kInstPmovmskb, // MMX-Ext + kInstPmovsxbd, // SSE4.1 + kInstPmovsxbq, // SSE4.1 + kInstPmovsxbw, // SSE4.1 + kInstPmovsxdq, // SSE4.1 + kInstPmovsxwd, // SSE4.1 + kInstPmovsxwq, // SSE4.1 + kInstPmovzxbd, // SSE4.1 + kInstPmovzxbq, // SSE4.1 + kInstPmovzxbw, // SSE4.1 + kInstPmovzxdq, // SSE4.1 + kInstPmovzxwd, // SSE4.1 + kInstPmovzxwq, // SSE4.1 + kInstPmuldq, // SSE4.1 + kInstPmulhrsw, // SSSE3 + kInstPmulhuw, // MMX-Ext + kInstPmulhw, // MMX/SSE2 + kInstPmulld, // SSE4.1 + kInstPmullw, // MMX/SSE2 + kInstPmuludq, // SSE2 + kInstPop, // X86/X64 + kInstPopa, // X86 only + kInstPopcnt, // SSE4.2 + kInstPopf, // X86/X64 + kInstPor, // MMX/SSE2 + kInstPrefetch, // MMX-Ext/SSE + kInstPrefetch3dNow, // 3dNow! + kInstPrefetchw3dNow, // 3dNow! + kInstPsadbw, // MMX-Ext + kInstPshufb, // SSSE3 + kInstPshufd, // SSE2 + kInstPshufhw, // SSE2 + kInstPshuflw, // SSE2 + kInstPshufw, // MMX-Ext + kInstPsignb, // SSSE3 + kInstPsignd, // SSSE3 + kInstPsignw, // SSSE3 + kInstPslld, // MMX/SSE2 + kInstPslldq, // SSE2 + kInstPsllq, // MMX/SSE2 + kInstPsllw, // MMX/SSE2 + kInstPsrad, // MMX/SSE2 + kInstPsraw, // MMX/SSE2 + kInstPsrld, // MMX/SSE2 + kInstPsrldq, // SSE2 + kInstPsrlq, // MMX/SSE2 + kInstPsrlw, // MMX/SSE2 + kInstPsubb, // MMX/SSE2 + kInstPsubd, // MMX/SSE2 + kInstPsubq, // SSE2 + kInstPsubsb, // MMX/SSE2 + kInstPsubsw, // MMX/SSE2 + kInstPsubusb, // MMX/SSE2 + kInstPsubusw, // MMX/SSE2 + kInstPsubw, // MMX/SSE2 + kInstPswapd, // Enhanced 3dNow! + kInstPtest, // SSE4.1 + kInstPunpckhbw, // MMX/SSE2 + kInstPunpckhdq, // MMX/SSE2 + kInstPunpckhqdq, // SSE2 + kInstPunpckhwd, // MMX/SSE2 + kInstPunpcklbw, // MMX/SSE2 + kInstPunpckldq, // MMX/SSE2 + kInstPunpcklqdq, // SSE2 + kInstPunpcklwd, // MMX/SSE2 + kInstPush, // X86/X64 + kInstPusha, // X86 only + kInstPushf, // X86/X64 + kInstPxor, // MMX/SSE2 + kInstRcl, // X86/X64 + kInstRcpps, // SSE + kInstRcpss, // SSE + kInstRcr, // X86/X64 + kInstRdfsbase, // FSGSBASE (x64) + kInstRdgsbase, // FSGSBASE (x64) + kInstRdrand, // RDRAND + kInstRdtsc, // X86/X64 + kInstRdtscp, // X86/X64 + kInstRepLodsb, // X86/X64 (REP) + kInstRepLodsd, // X86/X64 (REP) + kInstRepLodsq, // X64 only (REP) + kInstRepLodsw, // X86/X64 (REP) + kInstRepMovsb, // X86/X64 (REP) + kInstRepMovsd, // X86/X64 (REP) + kInstRepMovsq, // X64 only (REP) + kInstRepMovsw, // X86/X64 (REP) + kInstRepStosb, // X86/X64 (REP) + kInstRepStosd, // X86/X64 (REP) + kInstRepStosq, // X64 only (REP) + kInstRepStosw, // X86/X64 (REP) + kInstRepeCmpsb, // X86/X64 (REP) + kInstRepeCmpsd, // X86/X64 (REP) + kInstRepeCmpsq, // X64 only (REP) + kInstRepeCmpsw, // X86/X64 (REP) + kInstRepeScasb, // X86/X64 (REP) + kInstRepeScasd, // X86/X64 (REP) + kInstRepeScasq, // X64 only (REP) + kInstRepeScasw, // X86/X64 (REP) + kInstRepneCmpsb, // X86/X64 (REP) + kInstRepneCmpsd, // X86/X64 (REP) + kInstRepneCmpsq, // X64 only (REP) + kInstRepneCmpsw, // X86/X64 (REP) + kInstRepneScasb, // X86/X64 (REP) + kInstRepneScasd, // X86/X64 (REP) + kInstRepneScasq, // X64 only (REP) + kInstRepneScasw, // X86/X64 (REP) + kInstRet, // X86/X64 + kInstRol, // X86/X64 + kInstRor, // X86/X64 + kInstRorx, // BMI2 + kInstRoundpd, // SSE4.1 + kInstRoundps, // SSE4.1 + kInstRoundsd, // SSE4.1 + kInstRoundss, // SSE4.1 + kInstRsqrtps, // SSE + kInstRsqrtss, // SSE + kInstSahf, // X86/X64 (CPUID NEEDED) + kInstSal, // X86/X64 + kInstSar, // X86/X64 + kInstSarx, // BMI2 + kInstSbb, // X86/X64 + kInstSeta, // X86/X64 (setcc) + kInstSetae, // X86/X64 (setcc) + kInstSetb, // X86/X64 (setcc) + kInstSetbe, // X86/X64 (setcc) + kInstSetc, // X86/X64 (setcc) + kInstSete, // X86/X64 (setcc) + kInstSetg, // X86/X64 (setcc) + kInstSetge, // X86/X64 (setcc) + kInstSetl, // X86/X64 (setcc) + kInstSetle, // X86/X64 (setcc) + kInstSetna, // X86/X64 (setcc) + kInstSetnae, // X86/X64 (setcc) + kInstSetnb, // X86/X64 (setcc) + kInstSetnbe, // X86/X64 (setcc) + kInstSetnc, // X86/X64 (setcc) + kInstSetne, // X86/X64 (setcc) + kInstSetng, // X86/X64 (setcc) + kInstSetnge, // X86/X64 (setcc) + kInstSetnl, // X86/X64 (setcc) + kInstSetnle, // X86/X64 (setcc) + kInstSetno, // X86/X64 (setcc) + kInstSetnp, // X86/X64 (setcc) + kInstSetns, // X86/X64 (setcc) + kInstSetnz, // X86/X64 (setcc) + kInstSeto, // X86/X64 (setcc) + kInstSetp, // X86/X64 (setcc) + kInstSetpe, // X86/X64 (setcc) + kInstSetpo, // X86/X64 (setcc) + kInstSets, // X86/X64 (setcc) + kInstSetz, // X86/X64 (setcc) + kInstSfence, // MMX-Ext/SSE + kInstShl, // X86/X64 + kInstShld, // X86/X64 + kInstShlx, // BMI2 + kInstShr, // X86/X64 + kInstShrd, // X86/X64 + kInstShrx, // BMI2 + kInstShufpd, // SSE2 + kInstShufps, // SSE + kInstSqrtpd, // SSE2 + kInstSqrtps, // SSE + kInstSqrtsd, // SSE2 + kInstSqrtss, // SSE + kInstStc, // X86/X64 + kInstStd, // X86/X64 + kInstStmxcsr, // SSE + kInstSub, // X86/X64 + kInstSubpd, // SSE2 + kInstSubps, // SSE + kInstSubsd, // SSE2 + kInstSubss, // SSE + kInstTest, // X86/X64 + kInstTzcnt, // TZCNT + kInstUcomisd, // SSE2 + kInstUcomiss, // SSE + kInstUd2, // X86/X64 + kInstUnpckhpd, // SSE2 + kInstUnpckhps, // SSE + kInstUnpcklpd, // SSE2 + kInstUnpcklps, // SSE + kInstVaddpd, // AVX + kInstVaddps, // AVX + kInstVaddsd, // AVX + kInstVaddss, // AVX + kInstVaddsubpd, // AVX + kInstVaddsubps, // AVX + kInstVaesdec, // AVX+AESNI + kInstVaesdeclast, // AVX+AESNI + kInstVaesenc, // AVX+AESNI + kInstVaesenclast, // AVX+AESNI + kInstVaesimc, // AVX+AESNI + kInstVaeskeygenassist,// AVX+AESNI + kInstVandnpd, // AVX + kInstVandnps, // AVX + kInstVandpd, // AVX + kInstVandps, // AVX + kInstVblendpd, // AVX + kInstVblendps, // AVX + kInstVblendvpd, // AVX + kInstVblendvps, // AVX + kInstVbroadcastf128, // AVX + kInstVbroadcasti128, // AVX2 + kInstVbroadcastsd, // AVX/AVX2 + kInstVbroadcastss, // AVX/AVX2 + kInstVcmppd, // AVX + kInstVcmpps, // AVX + kInstVcmpsd, // AVX + kInstVcmpss, // AVX + kInstVcomisd, // AVX + kInstVcomiss, // AVX + kInstVcvtdq2pd, // AVX + kInstVcvtdq2ps, // AVX + kInstVcvtpd2dq, // AVX + kInstVcvtpd2ps, // AVX + kInstVcvtph2ps, // F16C + kInstVcvtps2dq, // AVX + kInstVcvtps2pd, // AVX + kInstVcvtps2ph, // F16C + kInstVcvtsd2si, // AVX + kInstVcvtsd2ss, // AVX + kInstVcvtsi2sd, // AVX + kInstVcvtsi2ss, // AVX + kInstVcvtss2sd, // AVX + kInstVcvtss2si, // AVX + kInstVcvttpd2dq, // AVX + kInstVcvttps2dq, // AVX + kInstVcvttsd2si, // AVX + kInstVcvttss2si, // AVX + kInstVdivpd, // AVX + kInstVdivps, // AVX + kInstVdivsd, // AVX + kInstVdivss, // AVX + kInstVdppd, // AVX + kInstVdpps, // AVX + kInstVextractf128, // AVX + kInstVextracti128, // AVX2 + kInstVextractps, // AVX + kInstVfmadd132pd, // FMA3 + kInstVfmadd132ps, // FMA3 + kInstVfmadd132sd, // FMA3 + kInstVfmadd132ss, // FMA3 + kInstVfmadd213pd, // FMA3 + kInstVfmadd213ps, // FMA3 + kInstVfmadd213sd, // FMA3 + kInstVfmadd213ss, // FMA3 + kInstVfmadd231pd, // FMA3 + kInstVfmadd231ps, // FMA3 + kInstVfmadd231sd, // FMA3 + kInstVfmadd231ss, // FMA3 + kInstVfmaddsub132pd, // FMA3 + kInstVfmaddsub132ps, // FMA3 + kInstVfmaddsub213pd, // FMA3 + kInstVfmaddsub213ps, // FMA3 + kInstVfmaddsub231pd, // FMA3 + kInstVfmaddsub231ps, // FMA3 + kInstVfmsub132pd, // FMA3 + kInstVfmsub132ps, // FMA3 + kInstVfmsub132sd, // FMA3 + kInstVfmsub132ss, // FMA3 + kInstVfmsub213pd, // FMA3 + kInstVfmsub213ps, // FMA3 + kInstVfmsub213sd, // FMA3 + kInstVfmsub213ss, // FMA3 + kInstVfmsub231pd, // FMA3 + kInstVfmsub231ps, // FMA3 + kInstVfmsub231sd, // FMA3 + kInstVfmsub231ss, // FMA3 + kInstVfmsubadd132pd, // FMA3 + kInstVfmsubadd132ps, // FMA3 + kInstVfmsubadd213pd, // FMA3 + kInstVfmsubadd213ps, // FMA3 + kInstVfmsubadd231pd, // FMA3 + kInstVfmsubadd231ps, // FMA3 + kInstVfnmadd132pd, // FMA3 + kInstVfnmadd132ps, // FMA3 + kInstVfnmadd132sd, // FMA3 + kInstVfnmadd132ss, // FMA3 + kInstVfnmadd213pd, // FMA3 + kInstVfnmadd213ps, // FMA3 + kInstVfnmadd213sd, // FMA3 + kInstVfnmadd213ss, // FMA3 + kInstVfnmadd231pd, // FMA3 + kInstVfnmadd231ps, // FMA3 + kInstVfnmadd231sd, // FMA3 + kInstVfnmadd231ss, // FMA3 + kInstVfnmsub132pd, // FMA3 + kInstVfnmsub132ps, // FMA3 + kInstVfnmsub132sd, // FMA3 + kInstVfnmsub132ss, // FMA3 + kInstVfnmsub213pd, // FMA3 + kInstVfnmsub213ps, // FMA3 + kInstVfnmsub213sd, // FMA3 + kInstVfnmsub213ss, // FMA3 + kInstVfnmsub231pd, // FMA3 + kInstVfnmsub231ps, // FMA3 + kInstVfnmsub231sd, // FMA3 + kInstVfnmsub231ss, // FMA3 + kInstVgatherdpd, // AVX2 + kInstVgatherdps, // AVX2 + kInstVgatherqpd, // AVX2 + kInstVgatherqps, // AVX2 + kInstVhaddpd, // AVX + kInstVhaddps, // AVX + kInstVhsubpd, // AVX + kInstVhsubps, // AVX + kInstVinsertf128, // AVX + kInstVinserti128, // AVX2 + kInstVinsertps, // AVX + kInstVlddqu, // AVX + kInstVldmxcsr, // AVX + kInstVmaskmovdqu, // AVX + kInstVmaskmovpd, // AVX + kInstVmaskmovps, // AVX + kInstVmaxpd, // AVX + kInstVmaxps, // AVX + kInstVmaxsd, // AVX + kInstVmaxss, // AVX + kInstVminpd, // AVX + kInstVminps, // AVX + kInstVminsd, // AVX + kInstVminss, // AVX + kInstVmovapd, // AVX + kInstVmovaps, // AVX + kInstVmovd, // AVX + kInstVmovddup, // AVX + kInstVmovdqa, // AVX + kInstVmovdqu, // AVX + kInstVmovhlps, // AVX + kInstVmovhpd, // AVX + kInstVmovhps, // AVX + kInstVmovlhps, // AVX + kInstVmovlpd, // AVX + kInstVmovlps, // AVX + kInstVmovmskpd, // AVX + kInstVmovmskps, // AVX + kInstVmovntdq, // AVX + kInstVmovntdqa, // AVX/AVX2 + kInstVmovntpd, // AVX + kInstVmovntps, // AVX + kInstVmovq, // AVX + kInstVmovsd, // AVX + kInstVmovshdup, // AVX + kInstVmovsldup, // AVX + kInstVmovss, // AVX + kInstVmovupd, // AVX + kInstVmovups, // AVX + kInstVmpsadbw, // AVX/AVX2 + kInstVmulpd, // AVX + kInstVmulps, // AVX + kInstVmulsd, // AVX + kInstVmulss, // AVX + kInstVorpd, // AVX + kInstVorps, // AVX + kInstVpabsb, // AVX2 + kInstVpabsd, // AVX2 + kInstVpabsw, // AVX2 + kInstVpackssdw, // AVX2 + kInstVpacksswb, // AVX2 + kInstVpackusdw, // AVX2 + kInstVpackuswb, // AVX2 + kInstVpaddb, // AVX2 + kInstVpaddd, // AVX2 + kInstVpaddq, // AVX2 + kInstVpaddsb, // AVX2 + kInstVpaddsw, // AVX2 + kInstVpaddusb, // AVX2 + kInstVpaddusw, // AVX2 + kInstVpaddw, // AVX2 + kInstVpalignr, // AVX2 + kInstVpand, // AVX2 + kInstVpandn, // AVX2 + kInstVpavgb, // AVX2 + kInstVpavgw, // AVX2 + kInstVpblendd, // AVX2 + kInstVpblendvb, // AVX2 + kInstVpblendw, // AVX2 + kInstVpbroadcastb, // AVX2 + kInstVpbroadcastd, // AVX2 + kInstVpbroadcastq, // AVX2 + kInstVpbroadcastw, // AVX2 + kInstVpclmulqdq, // AVX+PCLMULQDQ + kInstVpcmpeqb, // AVX2 + kInstVpcmpeqd, // AVX2 + kInstVpcmpeqq, // AVX2 + kInstVpcmpeqw, // AVX2 + kInstVpcmpestri, // AVX + kInstVpcmpestrm, // AVX + kInstVpcmpgtb, // AVX2 + kInstVpcmpgtd, // AVX2 + kInstVpcmpgtq, // AVX2 + kInstVpcmpgtw, // AVX2 + kInstVpcmpistri, // AVX + kInstVpcmpistrm, // AVX + kInstVperm2f128, // AVX + kInstVperm2i128, // AVX2 + kInstVpermd, // AVX2 + kInstVpermilpd, // AVX + kInstVpermilps, // AVX + kInstVpermpd, // AVX2 + kInstVpermps, // AVX2 + kInstVpermq, // AVX2 + kInstVpextrb, // AVX + kInstVpextrd, // AVX + kInstVpextrq, // AVX (x64 only) + kInstVpextrw, // AVX + kInstVpgatherdd, // AVX2 + kInstVpgatherdq, // AVX2 + kInstVpgatherqd, // AVX2 + kInstVpgatherqq, // AVX2 + kInstVphaddd, // AVX2 + kInstVphaddsw, // AVX2 + kInstVphaddw, // AVX2 + kInstVphminposuw, // AVX + kInstVphsubd, // AVX2 + kInstVphsubsw, // AVX2 + kInstVphsubw, // AVX2 + kInstVpinsrb, // AVX + kInstVpinsrd, // AVX + kInstVpinsrq, // AVX (x64 only) + kInstVpinsrw, // AVX + kInstVpmaddubsw, // AVX/AVX2 + kInstVpmaddwd, // AVX/AVX2 + kInstVpmaskmovd, // AVX2 + kInstVpmaskmovq, // AVX2 + kInstVpmaxsb, // AVX/AVX2 + kInstVpmaxsd, // AVX/AVX2 + kInstVpmaxsw, // AVX/AVX2 + kInstVpmaxub, // AVX/AVX2 + kInstVpmaxud, // AVX/AVX2 + kInstVpmaxuw, // AVX/AVX2 + kInstVpminsb, // AVX/AVX2 + kInstVpminsd, // AVX/AVX2 + kInstVpminsw, // AVX/AVX2 + kInstVpminub, // AVX/AVX2 + kInstVpminud, // AVX/AVX2 + kInstVpminuw, // AVX/AVX2 + kInstVpmovmskb, // AVX/AVX2 + kInstVpmovsxbd, // AVX/AVX2 + kInstVpmovsxbq, // AVX/AVX2 + kInstVpmovsxbw, // AVX/AVX2 + kInstVpmovsxdq, // AVX/AVX2 + kInstVpmovsxwd, // AVX/AVX2 + kInstVpmovsxwq, // AVX/AVX2 + kInstVpmovzxbd, // AVX/AVX2 + kInstVpmovzxbq, // AVX/AVX2 + kInstVpmovzxbw, // AVX/AVX2 + kInstVpmovzxdq, // AVX/AVX2 + kInstVpmovzxwd, // AVX/AVX2 + kInstVpmovzxwq, // AVX/AVX2 + kInstVpmuldq, // AVX/AVX2 + kInstVpmulhrsw, // AVX/AVX2 + kInstVpmulhuw, // AVX/AVX2 + kInstVpmulhw, // AVX/AVX2 + kInstVpmulld, // AVX/AVX2 + kInstVpmullw, // AVX/AVX2 + kInstVpmuludq, // AVX/AVX2 + kInstVpor, // AVX/AVX2 + kInstVpsadbw, // AVX/AVX2 + kInstVpshufb, // AVX/AVX2 + kInstVpshufd, // AVX/AVX2 + kInstVpshufhw, // AVX/AVX2 + kInstVpshuflw, // AVX/AVX2 + kInstVpsignb, // AVX/AVX2 + kInstVpsignd, // AVX/AVX2 + kInstVpsignw, // AVX/AVX2 + kInstVpslld, // AVX/AVX2 + kInstVpslldq, // AVX/AVX2 + kInstVpsllq, // AVX/AVX2 + kInstVpsllvd, // AVX2 + kInstVpsllvq, // AVX2 + kInstVpsllw, // AVX/AVX2 + kInstVpsrad, // AVX/AVX2 + kInstVpsravd, // AVX2 + kInstVpsraw, // AVX/AVX2 + kInstVpsrld, // AVX/AVX2 + kInstVpsrldq, // AVX/AVX2 + kInstVpsrlq, // AVX/AVX2 + kInstVpsrlvd, // AVX2 + kInstVpsrlvq, // AVX2 + kInstVpsrlw, // AVX/AVX2 + kInstVpsubb, // AVX/AVX2 + kInstVpsubd, // AVX/AVX2 + kInstVpsubq, // AVX/AVX2 + kInstVpsubsb, // AVX/AVX2 + kInstVpsubsw, // AVX/AVX2 + kInstVpsubusb, // AVX/AVX2 + kInstVpsubusw, // AVX/AVX2 + kInstVpsubw, // AVX/AVX2 + kInstVptest, // AVX + kInstVpunpckhbw, // AVX/AVX2 + kInstVpunpckhdq, // AVX/AVX2 + kInstVpunpckhqdq, // AVX/AVX2 + kInstVpunpckhwd, // AVX/AVX2 + kInstVpunpcklbw, // AVX/AVX2 + kInstVpunpckldq, // AVX/AVX2 + kInstVpunpcklqdq, // AVX/AVX2 + kInstVpunpcklwd, // AVX/AVX2 + kInstVpxor, // AVX/AVX2 + kInstVrcpps, // AVX + kInstVrcpss, // AVX + kInstVroundpd, // AVX + kInstVroundps, // AVX + kInstVroundsd, // AVX + kInstVroundss, // AVX + kInstVrsqrtps, // AVX + kInstVrsqrtss, // AVX + kInstVshufpd, // AVX + kInstVshufps, // AVX + kInstVsqrtpd, // AVX + kInstVsqrtps, // AVX + kInstVsqrtsd, // AVX + kInstVsqrtss, // AVX + kInstVstmxcsr, // AVX + kInstVsubpd, // AVX + kInstVsubps, // AVX + kInstVsubsd, // AVX + kInstVsubss, // AVX + kInstVtestpd, // AVX + kInstVtestps, // AVX + kInstVucomisd, // AVX + kInstVucomiss, // AVX + kInstVunpckhpd, // AVX + kInstVunpckhps, // AVX + kInstVunpcklpd, // AVX + kInstVunpcklps, // AVX + kInstVxorpd, // AVX + kInstVxorps, // AVX + kInstVzeroall, // AVX + kInstVzeroupper, // AVX + kInstWrfsbase, // FSGSBASE (x64) + kInstWrgsbase, // FSGSBASE (x64) + kInstXadd, // X86/X64 (i486) + kInstXchg, // X86/X64 (i386) + kInstXor, // X86/X64 + kInstXorpd, // SSE2 + kInstXorps, // SSE + + _kInstCount, + + _kInstCmovcc = kInstCmova, + _kInstJcc = kInstJa, + _kInstSetcc = kInstSeta, + + _kInstJbegin = kInstJa, + _kInstJend = kInstJmp +}; + +// ============================================================================ +// [asmjit::x86x64::kInstOptions] +// ============================================================================ + +//! @brief Instruction emit options, mainly for internal purposes. +ASMJIT_ENUM(kInstOptions) { + //! @brief Emit instruction with LOCK prefix. + //! + //! If this option is used and instruction doesn't support LOCK prefix an + //! invalid instruction error is generated. + kInstOptionLock = 0x10, + + //! @brief Force REX prefix to be emitted. + //! + //! This option should be used carefully, because there are unencodable + //! combinations. If you want to access ah, bh, ch or dh registers the REX + //! prefix can't be emitted, otherwise illegal instruction error will be + //! returned. + kInstOptionRex = 0x40, + + //! @brief Force three-byte VEX prefix to be emitted (instead of more compact + //! two-byte VEX prefix). + //! + //! Ignored if the instruction is not AVX instruction or extension to the + //! instruction set that is encoded by using VEX prefix. + kInstOptionVex3 = 0x80 +}; + +// ============================================================================ +// [asmjit::x86x64::kInstGroup] +// ============================================================================ + +//! @brief X86 instruction groups. +//! +//! This should be only used by assembler, because it's @c asmjit::Assembler +//! specific grouping. Each group represents one 'case' in the Assembler's +//! main emit method. +ASMJIT_ENUM(kInstGroup) { + //! @brief Never used. + kInstGroupNone, + + kInstGroupX86Op, + kInstGroupX86Rm, + kInstGroupX86Rm_B, + kInstGroupX86RmReg, + kInstGroupX86RegRm, + kInstGroupX86M, + //! @brief Adc/Add/And/Cmp/Or/Sbb/Sub/Xor. + kInstGroupX86Arith, + //! @brief Bswap. + kInstGroupX86BSwap, + //! @brief Bt/Btc/Btr/Bts. + kInstGroupX86BTest, + //! @brief Call. + kInstGroupX86Call, + //! @brief Enter. + kInstGroupX86Enter, + //! @brief Imul. + kInstGroupX86Imul, + //! @brief Inc/Dec. + kInstGroupX86IncDec, + //! @brief Int. + kInstGroupX86Int, + //! @brief Jcc. + kInstGroupX86Jcc, + //! @brief Jmp. + kInstGroupX86Jmp, + //! @brief Lea. + kInstGroupX86Lea, + //! @brief Mov. + kInstGroupX86Mov, + //! @brief Movsx/Movsxd/Movzx. + kInstGroupX86MovSxZx, + //! @brief Mov having absolute memory operand (x86/x64). + kInstGroupX86MovPtr, + //! @brief Push. + kInstGroupX86Push, + //! @brief Pop. + kInstGroupX86Pop, + //! @brief Rep/Repe/Repne LodsX/MovsX/StosX/CmpsX/ScasX. + kInstGroupX86Rep, + //! @brief Ret. + kInstGroupX86Ret, + //! @brief Rcl/Rcr/Rol/Ror/Sal/Sar/Shl/Shr. + kInstGroupX86Rot, + //! @brief Setcc. + kInstGroupX86Set, + //! @brief Shld/Rhrd. + kInstGroupX86Shlrd, + //! @brief Test. + kInstGroupX86Test, + //! @brief Xadd. + kInstGroupX86Xadd, + //! @brief Xchg. + kInstGroupX86Xchg, + + //! @brief Fincstp/Finit/FldX/Fnclex/Fninit/Fnop/Fpatan/Fprem/Fprem1/Fptan/Frndint/Fscale/Fsin/Fsincos/Fsqrt/Ftst/Fucompp/Fxam/Fxtract/Fyl2x/Fyl2xp1. + kInstGroupFpuOp, + //! @brief Fadd/Fdiv/Fdivr/Fmul/Fsub/Fsubr. + kInstGroupFpuArith, + //! @brief Fcom/Fcomp. + kInstGroupFpuCom, + //! @brief Fld/Fst/Fstp. + kInstGroupFpuFldFst, + //! @brief Fiadd/Ficom/Ficomp/Fidiv/Fidivr/Fild/Fimul/Fist/Fistp/Fisttp/Fisub/Fisubr. + kInstGroupFpuM, + //! @brief Faddp/Fcmov/Fcomi/Fcomip/Fdivp/Fdivrp/Ffree/Fmulp/Fsubp/Fsubrp/Fucom/Fucomi/Fucomip/Fucomp/Fxch. + kInstGroupFpuR, + //! @brief Fnstsw/Fstsw. + kInstGroupFpuStsw, + + //! @brief Mm/Xmm instruction. + kInstGroupExtRm, + //! @brief Mm/Xmm instruction (propagates 66H if the instruction uses XMM register). + kInstGroupExtRm_P, + //! @brief Mm/Xmm instruction (propagates REX.W if GPQ is used). + kInstGroupExtRm_Q, + //! @brief Mm/Xmm instruction having Rm/Ri encodings. + kInstGroupExtRmRi, + //! @brief Mm/Xmm instruction having Rm/Ri encodings (propagates 66H if the instruction uses XMM register). + kInstGroupExtRmRi_P, + //! @brief Mm/Xmm instruction having Rmi encoding. + kInstGroupExtRmi, + //! @brief Mm/Xmm instruction having Rmi encoding (propagates 66H if the instruction uses XMM register). + kInstGroupExtRmi_P, + //! @brief Crc32. + kInstGroupExtCrc, + //! @brief Pextrb/Pextrw/Pextrd/Pextrq/Extractps. + kInstGroupExtExtract, + //! @brief Lfence/Mfence/Sfence. + kInstGroupExtFence, + //! @brief Mov Mm/Xmm. + //! + //! 0x66 prefix must be set manually in opcodes. + //! + //! - Primary opcode is used for instructions in (X)Mm <- (X)Mm/Mem format, + //! - Secondary opcode is used for instructions in (X)Mm/Mem <- (X)Mm format. + kInstGroupExtMov, + //! @brief Mov Mm/Xmm. + kInstGroupExtMovNoRexW, + //! @brief Movbe. + kInstGroupExtMovBe, + //! @brief Movd. + kInstGroupExtMovD, + //! @brief Movq. + kInstGroupExtMovQ, + //! @brief Prefetch. + kInstGroupExtPrefetch, + + //! @brief 3dNow instruction. + kInstGroup3dNow, + + //! @brief AVX instruction without operands. + kInstGroupAvxOp, + //! @brief AVX instruction encoded as 'M'. + kInstGroupAvxM, + //! @brief AVX instruction encoded as 'MR'. + kInstGroupAvxMr, + //! @brief AVX instruction encoded as 'MR' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxMr_P, + //! @brief AVX instruction encoded as 'MRI'. + kInstGroupAvxMri, + //! @brief AVX instruction encoded as 'MRI' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxMri_P, + //! @brief AVX instruction encoded as 'RM'. + kInstGroupAvxRm, + //! @brief AVX instruction encoded as 'RM' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxRm_P, + //! @brief AVX instruction encoded as 'RMI'. + kInstGroupAvxRmi, + //! @brief AVX instruction encoded as 'RMI' (propagates AVX.L if the instruction uses YMM register).. + kInstGroupAvxRmi_P, + //! @brief AVX instruction encoded as 'RVM'. + kInstGroupAvxRvm, + //! @brief AVX instruction encoded as 'RVM' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxRvm_P, + //! @brief AVX instruction encoded as 'RVMR'. + kInstGroupAvxRvmr, + //! @brief AVX instruction encoded as 'RVMR' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxRvmr_P, + //! @brief AVX instruction encoded as 'RVMI'. + kInstGroupAvxRvmi, + //! @brief AVX instruction encoded as 'RVMI' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxRvmi_P, + //! @brief AVX instruction encoded as 'RMV'. + kInstGroupAvxRmv, + //! @brief AVX instruction encoded as 'RMVI'. + kInstGroupAvxRmvi, + //! @brief AVX instruction encoded as 'RM' or 'MR'. + kInstGroupAvxRmMr, + //! @brief AVX instruction encoded as 'RM' or 'MR' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxRmMr_P, + //! @brief AVX instruction encoded as 'RVM' or 'RMI'. + kInstGroupAvxRvmRmi, + //! @brief AVX instruction encoded as 'RVM' or 'RMI' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxRvmRmi_P, + //! @brief AVX instruction encoded as 'RVM' or 'MR'. + kInstGroupAvxRvmMr, + //! @brief AVX instruction encoded as 'RVM' or 'MVR'. + kInstGroupAvxRvmMvr, + //! @brief AVX instruction encoded as 'RVM' or 'MVR' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxRvmMvr_P, + //! @brief AVX instruction encoded as 'RVM' or 'VMI'. + kInstGroupAvxRvmVmi, + //! @brief AVX instruction encoded as 'RVM' or 'VMI' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxRvmVmi_P, + //! @brief AVX instruction encoded as 'VM'. + kInstGroupAvxVm, + //! @brief AVX instruction encoded as 'VMI'. + kInstGroupAvxVmi, + //! @brief AVX instruction encoded as 'VMI' (propagates AVX.L if the instruction uses YMM register). + kInstGroupAvxVmi_P, + //! @brief Vmovss/Vmovsd. + kInstGroupAvxMovSsSd, + //! @brief AVX2 gather family instructions (VSIB). + kInstGroupAvxGather, + //! @brief AVX2 gather family instructions (VSIB), differs only in mem operand. + kInstGroupAvxGatherEx +}; + +// ============================================================================ +// [asmjit::x86x64::kInstOpCode] +// ============================================================================ + +//! @brief Instruction OpCode encoding used by asmjit 'InstInfo' table. +//! +//! The schema was inspired by AVX/AVX2 features. +ASMJIT_ENUM(kInstOpCode) { + // 'MMMMM' field in AVX instruction. + // 'OpCode' leading bytes in legacy encoding. + kInstOpCode_MM_Shift = 16, + kInstOpCode_MM_Mask = 0x0FU << kInstOpCode_MM_Shift, + kInstOpCode_MM_00 = 0x00U << kInstOpCode_MM_Shift, + kInstOpCode_MM_0F = 0x01U << kInstOpCode_MM_Shift, + kInstOpCode_MM_0F38 = 0x02U << kInstOpCode_MM_Shift, + kInstOpCode_MM_0F3A = 0x03U << kInstOpCode_MM_Shift, + kInstOpCode_MM_0F01 = 0x0FU << kInstOpCode_MM_Shift, // Ext/Not part of AVX. + + // 'PP' field in AVX instruction. + // 'Mandatory Prefix' in legacy encoding. + kInstOpCode_PP_Shift = 21, + kInstOpCode_PP_Mask = 0x07U << kInstOpCode_PP_Shift, + kInstOpCode_PP_00 = 0x00U << kInstOpCode_PP_Shift, + kInstOpCode_PP_66 = 0x01U << kInstOpCode_PP_Shift, + kInstOpCode_PP_F3 = 0x02U << kInstOpCode_PP_Shift, + kInstOpCode_PP_F2 = 0x03U << kInstOpCode_PP_Shift, + kInstOpCode_PP_9B = 0x07U << kInstOpCode_PP_Shift, //Ext/Not part of AVX. + + // 'L' field in AVX instruction. + kInstOpCode_L_Shift = 24, + kInstOpCode_L_Mask = 0x01U << kInstOpCode_L_Shift, + kInstOpCode_L_False = 0x00U << kInstOpCode_L_Shift, + kInstOpCode_L_True = 0x01U << kInstOpCode_L_Shift, + + // 'O' field. + kInstOpCode_O_Shift = 29, + kInstOpCode_O_Mask = 0x07U << kInstOpCode_O_Shift +}; + +// ============================================================================ +// [asmjit::x86x64::kInstFlags] +// ============================================================================ + +//! @brief X86 instruction type flags. +ASMJIT_ENUM(kInstFlags) { + //! @brief No flags. + kInstFlagNone = 0x0000, + + //! @brief Instruction is a control-flow instruction. + //! + //! Control flow instructions are jmp, jcc, call and ret. + kInstFlagFlow = 0x0001, + + //! @brief Instruction is a compare/test like instruction. + kInstFlagTest = 0x0002, + + //! @brief Instruction is a move like instruction. + //! + //! Move instructions typically overwrite the first operand by the second + //! operand. The first operand can be the exact copy of the second operand + //! or it can be any kind of conversion. Mov instructions are typically + //! 'mov', 'movd', 'movq', 'movdq?', 'cmov??' like instructions, but we also + //! consider 'lea' (Load Effective Address), multimedia instructions like + //! 'cvtdq2pd', shuffle instructions like 'pshufb' and SSE/SSE2 mathematic + //! instructions like 'rcp??', 'round??' and 'rsqrt??'. + kInstFlagMove = 0x0004, + + //! @brief Instruction is an exchange like instruction. + //! + //! Exchange instruction typically overwrite first and second operand, we + //! count 'xchg' and 'xadd' instructions right now. + kInstFlagXchg = 0x0008, + + //! @brief Instruction accesses Fp register(s). + kInstFlagFp = 0x0010, + + //! @brief Instruction can be prefixed by using the LOCK prefix. + kInstFlagLock = 0x0020, + + //! @brief Instruction is special, this is for @c Compiler. + kInstFlagSpecial = 0x0040, + + //! @brief Instruction always performs memory access. + //! + //! This flag is always combined with @c kInstFlagSpecial and signalizes + //! that there is an implicit address which is accessed (usually EDI/RDI or + //! ESI/EDI). + kInstFlagSpecialMem = 0x0080, + + //! @brief Instruction memory operand can refer to 16-bit address (used by FPU). + kInstFlagMem2 = 0x0100, + //! @brief Instruction memory operand can refer to 32-bit address (used by FPU). + kInstFlagMem4 = 0x0200, + //! @brief Instruction memory operand can refer to 64-bit address (used by FPU). + kInstFlagMem8 = 0x0400, + //! @brief Instruction memory operand can refer to 80-bit address (used by FPU). + kInstFlagMem10 = 0x0800, + + //! @brief Combination of @c kInstFlagMem2 and @c kInstFlagMem4. + kInstFlagMem2_4 = kInstFlagMem2 | kInstFlagMem4, + //! @brief Combination of @c kInstFlagMem2 and @c kInstFlagMem4 and @c kInstFlagMem8. + kInstFlagMem2_4_8 = kInstFlagMem2_4 | kInstFlagMem8, + //! @brief Combination of @c kInstFlagMem4 and @c kInstFlagMem8. + kInstFlagMem4_8 = kInstFlagMem4 | kInstFlagMem8, + //! @brief Combination of @c kInstFlagMem4 and @c kInstFlagMem8 and @c kInstFlagMem10. + kInstFlagMem4_8_10 = kInstFlagMem4_8 | kInstFlagMem10, + + //! @brief REX.W/VEX.W by default. + kInstFlagW = 0x8000 +}; + +// ============================================================================ +// [asmjit::x86x64::kInstOp] +// ============================================================================ + +//! @brief X86 instruction operand flags. +ASMJIT_ENUM(kInstOp) { + // Gp, Fp, Mm, Xmm, Ymm, Zmm. + kInstOpGb = 0x0001, + kInstOpGw = 0x0002, + kInstOpGd = 0x0004, + kInstOpGq = 0x0008, + kInstOpFp = 0x0010, + kInstOpMm = 0x0020, + kInstOpXmm = 0x0100, + kInstOpYmm = 0x0200, + kInstOpZmm = 0x0400, + + // Mem, Imm. + kInstOpMem = 0x4000, + kInstOpImm = 0x8000, + + // Combined. + kInstOpGwb = kInstOpGw | kInstOpGb, + kInstOpGqd = kInstOpGq | kInstOpGd, + kInstOpGqdw = kInstOpGq | kInstOpGd | kInstOpGw, + kInstOpGqdwb = kInstOpGq | kInstOpGd | kInstOpGw | kInstOpGb, + + kInstOpGbMem = kInstOpGb | kInstOpMem, + kInstOpGwMem = kInstOpGw | kInstOpMem, + kInstOpGdMem = kInstOpGd | kInstOpMem, + kInstOpGqMem = kInstOpGq | kInstOpMem, + kInstOpGwbMem = kInstOpGwb | kInstOpMem, + kInstOpGqdMem = kInstOpGqd | kInstOpMem, + kInstOpGqdwMem = kInstOpGqdw | kInstOpMem, + kInstOpGqdwbMem = kInstOpGqdwb | kInstOpMem, + + kInstOpFpMem = kInstOpFp | kInstOpMem, + kInstOpMmMem = kInstOpMm | kInstOpMem, + kInstOpXmmMem = kInstOpXmm | kInstOpMem, + kInstOpYmmMem = kInstOpYmm | kInstOpMem, + + kInstOpMmXmm = kInstOpMm | kInstOpXmm, + kInstOpMmXmmMem = kInstOpMmXmm | kInstOpMem, + + kInstOpXmmYmm = kInstOpXmm | kInstOpYmm, + kInstOpXmmYmmMem = kInstOpXmmYmm | kInstOpMem +}; + +// ============================================================================ +// [asmjit::x86x64::kCond] +// ============================================================================ + +//! @brief X86 Condition codes. +ASMJIT_ENUM(kCond) { + // Condition codes from processor manuals. + kCondA = 0x07, // CF==0 & ZF==0 + kCondAE = 0x03, // CF==0 + kCondB = 0x02, // CF==1 + kCondBE = 0x06, // CF==1 | ZF==1 + kCondC = 0x02, // CF==1 + kCondE = 0x04, // ZF==1 + kCondG = 0x0F, // ZF==0 & SF==OF + kCondGE = 0x0D, // SF==OF + kCondL = 0x0C, // SF!=OF + kCondLE = 0x0E, // ZF==1 | SF!=OF + kCondNA = 0x06, // CF==1 | ZF==1 + kCondNAE = 0x02, // CF==1 + kCondNB = 0x03, // CF==0 + kCondNBE = 0x07, // CF==0 & ZF==0 + kCondNC = 0x03, // CF==0 + kCondNE = 0x05, // ZF==0 + kCondNG = 0x0E, // ZF==1 | SF!=OF + kCondNGE = 0x0C, // SF!=OF + kCondNL = 0x0D, // SF==OF + kCondNLE = 0x0F, // ZF==0 & SF==OF + kCondNO = 0x01, // OF==0 + kCondNP = 0x0B, // PF==0 + kCondNS = 0x09, // SF==0 + kCondNZ = 0x05, // ZF==0 + kCondO = 0x00, // OF==1 + kCondP = 0x0A, // PF==1 + kCondPE = 0x0A, // PF==1 + kCondPO = 0x0B, // PF==0 + kCondS = 0x08, // SF==1 + kCondZ = 0x04, // ZF==1 + + // Simplified condition codes. + kCondOverflow = 0x00, + kCondNotOverflow = 0x01, + kCondBelow = 0x02, // Unsigned. + kCondAboveEqual = 0x03, // Unsigned. + kCondEqual = 0x04, + kCondNotEqual = 0x05, + kCondBelowEqual = 0x06, // Unsigned. + kCondAbove = 0x07, // Unsigned. + kCondSign = 0x08, + kCondNotSign = 0x09, + kCondParityEven = 0x0A, + kCondParityOdd = 0x0B, + kCondLess = 0x0C, // Signed. + kCondGreaterEqual = 0x0D, // Signed. + kCondLessEqual = 0x0E, // Signed. + kCondGreater = 0x0F, // Signed. + + // Aliases. + kCondZero = 0x04, + kCondNotZero = 0x05, + kCondNegative = 0x08, + kCondPositive = 0x09, + + // Fpu-only. + kCondFpuUnordered = 0x10, + kCondFpuNotUnordered = 0x11, + + //! @brief No condition code. + kCondNone = 0x12 +}; + +// ============================================================================ +// [asmjit::x86x64::kVarType] +// ============================================================================ + +//! @brief X86 variable type. +ASMJIT_ENUM(kVarType) { + //! @brief Variable is Mm (MMX). + kVarTypeMm = 13, + + //! @brief Variable is Xmm (SSE/SSE2). + kVarTypeXmm = 14, + //! @brief Variable is SSE scalar SP-FP number. + kVarTypeXmmSs = 15, + //! @brief Variable is SSE packed SP-FP number (4 floats). + kVarTypeXmmPs = 16, + //! @brief Variable is SSE2 scalar DP-FP number. + kVarTypeXmmSd = 17, + //! @brief Variable is SSE2 packed DP-FP number (2 doubles). + kVarTypeXmmPd = 18, + + //! @brief Variable is Ymm (AVX). + kVarTypeYmm = 19, + //! @brief Variable is AVX packed SP-FP number (8 floats). + kVarTypeYmmPs = 20, + //! @brief Variable is AVX packed DP-FP number (4 doubles). + kVarTypeYmmPd = 21, + + //! @brief Count of variable types. + kVarTypeCount = 22, + + //! @internal + _kVarTypeMmStart = kVarTypeMm, + //! @internal + _kVarTypeMmEnd = kVarTypeMm, + + //! @internal + _kVarTypeXmmStart = kVarTypeXmm, + //! @internal + _kVarTypeXmmEnd = kVarTypeXmmPd, + + //! @internal + _kVarTypeYmmStart = kVarTypeYmm, + //! @internal + _kVarTypeYmmEnd = kVarTypeYmmPd +}; + +// ============================================================================ +// [asmjit::x86x64::kVarDesc] +// ============================================================================ + +//! @brief X86 variable description. +ASMJIT_ENUM(kVarDesc) { + //! @brief Variable contains single-precision floating-point(s). + kVarDescSp = 0x10, + //! @brief Variable contains double-precision floating-point(s). + kVarDescDp = 0x20, + //! @brief Variable is packed (for example float4x, double2x, ...). + kVarDescPacked = 0x40 +}; + +// ============================================================================ +// [asmjit::x86x64::InstInfo] +// ============================================================================ + +//! @brief X86 instruction information. +struct InstInfo { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get instruction name string (null terminated string). + ASMJIT_INLINE const char* getName() const { return _instName + static_cast(_nameIndex); } + //! @brief Get instruction name index (index to @ref _instName array). + ASMJIT_INLINE uint32_t _getNameIndex() const { return _nameIndex; } + + //! @brief Get instruction group, see @ref kInstGroup. + ASMJIT_INLINE uint32_t getGroup() const { return _group; } + + // -------------------------------------------------------------------------- + // [Flags] + // -------------------------------------------------------------------------- + + //! @brief Get instruction flags, see @ref kInstFlags. + ASMJIT_INLINE uint32_t getFlags() const { return _flags; } + + //! @brief Get whether the instruction is a control-flow intruction. + //! + //! Control flow instruction is instruction that modifies instruction pointer, + //! typically jmp, jcc, call, or ret. + ASMJIT_INLINE bool isFlow() const { return (_flags & kInstFlagFlow) != 0; } + + //! @brief Get whether the instruction is a compare/test like intruction. + ASMJIT_INLINE bool isTest() const { return (_flags & kInstFlagTest) != 0; } + + //! @brief Get whether the instruction is a typical Move instruction. + //! + //! Move instructions typically overwrite the first operand, so it's an useful + //! hint for @ref Compiler. Applies also to multimedia instruction - MMX, + //! SSE, SSE2 and AVX moves). + ASMJIT_INLINE bool isMove() const { return (_flags & kInstFlagMove) != 0; } + + //! @brief Get whether the instruction is a typical Exchange instruction. + //! + //! Exchange instructios are 'xchg' and 'xadd'. + ASMJIT_INLINE bool isXchg() const { return (_flags & kInstFlagXchg) != 0; } + + //! @brief Get whether the instruction accesses Fp register(s). + ASMJIT_INLINE bool isFp() const { return (_flags & kInstFlagFp) != 0; } + + //! @brief Get whether the instruction can be prefixed by LOCK prefix. + ASMJIT_INLINE bool isLockable() const { return (_flags & kInstFlagLock) != 0; } + + //! @brief Get whether the instruction is special type (this is used by + //! @c Compiler to manage additional variables or functionality). + ASMJIT_INLINE bool isSpecial() const { return (_flags & kInstFlagSpecial) != 0; } + + //! @brief Get whether the instruction is special type and it performs + //! memory access. + ASMJIT_INLINE bool isSpecialMem() const { return (_flags & kInstFlagSpecialMem) != 0; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Instruction name index in _instName[] array. + uint16_t _nameIndex; + //! @brief Instruction flags. + uint16_t _flags; + //! @brief Instruction group, used also by @c Compiler. + uint8_t _group; + //! @brief Reserved for future use. + uint8_t _reserved[3]; + //! @brief Operands' flags. + uint16_t _opFlags[4]; + //! @brief Primary and secondary opcodes. + uint32_t _opCode[2]; +}; + +// ============================================================================ +// [asmjit::x86x64::VarInfo] +// ============================================================================ + +//! @brief X86 variable information. +struct VarInfo { + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + //! @brief Get register type, see @ref kRegType. + ASMJIT_INLINE uint32_t getReg() const { return _reg; } + //! @brief Get register size in bytes. + ASMJIT_INLINE uint32_t getSize() const { return _size; } + //! @brief Get variable class, see @ref kRegClass. + ASMJIT_INLINE uint32_t getClass() const { return _class; } + //! @brief Get variable description, see @ref kVarDesc. + ASMJIT_INLINE uint32_t getDesc() const { return _desc; } + //! @brief Get variable type name. + ASMJIT_INLINE const char* getName() const { return _name; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Register type, see @ref kRegType. + uint8_t _reg; + //! @brief Register size in bytes. + uint8_t _size; + //! @brief Register class, see @ref kRegClass. + uint8_t _class; + //! @brief Variable flags, see @ref kVarDesc. + uint8_t _desc; + //! @brief Variable type name. + char _name[4]; +}; + +// ============================================================================ +// [asmjit::x86x64::RegCount] +// ============================================================================ + +//! @brief X86/X64 registers count (Gp, Fp, Mm, Xmm). +struct RegCount { + // -------------------------------------------------------------------------- + // [Zero] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset() { + _packed = 0; + } + + // -------------------------------------------------------------------------- + // [Get] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint32_t get(uint32_t c) const { + ASMJIT_ASSERT(c < kRegClassCount); + + return _regs[c]; + } + + // -------------------------------------------------------------------------- + // [Set] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void set(uint32_t c, uint32_t n) { + ASMJIT_ASSERT(c < kRegClassCount); + ASMJIT_ASSERT(n < 0x100); + + _regs[c] = static_cast(n); + } + + // -------------------------------------------------------------------------- + // [Add] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void add(uint32_t c, uint32_t n = 1) { + ASMJIT_ASSERT(c < kRegClassCount); + ASMJIT_ASSERT(n < 0x100); + + _regs[c] += static_cast(n); + } + + // -------------------------------------------------------------------------- + // [Misc] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void makeIndex(const RegCount& count) { + _regs[0] = 0; + _regs[1] = count._regs[0]; + _regs[2] = count._regs[1]; + _regs[3] = count._regs[2]; + + _regs[2] += _regs[1]; + _regs[3] += _regs[2]; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + struct { + uint8_t _gp; + uint8_t _fp; + uint8_t _mm; + uint8_t _xy; + }; + + uint8_t _regs[4]; + uint32_t _packed; + }; +}; + +// ============================================================================ +// [asmjit::x86x64::RegMask] +// ============================================================================ + +//! @brief X86/X64 registers mask (Gp, Fp, Mm, Xmm/Ymm/Zmm). +struct RegMask { + // -------------------------------------------------------------------------- + // [Zero] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void zero(uint32_t c) + { _packed.u16[c] = 0; } + + // -------------------------------------------------------------------------- + // [Get] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint32_t get(uint32_t c) const + { return _packed.u16[c]; } + + // -------------------------------------------------------------------------- + // [Set] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void set(uint32_t c, uint32_t mask) + { _packed.u16[c] = static_cast(mask); } + + ASMJIT_INLINE void set(const RegMask& other) + { _packed.setUInt64(other._packed); } + + // -------------------------------------------------------------------------- + // [Add] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void add(uint32_t c, uint32_t mask) + { _packed.u16[c] |= static_cast(mask); } + + ASMJIT_INLINE void add(const RegMask& other) + { _packed.or_(other._packed); } + + // -------------------------------------------------------------------------- + // [Del] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void del(uint32_t c, uint32_t mask) + { _packed.u16[c] &= ~static_cast(mask); } + + ASMJIT_INLINE void del(const RegMask& other) + { _packed.del(other._packed); } + + // -------------------------------------------------------------------------- + // [And] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void and_(uint32_t c, uint32_t mask) + { _packed.u16[c] &= static_cast(mask); } + + ASMJIT_INLINE void and_(const RegMask& other) + { _packed.and_(other._packed); } + + // -------------------------------------------------------------------------- + // [Xor] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void xor_(uint32_t c, uint32_t mask) + { _packed.u16[c] ^= static_cast(mask); } + + ASMJIT_INLINE void xor_(const RegMask& other) + { _packed.xor_(other._packed); } + + // -------------------------------------------------------------------------- + // [IsEmpty / Has] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE bool isEmpty() const + { return _packed.isZero(); } + + ASMJIT_INLINE bool has(uint32_t c, uint32_t mask = 0xFFFFFFFF) const + { return (static_cast(_packed.u16[c]) & mask) != 0; } + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE void reset() + { _packed.reset(); } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union { + struct { + //! @brief Gp registers mask. + uint16_t _gp; + //! @brief Fp registers mask. + uint16_t _fp; + //! @brief Mm registers mask. + uint16_t _mm; + //! @brief Xmm/Ymm registers mask. + uint16_t _xy; + }; + + uint16_t _regs[4]; + + //! @brief All masks as 64-bit integer. + UInt64 _packed; + }; +}; + +// ============================================================================ +// [asmjit::x86x64::X86Reg] +// ============================================================================ + +//! @brief X86/X64 register. +struct X86Reg : public BaseReg { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a dummy X86 register. + ASMJIT_INLINE X86Reg() : BaseReg() {} + //! @brief Create a custom X86 register. + ASMJIT_INLINE X86Reg(uint32_t type, uint32_t index, uint32_t size) : BaseReg(type, index, size) {} + //! @brief Create a reference to @a other X86 register. + ASMJIT_INLINE X86Reg(const X86Reg& other) : BaseReg(other) {} + //! @brief Create non-initialized X86 register. + explicit ASMJIT_INLINE X86Reg(const _DontInitialize&) : BaseReg(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [X86Reg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(X86Reg) + + //! @brief Get whether the register is Gp register. + ASMJIT_INLINE bool isGp() const { return _vreg.type <= kRegTypeGpq; } + //! @brief Get whether the register is Gp byte (8-bit) register. + ASMJIT_INLINE bool isGpb() const { return _vreg.type <= kRegTypeGpbHi; } + //! @brief Get whether the register is Gp lo-byte (8-bit) register. + ASMJIT_INLINE bool isGpbLo() const { return _vreg.type == kRegTypeGpbLo; } + //! @brief Get whether the register is Gp hi-byte (8-bit) register. + ASMJIT_INLINE bool isGpbHi() const { return _vreg.type == kRegTypeGpbHi; } + //! @brief Get whether the register is Gp word (16-bit) register. + ASMJIT_INLINE bool isGpw() const { return _vreg.type == kRegTypeGpw; } + //! @brief Get whether the register is Gp dword (32-bit) register. + ASMJIT_INLINE bool isGpd() const { return _vreg.type == kRegTypeGpd; } + //! @brief Get whether the register is Gp qword (64-bit) register. + ASMJIT_INLINE bool isGpq() const { return _vreg.type == kRegTypeGpq; } + + //! @brief Get whether the register is Fp register. + ASMJIT_INLINE bool isFp() const { return _vreg.type == kRegTypeFp; } + //! @brief Get whether the register is Mm (64-bit) register. + ASMJIT_INLINE bool isMm() const { return _vreg.type == kRegTypeMm; } + //! @brief Get whether the register is Xmm (128-bit) register. + ASMJIT_INLINE bool isXmm() const { return _vreg.type == kRegTypeXmm; } + //! @brief Get whether the register is Ymm (256-bit) register. + ASMJIT_INLINE bool isYmm() const { return _vreg.type == kRegTypeYmm; } + + //! @brief Get whether the register is a segment. + ASMJIT_INLINE bool isSeg() const { return _vreg.type == kRegTypeSeg; } +}; + +// ============================================================================ +// [asmjit::x86x64::GpReg] +// ============================================================================ + +//! @brief X86/X64 Gpb/Gpw/Gpd/Gpq register. +struct GpReg : public X86Reg { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a dummy Gp register. + ASMJIT_INLINE GpReg() : X86Reg() {} + //! @brief Create a reference to @a other Gp register. + ASMJIT_INLINE GpReg(const GpReg& other) : X86Reg(other) {} + //! @brief Create a custom Gp register. + ASMJIT_INLINE GpReg(uint32_t type, uint32_t index, uint32_t size) : X86Reg(type, index, size) {} + //! @brief Create non-initialized Gp register. + explicit ASMJIT_INLINE GpReg(const _DontInitialize&) : X86Reg(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [GpReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(GpReg) +}; + +// ============================================================================ +// [asmjit::x86x64::FpReg] +// ============================================================================ + +//! @brief X86/X64 80-bit Fp register. +struct FpReg : public X86Reg { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a dummy Fp register. + ASMJIT_INLINE FpReg() : X86Reg() {} + //! @brief Create a reference to @a other FPU register. + ASMJIT_INLINE FpReg(const FpReg& other) : X86Reg(other) {} + //! @brief Create a custom Fp register. + ASMJIT_INLINE FpReg(uint32_t type, uint32_t index, uint32_t size) : X86Reg(type, index, size) {} + //! @brief Create non-initialized Fp register. + explicit ASMJIT_INLINE FpReg(const _DontInitialize&) : X86Reg(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [FpReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(FpReg) +}; + +// ============================================================================ +// [asmjit::x86x64::MmReg] +// ============================================================================ + +//! @brief X86/X64 64-bit Mm register. +struct MmReg : public X86Reg { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a dummy Mm register. + ASMJIT_INLINE MmReg() : X86Reg() {} + //! @brief Create a reference to @a other Mm register. + ASMJIT_INLINE MmReg(const MmReg& other) : X86Reg(other) {} + //! @brief Create a custom Mm register. + ASMJIT_INLINE MmReg(uint32_t type, uint32_t index, uint32_t size) : X86Reg(type, index, size) {} + //! @brief Create non-initialized Mm register. + explicit ASMJIT_INLINE MmReg(const _DontInitialize&) : X86Reg(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [MmReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(MmReg) +}; + +// ============================================================================ +// [asmjit::x86x64::XmmReg] +// ============================================================================ + +//! @brief X86/X64 128-bit Xmm register. +struct XmmReg : public X86Reg { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a dummy Xmm register. + ASMJIT_INLINE XmmReg() : X86Reg() {} + //! @brief Create a reference to @a other Xmm register. + ASMJIT_INLINE XmmReg(const XmmReg& other) : X86Reg(other) {} + //! @brief Create a custom Xmm register. + ASMJIT_INLINE XmmReg(uint32_t type, uint32_t index, uint32_t size) : X86Reg(type, index, size) {} + //! @brief Create non-initialized Xmm register. + explicit ASMJIT_INLINE XmmReg(const _DontInitialize&) : X86Reg(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [XmmReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(XmmReg) +}; + +// ============================================================================ +// [asmjit::x86x64::YmmReg] +// ============================================================================ + +//! @brief X86/X64 256-bit Ymm register. +struct YmmReg : public X86Reg { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a dummy Ymm register. + ASMJIT_INLINE YmmReg() : X86Reg() {} + //! @brief Create a reference to @a other Xmm register. + ASMJIT_INLINE YmmReg(const YmmReg& other) : X86Reg(other) {} + //! @brief Create a custom Ymm register. + ASMJIT_INLINE YmmReg(uint32_t type, uint32_t index, uint32_t size) : X86Reg(type, index, size) {} + //! @brief Create non-initialized Ymm register. + explicit ASMJIT_INLINE YmmReg(const _DontInitialize&) : X86Reg(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [YmmReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(YmmReg) +}; + +// ============================================================================ +// [asmjit::x86x64::SegReg] +// ============================================================================ + +//! @brief X86/X64 segment register. +struct SegReg : public X86Reg { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a dummy segment register. + ASMJIT_INLINE SegReg() : X86Reg() {} + //! @brief Create a reference to @a other segment register. + ASMJIT_INLINE SegReg(const SegReg& other) : X86Reg(other) {} + //! @brief Create a custom segment register. + ASMJIT_INLINE SegReg(uint32_t type, uint32_t index, uint32_t size) : X86Reg(type, index, size) {} + //! @brief Create non-initialized segment register. + explicit ASMJIT_INLINE SegReg(const _DontInitialize&) : X86Reg(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [SegReg Specific] + // -------------------------------------------------------------------------- + + ASMJIT_REG_OP(SegReg) +}; + +// ============================================================================ +// [asmjit::x86x64::Mem] +// ============================================================================ + +#define _OP_ID(_Op_) reinterpret_cast(_Op_).getId() + +//! @brief X86 memory operand. +struct Mem : public BaseMem { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Mem() : BaseMem(DontInitialize) { + reset(); + } + + ASMJIT_INLINE Mem(const Label& label, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeLabel, 0, label._base.id); + _init_packed_d2_d3(kInvalidValue, disp); + } + + ASMJIT_INLINE Mem(const Label& label, const GpReg& index, uint32_t shift, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeLabel, + (kMemVSibGpz << kMemVSibIndex) + + (shift << kMemShiftIndex), + label.getId()); + _vmem.index = index.getRegIndex(); + _vmem.displacement = disp; + } + + ASMJIT_INLINE Mem(const Label& label, const GpVar& index, uint32_t shift, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeLabel, + (kMemVSibGpz << kMemVSibIndex) + + (shift << kMemShiftIndex), + label.getId()); + _vmem.index = _OP_ID(index); + _vmem.displacement = disp; + } + + ASMJIT_INLINE Mem(const GpReg& base, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(base) + + (kMemVSibGpz << kMemVSibIndex), + base.getRegIndex()); + _init_packed_d2_d3(kInvalidValue, disp); + } + + ASMJIT_INLINE Mem(const GpReg& base, const GpReg& index, uint32_t shift, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(base) + (shift << kMemShiftIndex), + base.getRegIndex()); + _vmem.index = index.getRegIndex(); + _vmem.displacement = disp; + } + + ASMJIT_INLINE Mem(const GpReg& base, const XmmReg& index, uint32_t shift, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(base) + + (kMemVSibXmm << kMemVSibIndex) + + (shift << kMemShiftIndex), + base.getRegIndex()); + _vmem.index = index.getRegIndex(); + _vmem.displacement = disp; + } + + ASMJIT_INLINE Mem(const GpReg& base, const YmmReg& index, uint32_t shift, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(base) + + (kMemVSibYmm << kMemVSibIndex) + + (shift << kMemShiftIndex), + base.getRegIndex()); + _vmem.index = index.getRegIndex(); + _vmem.displacement = disp; + } + + ASMJIT_INLINE Mem(const GpVar& base, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(reinterpret_cast(base)) + + (kMemVSibGpz << kMemVSibIndex), + _OP_ID(base)); + _init_packed_d2_d3(kInvalidValue, disp); + } + + + ASMJIT_INLINE Mem(const GpVar& base, const GpVar& index, uint32_t shift, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(reinterpret_cast(base)) + + (shift << kMemShiftIndex), + _OP_ID(base)); + _vmem.index = _OP_ID(index); + _vmem.displacement = disp; + } + + ASMJIT_INLINE Mem(const GpVar& base, const XmmVar& index, uint32_t shift, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(reinterpret_cast(base)) + + (kMemVSibXmm << kMemVSibIndex) + + (shift << kMemShiftIndex), + _OP_ID(base)); + _vmem.index = _OP_ID(index); + _vmem.displacement = disp; + } + + ASMJIT_INLINE Mem(const GpVar& base, const YmmVar& index, uint32_t shift, int32_t disp, uint32_t size = 0) : BaseMem(DontInitialize) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, kMemTypeBaseIndex, + _getGpdFlags(reinterpret_cast(base)) + + (kMemVSibYmm << kMemVSibIndex) + + (shift << kMemShiftIndex), + _OP_ID(base)); + _vmem.index = _OP_ID(index); + _vmem.displacement = disp; + } + + ASMJIT_INLINE Mem(const _Initialize&, uint32_t memType, const X86Var& base, int32_t disp, uint32_t size) : BaseMem(DontInitialize) { + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, memType, 0, _OP_ID(base)); + _vmem.index = kInvalidValue; + _vmem.displacement = disp; + } + + ASMJIT_INLINE Mem(const _Initialize&, uint32_t memType, const X86Var& base, const GpVar& index, uint32_t shift, int32_t disp, uint32_t size) : BaseMem(DontInitialize) { + ASMJIT_ASSERT(shift <= 3); + + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, memType, shift << kMemShiftIndex, _OP_ID(base)); + _vmem.index = _OP_ID(index); + _vmem.displacement = disp; + } + + ASMJIT_INLINE Mem(const Mem& other) : BaseMem(other) {} + explicit ASMJIT_INLINE Mem(const _DontInitialize&) : BaseMem(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [Mem Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone Mem operand. + ASMJIT_INLINE Mem clone() const { + return Mem(*this); + } + + //! @brief Reset Mem operand. + ASMJIT_INLINE void reset() { + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, 0, kMemTypeBaseIndex, 0, kInvalidValue); + _init_packed_d2_d3(kInvalidValue, 0); + } + + //! @internal + ASMJIT_INLINE void _init(uint32_t memType, uint32_t base, int32_t disp, uint32_t size) { + _init_packed_op_sz_b0_b1_id(kOperandTypeMem, size, memType, 0, base); + _vmem.index = kInvalidValue; + _vmem.displacement = disp; + } + + // -------------------------------------------------------------------------- + // [Segment] + // -------------------------------------------------------------------------- + + //! @brief Get whether the memory operand has segment override prefix. + ASMJIT_INLINE bool hasSegment() const { + return (_vmem.flags & kMemSegMask) != (kSegDefault << kMemSegIndex); + } + + //! @brief Get memory operand segment, see @c kSeg. + ASMJIT_INLINE uint32_t getSegment() const { + return (static_cast(_vmem.flags) >> kMemSegIndex) & kMemSegBits; + } + + //! @brief Set memory operand segment, see @c kSeg. + ASMJIT_INLINE Mem& setSegment(uint32_t segIndex) { + _vmem.flags = static_cast( + (static_cast(_vmem.flags) & kMemSegMask) + (segIndex << kMemSegIndex)); + return *this; + } + + //! @brief Set memory operand segment, see @c kSeg. + ASMJIT_INLINE Mem& setSegment(const SegReg& seg) { + return setSegment(seg.getRegIndex()); + } + + // -------------------------------------------------------------------------- + // [Gpd] + // -------------------------------------------------------------------------- + + //! @brief Get whether the memory operand has 32-bit GP base. + ASMJIT_INLINE bool hasGpdBase() const { + return (_packed[0].u32[0] & IntUtil::pack32_4x8(0x00, 0x00, 0x00, kMemGpdMask)) != 0; + } + + //! @brief Set whether the memory operand has 32-bit GP base. + ASMJIT_INLINE Mem& setGpdBase() { + _packed[0].u32[0] |= IntUtil::pack32_4x8(0x00, 0x00, 0x00, kMemGpdMask); + return *this; + } + + //! @brief Set whether the memory operand has 32-bit GP base to @a b. + ASMJIT_INLINE Mem& setGpdBase(uint32_t b) { + _packed[0].u32[0] &=~IntUtil::pack32_4x8(0x00, 0x00, 0x00, kMemGpdMask); + _packed[0].u32[0] |= IntUtil::pack32_4x8(0x00, 0x00, 0x00, b << kMemGpdIndex); + return *this; + } + + // -------------------------------------------------------------------------- + // [VSib] + // -------------------------------------------------------------------------- + + //! @brief Get SIB type. + ASMJIT_INLINE uint32_t getVSib() const { + return (static_cast(_vmem.flags) >> kMemVSibIndex) & kMemVSibBits; + } + + //! @brief Set SIB type. + ASMJIT_INLINE Mem& _setVSib(uint32_t vsib) { + _packed[0].u32[0] &=~IntUtil::pack32_4x8(0x00, 0x00, 0x00, kMemVSibMask); + _packed[0].u32[0] |= IntUtil::pack32_4x8(0x00, 0x00, 0x00, vsib << kMemVSibIndex); + return *this; + } + + // -------------------------------------------------------------------------- + // [Size] + // -------------------------------------------------------------------------- + + //! @brief Set memory operand size. + ASMJIT_INLINE Mem& setSize(uint32_t size) { + _vmem.size = static_cast(size); + return *this; + } + + // -------------------------------------------------------------------------- + // [Base] + // -------------------------------------------------------------------------- + + //! @brief Get whether the memory operand has base register. + ASMJIT_INLINE bool hasBase() const + { return _vmem.base != kInvalidValue; } + + //! @brief Get memory operand base register code, variable id, or @ref kInvalidValue. + ASMJIT_INLINE uint32_t getBase() const + { return _vmem.base; } + + //! @brief Set memory operand base register code, variable id, or @ref kInvalidValue. + ASMJIT_INLINE Mem& setBase(uint32_t base) { + _vmem.base = base; + return *this; + } + + // -------------------------------------------------------------------------- + // [Index] + // -------------------------------------------------------------------------- + + //! @brief Get whether the memory operand has index. + ASMJIT_INLINE bool hasIndex() const { + return _vmem.index != kInvalidValue; + } + + //! @brief Get memory operand index register code, variable id, or @ref kInvalidValue. + ASMJIT_INLINE uint32_t getIndex() const { + return _vmem.index; + } + + //! @brief Set memory operand index register code, variable id, or @ref kInvalidValue. + ASMJIT_INLINE Mem& setIndex(uint32_t index) { + _vmem.index = index; + return *this; + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const GpReg& index) { + _vmem.index = index.getRegIndex(); + return _setVSib(kMemVSibGpz); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const GpReg& index, uint32_t shift) { + _vmem.index = index.getRegIndex(); + return _setVSib(kMemVSibGpz).setShift(shift); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const GpVar& index) { + _vmem.index = reinterpret_cast(index).getId(); + return _setVSib(kMemVSibGpz); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const GpVar& index, uint32_t shift) { + _vmem.index = reinterpret_cast(index).getId(); + return _setVSib(kMemVSibGpz).setShift(shift); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const XmmReg& index) { + _vmem.index = index.getRegIndex(); + return _setVSib(kMemVSibXmm); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const XmmReg& index, uint32_t shift) { + _vmem.index = index.getRegIndex(); + return _setVSib(kMemVSibXmm).setShift(shift); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const XmmVar& index) { + _vmem.index = reinterpret_cast(index).getId(); + return _setVSib(kMemVSibXmm); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const XmmVar& index, uint32_t shift) { + _vmem.index = reinterpret_cast(index).getId(); + return _setVSib(kMemVSibXmm).setShift(shift); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const YmmReg& index) { + _vmem.index = index.getRegIndex(); + return _setVSib(kMemVSibYmm); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const YmmReg& index, uint32_t shift) { + _vmem.index = index.getRegIndex(); + return _setVSib(kMemVSibYmm).setShift(shift); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const YmmVar& index) { + _vmem.index = reinterpret_cast(index).getId(); + return _setVSib(kMemVSibYmm); + } + + //! @brief Set memory index. + ASMJIT_INLINE Mem& setIndex(const YmmVar& index, uint32_t shift) { + _vmem.index = reinterpret_cast(index).getId(); + return _setVSib(kMemVSibYmm).setShift(shift); + } + + //! @brief Reset memory index. + ASMJIT_INLINE Mem& resetIndex() { + _vmem.index = kInvalidValue; + return _setVSib(kMemVSibGpz); + } + + // -------------------------------------------------------------------------- + // [Shift] + // -------------------------------------------------------------------------- + + //! @brief Get whether the memory operand has shift used. + ASMJIT_INLINE bool hasShift() const { + return (_vmem.flags & kMemShiftMask) != 0; + } + + //! @brief Get memory operand index scale (0, 1, 2 or 3). + ASMJIT_INLINE uint32_t getShift() const { + return _vmem.flags >> kMemShiftIndex; + } + + //! @brief Set memory operand index scale (0, 1, 2 or 3). + ASMJIT_INLINE Mem& setShift(uint32_t shift) { + _packed[0].u32[0] &=~IntUtil::pack32_4x8(0x00, 0x00, 0x00, kMemShiftMask); + _packed[0].u32[0] |= IntUtil::pack32_4x8(0x00, 0x00, 0x00, shift << kMemShiftIndex); + return *this; + } + + // -------------------------------------------------------------------------- + // [Displacement] + // -------------------------------------------------------------------------- + + //! @brief Get memory operand relative displacement. + ASMJIT_INLINE int32_t getDisplacement() const { + return _vmem.displacement; + } + + //! @brief Set memory operand relative displacement. + ASMJIT_INLINE Mem& setDisplacement(int32_t disp) { + _vmem.displacement = disp; + return *this; + } + + //! @brief Reset memory operand relative displacement. + ASMJIT_INLINE Mem& resetDisplacement(int32_t disp) { + _vmem.displacement = 0; + return *this; + } + + //! @brief Adjust memory operand relative displacement by @a displacement. + ASMJIT_INLINE Mem& adjust(int32_t disp) { + _vmem.displacement += disp; + return *this; + } + + //! @brief Get new memory operand adjusted by @a displacement. + ASMJIT_INLINE Mem adjusted(int32_t disp) const { + Mem result(*this); + result.adjust(disp); + return result; + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE Mem& operator=(const Mem& other) { + _copy(other); + return *this; + } + + ASMJIT_INLINE bool operator==(const Mem& other) const { + return (_packed[0] == other._packed[0]) & (_packed[1] == other._packed[1]) ; + } + + ASMJIT_INLINE bool operator!=(const Mem& other) const { + return !(*this == other); + } + + // -------------------------------------------------------------------------- + // [Static] + // -------------------------------------------------------------------------- + + static ASMJIT_INLINE uint32_t _getGpdFlags(const Operand& base) { + return (base._vreg.size & 0x4) << (kMemGpdIndex - 2); + } +}; + +#undef _OP_ID + +// ============================================================================ +// [asmjit::x86x64::X86Var] +// ============================================================================ + +//! @brief Base class for all variables. +struct X86Var : public BaseVar { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86Var() : BaseVar(DontInitialize) { + reset(); + } + + ASMJIT_INLINE X86Var(const X86Var& other) : BaseVar(other) {} + + explicit ASMJIT_INLINE X86Var(const _DontInitialize&) : BaseVar(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [X86Var Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone X86Var operand. + ASMJIT_INLINE X86Var clone() const { + return X86Var(*this); + } + + //! @brief Reset X86Var operand. + ASMJIT_INLINE void reset() { + _init_packed_op_sz_b0_b1_id(kOperandTypeVar, 0, kInvalidReg, kInvalidReg, kInvalidValue); + _init_packed_d2_d3(kInvalidValue, kInvalidValue); + } + + // -------------------------------------------------------------------------- + // [Type] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint32_t getRegType() const { return _vreg.type; } + ASMJIT_INLINE uint32_t getVarType() const { return _vreg.vType; } + + //! @brief Get whether the variable is Gpb register. + ASMJIT_INLINE bool isGp() const { return _vreg.type <= kRegTypeGpq; } + //! @brief Get whether the variable is Gpb register. + ASMJIT_INLINE bool isGpb() const { return _vreg.type <= kRegTypeGpbHi; } + //! @brief Get whether the variable is Gpb-lo register. + ASMJIT_INLINE bool isGpbLo() const { return _vreg.type == kRegTypeGpbLo; } + //! @brief Get whether the variable is Gpb-hi register. + ASMJIT_INLINE bool isGpbHi() const { return _vreg.type == kRegTypeGpbHi; } + //! @brief Get whether the variable is Gpw register. + ASMJIT_INLINE bool isGpw() const { return _vreg.type == kRegTypeGpw; } + //! @brief Get whether the variable is Gpd register. + ASMJIT_INLINE bool isGpd() const { return _vreg.type == kRegTypeGpd; } + //! @brief Get whether the variable is Gpq register. + ASMJIT_INLINE bool isGpq() const { return _vreg.type == kRegTypeGpq; } + + //! @brief Get whether the variable is Fp register. + ASMJIT_INLINE bool isFp() const { return _vreg.type == kRegTypeFp; } + //! @brief Get whether the variable is Mm type. + ASMJIT_INLINE bool isMm() const { return _vreg.type == kRegTypeMm; } + //! @brief Get whether the variable is Xmm type. + ASMJIT_INLINE bool isXmm() const { return _vreg.type == kRegTypeXmm; } + //! @brief Get whether the variable is Ymm type. + ASMJIT_INLINE bool isYmm() const { return _vreg.type == kRegTypeYmm; } + + // -------------------------------------------------------------------------- + // [Memory Cast] + // -------------------------------------------------------------------------- + + //! @brief Cast this variable to memory operand. + //! + //! @note Size of operand depends on native variable type, you can use other + //! variants if you want specific one. + ASMJIT_INLINE Mem m(int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, disp, getSize()); } + + //! @overload + ASMJIT_INLINE Mem m(const GpVar& index, uint32_t shift = 0, int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, index, shift, disp, getSize()); } + + //! @brief Cast this variable to 8-bit memory operand. + ASMJIT_INLINE Mem m8(int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, disp, 1); } + + //! @overload + ASMJIT_INLINE Mem m8(const GpVar& index, uint32_t shift = 0, int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, index, shift, disp, 1); } + + //! @brief Cast this variable to 16-bit memory operand. + ASMJIT_INLINE Mem m16(int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, disp, 2); } + + //! @overload + ASMJIT_INLINE Mem m16(const GpVar& index, uint32_t shift = 0, int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, index, shift, disp, 2); } + + //! @brief Cast this variable to 32-bit memory operand. + ASMJIT_INLINE Mem m32(int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, disp, 4); } + + //! @overload + ASMJIT_INLINE Mem m32(const GpVar& index, uint32_t shift = 0, int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, index, shift, disp, 4); } + + //! @brief Cast this variable to 64-bit memory operand. + ASMJIT_INLINE Mem m64(int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, disp, 8); } + + //! @overload + ASMJIT_INLINE Mem m64(const GpVar& index, uint32_t shift = 0, int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, index, shift, disp, 8); } + + //! @brief Cast this variable to 80-bit memory operand (long double). + ASMJIT_INLINE Mem m80(int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, disp, 10); } + + //! @overload + ASMJIT_INLINE Mem m80(const GpVar& index, uint32_t shift = 0, int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, index, shift, disp, 10); } + + //! @brief Cast this variable to 128-bit memory operand. + ASMJIT_INLINE Mem m128(int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, disp, 16); } + + //! @overload + ASMJIT_INLINE Mem m128(const GpVar& index, uint32_t shift = 0, int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, index, shift, disp, 16); } + + //! @brief Cast this variable to 256-bit memory operand. + ASMJIT_INLINE Mem m256(int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, disp, 32); } + + //! @overload + ASMJIT_INLINE Mem m256(const GpVar& index, uint32_t shift = 0, int32_t disp = 0) const + { return Mem(Initialize, kMemTypeStackIndex, *this, index, shift, disp, 32); } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE X86Var& operator=(const X86Var& other) { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const X86Var& other) const { return _packed[0] == other._packed[0]; } + ASMJIT_INLINE bool operator!=(const X86Var& other) const { return !operator==(other); } + + // -------------------------------------------------------------------------- + // [Private] + // -------------------------------------------------------------------------- + +protected: + ASMJIT_INLINE X86Var(const X86Var& other, uint32_t reg, uint32_t size) : BaseVar(DontInitialize) + { + _init_packed_op_sz_w0_id(kOperandTypeVar, size, (reg << 8) + other._vreg.index, other._base.id); + _vreg.vType = other._vreg.vType; + } +}; + +// ============================================================================ +// [asmjit::x86x64::GpVar] +// ============================================================================ + +//! @brief Gp variable. +struct GpVar : public X86Var { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new uninitialized @c GpVar instance. + ASMJIT_INLINE GpVar() : X86Var() {} + + //! @brief Create new initialized @c GpVar instance. + ASMJIT_INLINE GpVar(BaseCompiler& c, uint32_t type = kVarTypeIntPtr, const char* name = NULL) : X86Var(DontInitialize) { + c._newVar(this, type, name); + } + + //! @brief Create new @c GpVar instance using @a other. + //! + //! Note this will not create a different variable, use @c Compiler::newGpVar() + //! if you want to do so. This is only copy-constructor that allows to store + //! the same variable in different places. + ASMJIT_INLINE GpVar(const GpVar& other) : X86Var(other) {} + + //! @brief Create new uninitialized @c GpVar instance (internal). + explicit ASMJIT_INLINE GpVar(const _DontInitialize&) : X86Var(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [GpVar Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone GpVar operand. + ASMJIT_INLINE GpVar clone() const { + return GpVar(*this); + } + + //! @brief Reset GpVar operand. + ASMJIT_INLINE void reset() { + X86Var::reset(); + } + + // -------------------------------------------------------------------------- + // [GpVar Cast] + // -------------------------------------------------------------------------- + + //! @brief Cast this variable to 8-bit (LO) part of variable + ASMJIT_INLINE GpVar r8() const { return GpVar(*this, kRegTypeGpbLo, 1); } + //! @brief Cast this variable to 8-bit (LO) part of variable + ASMJIT_INLINE GpVar r8Lo() const { return GpVar(*this, kRegTypeGpbLo, 1); } + //! @brief Cast this variable to 8-bit (HI) part of variable + ASMJIT_INLINE GpVar r8Hi() const { return GpVar(*this, kRegTypeGpbHi, 1); } + + //! @brief Cast this variable to 16-bit part of variable + ASMJIT_INLINE GpVar r16() const { return GpVar(*this, kRegTypeGpw, 2); } + //! @brief Cast this variable to 32-bit part of variable + ASMJIT_INLINE GpVar r32() const { return GpVar(*this, kRegTypeGpd, 4); } + //! @brief Cast this variable to 64-bit part of variable + ASMJIT_INLINE GpVar r64() const { return GpVar(*this, kRegTypeGpq, 8); } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE GpVar& operator=(const GpVar& other) { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const GpVar& other) const { return X86Var::operator==(other); } + ASMJIT_INLINE bool operator!=(const GpVar& other) const { return X86Var::operator!=(other); } + + // -------------------------------------------------------------------------- + // [Private] + // -------------------------------------------------------------------------- + +protected: + ASMJIT_INLINE GpVar(const GpVar& other, uint32_t reg, uint32_t size) : X86Var(other, reg, size) {} +}; + +// ============================================================================ +// [asmjit::x86x64::FpVar] +// ============================================================================ + +//! @brief Fpu variable. +struct FpVar : public X86Var { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new uninitialized @c FpVar instance. + ASMJIT_INLINE FpVar() : X86Var() {} + ASMJIT_INLINE FpVar(const FpVar& other) : X86Var(other) {} + + //! @brief Create new uninitialized @c FpVar instance (internal). + explicit ASMJIT_INLINE FpVar(const _DontInitialize&) : X86Var(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [FpVar Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone FpVar operand. + ASMJIT_INLINE FpVar clone() const { + return FpVar(*this); + } + + //! @brief Reset FpVar operand. + ASMJIT_INLINE void reset() { + X86Var::reset(); + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE FpVar& operator=(const FpVar& other) { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const FpVar& other) const { return X86Var::operator==(other); } + ASMJIT_INLINE bool operator!=(const FpVar& other) const { return X86Var::operator!=(other); } +}; + +// ============================================================================ +// [asmjit::x86x64::MmVar] +// ============================================================================ + +//! @brief Mm variable. +struct MmVar : public X86Var { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new uninitialized @c MmVar instance. + ASMJIT_INLINE MmVar() : X86Var() {} + //! @brief Create new initialized @c MmVar instance. + ASMJIT_INLINE MmVar(BaseCompiler& c, uint32_t type = kVarTypeMm, const char* name = NULL) : X86Var(DontInitialize) { + c._newVar(this, type, name); + } + + //! @brief Create new @c MmVar instance using @a other. + //! + //! Note this will not create a different variable, use @c Compiler::newMmVar() + //! if you want to do so. This is only copy-constructor that allows to store + //! the same variable in different places. + ASMJIT_INLINE MmVar(const MmVar& other) : X86Var(other) {} + + //! @brief Create new uninitialized @c MmVar instance (internal). + explicit ASMJIT_INLINE MmVar(const _DontInitialize&) : X86Var(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [MmVar Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone MmVar operand. + ASMJIT_INLINE MmVar clone() const { + return MmVar(*this); + } + + //! @brief Reset MmVar operand. + ASMJIT_INLINE void reset() { + X86Var::reset(); + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE MmVar& operator=(const MmVar& other) { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const MmVar& other) const { return X86Var::operator==(other); } + ASMJIT_INLINE bool operator!=(const MmVar& other) const { return X86Var::operator!=(other); } +}; + +// ============================================================================ +// [asmjit::x86x64::XmmVar] +// ============================================================================ + +//! @brief Xmm variable. +struct XmmVar : public X86Var { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new uninitialized @c XmmVar instance. + ASMJIT_INLINE XmmVar() : X86Var() {} + //! @brief Create new initialized @c XmmVar instance. + ASMJIT_INLINE XmmVar(BaseCompiler& c, uint32_t type = kVarTypeXmm, const char* name = NULL) : X86Var(DontInitialize) { + c._newVar(this, type, name); + } + + ASMJIT_INLINE XmmVar(const XmmVar& other) : X86Var(other) {} + + //! @brief Create new uninitialized @c XmmVar instance (internal). + explicit ASMJIT_INLINE XmmVar(const _DontInitialize&) : X86Var(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [XmmVar Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone XmmVar operand. + ASMJIT_INLINE XmmVar clone() const { + return XmmVar(*this); + } + + //! @brief Reset XmmVar operand. + ASMJIT_INLINE void reset() { + X86Var::reset(); + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE XmmVar& operator=(const XmmVar& other) { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const XmmVar& other) const { return X86Var::operator==(other); } + ASMJIT_INLINE bool operator!=(const XmmVar& other) const { return X86Var::operator!=(other); } +}; + +// ============================================================================ +// [asmjit::x86x64::YmmVar] +// ============================================================================ + +//! @brief Ymm variable. +struct YmmVar : public X86Var { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new uninitialized @c YmmVar instance. + ASMJIT_INLINE YmmVar() : X86Var() {} + //! @brief Create new initialized @c YmmVar instance. + ASMJIT_INLINE YmmVar(BaseCompiler& c, uint32_t type = kVarTypeYmm, const char* name = NULL) : X86Var(DontInitialize) { + c._newVar(this, type, name); + } + + ASMJIT_INLINE YmmVar(const YmmVar& other) : X86Var(other) {} + + //! @brief Create new uninitialized @c YmmVar instance (internal). + explicit ASMJIT_INLINE YmmVar(const _DontInitialize&) : X86Var(DontInitialize) {} + + // -------------------------------------------------------------------------- + // [YmmVar Specific] + // -------------------------------------------------------------------------- + + //! @brief Clone YmmVar operand. + ASMJIT_INLINE YmmVar clone() const { + return YmmVar(*this); + } + + //! @brief Reset YmmVar operand. + ASMJIT_INLINE void reset() { + X86Var::reset(); + } + + // -------------------------------------------------------------------------- + // [Operator Overload] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE YmmVar& operator=(const YmmVar& other) { _copy(other); return *this; } + + ASMJIT_INLINE bool operator==(const YmmVar& other) const { return X86Var::operator==(other); } + ASMJIT_INLINE bool operator!=(const YmmVar& other) const { return X86Var::operator!=(other); } +}; + +// ============================================================================ +// [asmjit::x86x64::Macros] +// ============================================================================ + +//! @brief Create Shuffle Constant for MMX/SSE shuffle instrutions. +//! +//! @param z First component position, number at interval [0, 3] inclusive. +//! @param x Second component position, number at interval [0, 3] inclusive. +//! @param y Third component position, number at interval [0, 3] inclusive. +//! @param w Fourth component position, number at interval [0, 3] inclusive. +//! +//! Shuffle constants can be used to make immediate value for these intrinsics: +//! - @ref Assembler::pshufw() +//! - @ref Assembler::pshufd() +//! - @ref Assembler::pshufhw() +//! - @ref Assembler::pshuflw() +//! - @ref Assembler::shufps() +static ASMJIT_INLINE uint8_t mm_shuffle(uint8_t z, uint8_t y, uint8_t x, uint8_t w) +{ return (z << 6) | (y << 4) | (x << 2) | w; } + +// ============================================================================ +// [asmjit::x86x64::Cond - Reverse / Negate] +// ============================================================================ + +//! @brief Corresponds to transposing the operands of a comparison. +static ASMJIT_INLINE uint32_t reverseCond(uint32_t cond) { + ASMJIT_ASSERT(cond < ASMJIT_ARRAY_SIZE(_reverseCond)); + return _reverseCond[cond]; +} + +//! @brief Get the equivalent of negated condition code. +static ASMJIT_INLINE uint32_t negateCond(uint32_t cond) { + ASMJIT_ASSERT(cond < ASMJIT_ARRAY_SIZE(_reverseCond)); + return static_cast(cond ^ static_cast(cond < kCondNone)); +} + +// ============================================================================ +// [asmjit::x86x64::Cond - ToJcc / ToMovcc / ToSetcc] +// ============================================================================ + +//! @brief Translate condition code @a cc to cmovcc instruction code. +//! @sa @c kInstCode, @c _kInstCmovcc. +static ASMJIT_INLINE uint32_t condToCmovcc(uint32_t cond) { + ASMJIT_ASSERT(static_cast(cond) < ASMJIT_ARRAY_SIZE(_condToCmovcc)); + return _condToCmovcc[cond]; +} + +//! @brief Translate condition code @a cc to jcc instruction code. +//! @sa @c kInstCode, @c _kInstJcc. +static ASMJIT_INLINE uint32_t condToJcc(uint32_t cond) { + ASMJIT_ASSERT(static_cast(cond) < ASMJIT_ARRAY_SIZE(_condToJcc)); + return _condToJcc[cond]; +} + +//! @brief Translate condition code @a cc to setcc instruction code. +//! @sa @c kInstCode, @c _kInstSetcc. +static ASMJIT_INLINE uint32_t condToSetcc(uint32_t cond) { + ASMJIT_ASSERT(static_cast(cond) < ASMJIT_ARRAY_SIZE(_condToSetcc)); + return _condToSetcc[cond]; +} + +// ============================================================================ +// [asmjit::x86x64::Registers] +// ============================================================================ + +//! @brief No register, can be used only within @c Mem operand. +ASMJIT_VAR const GpReg noGpReg; + +//! @brief Gpb-lo register. +ASMJIT_VAR const GpReg al; +//! @brief Gpb-lo register. +ASMJIT_VAR const GpReg cl; +//! @brief Gpb-lo register. +ASMJIT_VAR const GpReg dl; +//! @brief Gpb-lo register. +ASMJIT_VAR const GpReg bl; +//! @brief Gpb-hi register. +ASMJIT_VAR const GpReg ah; +//! @brief Gpb-hi register. +ASMJIT_VAR const GpReg ch; +//! @brief Gpb-hi register. +ASMJIT_VAR const GpReg dh; +//! @brief Gpb-hi register. +ASMJIT_VAR const GpReg bh; + +//! @brief Gpw register. +ASMJIT_VAR const GpReg ax; +//! @brief Gpw register. +ASMJIT_VAR const GpReg cx; +//! @brief Gpw register. +ASMJIT_VAR const GpReg dx; +//! @brief Gpw register. +ASMJIT_VAR const GpReg bx; +//! @brief Gpw register. +ASMJIT_VAR const GpReg sp; +//! @brief Gpw register. +ASMJIT_VAR const GpReg bp; +//! @brief Gpw register. +ASMJIT_VAR const GpReg si; +//! @brief Gpw register. +ASMJIT_VAR const GpReg di; + +//! @brief Gpd register. +ASMJIT_VAR const GpReg eax; +//! @brief Gpd register. +ASMJIT_VAR const GpReg ecx; +//! @brief Gpd register. +ASMJIT_VAR const GpReg edx; +//! @brief Gpd register. +ASMJIT_VAR const GpReg ebx; +//! @brief Gpd register. +ASMJIT_VAR const GpReg esp; +//! @brief Gpd register. +ASMJIT_VAR const GpReg ebp; +//! @brief Gpd register. +ASMJIT_VAR const GpReg esi; +//! @brief Gpd register. +ASMJIT_VAR const GpReg edi; + +//! @brief Fp register. +ASMJIT_VAR const FpReg fp0; +//! @brief Fp register. +ASMJIT_VAR const FpReg fp1; +//! @brief Fp register. +ASMJIT_VAR const FpReg fp2; +//! @brief Fp register. +ASMJIT_VAR const FpReg fp3; +//! @brief Fp register. +ASMJIT_VAR const FpReg fp4; +//! @brief Fp register. +ASMJIT_VAR const FpReg fp5; +//! @brief Fp register. +ASMJIT_VAR const FpReg fp6; +//! @brief Fp register. +ASMJIT_VAR const FpReg fp7; + +//! @brief Mm register. +ASMJIT_VAR const MmReg mm0; +//! @brief Mm register. +ASMJIT_VAR const MmReg mm1; +//! @brief Mm register. +ASMJIT_VAR const MmReg mm2; +//! @brief Mm register. +ASMJIT_VAR const MmReg mm3; +//! @brief Mm register. +ASMJIT_VAR const MmReg mm4; +//! @brief Mm register. +ASMJIT_VAR const MmReg mm5; +//! @brief Mm register. +ASMJIT_VAR const MmReg mm6; +//! @brief Mm register. +ASMJIT_VAR const MmReg mm7; + +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm0; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm1; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm2; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm3; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm4; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm5; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm6; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm7; + +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm0; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm1; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm2; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm3; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm4; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm5; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm6; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm7; + +//! @brief Cs segment register. +ASMJIT_VAR const SegReg cs; +//! @brief Ss segment register. +ASMJIT_VAR const SegReg ss; +//! @brief Ds segment register. +ASMJIT_VAR const SegReg ds; +//! @brief Es segment register. +ASMJIT_VAR const SegReg es; +//! @brief Fs segment register. +ASMJIT_VAR const SegReg fs; +//! @brief Gs segment register. +ASMJIT_VAR const SegReg gs; + +//! @brief Get Gpb-lo register. +static ASMJIT_INLINE GpReg gpb_lo(uint32_t index) { return GpReg(kRegTypeGpbLo, index, 1); } +//! @brief Get Gpb-hi register. +static ASMJIT_INLINE GpReg gpb_hi(uint32_t index) { return GpReg(kRegTypeGpbHi, index, 1); } +//! @brief Get Gpw register. +static ASMJIT_INLINE GpReg gpw(uint32_t index) { return GpReg(kRegTypeGpw, index, 2); } +//! @brief Get Gpd register. +static ASMJIT_INLINE GpReg gpd(uint32_t index) { return GpReg(kRegTypeGpd, index, 4); } +//! @brief Get Fp register. +static ASMJIT_INLINE FpReg fp(uint32_t index) { return FpReg(kRegTypeFp, index, 10); } +//! @brief Get Mm register. +static ASMJIT_INLINE MmReg mm(uint32_t index) { return MmReg(kRegTypeMm, index, 8); } +//! @brief Get Xmm register. +static ASMJIT_INLINE XmmReg xmm(uint32_t index) { return XmmReg(kRegTypeXmm, index, 16); } +//! @brief Get Xmm register. +static ASMJIT_INLINE YmmReg ymm(uint32_t index) { return YmmReg(kRegTypeYmm, index, 32); } + +// ============================================================================ +// [asmjit::x86x64::Mem - ptr[base + disp]] +// ============================================================================ + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const GpReg& base, int32_t disp = 0, uint32_t size = 0) { return Mem(base, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const GpReg& base, int32_t disp = 0) { return ptr(base, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const GpReg& base, int32_t disp = 0) { return ptr(base, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const GpReg& base, int32_t disp = 0) { return ptr(base, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const GpReg& base, int32_t disp = 0) { return ptr(base, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const GpReg& base, int32_t disp = 0) { return ptr(base, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const GpReg& base, int32_t disp = 0) { return ptr(base, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const GpReg& base, int32_t disp = 0) { return ptr(base, disp, kSizeYWord); } + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const GpVar& base, int32_t disp = 0, uint32_t size = 0) { return Mem(base, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const GpVar& base, int32_t disp = 0) { return ptr(base, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const GpVar& base, int32_t disp = 0) { return ptr(base, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const GpVar& base, int32_t disp = 0) { return ptr(base, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const GpVar& base, int32_t disp = 0) { return ptr(base, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const GpVar& base, int32_t disp = 0) { return ptr(base, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const GpVar& base, int32_t disp = 0) { return ptr(base, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const GpVar& base, int32_t disp = 0) { return ptr(base, disp, kSizeYWord); } + +// ============================================================================ +// [asmjit::x86x64::Mem - ptr[base + (index << shift) + disp]] +// ============================================================================ + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const GpReg& base, const GpReg& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) { return Mem(base, index, shift, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const GpReg& base, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const GpReg& base, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const GpReg& base, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const GpReg& base, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const GpReg& base, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const GpReg& base, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const GpReg& base, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeYWord); } + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const GpVar& base, const GpVar& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) { return Mem(base, index, shift, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const GpVar& base, const GpVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const GpVar& base, const GpVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const GpVar& base, const GpVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const GpVar& base, const GpVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const GpVar& base, const GpVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const GpVar& base, const GpVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const GpVar& base, const GpVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeYWord); } + +// ============================================================================ +// [asmjit::x86x64::Mem - ptr[base + (vex << shift) + disp]] +// ============================================================================ + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const GpReg& base, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) { return Mem(base, index, shift, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const GpReg& base, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const GpReg& base, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const GpReg& base, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const GpReg& base, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const GpReg& base, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const GpReg& base, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const GpReg& base, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeYWord); } + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const GpReg& base, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) { return Mem(base, index, shift, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const GpReg& base, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const GpReg& base, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const GpReg& base, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const GpReg& base, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const GpReg& base, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const GpReg& base, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const GpReg& base, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeYWord); } + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const GpVar& base, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) { return Mem(base, index, shift, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const GpVar& base, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const GpVar& base, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const GpVar& base, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const GpVar& base, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const GpVar& base, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const GpVar& base, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const GpVar& base, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeYWord); } + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const GpVar& base, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0) { return Mem(base, index, shift, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const GpVar& base, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const GpVar& base, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const GpVar& base, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const GpVar& base, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const GpVar& base, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const GpVar& base, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const GpVar& base, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, kSizeYWord); } + +// ============================================================================ +// [asmjit::x86x64::Mem - [label + disp]] +// ============================================================================ + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const Label& label, int32_t disp = 0, uint32_t size = 0) { return Mem(label, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const Label& label, int32_t disp = 0) { return ptr(label, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const Label& label, int32_t disp = 0) { return ptr(label, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const Label& label, int32_t disp = 0) { return ptr(label, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const Label& label, int32_t disp = 0) { return ptr(label, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const Label& label, int32_t disp = 0) { return ptr(label, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const Label& label, int32_t disp = 0) { return ptr(label, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const Label& label, int32_t disp = 0) { return ptr(label, disp, kSizeYWord); } + +// ============================================================================ +// [asmjit::x86x64::Mem - [label + index << shift + disp]] +// ============================================================================ + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const Label& label, const GpReg& index, uint32_t shift, int32_t disp = 0, uint32_t size = 0) { return Mem(label, index, shift, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const Label& label, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const Label& label, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const Label& label, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const Label& label, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const Label& label, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const Label& label, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const Label& label, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeYWord); } + +//! @brief Create a custom pointer operand. +static ASMJIT_INLINE Mem ptr(const Label& label, const GpVar& index, uint32_t shift, int32_t disp = 0, uint32_t size = 0) { return Mem(label, index, shift, disp, size); } +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr(const Label& label, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr(const Label& label, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr(const Label& label, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr(const Label& label, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr(const Label& label, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr(const Label& label, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr(const Label& label, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, kSizeYWord); } + +// ============================================================================ +// [asmjit::x86x64::Mem - [ptr + disp] +// ============================================================================ + +//! @brief Create a custom pointer operand. +ASMJIT_API Mem ptr_abs(Ptr pAbs, int32_t disp = 0, uint32_t size = 0); + +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr_abs(Ptr pAbs, int32_t disp = 0) { return ptr_abs(pAbs, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr_abs(Ptr pAbs, int32_t disp = 0) { return ptr_abs(pAbs, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr_abs(Ptr pAbs, int32_t disp = 0) { return ptr_abs(pAbs, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr_abs(Ptr pAbs, int32_t disp = 0) { return ptr_abs(pAbs, disp, kSizeQWord); } +//! @brief Create a tword pointer operand (used for 80-bit floating points). +static ASMJIT_INLINE Mem tword_ptr_abs(Ptr pAbs, int32_t disp = 0) { return ptr_abs(pAbs, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr_abs(Ptr pAbs, int32_t disp = 0) { return ptr_abs(pAbs, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr_abs(Ptr pAbs, int32_t disp = 0) { return ptr_abs(pAbs, disp, kSizeYWord); } + +// ============================================================================ +// [asmjit::x86x64::Mem - [pAbs + index << shift + disp] +// ============================================================================ + +//! @brief Create a custom pointer operand. +ASMJIT_API Mem ptr_abs(Ptr pAbs, const X86Reg& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0); +//! @brief Create a custom pointer operand. +ASMJIT_API Mem ptr_abs(Ptr pAbs, const X86Var& index, uint32_t shift = 0, int32_t disp = 0, uint32_t size = 0); + +//! @brief Create a Byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr_abs(Ptr pAbs, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr_abs(Ptr pAbs, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr_abs(Ptr pAbs, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr_abs(Ptr pAbs, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr_abs(Ptr pAbs, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr_abs(Ptr pAbs, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr_abs(Ptr pAbs, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeYWord); } + +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr_abs(Ptr pAbs, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr_abs(Ptr pAbs, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr_abs(Ptr pAbs, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr_abs(Ptr pAbs, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr_abs(Ptr pAbs, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr_abs(Ptr pAbs, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr_abs(Ptr pAbs, const XmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeYWord); } + +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr_abs(Ptr pAbs, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr_abs(Ptr pAbs, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr_abs(Ptr pAbs, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr_abs(Ptr pAbs, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr_abs(Ptr pAbs, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr_abs(Ptr pAbs, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr_abs(Ptr pAbs, const YmmReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeYWord); } + +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr_abs(Ptr pAbs, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr_abs(Ptr pAbs, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr_abs(Ptr pAbs, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr_abs(Ptr pAbs, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr_abs(Ptr pAbs, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr_abs(Ptr pAbs, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr_abs(Ptr pAbs, const XmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeYWord); } + +//! @brief Create a byte pointer operand. +static ASMJIT_INLINE Mem byte_ptr_abs(Ptr pAbs, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeByte); } +//! @brief Create a word pointer operand. +static ASMJIT_INLINE Mem word_ptr_abs(Ptr pAbs, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeWord); } +//! @brief Create a dword pointer operand. +static ASMJIT_INLINE Mem dword_ptr_abs(Ptr pAbs, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeDWord); } +//! @brief Create a qword pointer operand. +static ASMJIT_INLINE Mem qword_ptr_abs(Ptr pAbs, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeQWord); } +//! @brief Create a tword pointer operand. +static ASMJIT_INLINE Mem tword_ptr_abs(Ptr pAbs, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeTWord); } +//! @brief Create a oword pointer operand. +static ASMJIT_INLINE Mem oword_ptr_abs(Ptr pAbs, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeOWord); } +//! @brief Create a yword pointer operand. +static ASMJIT_INLINE Mem yword_ptr_abs(Ptr pAbs, const YmmVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, kSizeYWord); } + +// ============================================================================ +// [asmjit::x86x64::Util] +// ============================================================================ + +static ASMJIT_INLINE bool x86IsGpbRegOp(const Operand* op) { + const uint32_t mask = IntUtil::pack32_2x8_1x16(0xFF, 0xFF, ~(kRegTypePatchedGpbHi << 8) & 0xFF00); + return (op->_packed[0].u32[0] & mask) == IntUtil::pack32_2x8_1x16(kOperandTypeReg, 1, 0x0000); +} + +static ASMJIT_INLINE uint32_t x86VarTypeToClass(uint32_t vType) { + // Getting varClass is the only safe operation when dealing with denormalized + // varType. Any other property would require to map vType to the architecture + // specific one. + ASMJIT_ASSERT(vType < kVarTypeCount); + return _varInfo[vType].getClass(); +} + +static ASMJIT_INLINE bool x86VarIsInt(uint32_t vType) { + ASMJIT_ASSERT(vType < kVarTypeCount); + return IntUtil::inInterval(vType, _kVarTypeIntStart, _kVarTypeIntEnd); +} + +static ASMJIT_INLINE bool x86VarIsFloat(uint32_t vType) { + ASMJIT_ASSERT(vType < kVarTypeCount); + return (_varInfo[vType].getDesc() & (kVarDescSp | kVarDescDp)) != 0; +} + +//! @} + +} // x86x64 namespace +} // asmjit namespace + +// ============================================================================ +// [asmjit::x86] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X86) + +namespace asmjit { +namespace x86 { + +// This is the only place where the x86x64 namespace is included into x86. +using namespace ::asmjit::x86x64; + +//! @addtogroup asmjit_x86x64 +//! @{ + +// ============================================================================ +// [asmjit::x86::kRegType] +// ============================================================================ + +ASMJIT_ENUM(kRegType) { + //! @brief Gpd register. + kRegTypeGpz = kRegTypeGpd +}; + +// ============================================================================ +// [asmjit::x86::kRegCount] +// ============================================================================ + +//! @brief X86 registers count per class. +ASMJIT_ENUM(kRegCount) { + //! @brief Base count of registers (8). + kRegCountBase = 8, + //! @brief Count of Gp registers (8). + kRegCountGp = kRegCountBase, + //! @brief Count of Xmm registers (8). + kRegCountXmm = kRegCountBase, + //! @brief Count of Ymm registers (8). + kRegCountYmm = kRegCountBase +}; + +// ============================================================================ +// [asmjit::x86::Variables] +// ============================================================================ + +//! @internal +//! +//! @brief Mapping of x64 variables into their real IDs. +//! +//! This mapping translates the following: +//! - @c kVarTypeIntPtr to @c kVarTypeInt32. +//! - @c kVarTypeUIntPtr to @c kVarTypeUInt32. +ASMJIT_VAR const uint8_t _varMapping[kVarTypeCount]; + +// ============================================================================ +// [asmjit::x86::Registers] +// ============================================================================ + +//! @brief Gpd register. +ASMJIT_VAR const GpReg zax; +//! @brief Gpd register. +ASMJIT_VAR const GpReg zcx; +//! @brief Gpd register. +ASMJIT_VAR const GpReg zdx; +//! @brief Gpd register. +ASMJIT_VAR const GpReg zbx; +//! @brief Gpd register. +ASMJIT_VAR const GpReg zsp; +//! @brief Gpd register. +ASMJIT_VAR const GpReg zbp; +//! @brief Gpd register. +ASMJIT_VAR const GpReg zsi; +//! @brief Gpd register. +ASMJIT_VAR const GpReg zdi; + +//! @brief Get Gp qword register. +static ASMJIT_INLINE GpReg gpz(uint32_t index) { return GpReg(kRegTypeGpd, index, 4); } + +// ============================================================================ +// [asmjit::x86::Mem] +// ============================================================================ + +//! @brief Create an intptr_t 32-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const GpReg& base, int32_t disp = 0) { return ptr(base, disp, 4); } +//! @brief Create an intptr_t 32-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const GpVar& base, int32_t disp = 0) { return ptr(base, disp, 4); } +//! @brief Create an intptr_t 32-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const GpReg& base, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, 4); } +//! @brief Create an intptr_t 32-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const GpVar& base, const GpVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, 4); } + +//! @brief Create an intptr_t 32-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const Label& label, int32_t disp = 0) { return ptr(label, disp, 4); } +//! @brief Create an intptr_t 32-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const Label& label, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, 4); } +//! @brief Create an intptr_t 32-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const Label& label, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, 4); } + +//! @brief Create an intptr_t 32-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr_abs(Ptr pAbs, int32_t disp = 0) { return ptr_abs(pAbs, disp, 4); } +//! @brief Create an intptr_t 32-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr_abs(Ptr pAbs, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, 4); } +//! @brief Create an intptr_t 32-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr_abs(Ptr pAbs, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, 4); } + +//! @} + +} // x86 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X86 + +// ============================================================================ +// [asmjit::x64] +// ============================================================================ + +#if defined(ASMJIT_BUILD_X64) + +namespace asmjit { +namespace x64 { + +// This is the only place where the x86x64 namespace is included into x64. +using namespace ::asmjit::x86x64; + +//! @addtogroup asmjit_x86x64 +//! @{ + +// ============================================================================ +// [asmjit::x64::kRegType] +// ============================================================================ + +ASMJIT_ENUM(kRegType) { + //! @brief Gpq register. + kRegTypeGpz = kRegTypeGpq +}; + +// ============================================================================ +// [asmjit::x64::kRegCount] +// ============================================================================ + +//! @brief X86 registers count per class. +ASMJIT_ENUM(kRegCount) { + //! @brief Base count of registers (16). + kRegCountBase = 16, + //! @brief Count of Gp registers (16). + kRegCountGp = kRegCountBase, + //! @brief Count of Xmm registers (16). + kRegCountXmm = kRegCountBase, + //! @brief Count of Ymm registers (16). + kRegCountYmm = kRegCountBase +}; + +// ============================================================================ +// [asmjit::x64::Variables] +// ============================================================================ + +//! @internal +//! +//! @brief Mapping of x64 variables into their real IDs. +//! +//! This mapping translates the following: +//! - @c kVarTypeIntPtr to @c kVarTypeInt64. +//! - @c kVarTypeUIntPtr to @c kVarTypeUInt64. +ASMJIT_VAR const uint8_t _varMapping[kVarTypeCount]; + +// ============================================================================ +// [asmjit::x64::Registers] +// ============================================================================ + +//! @brief Gpb register. +ASMJIT_VAR const GpReg spl; +//! @brief Gpb register. +ASMJIT_VAR const GpReg bpl; +//! @brief Gpb register. +ASMJIT_VAR const GpReg sil; +//! @brief Gpb register. +ASMJIT_VAR const GpReg dil; +//! @brief Gpb register. +ASMJIT_VAR const GpReg r8b; +//! @brief Gpb register. +ASMJIT_VAR const GpReg r9b; +//! @brief Gpb register. +ASMJIT_VAR const GpReg r10b; +//! @brief Gpb register. +ASMJIT_VAR const GpReg r11b; +//! @brief Gpb register. +ASMJIT_VAR const GpReg r12b; +//! @brief Gpb register. +ASMJIT_VAR const GpReg r13b; +//! @brief Gpb register. +ASMJIT_VAR const GpReg r14b; +//! @brief Gpb register. +ASMJIT_VAR const GpReg r15b; + +//! @brief Gpw register. +ASMJIT_VAR const GpReg r8w; +//! @brief Gpw register. +ASMJIT_VAR const GpReg r9w; +//! @brief Gpw register. +ASMJIT_VAR const GpReg r10w; +//! @brief Gpw register. +ASMJIT_VAR const GpReg r11w; +//! @brief Gpw register. +ASMJIT_VAR const GpReg r12w; +//! @brief Gpw register. +ASMJIT_VAR const GpReg r13w; +//! @brief Gpw register. +ASMJIT_VAR const GpReg r14w; +//! @brief Gpw register. +ASMJIT_VAR const GpReg r15w; + +//! @brief Gpd register. +ASMJIT_VAR const GpReg r8d; +//! @brief Gpd register. +ASMJIT_VAR const GpReg r9d; +//! @brief Gpd register. +ASMJIT_VAR const GpReg r10d; +//! @brief Gpd register. +ASMJIT_VAR const GpReg r11d; +//! @brief Gpd register. +ASMJIT_VAR const GpReg r12d; +//! @brief Gpd register. +ASMJIT_VAR const GpReg r13d; +//! @brief Gpd register. +ASMJIT_VAR const GpReg r14d; +//! @brief Gpd register. +ASMJIT_VAR const GpReg r15d; + +//! @brief Gpq register. +ASMJIT_VAR const GpReg rax; +//! @brief Gpq register. +ASMJIT_VAR const GpReg rcx; +//! @brief Gpq register. +ASMJIT_VAR const GpReg rdx; +//! @brief Gpq register. +ASMJIT_VAR const GpReg rbx; +//! @brief Gpq register. +ASMJIT_VAR const GpReg rsp; +//! @brief Gpq register. +ASMJIT_VAR const GpReg rbp; +//! @brief Gpq register. +ASMJIT_VAR const GpReg rsi; +//! @brief Gpq register. +ASMJIT_VAR const GpReg rdi; + +//! @brief Gpq register. +ASMJIT_VAR const GpReg r8; +//! @brief Gpq register. +ASMJIT_VAR const GpReg r9; +//! @brief Gpq register. +ASMJIT_VAR const GpReg r10; +//! @brief Gpq register. +ASMJIT_VAR const GpReg r11; +//! @brief Gpq register. +ASMJIT_VAR const GpReg r12; +//! @brief Gpq register. +ASMJIT_VAR const GpReg r13; +//! @brief Gpq register. +ASMJIT_VAR const GpReg r14; +//! @brief Gpq register. +ASMJIT_VAR const GpReg r15; + +//! @brief Gpq register. +ASMJIT_VAR const GpReg zax; +//! @brief Gpq register. +ASMJIT_VAR const GpReg zcx; +//! @brief Gpq register. +ASMJIT_VAR const GpReg zdx; +//! @brief Gpq register. +ASMJIT_VAR const GpReg zbx; +//! @brief Gpq register. +ASMJIT_VAR const GpReg zsp; +//! @brief Gpq register. +ASMJIT_VAR const GpReg zbp; +//! @brief Gpq register. +ASMJIT_VAR const GpReg zsi; +//! @brief Gpq register. +ASMJIT_VAR const GpReg zdi; + +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm8; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm9; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm10; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm11; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm12; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm13; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm14; +//! @brief Xmm register. +ASMJIT_VAR const XmmReg xmm15; + +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm8; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm9; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm10; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm11; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm12; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm13; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm14; +//! @brief Ymm register. +ASMJIT_VAR const YmmReg ymm15; + +//! @brief Get Gpq register. +static ASMJIT_INLINE GpReg gpq(uint32_t index) { return GpReg(kRegTypeGpq, index, 8); } +//! @brief Get Gpq register. +static ASMJIT_INLINE GpReg gpz(uint32_t index) { return GpReg(kRegTypeGpq, index, 8); } + +// ============================================================================ +// [asmjit::x64::Mem] +// ============================================================================ + +//! @brief Create an intptr_t 64-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const Label& label, int32_t disp = 0) { return ptr(label, disp, 8); } +//! @brief Create an intptr_t 64-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const Label& label, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, 8); } +//! @brief Create an intptr_t 64-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const Label& label, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr(label, index, shift, disp, 8); } + +//! @brief Create an intptr_t 64-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr_abs(Ptr pAbs, int32_t disp = 0) { return ptr_abs(pAbs, disp, 8); } +//! @brief Create an intptr_t 64-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr_abs(Ptr pAbs, const GpReg& index, uint32_t shift, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, 8); } +//! @brief Create an intptr_t 64-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr_abs(Ptr pAbs, const GpVar& index, uint32_t shift, int32_t disp = 0) { return ptr_abs(pAbs, index, shift, disp, 8); } + +//! @brief Create an intptr_t 64-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const GpReg& base, int32_t disp = 0) { return ptr(base, disp, 8); } +//! @brief Create an intptr_t 64-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const GpVar& base, int32_t disp = 0) { return ptr(base, disp, 8); } +//! @brief Create an intptr_t 64-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const GpReg& base, const GpReg& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, 8); } +//! @brief Create an intptr_t 64-bit pointer operand. +static ASMJIT_INLINE Mem intptr_ptr(const GpVar& base, const GpVar& index, uint32_t shift = 0, int32_t disp = 0) { return ptr(base, index, shift, disp, 8); } + +//! @} + +} // x64 namespace +} // asmjit namespace + +#endif // ASMJIT_BUILD_X64 + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_X86_X86DEFS_H diff --git a/src/asmjit/x86/x86func.cpp b/src/asmjit/x86/x86func.cpp new file mode 100644 index 0000000..79a90e5 --- /dev/null +++ b/src/asmjit/x86/x86func.cpp @@ -0,0 +1,539 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Export] +#define ASMJIT_EXPORTS + +// [Guard] +#include "../build.h" +#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64) + +// [Dependencies - AsmJit] +#include "../base/assert.h" +#include "../base/intutil.h" +#include "../base/string.h" +#include "../x86/x86defs.h" +#include "../x86/x86func.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +// ============================================================================ +// [asmjit::X86X64FuncDecl - FindArgByReg] +// ============================================================================ + +uint32_t X86X64FuncDecl::findArgByReg(uint32_t rClass, uint32_t rIndex) const { + for (uint32_t i = 0; i < _argCount; i++) { + const FuncInOut& arg = getArg(i); + if (arg.getRegIndex() == rIndex && x86VarTypeToClass(arg.getVarType()) == rClass) + return i; + } + + return kInvalidValue; +} + +// ============================================================================ +// [asmjit::X86X64FuncDecl - SetPrototype] +// ============================================================================ + +#define R(_Index_) kRegIndex##_Index_ +static uint32_t X86X64FuncDecl_initConv(X86X64FuncDecl* self, uint32_t arch, uint32_t conv) { + uint32_t i; + + // Setup defaults. + self->_argStackSize = 0; + self->_redZoneSize = 0; + self->_spillZoneSize = 0; + + self->_convention = static_cast(conv); + self->_calleePopsStack = false; + self->_direction = kFuncDirRtl; + + self->_passed.reset(); + self->_preserved.reset(); + + for (i = 0; i < ASMJIT_ARRAY_SIZE(self->_passedOrderGp); i++) { + self->_passedOrderGp[i] = kInvalidReg; + } + + for (i = 0; i < ASMJIT_ARRAY_SIZE(self->_passedOrderXmm); i++) { + self->_passedOrderXmm[i] = kInvalidReg; + } + + // -------------------------------------------------------------------------- + // [X86 Support] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_BUILD_X86) + if (arch == kArchX86) { + self->_preserved.set(kRegClassGp, IntUtil::mask(R(Bx), R(Sp), R(Bp), R(Si), R(Di))); + + switch (conv) { + case kFuncConvCDecl: + break; + + case kFuncConvStdCall: + self->_calleePopsStack = true; + break; + + case kFuncConvMsThisCall: + self->_calleePopsStack = true; + self->_passed.set(kRegClassGp, IntUtil::mask(R(Cx))); + self->_passedOrderGp[0] = R(Cx); + break; + + case kFuncConvMsFastCall: + self->_calleePopsStack = true; + self->_passed.set(kRegClassGp, IntUtil::mask(R(Cx), R(Cx))); + self->_passedOrderGp[0] = R(Cx); + self->_passedOrderGp[1] = R(Dx); + break; + + case kFuncConvBorlandFastCall: + self->_calleePopsStack = true; + self->_direction = kFuncDirLtr; + self->_passed.set(kRegClassGp, IntUtil::mask(R(Ax), R(Dx), R(Cx))); + self->_passedOrderGp[0] = R(Ax); + self->_passedOrderGp[1] = R(Dx); + self->_passedOrderGp[2] = R(Cx); + break; + + case kFuncConvGccFastCall: + self->_calleePopsStack = true; + self->_passed.set(kRegClassGp, IntUtil::mask(R(Cx), R(Dx))); + self->_passedOrderGp[0] = R(Cx); + self->_passedOrderGp[1] = R(Dx); + break; + + case kFuncConvGccRegParm1: + self->_passed.set(kRegClassGp, IntUtil::mask(R(Ax))); + self->_passedOrderGp[0] = R(Ax); + break; + + case kFuncConvGccRegParm2: + self->_passed.set(kRegClassGp, IntUtil::mask(R(Ax), R(Dx))); + self->_passedOrderGp[0] = R(Ax); + self->_passedOrderGp[1] = R(Dx); + break; + + case kFuncConvGccRegParm3: + self->_passed.set(kRegClassGp, IntUtil::mask(R(Ax), R(Dx), R(Cx))); + self->_passedOrderGp[0] = R(Ax); + self->_passedOrderGp[1] = R(Dx); + self->_passedOrderGp[2] = R(Cx); + break; + + default: + ASMJIT_ASSERT(!"Reached"); + } + + return kErrorOk; + } +#endif // ASMJIT_BUILD_X86 + + // -------------------------------------------------------------------------- + // [X64 Support] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_BUILD_X64) + switch (conv) { + case kFuncConvX64W: + self->_spillZoneSize = 32; + + self->_passed.set(kRegClassGp, IntUtil::mask(R(Cx), R(Dx), R(R8), R(R9))); + self->_passedOrderGp[0] = R(Cx); + self->_passedOrderGp[1] = R(Dx); + self->_passedOrderGp[2] = R(R8); + self->_passedOrderGp[3] = R(R9); + + self->_passed.set(kRegClassXy, IntUtil::mask(0, 1, 2, 3)); + self->_passedOrderXmm[0] = R(Xmm0); + self->_passedOrderXmm[1] = R(Xmm1); + self->_passedOrderXmm[2] = R(Xmm2); + self->_passedOrderXmm[3] = R(Xmm3); + + self->_preserved.set(kRegClassGp, IntUtil::mask(R(Bx), R(Sp), R(Bp), R(Si), R(Di), R(R12), R(R13), R(R14), R(R15))); + self->_preserved.set(kRegClassXy, IntUtil::mask(R(Xmm6), R(Xmm7), R(Xmm8), R(Xmm9), R(Xmm10), R(Xmm11), R(Xmm12), R(Xmm13), R(Xmm14), R(Xmm15))); + break; + + case kFuncConvX64U: + self->_redZoneSize = 128; + + self->_passed.set(kRegClassGp, IntUtil::mask(R(Di), R(Si), R(Dx), R(Cx), R(R8), R(R9))); + self->_passedOrderGp[0] = R(Di); + self->_passedOrderGp[1] = R(Si); + self->_passedOrderGp[2] = R(Dx); + self->_passedOrderGp[3] = R(Cx); + self->_passedOrderGp[4] = R(R8); + self->_passedOrderGp[5] = R(R9); + + self->_passed.set(kRegClassXy, IntUtil::mask(R(Xmm0), R(Xmm1), R(Xmm2), R(Xmm3), R(Xmm4), R(Xmm5), R(Xmm6), R(Xmm7))); + self->_passedOrderXmm[0] = R(Xmm0); + self->_passedOrderXmm[1] = R(Xmm1); + self->_passedOrderXmm[2] = R(Xmm2); + self->_passedOrderXmm[3] = R(Xmm3); + self->_passedOrderXmm[4] = R(Xmm4); + self->_passedOrderXmm[5] = R(Xmm5); + self->_passedOrderXmm[6] = R(Xmm6); + self->_passedOrderXmm[7] = R(Xmm7); + + self->_preserved.set(kRegClassGp, IntUtil::mask(R(Bx), R(Sp), R(Bp), R(R12), R(R13), R(R14), R(R15))); + break; + + default: + ASMJIT_ASSERT(!"Reached"); + } +#endif // ASMJIT_BUILD_X64 + + return kErrorOk; +} +#undef R + +static Error X86X64FuncDecl_initFunc(X86X64FuncDecl* self, uint32_t arch, + uint32_t ret, const uint32_t* argList, uint32_t argCount) { + + ASMJIT_ASSERT(argCount <= kFuncArgCount); + + uint32_t conv = self->_convention; + uint32_t regSize = (arch == kArchX86) ? 4 : 8; + + int32_t i = 0; + int32_t gpPos = 0; + int32_t xmmPos = 0; + int32_t stackOffset = 0; + + const uint8_t* varMapping; + +#if defined(ASMJIT_BUILD_X86) + if (arch == kArchX86) + varMapping = x86::_varMapping; +#endif // ASMJIT_BUILD_X86 + +#if defined(ASMJIT_BUILD_X64) + if (arch == kArchX64) + varMapping = x64::_varMapping; +#endif // ASMJIT_BUILD_X64 + + self->_argCount = static_cast(argCount); + self->_retCount = 0; + + for (i = 0; i < static_cast(argCount); i++) { + FuncInOut& arg = self->getArg(i); + arg._varType = static_cast(argList[i]); + arg._regIndex = kInvalidReg; + arg._stackOffset = kFuncStackInvalid; + } + + for (; i < kFuncArgCount; i++) { + self->_argList[i].reset(); + } + + self->_retList[0].reset(); + self->_retList[1].reset(); + self->_argStackSize = 0; + self->_used.reset(); + + if (ret != kVarTypeInvalid) { + ret = varMapping[ret]; + switch (ret) { + case kVarTypeInt64: + case kVarTypeUInt64: + // 64-bit value is returned in EDX:EAX on x86. +#if defined(ASMJIT_BUILD_X86) + if (arch == kArchX86) { + self->_retCount = 2; + self->_retList[0]._varType = kVarTypeUInt32; + self->_retList[0]._regIndex = kRegIndexAx; + self->_retList[1]._varType = static_cast(ret - 2); + self->_retList[1]._regIndex = kRegIndexDx; + } +#endif // ASMJIT_BUILD_X86 + // ... Fall through ... + + case kVarTypeInt8: + case kVarTypeUInt8: + case kVarTypeInt16: + case kVarTypeUInt16: + case kVarTypeInt32: + case kVarTypeUInt32: + self->_retCount = 1; + self->_retList[0]._varType = static_cast(ret); + self->_retList[0]._regIndex = kRegIndexAx; + break; + + case kVarTypeMm: + self->_retCount = 1; + self->_retList[0]._varType = static_cast(ret); + self->_retList[0]._regIndex = kRegIndexMm0; + break; + + case kVarTypeFp32: + self->_retCount = 1; + if (arch == kArchX86) { + self->_retList[0]._varType = kVarTypeFp32; + self->_retList[0]._regIndex = kRegIndexFp0; + } + else { + self->_retList[0]._varType = kVarTypeXmmSs; + self->_retList[0]._regIndex = kRegIndexXmm0; + } + break; + + case kVarTypeFp64: + self->_retCount = 1; + if (arch == kArchX86) { + self->_retList[0]._varType = kVarTypeFp64; + self->_retList[0]._regIndex = kRegIndexFp0; + } + else { + self->_retList[0]._varType = kVarTypeXmmSd; + self->_retList[0]._regIndex = kRegIndexXmm0; + break; + } + break; + + case kVarTypeFpEx: + self->_retCount = 1; + self->_retList[0]._varType = static_cast(ret); + self->_retList[0]._regIndex = kRegIndexFp0; + break; + + case kVarTypeXmm: + case kVarTypeXmmSs: + case kVarTypeXmmSd: + case kVarTypeXmmPs: + case kVarTypeXmmPd: + self->_retCount = 1; + self->_retList[0]._varType = static_cast(ret); + self->_retList[0]._regIndex = kRegIndexXmm0; + break; + } + } + + if (self->_argCount == 0) + return kErrorOk; + +#if defined(ASMJIT_BUILD_X86) + if (arch == kArchX86) { + // Register arguments (Integer), always left-to-right. + for (i = 0; i != static_cast(argCount); i++) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (x86VarIsInt(varType) && gpPos < 16 && self->_passedOrderGp[gpPos] != kInvalidReg) { + arg._regIndex = self->_passedOrderGp[gpPos++]; + self->_used.add(kRegClassGp, IntUtil::mask(arg.getRegIndex())); + } + } + + // Stack arguments. + int32_t iStart = static_cast(argCount - 1); + int32_t iEnd = -1; + int32_t iStep = -1; + + if (self->_direction == kFuncDirLtr) { + iStart = 0; + iEnd = static_cast(argCount); + iStep = 1; + } + + for (i = iStart; i != iEnd; i += iStep) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (arg.hasRegIndex()) + continue; + + if (x86VarIsInt(varType)) { + stackOffset -= 4; + arg._stackOffset = static_cast(stackOffset); + } + else if (x86VarIsFloat(varType)) { + int32_t size = static_cast(_varInfo[varType].getSize()); + stackOffset -= size; + arg._stackOffset = static_cast(stackOffset); + } + } + } +#endif // ASMJIT_BUILD_X86 + +#if defined(ASMJIT_BUILD_X64) + if (arch == kArchX64) { + if (conv == kFuncConvX64W) { + int32_t argMax = argCount < 4 ? argCount : 4; + + // Register arguments (Gp/Xmm), always left-to-right. + for (i = 0; i != argMax; i++) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (x86VarIsInt(varType)) { + arg._regIndex = self->_passedOrderGp[i]; + self->_used.add(kRegClassGp, IntUtil::mask(arg.getRegIndex())); + } + else if (x86VarIsFloat(varType)) { + arg._regIndex = self->_passedOrderXmm[i]; + self->_used.add(kRegClassXy, IntUtil::mask(arg.getRegIndex())); + } + } + + // Stack arguments (always right-to-left). + for (i = argCount - 1; i != -1; i--) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (arg.hasRegIndex()) + continue; + + if (x86VarIsInt(varType)) { + stackOffset -= 8; // Always 8 bytes. + arg._stackOffset = stackOffset; + } + else if (x86VarIsFloat(varType)) { + int32_t size = static_cast(_varInfo[varType].getSize()); + stackOffset -= size; + arg._stackOffset = stackOffset; + } + } + + // 32 bytes shadow space (X64W calling convention specific). + stackOffset -= 4 * 8; + } + else { + // Register arguments (Gp), always left-to-right. + for (i = 0; i != static_cast(argCount); i++) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (x86VarIsInt(varType) && gpPos < 32 && self->_passedOrderGp[gpPos] != kInvalidReg) { + arg._regIndex = self->_passedOrderGp[gpPos++]; + self->_used.add(kRegClassGp, IntUtil::mask(arg.getRegIndex())); + } + } + + // Register arguments (Xmm), always left-to-right. + for (i = 0; i != static_cast(argCount); i++) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (x86VarIsFloat(varType)) { + arg._regIndex = self->_passedOrderXmm[xmmPos++]; + self->_used.add(kRegClassXy, IntUtil::mask(arg.getRegIndex())); + } + } + + // Stack arguments. + for (i = argCount - 1; i != -1; i--) { + FuncInOut& arg = self->getArg(i); + uint32_t varType = varMapping[arg.getVarType()]; + + if (arg.hasRegIndex()) + continue; + + if (x86VarIsInt(varType)) { + stackOffset -= 8; + arg._stackOffset = static_cast(stackOffset); + } + else if (x86VarIsFloat(varType)) { + int32_t size = static_cast(_varInfo[varType].getSize()); + + stackOffset -= size; + arg._stackOffset = static_cast(stackOffset); + } + } + } + } +#endif // ASMJIT_BUILD_X64 + + // Modify the stack offset, thus in result all parameters would have positive + // non-zero stack offset. + for (i = 0; i < static_cast(argCount); i++) { + FuncInOut& arg = self->getArg(i); + if (!arg.hasRegIndex()) { + arg._stackOffset += static_cast(static_cast(regSize) - stackOffset); + } + } + + self->_argStackSize = static_cast(-stackOffset); + return kErrorOk; +} + +Error X86X64FuncDecl::setPrototype(uint32_t conv, const FuncPrototype& p) { + if (conv == kFuncConvNone || conv >= _kFuncConvCount) + return kErrorInvalidArgument; + + if (p.getArgCount() > kFuncArgCount) + return kErrorInvalidArgument; + + // Validate that the required convention is supported by the current asmjit + // configuration, if only one target is compiled. + uint32_t arch = x86GetArchFromCConv(conv); +#if defined(ASMJIT_BUILD_X86) && !defined(ASMJIT_BUILD_X64) + if (arch == kArchX64) + return kErrorInvalidState; +#endif // ASMJIT_BUILD_X86 && !ASMJIT_BUILD_X64 + +#if !defined(ASMJIT_BUILD_X86) && defined(ASMJIT_BUILD_X64) + if (arch == kArchX86) + return kErrorInvalidState; +#endif // !ASMJIT_BUILD_X86 && ASMJIT_BUILD_X64 + + ASMJIT_PROPAGATE_ERROR(X86X64FuncDecl_initConv(this, arch, conv)); + ASMJIT_PROPAGATE_ERROR(X86X64FuncDecl_initFunc(this, arch, p.getRet(), p.getArgList(), p.getArgCount())); + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::X86X64FuncDecl - Reset] +// ============================================================================ + +void X86X64FuncDecl::reset() { + uint32_t i; + + _convention = kFuncConvNone; + _calleePopsStack = false; + _direction = kFuncDirRtl; + _reserved0 = 0; + + _argCount = 0; + _retCount = 0; + + _argStackSize = 0; + _redZoneSize = 0; + _spillZoneSize = 0; + + for (i = 0; i < ASMJIT_ARRAY_SIZE(_argList); i++) { + _argList[i].reset(); + } + + _retList[0].reset(); + _retList[1].reset(); + + _used.reset(); + _passed.reset(); + _preserved.reset(); + + for (i = 0; i < ASMJIT_ARRAY_SIZE(_passedOrderGp); i++) { + _passedOrderGp[i] = kInvalidReg; + } + + for (i = 0; i < ASMJIT_ARRAY_SIZE(_passedOrderXmm); i++) { + _passedOrderXmm[i] = kInvalidReg; + } +} + +} // x86x64 namespace +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // ASMJIT_BUILD_X86 || ASMJIT_BUILD_X64 diff --git a/src/asmjit/x86/x86func.h b/src/asmjit/x86/x86func.h new file mode 100644 index 0000000..a6c9420 --- /dev/null +++ b/src/asmjit/x86/x86func.h @@ -0,0 +1,500 @@ +// [AsmJit] +// Complete x86/x64 JIT and Remote Assembler for C++. +// +// [License] +// Zlib - See LICENSE.md file in the package. + +// [Guard] +#ifndef _ASMJIT_X86_X86FUNC_H +#define _ASMJIT_X86_X86FUNC_H + +// [Dependencies - AsmJit] +#include "../base/defs.h" +#include "../base/func.h" +#include "../x86/x86defs.h" + +// [Api-Begin] +#include "../base/apibegin.h" + +namespace asmjit { +namespace x86x64 { + +//! @addtogroup asmjit_x86x64 +//! @{ + +// ============================================================================ +// [asmjit::x86x64::kFuncConv] +// ============================================================================ + +//! @brief X86 function calling conventions. +//! +//! Calling convention is scheme how function arguments are passed into +//! function and how functions returns values. In assembler programming +//! it's needed to always comply with function calling conventions, because +//! even small inconsistency can cause undefined behavior or crash. +//! +//! List of calling conventions for 32-bit x86 mode: +//! - @c kFuncConvCDecl - Calling convention for C runtime. +//! - @c kFuncConvStdCall - Calling convention for WinAPI functions. +//! - @c kFuncConvMsThisCall - Calling convention for C++ members under +//! Windows (produced by MSVC and all MSVC compatible compilers). +//! - @c kFuncConvMsFastCall - Fastest calling convention that can be used +//! by MSVC compiler. +//! - @c kFuncConv_BORNANDFASTCALL - Borland fastcall convention. +//! - @c kFuncConvGccFastCall - GCC fastcall convention (2 register arguments). +//! - @c kFuncConvGccRegParm1 - GCC regparm(1) convention. +//! - @c kFuncConvGccRegParm2 - GCC regparm(2) convention. +//! - @c kFuncConvGccRegParm3 - GCC regparm(3) convention. +//! +//! List of calling conventions for 64-bit x86 mode (x64): +//! - @c kFuncConvX64W - Windows 64-bit calling convention (WIN64 ABI). +//! - @c kFuncConvX64U - Unix 64-bit calling convention (AMD64 ABI). +//! +//! There is also @c kFuncConvHost that is defined to fit best to your +//! compiler. +//! +//! These types are used together with @c asmjit::Compiler::addFunc() +//! method. +ASMJIT_ENUM(kFuncConv) { + // -------------------------------------------------------------------------- + // [X64] + // -------------------------------------------------------------------------- + + //! @brief X64 calling convention for Windows platform (WIN64 ABI). + //! + //! For first four arguments are used these registers: + //! - 1. 32/64-bit integer or floating point argument - rcx/xmm0 + //! - 2. 32/64-bit integer or floating point argument - rdx/xmm1 + //! - 3. 32/64-bit integer or floating point argument - r8/xmm2 + //! - 4. 32/64-bit integer or floating point argument - r9/xmm3 + //! + //! Note first four arguments here means arguments at positions from 1 to 4 + //! (included). For example if second argument is not passed in register then + //! rdx/xmm1 register is unused. + //! + //! All other arguments are pushed on the stack in right-to-left direction. + //! Stack is aligned by 16 bytes. There is 32-byte shadow space on the stack + //! that can be used to save up to four 64-bit registers (probably designed to + //! be used to save first four arguments passed in registers). + //! + //! Arguments direction: + //! - Right to Left (except for first 4 parameters that's in registers) + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - RAX register. + //! - Floating points - XMM0 register. + //! + //! Stack is always aligned by 16 bytes. + //! + //! More information about this calling convention can be found on MSDN: + //! http://msdn.microsoft.com/en-us/library/9b372w95.aspx . + kFuncConvX64W = 1, + + //! @brief X64 calling convention for Unix platforms (AMD64 ABI). + //! + //! First six 32 or 64-bit integer arguments are passed in rdi, rsi, rdx, + //! rcx, r8, r9 registers. First eight floating point or Xmm arguments + //! are passed in xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 registers. + //! This means that in registers can be transferred up to 14 arguments total. + //! + //! There is also RED ZONE below the stack pointer that can be used for + //! temporary storage. The red zone is the space from [rsp-128] to [rsp-8]. + //! + //! Arguments direction: + //! - Right to Left (Except for arguments passed in registers). + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - RAX register. + //! - Floating points - XMM0 register. + //! + //! Stack is always aligned by 16 bytes. + kFuncConvX64U = 2, + + // -------------------------------------------------------------------------- + // [X86] + // -------------------------------------------------------------------------- + + //! @brief Cdecl calling convention (used by C runtime). + //! + //! Compatible across MSVC and GCC. + //! + //! Arguments direction: + //! - Right to Left + //! + //! Stack is cleaned by: + //! - Caller. + kFuncConvCDecl = 3, + + //! @brief Stdcall calling convention (used by WinAPI). + //! + //! Compatible across MSVC and GCC. + //! + //! Arguments direction: + //! - Right to Left + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - fp0 register. + kFuncConvStdCall = 4, + + //! @brief MSVC specific calling convention used by MSVC/Intel compilers + //! for struct/class methods. + //! + //! This is MSVC (and Intel) only calling convention used in Windows + //! world for C++ class methods. Implicit 'this' pointer is stored in + //! ECX register instead of storing it on the stack. + //! + //! Arguments direction: + //! - Right to Left (except this pointer in ECX) + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - fp0 register. + //! + //! C++ class methods that have variable count of arguments uses different + //! calling convention called cdecl. + //! + //! @note This calling convention is always used by MSVC for class methods, + //! it's implicit and there is no way how to override it. + kFuncConvMsThisCall = 5, + + //! @brief MSVC specific fastcall. + //! + //! Two first parameters (evaluated from left-to-right) are in ECX:EDX + //! registers, all others on the stack in right-to-left order. + //! + //! Arguments direction: + //! - Right to Left (except to first two integer arguments in ECX:EDX) + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - fp0 register. + //! + //! @note This calling convention differs to GCC one in stack cleaning + //! mechanism. + kFuncConvMsFastCall = 6, + + //! @brief Borland specific fastcall with 2 parameters in registers. + //! + //! Two first parameters (evaluated from left-to-right) are in ECX:EDX + //! registers, all others on the stack in left-to-right order. + //! + //! Arguments direction: + //! - Left to Right (except to first two integer arguments in ECX:EDX) + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - fp0 register. + //! + //! @note Arguments on the stack are in left-to-right order that differs + //! to other fastcall conventions used in different compilers. + kFuncConvBorlandFastCall = 7, + + //! @brief GCC specific fastcall convention. + //! + //! Two first parameters (evaluated from left-to-right) are in ECX:EDX + //! registers, all others on the stack in right-to-left order. + //! + //! Arguments direction: + //! - Right to Left (except to first two integer arguments in ECX:EDX) + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - fp0 register. + //! + //! @note This calling convention should be compatible with + //! @c kFuncConvMsFastCall. + kFuncConvGccFastCall = 8, + + //! @brief GCC specific regparm(1) convention. + //! + //! The first parameter (evaluated from left-to-right) is in EAX register, + //! all others on the stack in right-to-left order. + //! + //! Arguments direction: + //! - Right to Left (except to first one integer argument in EAX) + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - fp0 register. + kFuncConvGccRegParm1 = 9, + + //! @brief GCC specific regparm(2) convention. + //! + //! Two first parameters (evaluated from left-to-right) are in EAX:EDX + //! registers, all others on the stack in right-to-left order. + //! + //! Arguments direction: + //! - Right to Left (except to first two integer arguments in EAX:EDX) + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - fp0 register. + kFuncConvGccRegParm2 = 10, + + //! @brief GCC specific fastcall with 3 parameters in registers. + //! + //! Three first parameters (evaluated from left-to-right) are in + //! EAX:EDX:ECX registers, all others on the stack in right-to-left order. + //! + //! Arguments direction: + //! - Right to Left (except to first three integer arguments in EAX:EDX:ECX) + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - fp0 register. + kFuncConvGccRegParm3 = 11, + + //! @internal + //! + //! @brief Count of function calling conventions. + _kFuncConvCount = 12, + + // -------------------------------------------------------------------------- + // [Host] + // -------------------------------------------------------------------------- + + //! @def kFuncConvHost + //! @brief Default calling convention for current platform / operating system. + + //! @def kFuncConvHostCDecl + //! @brief Default C calling convention based on current compiler's settings. + + //! @def kFuncConvHostStdCall + //! @brief Compatibility for __stdcall calling convention. + //! + //! @note This enumeration is always set to a value which is compatible with + //! current compilers __stdcall calling convention. In 64-bit mode the value + //! is compatible with @ref kFuncConvX64W or @ref kFuncConvX64U. + + //! @def kFuncConvHostFastCall + //! @brief Compatibility for __fastcall calling convention. + //! + //! @note This enumeration is always set to a value which is compatible with + //! current compilers __fastcall calling convention. In 64-bit mode the value + //! is compatible with @ref kFuncConvX64W or @ref kFuncConvX64U. + +#if defined(ASMJIT_HOST_X86) + + kFuncConvHost = kFuncConvCDecl, + kFuncConvHostCDecl = kFuncConvCDecl, + kFuncConvHostStdCall = kFuncConvStdCall, + +# if defined(_MSC_VER) + kFuncConvHostFastCall = kFuncConvMsFastCall +# elif defined(__GNUC__) + kFuncConvHostFastCall = kFuncConvGccFastCall +# elif defined(__BORLANDC__) + kFuncConvHostFastCall = kFuncConvBorlandFastCall +# else +# error "asmjit/x86/x86func.h - asmjit::kFuncConvHostFastCall not supported." +# endif + +#else + +# if defined(ASMJIT_OS_WINDOWS) + kFuncConvHost = kFuncConvX64W, +# else + kFuncConvHost = kFuncConvX64U, +# endif + + kFuncConvHostCDecl = kFuncConvHost, + kFuncConvHostStdCall = kFuncConvHost, + kFuncConvHostFastCall = kFuncConvHost + +#endif // ASMJIT_HOST +}; + +// ============================================================================ +// [asmjit::x86x64::kFuncHint] +// ============================================================================ + +//! @brief X86 function hints. +ASMJIT_ENUM(kFuncHint) { + //! @brief Use push/pop sequences instead of mov sequences in function prolog + //! and epilog. + kFuncHintPushPop = 16, + //! @brief Add emms instruction to the function epilog. + kFuncHintEmms = 17, + //! @brief Add sfence instruction to the function epilog. + kFuncHintSFence = 18, + //! @brief Add lfence instruction to the function epilog. + kFuncHintLFence = 19 +}; + +// ============================================================================ +// [asmjit::x86x64::kFuncFlags] +// ============================================================================ + +//! @brief X86 function flags. +ASMJIT_ENUM(kFuncFlags) { + //! @brief Whether to emit register load/save sequence using push/pop pairs. + kFuncFlagPushPop = 0x00010000, + + //! @brief Whether to emit "enter" instead of three instructions in case + //! that the function is not naked or misaligned. + kFuncFlagEnter = 0x00020000, + + //! @brief Whether to emit "leave" instead of two instructions in case + //! that the function is not naked or misaligned. + kFuncFlagLeave = 0x00040000, + + //! @brief Whether it's required to move arguments to a new stack location, + //! because of manual aligning. + kFuncFlagMoveArgs = 0x00080000, + + //! @brief Whether to emit EMMS instruction in epilog (auto-detected). + kFuncFlagEmms = 0x01000000, + + //! @brief Whether to emit SFence instruction in epilog (auto-detected). + //! + //! @note @ref kFuncFlagSFence and @ref kFuncFlagLFence + //! combination will result in emitting mfence. + kFuncFlagSFence = 0x02000000, + + //! @brief Whether to emit LFence instruction in epilog (auto-detected). + //! + //! @note @ref kFuncFlagSFence and @ref kFuncFlagLFence + //! combination will result in emitting mfence. + kFuncFlagLFence = 0x04000000 +}; + +// ============================================================================ +// [asmjit::x86x64::x86GetArchFromCConv] +// ============================================================================ + +static ASMJIT_INLINE uint32_t x86GetArchFromCConv(uint32_t conv) { + return IntUtil::inInterval(conv, kFuncConvX64W, kFuncConvX64U) ? kArchX64 : kArchX86; +} + +// ============================================================================ +// [asmjit::x86x64::X86X64FuncDecl] +// ============================================================================ + +//! @brief X86 function, including calling convention, arguments and their +//! register indices or stack positions. +struct X86X64FuncDecl : public FuncDecl { + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref X86X64FuncDecl instance. + ASMJIT_INLINE X86X64FuncDecl() { reset(); } + + // -------------------------------------------------------------------------- + // [Accessors - X86] + // -------------------------------------------------------------------------- + + //! @brief Get used registers (mask). + //! + //! @note The result depends on the function calling convention AND the + //! function prototype. Returned mask contains only registers actually used + //! to pass function arguments. + ASMJIT_INLINE uint32_t getUsed(uint32_t c) const { return _used.get(c); } + + //! @brief Get passed registers (mask). + //! + //! @note The result depends on the function calling convention used; the + //! prototype of the function doesn't affect the mask returned. + ASMJIT_INLINE uint32_t getPassed(uint32_t c) const { return _passed.get(c); } + + //! @brief Get preserved registers (mask). + //! + //! @note The result depends on the function calling convention used; the + //! prototype of the function doesn't affect the mask returned. + ASMJIT_INLINE uint32_t getPreserved(uint32_t c) const { return _preserved.get(c); } + + //! @brief Get ther order of passed registers (Gp). + //! + //! @note The result depends on the function calling convention used; the + //! prototype of the function doesn't affect the mask returned. + ASMJIT_INLINE const uint8_t* getPassedOrderGp() const { return _passedOrderGp; } + + //! @brief Get ther order of passed registers (Xmm). + //! + //! @note The result depends on the function calling convention used; the + //! prototype of the function doesn't affect the mask returned. + ASMJIT_INLINE const uint8_t* getPassedOrderXmm() const { return _passedOrderXmm; } + + // -------------------------------------------------------------------------- + // [FindArgByReg] + // -------------------------------------------------------------------------- + + //! @brief Find argument ID by register class and index. + ASMJIT_API uint32_t findArgByReg(uint32_t rClass, uint32_t rIndex) const; + + // -------------------------------------------------------------------------- + // [SetPrototype] + // -------------------------------------------------------------------------- + + //! @brief Set function prototype. + //! + //! This will set function calling convention and setup arguments variables. + //! + //! @note This function will allocate variables, it can be called only once. + ASMJIT_API Error setPrototype(uint32_t conv, const FuncPrototype& p); + + // -------------------------------------------------------------------------- + // [Reset] + // -------------------------------------------------------------------------- + + ASMJIT_API void reset(); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Used registers . + RegMask _used; + + //! @brief Passed registers (defined by the calling convention). + RegMask _passed; + //! @brief Preserved registers (defined by the calling convention). + RegMask _preserved; + + //! @brief Order of registers defined to pass function arguments (Gp). + uint8_t _passedOrderGp[kFuncArgCount]; + //! @brief Order of registers defined to pass function arguments (Xmm). + uint8_t _passedOrderXmm[kFuncArgCount]; +}; + +//! @} + +} // x86x64 namespace +} // asmjit namespace + +// [Api-End] +#include "../base/apiend.h" + +// [Guard] +#endif // _ASMJIT_X86_X86FUNC_H diff --git a/tools/autoexp-patch.py b/tools/autoexp-patch.py new file mode 100644 index 0000000..3df3e3e --- /dev/null +++ b/tools/autoexp-patch.py @@ -0,0 +1,424 @@ +#!/usr/bin/env python + +import os + +AUTOEXP_FILES = [ + # Visual Studio 8.0 (2005). + "C:\\Program Files\\Microsoft Visual Studio 8\\Common7\\Packages\\Debugger\\autoexp.dat", + "C:\\Program Files (x86)\\Microsoft Visual Studio 8\\Common7\\Packages\\Debugger\\autoexp.dat", + + # Visual Studio 9.0 (2008). + "C:\\Program Files\\Microsoft Visual Studio 9.0\\Common7\\Packages\\Debugger\\autoexp.dat", + "C:\\Program Files (x86)\\Microsoft Visual Studio 9.0\\Common7\\Packages\\Debugger\\autoexp.dat", + + # Visual Studio 10.0 (2010). + "C:\\Program Files\\Microsoft Visual Studio 10.0\\Common7\\Packages\\Debugger\\autoexp.dat", + "C:\\Program Files (x86)\\Microsoft Visual Studio 10.0\\Common7\\Packages\\Debugger\\autoexp.dat" +] + +DIRECTIVE_SYMBOL = '@' + +# ============================================================================= +# [Log] +# ============================================================================= + +def log(str): + print(str) + +# ============================================================================= +# [Is...] +# ============================================================================= + +def isDirective(c): + return c == DIRECTIVE_SYMBOL + +def isAlpha(c): + c = ord(c) + return (c >= ord('a') and c <= ord('z')) or (c >= ord('A') and c <= ord('Z')) + +def isAlpha_(c): + return isAlpha(c) or (c == '_') + +def isNumber(c): + c = ord(c) + return (c >= ord('0')) and (c <= ord('9')) + +def isAlnum(c): + return isAlpha(c) or isNumber(c) + +def isAlnum_(c): + return isAlnum(c) or (c == '_') + +def isSpace(c): + return (c == ' ') or (c == '\t') + +def isNewLine(c): + return c == '\n' + +# ============================================================================= +# [SyntaxError] +# ============================================================================= + +class SyntaxError(Exception): + def __init__(self, msg): + self.msg = msg + + def __str__(self): + return repr(self.msg) + +# ============================================================================= +# [Context] +# ============================================================================= + +class Context(object): + def __init__(self, data): + self.data = data + self.index = 0 + self.size = len(data) + + def isNewLine(self): + if self.index == 0: + return True + else: + return self.data[self.index - 1] == '\n' + + def isEnd(self): + return self.index >= self.size + + def getChar(self): + if self.index >= self.size: + return '\0' + return self.data[self.index] + + def advance(self): + if self.index < self.size: + self.index += 1 + + def parseUntilTrue(self, func, advance): + while not self.isEnd(): + c = self.data[self.index] + if func(c): + self.index += 1 + continue + if advance: + self.index += 1 + return True + return False + + def parseUntilFalse(self, func, advance): + while not self.isEnd(): + c = self.data[self.index] + if not func(c): + self.index += 1 + continue + if advance: + self.index += 1 + return True + return False + + def skipString(self): + def func(c): + return c == '"' + return self.parseUntilFalse(func, True) + + def skipSpaces(self): + return self.parseUntilTrue(isSpace, False) + + def skipLine(self): + return self.parseUntilFalse(isNewLine, True) + + def parseDirective(self, index): + start = index + + data = self.data + size = self.size + + c = data[index] + assert isAlpha_(c) + + while True: + index += 1 + if index >= size: + break + c = data[index] + if isAlnum_(c): + continue + break + + self.index = index + return data[start:index] + + def parseSymbol(self, index): + start = index + + data = self.data + size = self.size + + c = data[index] + assert isAlpha_(c) + + while True: + index += 1 + if index >= size: + return data[start:index] + c = data[index] + if isAlnum_(c): + continue + if c == ':' and index + 2 < size and data[index + 1] == ':' and isAlpha_(data[index + 2]): + index += 2 + continue + + self.index = index + return data[start:index] + + def parseMacro(self, index): + start = index + end = None + + data = self.data + size = self.size + + if index >= size: + return "" + + while True: + c = data[index] + index += 1 + + if c == '\n' or index >= size: + if end == None: + end = index - 1 + break + + if c == ';': + if end == None: + end = index + + while start < end and isSpace(data[end - 1]): + end -= 1 + + self.index = index + return data[start:end] + + def replaceRange(self, start, end, content): + old = self.data + + self.data = old[0:start] + content + old[end:] + self.size = len(self.data) + + assert(self.index >= end) + + self.index -= end - start + self.index += len(content) + +# ============================================================================= +# [AutoExpDat] +# ============================================================================= + +class AutoExpDat(object): + def __init__(self, data): + self.library = None + self.symbols = {} + self.data = self.process(data.replace('\r', '')) + + def process(self, data): + ctx = Context(data) + + while not ctx.isEnd(): + c = ctx.getChar() + + # Skip comments. + if c == ';': + ctx.skipLine() + continue + + # Skip strings. + if c == '"': + ctx.advance() + ctx.skipString() + continue + + # Skip numbers. + if isNumber(c): + ctx.parseUntilTrue(isAlnum_, True) + continue + + # Parse directives. + if isDirective(c) and ctx.isNewLine(): + start = ctx.index + + ctx.advance() + c = ctx.getChar() + + # Remove lines that have '@' followed by space or newline. + if isNewLine(c) or c == '\0': + ctx.advance() + ctx.replaceRange(start, ctx.index, "") + continue + if isSpace(c): + ctx.skipLine() + ctx.replaceRange(start, ctx.index, "") + continue + + directive = ctx.parseDirective(ctx.index) + + c = ctx.getChar() + if not isSpace(c): + self.error("Directive Error: @" + directive + ".") + ctx.skipSpaces() + + # Directive '@library'. + if directive == "library": + self.library = ctx.parseMacro(ctx.index) + + # Directive '@define'. + elif directive == "define": + c = ctx.getChar() + if not isAlpha_(c): + self.error("Define Directive has to start with alpha character or underscore") + symbol = ctx.parseSymbol(ctx.index) + + c = ctx.getChar() + + # No Macro. + if isNewLine(c): + ctx.advance() + self.addSymbol(symbol, "") + # Has Macro. + else: + ctx.skipSpaces() + + macro = ctx.parseMacro(ctx.index) + self.addSymbol(symbol, macro) + + # Unknown Directive. + else: + self.error("Unknown Directive: @" + directive + ".") + + ctx.replaceRange(start, ctx.index, "") + continue + + # Parse/Replace symbol. + if isAlpha_(c) and ctx.index > 0 and ctx.data[ctx.index - 1] != '#': + start = ctx.index + symbol = ctx.parseSymbol(start) + + if symbol in self.symbols: + ctx.replaceRange(start, start + len(symbol), self.symbols[symbol]) + continue + + ctx.advance() + + return ctx.data + + def addSymbol(self, symbol, macro): + if symbol in self.symbols: + self.error("Symbol '" + symbol + "' redefinition.") + else: + # Recurse. + macro = self.process(macro) + + log("-- @define " + symbol + " " + macro) + self.symbols[symbol] = macro + + def error(self, msg): + raise SyntaxError(msg) + +# ============================================================================= +# [LoadFile / SaveFile] +# ============================================================================= + +def loadFile(file): + h = None + data = None + + try: + h = open(file, "rb") + data = h.read() + except: + pass + finally: + if h: + h.close() + + return data + +def saveFile(file, data): + h = None + result = False + + try: + h = open(file, "wb") + h.truncate() + h.write(data) + result = True + except: + pass + finally: + if h: + h.close() + + return result + +# ============================================================================= +# [PatchFile] +# ============================================================================= + +def patchFile(file, mark, data): + input = loadFile(file) + if not input: + return + + beginMark = ";${" + mark + ":Begin}" + endMark = ";${" + mark + ":End}" + + if beginMark in input: + # Replace. + if not endMark in input: + log("-- Corrupted File:\n" + " " + file) + return + + beginMarkIndex = input.find(beginMark) + endMarkIndex = input.find(endMark) + + beginMarkIndex = input.find('\n', beginMarkIndex) + 1 + endMarkIndex = input.rfind('\n', 0, endMarkIndex) + 1 + + if beginMarkIndex == -1 or \ + endMarkIndex == -1 or \ + beginMarkIndex > endMarkIndex: + log("-- Corrupted File:\n" + " " + file) + return + + output = input[:beginMarkIndex] + data + input[endMarkIndex:] + + else: + # Add. + output = input + output += "\n" + output += beginMark + "\n" + output += data + output += endMark + "\n" + + if input == output: + log("-- Unaffected:\n" + " " + file) + else: + log("-- Patching:\n" + " " + file) + if not saveFile(file, output): + log("!! Can't write:\n" + " " + file) + +def main(): + src = loadFile("autoexp.dat") + if src == None: + log("!! Can't read autoexp.dat") + return + + src = AutoExpDat(src) + if not src.library: + log("!! Library not defined, use @library directive.") + return + + for file in AUTOEXP_FILES: + patchFile(file, src.library, src.data) + +main() diff --git a/tools/autoexp.dat b/tools/autoexp.dat new file mode 100644 index 0000000..ac8a12e --- /dev/null +++ b/tools/autoexp.dat @@ -0,0 +1,922 @@ +@library asmjit +@ +@define NULL (0) +@ +@define asmjit::kInvalidValue (0xFFFFFFFF) +@ +@define asmjit::kOperandTypeNone (0x0) +@define asmjit::kOperandTypeReg (0x1) +@define asmjit::kOperandTypeVar (0x2) +@define asmjit::kOperandTypeMem (0x3) +@define asmjit::kOperandTypeImm (0x4) +@define asmjit::kOperandTypeLabel (0x5) +@ +@define asmjit::kMemTypeBaseIndex (0x0) +@define asmjit::kMemTypeStackIndex (0x1) +@define asmjit::kMemTypeLabel (0x2) +@define asmjit::kMemTypeAbsolute (0x3) +@ +@define asmjit::kVarAttrInReg (0x00000001) +@define asmjit::kVarAttrOutReg (0x00000002) +@define asmjit::kVarAttrInOutReg (0x00000003) +@define asmjit::kVarAttrInMem (0x00000004) +@define asmjit::kVarAttrOutMem (0x00000008) +@define asmjit::kVarAttrInOutMem (0x0000000C) +@define asmjit::kVarAttrInDecide (0x00000010) +@define asmjit::kVarAttrOutDecide (0x00000020) +@define asmjit::kVarAttrInOutDecide (0x00000030) +@define asmjit::kVarAttrInConv (0x00000040) +@define asmjit::kVarAttrOutConv (0x00000080) +@define asmjit::kVarAttrInOutConv (0x000000C0) +@define asmjit::kVarAttrInCall (0x00000100) +@define asmjit::kVarAttrInArg (0x00000200) +@define asmjit::kVarAttrInStack (0x00000400) +@define asmjit::kVarAttrOutRet (0x00000800) +@define asmjit::kVarAttrUnuse (0x00001000) +@ +@define asmjit::kVarTypeInt8 (0x0) +@define asmjit::kVarTypeUInt8 (0x1) +@define asmjit::kVarTypeInt16 (0x2) +@define asmjit::kVarTypeUInt16 (0x3) +@define asmjit::kVarTypeInt32 (0x4) +@define asmjit::kVarTypeUInt32 (0x5) +@define asmjit::kVarTypeInt64 (0x6) +@define asmjit::kVarTypeUInt64 (0x7) +@define asmjit::kVarTypeFp32 (0x8) +@define asmjit::kVarTypeFp64 (0x9) +@define asmjit::kVarTypeFpEx (0xA) +@define asmjit::kVarTypeInvalid (0xFF) +@ +@define asmjit::x86x64::kVarTypeMm (0xB) +@define asmjit::x86x64::kVarTypeXmm (0xC) +@define asmjit::x86x64::kVarTypeXmmSs (0xD) +@define asmjit::x86x64::kVarTypeXmmPs (0xE) +@define asmjit::x86x64::kVarTypeXmmSd (0xF) +@define asmjit::x86x64::kVarTypeXmmPd (0x10) +@ +@define asmjit::kVarStateUnused (0x0) +@define asmjit::kVarStateReg (0x1) +@define asmjit::kVarStateMem (0x2) +@ +@define asmjit::kNodeTypeNone (0x0) +@define asmjit::kNodeTypeAlign (0x1) +@define asmjit::kNodeTypeEmbed (0x2) +@define asmjit::kNodeTypeComment (0x3) +@define asmjit::kNodeTypeMark (0x4) +@define asmjit::kNodeTypeHint (0x5) +@define asmjit::kNodeTypeTarget (0x6) +@define asmjit::kNodeTypeInst (0x7) +@define asmjit::kNodeTypeFunc (0x8) +@define asmjit::kNodeTypeEnd (0x9) +@define asmjit::kNodeTypeRet (0xA) +@define asmjit::kNodeTypeCall (0xB) +@ +@define asmjit::kNodeFlagIsTranslated (0x0001) +@define asmjit::kNodeFlagIsJmp (0x0002) +@define asmjit::kNodeFlagIsJcc (0x0004) +@define asmjit::kNodeFlagIsTaken (0x0008) +@define asmjit::kNodeFlagIsRet (0x0010) +@define asmjit::kNodeFlagIsSpecial (0x0020) +@define asmjit::kNodeFlagIsFp (0x0040) + +[Visualizer] + +; ============================================================================= +; [asmjit::base] +; ============================================================================= + +asmjit::PodVector<*> { + preview( + #( + "[", $e._length, "]", + "(", + #array( + expr: ((($T1*)($e._d + 1))[$i]), + size: $e._d->length + ), + ")" + ) + ) + + children( + #( + #([...]: [$c,!]), + + #array( + expr: ((($T1*)($e._d + 1))[$i]), + size: $e._d->length + ) + ) + ) +} + +; ============================================================================= +; [asmjit::x86x64 - Operand] +; ============================================================================= + +asmjit::Operand { + preview( + #( + #if ($e._base.op == asmjit::kOperandTypeReg) ([(*(asmjit::BaseReg *) &$e)]) + #elif ($e._base.op == asmjit::kOperandTypeVar) ([(*(asmjit::BaseVar *) &$e)]) + #elif ($e._base.op == asmjit::kOperandTypeMem) ([(*(asmjit::BaseMem *) &$e)]) + #elif ($e._base.op == asmjit::kOperandTypeImm) ([(*(asmjit::Imm *) &$e)]) + #elif ($e._base.op == asmjit::kOperandTypeLabel) ([(*(asmjit::Label *) &$e)]) + #else ("noOperand") + ) + ) + + children( + #( + #([...]: [$c,!]), + #(base: [$e._base]), + #(reg: [(*(asmjit::BaseReg*) &$e)]), + #(var: [(*(asmjit::BaseVar*) &$e)]), + #(mem: [(*(asmjit::BaseMem*) &$e)]), + #(imm: [(*(asmjit::Imm*) &$e)]), + #(label: [(*(asmjit::Label*) &$e)]) + ) + ) +} + +asmjit::BaseReg|asmjit::x86x64::X86Reg|asmjit::x86x64::GpReg|asmjit::x86x64::FpReg|asmjit::x86x64::MmReg|asmjit::x86x64::XmmReg|asmjit::x86x64::SegReg { + preview( + #( + #if ($e._reg.code == 0x0100) ("al") + #elif ($e._reg.code == 0x0101) ("cl") + #elif ($e._reg.code == 0x0102) ("dl") + #elif ($e._reg.code == 0x0103) ("bl") + #elif ($e._reg.code == 0x0104) ("spl") + #elif ($e._reg.code == 0x0105) ("bpl") + #elif ($e._reg.code == 0x0106) ("sil") + #elif ($e._reg.code == 0x0107) ("dil") + #elif ($e._reg.code == 0x0108) ("r8b") + #elif ($e._reg.code == 0x0109) ("r9b") + #elif ($e._reg.code == 0x010A) ("r10b") + #elif ($e._reg.code == 0x010B) ("r11b") + #elif ($e._reg.code == 0x010C) ("r12b") + #elif ($e._reg.code == 0x010D) ("r13b") + #elif ($e._reg.code == 0x010E) ("r14b") + #elif ($e._reg.code == 0x010F) ("r15b") + + #elif ($e._reg.code == 0x0200) ("ah") + #elif ($e._reg.code == 0x0201) ("ch") + #elif ($e._reg.code == 0x0202) ("dh") + #elif ($e._reg.code == 0x0203) ("bh") + + #elif ($e._reg.code == 0x1000) ("ax") + #elif ($e._reg.code == 0x1001) ("cx") + #elif ($e._reg.code == 0x1002) ("dx") + #elif ($e._reg.code == 0x1003) ("bx") + #elif ($e._reg.code == 0x1004) ("sp") + #elif ($e._reg.code == 0x1005) ("bp") + #elif ($e._reg.code == 0x1006) ("si") + #elif ($e._reg.code == 0x1007) ("di") + #elif ($e._reg.code == 0x1008) ("r8w") + #elif ($e._reg.code == 0x1009) ("r9w") + #elif ($e._reg.code == 0x100A) ("r10w") + #elif ($e._reg.code == 0x100B) ("r11w") + #elif ($e._reg.code == 0x100C) ("r12w") + #elif ($e._reg.code == 0x100D) ("r13w") + #elif ($e._reg.code == 0x100E) ("r14w") + #elif ($e._reg.code == 0x100F) ("r15w") + + #elif ($e._reg.code == 0x2000) ("eax") + #elif ($e._reg.code == 0x2001) ("ecx") + #elif ($e._reg.code == 0x2002) ("edx") + #elif ($e._reg.code == 0x2003) ("ebx") + #elif ($e._reg.code == 0x2004) ("esp") + #elif ($e._reg.code == 0x2005) ("ebp") + #elif ($e._reg.code == 0x2006) ("esi") + #elif ($e._reg.code == 0x2007) ("edi") + #elif ($e._reg.code == 0x2008) ("r8d") + #elif ($e._reg.code == 0x2009) ("r9d") + #elif ($e._reg.code == 0x200A) ("r10d") + #elif ($e._reg.code == 0x200B) ("r11d") + #elif ($e._reg.code == 0x200C) ("r12d") + #elif ($e._reg.code == 0x200D) ("r13d") + #elif ($e._reg.code == 0x200E) ("r14d") + #elif ($e._reg.code == 0x200F) ("r15d") + + #elif ($e._reg.code == 0x3000) ("rax") + #elif ($e._reg.code == 0x3001) ("rcx") + #elif ($e._reg.code == 0x3002) ("rdx") + #elif ($e._reg.code == 0x3003) ("rbx") + #elif ($e._reg.code == 0x3004) ("rsp") + #elif ($e._reg.code == 0x3005) ("rbp") + #elif ($e._reg.code == 0x3006) ("rsi") + #elif ($e._reg.code == 0x3007) ("rdi") + #elif ($e._reg.code == 0x3008) ("r8") + #elif ($e._reg.code == 0x3009) ("r9") + #elif ($e._reg.code == 0x300A) ("r10") + #elif ($e._reg.code == 0x300B) ("r11") + #elif ($e._reg.code == 0x300C) ("r12") + #elif ($e._reg.code == 0x300D) ("r13") + #elif ($e._reg.code == 0x300E) ("r14") + #elif ($e._reg.code == 0x300F) ("r15") + + #elif ($e._reg.code == 0x5000) ("fp0") + #elif ($e._reg.code == 0x5001) ("fp1") + #elif ($e._reg.code == 0x5002) ("fp2") + #elif ($e._reg.code == 0x5003) ("fp3") + #elif ($e._reg.code == 0x5004) ("fp4") + #elif ($e._reg.code == 0x5005) ("fp5") + #elif ($e._reg.code == 0x5006) ("fp6") + #elif ($e._reg.code == 0x5007) ("fp7") + + #elif ($e._reg.code == 0x6000) ("mm0") + #elif ($e._reg.code == 0x6001) ("mm1") + #elif ($e._reg.code == 0x6002) ("mm2") + #elif ($e._reg.code == 0x6003) ("mm3") + #elif ($e._reg.code == 0x6004) ("mm4") + #elif ($e._reg.code == 0x6005) ("mm5") + #elif ($e._reg.code == 0x6006) ("mm6") + #elif ($e._reg.code == 0x6007) ("mm7") + + #elif ($e._reg.code == 0x7000) ("xmm0") + #elif ($e._reg.code == 0x7001) ("xmm1") + #elif ($e._reg.code == 0x7002) ("xmm2") + #elif ($e._reg.code == 0x7003) ("xmm3") + #elif ($e._reg.code == 0x7004) ("xmm4") + #elif ($e._reg.code == 0x7005) ("xmm5") + #elif ($e._reg.code == 0x7006) ("xmm6") + #elif ($e._reg.code == 0x7007) ("xmm7") + #elif ($e._reg.code == 0x7008) ("xmm8") + #elif ($e._reg.code == 0x7009) ("xmm9") + #elif ($e._reg.code == 0x700A) ("xmm10") + #elif ($e._reg.code == 0x700B) ("xmm11") + #elif ($e._reg.code == 0x700C) ("xmm12") + #elif ($e._reg.code == 0x700D) ("xmm13") + #elif ($e._reg.code == 0x700E) ("xmm14") + #elif ($e._reg.code == 0x700F) ("xmm15") + + #elif ($e._reg.code == 0x8000) ("ymm0") + #elif ($e._reg.code == 0x8001) ("ymm1") + #elif ($e._reg.code == 0x8002) ("ymm2") + #elif ($e._reg.code == 0x8003) ("ymm3") + #elif ($e._reg.code == 0x8004) ("ymm4") + #elif ($e._reg.code == 0x8005) ("ymm5") + #elif ($e._reg.code == 0x8006) ("ymm6") + #elif ($e._reg.code == 0x8007) ("ymm7") + #elif ($e._reg.code == 0x8008) ("ymm8") + #elif ($e._reg.code == 0x8009) ("ymm9") + #elif ($e._reg.code == 0x800A) ("ymm10") + #elif ($e._reg.code == 0x800B) ("ymm11") + #elif ($e._reg.code == 0x800C) ("ymm12") + #elif ($e._reg.code == 0x800D) ("ymm13") + #elif ($e._reg.code == 0x800E) ("ymm14") + #elif ($e._reg.code == 0x800F) ("ymm15") + + #elif ($e._reg.code == 0xD000) ("es") + #elif ($e._reg.code == 0xD001) ("cs") + #elif ($e._reg.code == 0xD002) ("ss") + #elif ($e._reg.code == 0xD003) ("ds") + #elif ($e._reg.code == 0xD004) ("fs") + #elif ($e._reg.code == 0xD005) ("gs") + + #else ("noReg") + ) + ) + + children( + #( + #([...]: [$c,!]), + #(op: [$e._reg.op, x]), + #(size: [$e._reg.size, u]), + #(code: [$e._reg.code, x]) + ) + ) +} + +asmjit::BaseVar|asmjit::x86x64::X86Var|asmjit::x86x64::GpVar|asmjit::x86x64::FpVar|asmjit::x86x64::MmVar|asmjit::x86x64::XmmVar { + preview( + #( + #if ($e._var.varType == asmjit::kVarTypeInt8) ("gpbLo") + #elif ($e._var.varType == asmjit::kVarTypeUInt8) ("gpbLo") + #elif ($e._var.varType == asmjit::kVarTypeInt16) ("gpw") + #elif ($e._var.varType == asmjit::kVarTypeUInt16) ("gpw") + #elif ($e._var.varType == asmjit::kVarTypeInt32) ("gpd") + #elif ($e._var.varType == asmjit::kVarTypeUInt32) ("gpd") + #elif ($e._var.varType == asmjit::kVarTypeInt64) ("gpq") + #elif ($e._var.varType == asmjit::kVarTypeUInt64) ("gpq") + #elif ($e._var.varType == asmjit::kVarTypeFp32) ("fp32") + #elif ($e._var.varType == asmjit::kVarTypeFp64) ("fp64") + #elif ($e._var.varType == asmjit::kVarTypeFpEx) ("fpEx") + #elif ($e._var.varType == asmjit::x86x64::kVarTypeMm) ("mm") + #elif ($e._var.varType == asmjit::x86x64::kVarTypeXmm) ("xmm") + #elif ($e._var.varType == asmjit::x86x64::kVarTypeXmmSs) ("xmmSs") + #elif ($e._var.varType == asmjit::x86x64::kVarTypeXmmSd) ("xmmSd") + #elif ($e._var.varType == asmjit::x86x64::kVarTypeXmmPs) ("xmmPs") + #elif ($e._var.varType == asmjit::x86x64::kVarTypeXmmPd) ("xmmPd") + #elif ($e._var.varType == asmjit::x86x64::kVarTypeYmm) ("ymm") + #elif ($e._var.varType == asmjit::x86x64::kVarTypeYmmPs) ("ymmPs") + #elif ($e._var.varType == asmjit::x86x64::kVarTypeYmmPd) ("ymmPd") + #else ("noVar"), + "(", + "id=", + #if ($e._var.id != asmjit::kInvalidValue) ( + [$e._var.id, x] + ) + #else ( + "INVALID" + ), + ")" + ) + ) + + children( + #( + #([...]: [$c,!]), + #(op: [$e._var.op, x]), + #(size: [$e._var.size, u]), + #(id: [$e._var.id, x]), + #(code: [$e._var.code, x]), + #(varType: [$e._var.varType, x]) + ) + ) +} + +asmjit::BaseMem|asmjit::x86x64::Mem { + preview( + #( + #if ($e._mem.size == 1) ("byte ptr") + #elif ($e._mem.size == 2) ("word ptr") + #elif ($e._mem.size == 4) ("dword ptr") + #elif ($e._mem.size == 8) ("qword ptr") + #elif ($e._mem.size == 10) ("tword ptr") + #elif ($e._mem.size == 16) ("dqword ptr") + #elif ($e._mem.size == 32) ("qqword ptr") + #else ("ptr"), + + #if ($e._mem.segment == 0) (" es:") + #elif ($e._mem.segment == 1) (" cs:") + #elif ($e._mem.segment == 2) (" ss:") + #elif ($e._mem.segment == 3) (" ds:") + #elif ($e._mem.segment == 4) (" fs:") + #elif ($e._mem.segment == 5) (" gs:") + #else (""), + + "[", + + #if ($e._mem.id == asmjit::kInvalidValue) ( + #( + #if ($e._mem.type == asmjit::kMemTypeBaseIndex) ( + #if ((sizeof(uintptr_t) == 4) && ($e._mem.sizePrefix == 1)) ( + #if (($e._mem.base & 0xFF) == 0x0) ("ax") + #elif (($e._mem.base & 0xFF) == 0x1) ("cx") + #elif (($e._mem.base & 0xFF) == 0x2) ("dx") + #elif (($e._mem.base & 0xFF) == 0x3) ("bx") + #elif (($e._mem.base & 0xFF) == 0x4) ("sp") + #elif (($e._mem.base & 0xFF) == 0x5) ("bp") + #elif (($e._mem.base & 0xFF) == 0x6) ("si") + #elif (($e._mem.base & 0xFF) == 0x7) ("di") + #elif (($e._mem.base & 0xFF) == 0x8) ("r8w") + #elif (($e._mem.base & 0xFF) == 0x9) ("r9w") + #elif (($e._mem.base & 0xFF) == 0xA) ("r10w") + #elif (($e._mem.base & 0xFF) == 0xB) ("r11w") + #elif (($e._mem.base & 0xFF) == 0xC) ("r12w") + #elif (($e._mem.base & 0xFF) == 0xD) ("r13w") + #elif (($e._mem.base & 0xFF) == 0xE) ("r14w") + #elif (($e._mem.base & 0xFF) == 0xF) ("r15w") + #else ("INVALID") + ) + #elif ((sizeof(uintptr_t) == 4) || ($e._mem.sizePrefix == 1)) ( + #if (($e._mem.base & 0xFF) == 0x0) ("eax") + #elif (($e._mem.base & 0xFF) == 0x1) ("ecx") + #elif (($e._mem.base & 0xFF) == 0x2) ("edx") + #elif (($e._mem.base & 0xFF) == 0x3) ("ebx") + #elif (($e._mem.base & 0xFF) == 0x4) ("esp") + #elif (($e._mem.base & 0xFF) == 0x5) ("ebp") + #elif (($e._mem.base & 0xFF) == 0x6) ("esi") + #elif (($e._mem.base & 0xFF) == 0x7) ("edi") + #elif (($e._mem.base & 0xFF) == 0x8) ("r8d") + #elif (($e._mem.base & 0xFF) == 0x9) ("r9d") + #elif (($e._mem.base & 0xFF) == 0xA) ("r10d") + #elif (($e._mem.base & 0xFF) == 0xB) ("r11d") + #elif (($e._mem.base & 0xFF) == 0xC) ("r12d") + #elif (($e._mem.base & 0xFF) == 0xD) ("r13d") + #elif (($e._mem.base & 0xFF) == 0xE) ("r14d") + #elif (($e._mem.base & 0xFF) == 0xF) ("r15d") + #else ("INVALID") + ) + #else ( + #if (($e._mem.base & 0xFF) == 0x0) ("rax") + #elif (($e._mem.base & 0xFF) == 0x1) ("rcx") + #elif (($e._mem.base & 0xFF) == 0x2) ("rdx") + #elif (($e._mem.base & 0xFF) == 0x3) ("rbx") + #elif (($e._mem.base & 0xFF) == 0x4) ("rsp") + #elif (($e._mem.base & 0xFF) == 0x5) ("rbp") + #elif (($e._mem.base & 0xFF) == 0x6) ("rsi") + #elif (($e._mem.base & 0xFF) == 0x7) ("rdi") + #elif (($e._mem.base & 0xFF) == 0x8) ("r8") + #elif (($e._mem.base & 0xFF) == 0x9) ("r9") + #elif (($e._mem.base & 0xFF) == 0xA) ("r10") + #elif (($e._mem.base & 0xFF) == 0xB) ("r11") + #elif (($e._mem.base & 0xFF) == 0xC) ("r12") + #elif (($e._mem.base & 0xFF) == 0xD) ("r13") + #elif (($e._mem.base & 0xFF) == 0xE) ("r14") + #elif (($e._mem.base & 0xFF) == 0xF) ("r15") + #else ("INVALID") + ) + ) + #elif ($e._mem.type == asmjit::kMemTypeLabel) ( + #( + "L.", + #if ($e._mem.base != asmjit::kInvalidValue) ( + [$e._mem.base & 0x3FFFFFFF, x] + ) + #else ( + "INVALID" + ) + ) + ) + #else ( + [$e._mem.target] + ) + ) + ) + #else ( + #("{id=", [$e._mem.id, x], "}") + ), + + #if ($e._mem.index != asmjit::kInvalidValue) ( + #( + " + ", + + #if ((sizeof(uintptr_t) == 4) && ($e._mem.sizePrefix == 1)) ( + #if (($e._mem.index & 0xFF) == 0x0) ("ax") + #elif (($e._mem.index & 0xFF) == 0x1) ("cx") + #elif (($e._mem.index & 0xFF) == 0x2) ("dx") + #elif (($e._mem.index & 0xFF) == 0x3) ("bx") + #elif (($e._mem.index & 0xFF) == 0x4) ("sp") + #elif (($e._mem.index & 0xFF) == 0x5) ("bp") + #elif (($e._mem.index & 0xFF) == 0x6) ("si") + #elif (($e._mem.index & 0xFF) == 0x7) ("di") + #elif (($e._mem.index & 0xFF) == 0x8) ("r8w") + #elif (($e._mem.index & 0xFF) == 0x9) ("r9w") + #elif (($e._mem.index & 0xFF) == 0xA) ("r10w") + #elif (($e._mem.index & 0xFF) == 0xB) ("r11w") + #elif (($e._mem.index & 0xFF) == 0xC) ("r12w") + #elif (($e._mem.index & 0xFF) == 0xD) ("r13w") + #elif (($e._mem.index & 0xFF) == 0xE) ("r14w") + #elif (($e._mem.index & 0xFF) == 0xF) ("r15w") + #else ("INVALID") + ) + #elif ((sizeof(uintptr_t) == 4) || ($e._mem.sizePrefix == 1)) ( + #if (($e._mem.index & 0xFF) == 0x0) ("eax") + #elif (($e._mem.index & 0xFF) == 0x1) ("ecx") + #elif (($e._mem.index & 0xFF) == 0x2) ("edx") + #elif (($e._mem.index & 0xFF) == 0x3) ("ebx") + #elif (($e._mem.index & 0xFF) == 0x4) ("esp") + #elif (($e._mem.index & 0xFF) == 0x5) ("ebp") + #elif (($e._mem.index & 0xFF) == 0x6) ("esi") + #elif (($e._mem.index & 0xFF) == 0x7) ("edi") + #elif (($e._mem.index & 0xFF) == 0x8) ("r8d") + #elif (($e._mem.index & 0xFF) == 0x9) ("r9d") + #elif (($e._mem.index & 0xFF) == 0xA) ("r10d") + #elif (($e._mem.index & 0xFF) == 0xB) ("r11d") + #elif (($e._mem.index & 0xFF) == 0xC) ("r12d") + #elif (($e._mem.index & 0xFF) == 0xD) ("r13d") + #elif (($e._mem.index & 0xFF) == 0xE) ("r14d") + #elif (($e._mem.index & 0xFF) == 0xF) ("r15d") + #else ("INVALID") + ) + #else ( + #if (($e._mem.index & 0xFF) == 0x0) ("rax") + #elif (($e._mem.index & 0xFF) == 0x1) ("rcx") + #elif (($e._mem.index & 0xFF) == 0x2) ("rdx") + #elif (($e._mem.index & 0xFF) == 0x3) ("rbx") + #elif (($e._mem.index & 0xFF) == 0x4) ("rsp") + #elif (($e._mem.index & 0xFF) == 0x5) ("rbp") + #elif (($e._mem.index & 0xFF) == 0x6) ("rsi") + #elif (($e._mem.index & 0xFF) == 0x7) ("rdi") + #elif (($e._mem.index & 0xFF) == 0x8) ("r8") + #elif (($e._mem.index & 0xFF) == 0x9) ("r9") + #elif (($e._mem.index & 0xFF) == 0xA) ("r10") + #elif (($e._mem.index & 0xFF) == 0xB) ("r11") + #elif (($e._mem.index & 0xFF) == 0xC) ("r12") + #elif (($e._mem.index & 0xFF) == 0xD) ("r13") + #elif (($e._mem.index & 0xFF) == 0xE) ("r14") + #elif (($e._mem.index & 0xFF) == 0xF) ("r15") + #else ("INVALID") + ), + + #if ($e._mem.shift == 1) (" * 2") + #elif ($e._mem.shift == 2) (" * 4") + #elif ($e._mem.shift == 3) (" * 8") + #else ("") + ) + ), + + #if (($e._mem.type != asmjit::kMemTypeAbsolute) && ($e._mem.displacement != 0)) ( + #if ($e._mem.displacement < 0) ( + #(" - ", [-$e._mem.displacement, i]) + ) + #else ( + #(" + ", [$e._mem.displacement, i]) + ) + ), + + "]" + ) + ) + + children( + #( + #([...]: [$c,!]), + + #(op: [$e._mem.op, x]), + #(size: [$e._mem.size, u]), + + #(type: [$e._mem.type, u]), + #(segment: [$e._mem.segment, u]), + + #(sizePrefix: [$e._mem.sizePrefix, u]), + #(shift: [$e._mem.shift, u]), + + #(id: [$e._mem.id, x]), + #(base: [$e._mem.base, u]), + #(index: [$e._mem.index, u]), + + #(target: [$e._mem.target]), + #(displacement: [$e._mem.displacement, i]) + ) + ) +} + +asmjit::Imm { + preview( + #( + "i=", [(int64_t)$e._imm.value, i], + " ", + "u=", [(uint64_t)$e._imm.value, u], + " ", + "x=", [(uint64_t)$e._imm.value, x] + ) + ) + + children( + #( + #([...]: [$c,!]), + + #(op: [$e._imm.op, x]), + #(size: [$e._imm.size, u]), + + #(value_s: [(int64_t)$e._imm.value, i]), + #(value_u: [(uint64_t)$e._imm.value, u]), + #(value_x: [(uint64_t)$e._imm.value, x]) + ) + ) +} + +asmjit::Label { + preview( + #( + "L_", + #if ($e._label.id != asmjit::kInvalidValue) ( + [$e._label.id, x] + ) + #else ( + "INVALID" + ), + "" + ) + ) + + children( + #( + #([...]: [$c,!]), + + #(op: [$e._label.op, x]), + #(size: [$e._label.size, u]), + + #(id: [$e._label.id, x]) + ) + ) +} + +; ============================================================================= +; [asmjit::x86x64 - RegMask] +; ============================================================================= + +asmjit::x86x64::RegMask { + preview( + #( + #if (($e._gp | $e._fp | $e._mm | $e._xy) != 0) ( + #( + #if ($e._gp != 0) ( + #( + "gp=", [$e._gp, x], + #if ($e._gp & 0x0001) ("|rax"), + #if ($e._gp & 0x0002) ("|rcx"), + #if ($e._gp & 0x0004) ("|rdx"), + #if ($e._gp & 0x0008) ("|rbx"), + #if ($e._gp & 0x0010) ("|rsp"), + #if ($e._gp & 0x0020) ("|rbp"), + #if ($e._gp & 0x0040) ("|rsi"), + #if ($e._gp & 0x0080) ("|rdi"), + #if ($e._gp & 0x0100) ("|r8"), + #if ($e._gp & 0x0200) ("|r9"), + #if ($e._gp & 0x0400) ("|r10"), + #if ($e._gp & 0x0800) ("|r11"), + #if ($e._gp & 0x1000) ("|r12"), + #if ($e._gp & 0x2000) ("|r13"), + #if ($e._gp & 0x4000) ("|r14"), + #if ($e._gp & 0x8000) ("|r15"), + #if (($e._fp | $e._mm | $e._xy) != 0) (" ") + ) + ), + + #if ($e._fp != 0) ( + #( + "fp=", [$e._fp, x], + #if ($e._fp & 0x0001) ("|fp0"), + #if ($e._fp & 0x0002) ("|fp1"), + #if ($e._fp & 0x0004) ("|fp2"), + #if ($e._fp & 0x0008) ("|fp3"), + #if ($e._fp & 0x0010) ("|fp4"), + #if ($e._fp & 0x0020) ("|fp5"), + #if ($e._fp & 0x0040) ("|fp6"), + #if ($e._fp & 0x0080) ("|fp7"), + #if (($e._mm | $e._xy) != 0) (" ") + ) + ), + + #if ($e._mm != 0) ( + #( + "mm=", [$e._mm, x], + #if ($e._mm & 0x0001) ("|mm0"), + #if ($e._mm & 0x0002) ("|mm1"), + #if ($e._mm & 0x0004) ("|mm2"), + #if ($e._mm & 0x0008) ("|mm3"), + #if ($e._mm & 0x0010) ("|mm4"), + #if ($e._mm & 0x0020) ("|mm5"), + #if ($e._mm & 0x0040) ("|mm6"), + #if ($e._mm & 0x0080) ("|mm7"), + #if ($e._xy != 0) (" ") + ) + ), + + #if ($e._xy != 0) ( + #( + "xy=", [$e._xy, x], + #if ($e._xy & 0x0001) ("|xy0"), + #if ($e._xy & 0x0002) ("|xy1"), + #if ($e._xy & 0x0004) ("|xy2"), + #if ($e._xy & 0x0008) ("|xy3"), + #if ($e._xy & 0x0010) ("|xy4"), + #if ($e._xy & 0x0020) ("|xy5"), + #if ($e._xy & 0x0040) ("|xy6"), + #if ($e._xy & 0x0080) ("|xy7"), + #if ($e._xy & 0x0100) ("|xy8"), + #if ($e._xy & 0x0200) ("|xy9"), + #if ($e._xy & 0x0400) ("|xy10"), + #if ($e._xy & 0x0800) ("|xy11"), + #if ($e._xy & 0x1000) ("|xy12"), + #if ($e._xy & 0x2000) ("|xy13"), + #if ($e._xy & 0x4000) ("|xy14"), + #if ($e._xy & 0x8000) ("|xy15") + ) + ) + ) + ) + #else ( + "empty" + ) + ) + ) + + children( + #( + #([...]: [$c,!]), + + #(gp: [$e._gp, x]), + #(fp: [$e._fp, x]), + #(mm: [$e._mm, x]), + #(xy: [$e._xy, x]) + ) + ) +} + +; ============================================================================= +; [asmjit::x86x64 - Var] +; ============================================================================= + +asmjit::BaseVarAttr|asmjit::x86x64::VarAttr { + preview( + #( + "VarAttr(", + #if ($e._vd != NULL) ( + #( + "id=", + [$e._vd->_id, x], + " ", + + #if (($e._vd->_contextId) != asmjit::kInvalidValue) ( + #("cid=", [($e._vd->_contextId), u], " ") + ), + + #if (($e._vd->_name) != NULL) ( + #("name=", [($e._vd->_name), s], " ") + ), + + "state=", + #if ($e._vd->_state == asmjit::kVarStateUnused) ("unused") + #elif ($e._vd->_state == asmjit::kVarStateReg) (#("reg|", [$e._vd->_regIndex, u])) + #elif ($e._vd->_state == asmjit::kVarStateMem) ("mem") + #else ("INVALID"), + " ", + + #if (($e._flags & (asmjit::kVarAttrInReg | asmjit::kVarAttrInMem | asmjit::kVarAttrInDecide | asmjit::kVarAttrInConv | asmjit::kVarAttrInCall | asmjit::kVarAttrInArg | asmjit::kVarAttrInStack)) != 0) ( + #( + "in[", + #if (($e._flags & asmjit::kVarAttrInReg) != 0) ("reg "), + #if (($e._flags & asmjit::kVarAttrInMem) != 0) ("mem "), + #if (($e._flags & asmjit::kVarAttrInDecide) != 0) ("decide "), + #if (($e._flags & asmjit::kVarAttrInConv) != 0) ("conv "), + #if (($e._flags & asmjit::kVarAttrInCall) != 0) ("call "), + #if (($e._flags & asmjit::kVarAttrInArg) != 0) ("arg "), + #if (($e._flags & asmjit::kVarAttrInStack) != 0) ("stack "), + "] " + ) + ), + #if (($e._flags & (asmjit::kVarAttrOutReg | asmjit::kVarAttrOutMem | asmjit::kVarAttrOutDecide | asmjit::kVarAttrOutConv | asmjit::kVarAttrOutRet)) != 0) ( + #( + "out[", + #if (($e._flags & asmjit::kVarAttrOutReg) != 0) ("reg "), + #if (($e._flags & asmjit::kVarAttrOutMem) != 0) ("mem "), + #if (($e._flags & asmjit::kVarAttrOutDecide) != 0) ("decide "), + #if (($e._flags & asmjit::kVarAttrOutConv) != 0) ("conv "), + #if (($e._flags & asmjit::kVarAttrOutRet) != 0) ("ret "), + "] " + ) + ), + #if (($e._flags & asmjit::kVarAttrUnuse) == asmjit::kVarAttrUnuse) ("unuse ") + ) + ) + #else ( + "INVALID " + ), + ")" + ) + ) + + children( + #( + #([...]: [$c,!]), + + #(vd: [(asmjit::x86x64::VarData*)$e._vd]), + #(flags: [$e._flags, x]), + #(varCount: [$e._varCount, u]), + #(argCount: [$e._argCount, u]), + #(inRegIndex: [$e._inRegIndex, u]), + #(outRegIndex: [$e._outRegIndex, u]), + #(inRegs: [$e._inRegs, x]), + #(allocableRegs: [$e._allocableRegs, x]) + ) + ) +} + +asmjit::BaseVarInst|asmjit::x86x64::VarInst { + children( + #( + #([...]: [$c,!]), + + #(inRegs: [$e._inRegs]), + #(outRegs: [$e._outRegs]), + #(clobberedRegs: [$e._clobberedRegs]), + #(start: [$e._start]), + #(count: [$e._count]), + + #(vaCount: [$e._vaCount, u]), + #array( + expr: $e._list[$i], + size: $e._vaCount + ) + ) + ) +} + +; ============================================================================= +; [asmjit::X86 - Compiler - BaseNode] +; ============================================================================= + +asmjit::BaseNode|asmjit::AlignNode|asmjit::EmbedNode|asmjit::CommentNode|asmjit::MarkNode|asmjit::HintNode|asmjit::TargetNode|asmjit::InstNode|asmjit::JumpNode::asmjit::FuncNode|asmjit::EndNode|asmjit::RetNode|asmjit::x86x64::X86X64FuncNode|asmjit::x86x64::X86X64CallNode { + preview( + #( + #if ($e._type == asmjit::kNodeTypeAlign) ("AlignNode") + #elif ($e._type == asmjit::kNodeTypeEmbed) ("EmbedNode") + #elif ($e._type == asmjit::kNodeTypeComment) ("CommentNode") + #elif ($e._type == asmjit::kNodeTypeMark) ("MarkNode") + #elif ($e._type == asmjit::kNodeTypeHint) ("HintNode") + #elif ($e._type == asmjit::kNodeTypeTarget) ("TargetNode") + #elif ($e._type == asmjit::kNodeTypeInst) ("InstNode") + #elif ($e._type == asmjit::kNodeTypeFunc) ("FuncNode") + #elif ($e._type == asmjit::kNodeTypeEnd) ("EndNode") + #elif ($e._type == asmjit::kNodeTypeRet) ("RetNode") + #elif ($e._type == asmjit::kNodeTypeCall) ("CallNode") + #else ("BaseNode"), + + "(", + #if (($e._liveness) != NULL) ("analyzed "), + #if (($e._flags & asmjit::kNodeFlagIsTranslated) != 0) ("translated "), + #if (($e._flags & asmjit::kNodeFlagIsJmp) != 0) ("jmp "), + #if (($e._flags & asmjit::kNodeFlagIsJcc) != 0) ("jcc "), + #if (($e._flags & asmjit::kNodeFlagIsTaken) != 0) ("taken "), + #if (($e._flags & asmjit::kNodeFlagIsRet) != 0) ("ret "), + ")" + ) + ) + + children( + #( + #([...]: [$c,!]), + + #(prev: [$e._prev]), + #(next: [$e._next]), + + #(type: [$e._type]), + #(flags: [$e._flags]), + #(flowId: [$e._flowId]), + #(comment: [$e._comment]), + + #(varInst: [( (asmjit::x86x64::VarInst*)($e._varInst) )]), + #(liveness: [( (asmjit::VarBits*)($e._liveness) )]), + #(state: [( (asmjit::x86x64::VarState*)($e._state) )]), + + #if ($e._type == asmjit::kNodeTypeAlign) ( + #( + #(size : [( ((asmjit::AlignNode*)&$e)->_size )]) + ) + ) + #elif (($e._type == asmjit::kNodeTypeEmbed) && (($e._packedData.embed.size) <= (sizeof(uintptr_t)))) ( + #( + #(size : [( ((asmjit::EmbedNode*)&$e)->_size )]), + #(data : [( ((asmjit::EmbedNode*)&$e)->_data.buf )]) + ) + ) + #elif (($e._type == asmjit::kNodeTypeEmbed) && (($e._packedData.embed.size) > (sizeof(uintptr_t)))) ( + #( + #(size : [( ((asmjit::EmbedNode*)&$e)->_size )]), + #(data : [( ((asmjit::EmbedNode*)&$e)->_data.ptr )]) + ) + ) + #elif ($e._type == asmjit::kNodeTypeHint) ( + #( + #(var : [( (asmjit::x86x64::VarData*) (((asmjit::HintNode*)&$e)->_var) )]), + #(hint : [( ((asmjit::HintNode*)&$e)->_hint )]), + #(value : [( ((asmjit::HintNode*)&$e)->_value )]) + ) + ) + #elif ($e._type == asmjit::kNodeTypeTarget) ( + #( + #(label : [( ((asmjit::TargetNode*)&$e)->_label )]), + #(from : [( ((asmjit::TargetNode*)&$e)->_from )]), + #(numRefs: [( ((asmjit::TargetNode*)&$e)->_numRefs )]) + ) + ) + #elif ($e._type == asmjit::kNodeTypeInst) ( + #( + #(opCount: [( ((asmjit::x86x64::X86X64InstNode*)&$e)->_opCount )]), + #array( + expr: ( ((asmjit::x86x64::X86X64InstNode*)&$e)->_opList[$i] ), + size: ( ((asmjit::x86x64::X86X64InstNode*)&$e)->_opCount ) + ) + ) + ) + #elif ($e._type == asmjit::kNodeTypeFunc) ( + #( + #(entryTarget : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_entryTarget )]), + #(exitTarget : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_exitTarget )]), + #(decl : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_x86Decl )]), + #(end : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_end )]), + #(argList : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_argList )]), + #(funcHints : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_funcHints )]), + #(funcFlags : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_funcFlags )]), + + #(expectedStackAlignment: [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_expectedStackAlignment )]), + #(requiredStackAlignment: [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_requiredStackAlignment )]), + + #(redZoneSize : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_redZoneSize )]), + #(spillZoneSize : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_spillZoneSize )]), + #(argStackSize : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_argStackSize )]), + #(memStackSize : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_memStackSize )]), + #(callStackSize : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_callStackSize )]), + + ; X86. + #(saveRestoreRegs : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_saveRestoreRegs )]), + + #(alignStackSize : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_alignStackSize )]), + #(alignedMemStackSize : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_alignedMemStackSize )]), + #(pushPopStackSize : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_pushPopStackSize )]), + #(moveStackSize : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_moveStackSize )]), + #(extraStackSize : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_extraStackSize )]), + + #(stackFrameRegIndex : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_stackFrameRegIndex )]), + #(stackFrameRegPreserved: [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_isStackFrameRegPreserved )]), + #(stackFrameCopyGpIndex : [( ((asmjit::x86x64::X86X64FuncNode*)&$e)->_stackFrameCopyGpIndex )]) + ) + ) + ) + ) +} diff --git a/tools/configure-mac-xcode.sh b/tools/configure-mac-xcode.sh new file mode 100644 index 0000000..2ed9b71 --- /dev/null +++ b/tools/configure-mac-xcode.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +ASMJIT_CURRENT_DIR=`pwd` +ASMJIT_BUILD_DIR="build_xcode" + +mkdir ../${ASMJIT_BUILD_DIR} +cd ../${ASMJIT_BUILD_DIR} +cmake .. -G"Xcode" -DASMJIT_BUILD_SAMPLES=1 +cd ${ASMJIT_CURRENT_DIR} diff --git a/tools/configure-unix-makefiles-dbg.sh b/tools/configure-unix-makefiles-dbg.sh new file mode 100644 index 0000000..e83283c --- /dev/null +++ b/tools/configure-unix-makefiles-dbg.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +ASMJIT_CURRENT_DIR=`pwd` +ASMJIT_BUILD_DIR="build_makefiles_dbg" + +mkdir ../${ASMJIT_BUILD_DIR} +cd ../${ASMJIT_BUILD_DIR} +cmake .. -G"Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DASMJIT_BUILD_SAMPLES=1 +cd ${ASMJIT_CURRENT_DIR} diff --git a/tools/configure-unix-makefiles-rel.sh b/tools/configure-unix-makefiles-rel.sh new file mode 100644 index 0000000..b15b157 --- /dev/null +++ b/tools/configure-unix-makefiles-rel.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +ASMJIT_CURRENT_DIR=`pwd` +ASMJIT_BUILD_DIR="build_makefiles_rel" + +mkdir ../${ASMJIT_BUILD_DIR} +cd ../${ASMJIT_BUILD_DIR} +cmake .. -G"Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DASMJIT_BUILD_SAMPLES=1 +cd ${ASMJIT_CURRENT_DIR} diff --git a/tools/configure-win-mingw-dbg.bat b/tools/configure-win-mingw-dbg.bat new file mode 100644 index 0000000..bf18899 --- /dev/null +++ b/tools/configure-win-mingw-dbg.bat @@ -0,0 +1,9 @@ +@echo off + +set ASMJIT_CURRENT_DIR=%CD% +set ASMJIT_BUILD_DIR="build_mingw_dbg" + +mkdir ..\%ASMJIT_BUILD_DIR% +cd ..\%ASMJIT_BUILD_DIR% +cmake .. -G"MinGW Makefiles" -DCMAKE_BUILD_TYPE=Debug -DASMJIT_BUILD_SAMPLES=1 +cd %ASMJIT_CURRENT_DIR% diff --git a/tools/configure-win-mingw-rel.bat b/tools/configure-win-mingw-rel.bat new file mode 100644 index 0000000..fc98213 --- /dev/null +++ b/tools/configure-win-mingw-rel.bat @@ -0,0 +1,9 @@ +@echo off + +set ASMJIT_CURRENT_DIR=%CD% +set ASMJIT_BUILD_DIR="build_mingw_rel" + +mkdir ..\%ASMJIT_BUILD_DIR% +cd ..\%ASMJIT_BUILD_DIR% +cmake .. -G"MinGW Makefiles" -DCMAKE_BUILD_TYPE=Release -DASMJIT_BUILD_SAMPLES=1 +cd %ASMJIT_CURRENT_DIR% diff --git a/tools/configure-win-vs2005-x64.bat b/tools/configure-win-vs2005-x64.bat new file mode 100644 index 0000000..65b2cf0 --- /dev/null +++ b/tools/configure-win-vs2005-x64.bat @@ -0,0 +1,9 @@ +@echo off + +set ASMJIT_CURRENT_DIR=%CD% +set ASMJIT_BUILD_DIR="build_vs2005_x64" + +mkdir ..\%ASMJIT_BUILD_DIR% +cd ..\%ASMJIT_BUILD_DIR% +cmake .. -G"Visual Studio 8 2005 Win64" -DASMJIT_BUILD_SAMPLES=1 +cd %ASMJIT_CURRENT_DIR% diff --git a/tools/configure-win-vs2005-x86.bat b/tools/configure-win-vs2005-x86.bat new file mode 100644 index 0000000..9482087 --- /dev/null +++ b/tools/configure-win-vs2005-x86.bat @@ -0,0 +1,9 @@ +@echo off + +set ASMJIT_CURRENT_DIR=%CD% +set ASMJIT_BUILD_DIR="build_vs2005_x86" + +mkdir ..\%ASMJIT_BUILD_DIR% +cd ..\%ASMJIT_BUILD_DIR% +cmake .. -G"Visual Studio 8 2005" -DASMJIT_BUILD_SAMPLES=1 +cd %ASMJIT_CURRENT_DIR% diff --git a/tools/configure-win-vs2008-x64.bat b/tools/configure-win-vs2008-x64.bat new file mode 100644 index 0000000..7b1f81d --- /dev/null +++ b/tools/configure-win-vs2008-x64.bat @@ -0,0 +1,9 @@ +@echo off + +set ASMJIT_CURRENT_DIR=%CD% +set ASMJIT_BUILD_DIR="build_vs2008_x64" + +mkdir ..\%ASMJIT_BUILD_DIR% +cd ..\%ASMJIT_BUILD_DIR% +cmake .. -G"Visual Studio 9 2008 Win64" -DASMJIT_BUILD_SAMPLES=1 +cd %ASMJIT_CURRENT_DIR% diff --git a/tools/configure-win-vs2008-x86.bat b/tools/configure-win-vs2008-x86.bat new file mode 100644 index 0000000..12d4bb3 --- /dev/null +++ b/tools/configure-win-vs2008-x86.bat @@ -0,0 +1,9 @@ +@echo off + +set ASMJIT_CURRENT_DIR=%CD% +set ASMJIT_BUILD_DIR="build_vs2008_x86" + +mkdir ..\%ASMJIT_BUILD_DIR% +cd ..\%ASMJIT_BUILD_DIR% +cmake .. -G"Visual Studio 9 2008" -DASMJIT_BUILD_SAMPLES=1 +cd %ASMJIT_CURRENT_DIR% diff --git a/tools/configure-win-vs2010-x64.bat b/tools/configure-win-vs2010-x64.bat new file mode 100644 index 0000000..b7c5eb4 --- /dev/null +++ b/tools/configure-win-vs2010-x64.bat @@ -0,0 +1,9 @@ +@echo off + +set ASMJIT_CURRENT_DIR=%CD% +set ASMJIT_BUILD_DIR="build_vs2010_x64" + +mkdir ..\%ASMJIT_BUILD_DIR% +cd ..\%ASMJIT_BUILD_DIR% +cmake .. -G"Visual Studio 10 Win64" -DASMJIT_BUILD_SAMPLES=1 +cd %ASMJIT_CURRENT_DIR% diff --git a/tools/configure-win-vs2010-x86.bat b/tools/configure-win-vs2010-x86.bat new file mode 100644 index 0000000..ec07acd --- /dev/null +++ b/tools/configure-win-vs2010-x86.bat @@ -0,0 +1,9 @@ +@echo off + +set ASMJIT_CURRENT_DIR=%CD% +set ASMJIT_BUILD_DIR="build_vs2010_x86" + +mkdir ..\%ASMJIT_BUILD_DIR% +cd ..\%ASMJIT_BUILD_DIR% +cmake .. -G"Visual Studio 10" -DASMJIT_BUILD_SAMPLES=1 +cd %ASMJIT_CURRENT_DIR% diff --git a/tools/configure-win-vs2013-x64.bat b/tools/configure-win-vs2013-x64.bat new file mode 100644 index 0000000..13d7318 --- /dev/null +++ b/tools/configure-win-vs2013-x64.bat @@ -0,0 +1,9 @@ +@echo off + +set ASMJIT_CURRENT_DIR=%CD% +set ASMJIT_BUILD_DIR="build_vs2013_x64" + +mkdir ..\%ASMJIT_BUILD_DIR% +cd ..\%ASMJIT_BUILD_DIR% +cmake .. -G"Visual Studio 12 Win64" -DASMJIT_BUILD_SAMPLES=1 +cd %ASMJIT_CURRENT_DIR% diff --git a/tools/configure-win-vs2013-x86.bat b/tools/configure-win-vs2013-x86.bat new file mode 100644 index 0000000..a068f8e --- /dev/null +++ b/tools/configure-win-vs2013-x86.bat @@ -0,0 +1,9 @@ +@echo off + +set ASMJIT_CURRENT_DIR=%CD% +set ASMJIT_BUILD_DIR="build_vs2013_x86" + +mkdir ..\%ASMJIT_BUILD_DIR% +cd ..\%ASMJIT_BUILD_DIR% +cmake .. -G"Visual Studio 12" -DASMJIT_BUILD_SAMPLES=1 +cd %ASMJIT_CURRENT_DIR% diff --git a/tools/doxygen.conf b/tools/doxygen.conf new file mode 100644 index 0000000..6ba327b --- /dev/null +++ b/tools/doxygen.conf @@ -0,0 +1,1634 @@ +# Doxyfile 1.7.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = AsmJit + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = 1.0 + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = . + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = NO + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 2 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen to replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = NO + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penality. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will rougly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = YES + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = NO + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = YES + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = YES + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = NO + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = YES + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = YES + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = NO + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = NO + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = NO + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= NO + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 0 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = NO + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = NO + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = NO + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = ../../src/asmjit + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS = *.c \ + *.cpp \ + *.h + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = NO + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = NO + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = doxygen.css + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the stylesheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = YES + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = ../AsmJit.chm + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = "C:/Program Files/HTML Help Workshop/hhc.exe" + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = YES + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 1 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvances is that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = YES + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = ASMJIT_NODOC=1 \ + ASMJIT_X86=1 + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = NO + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = NO + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = YES + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will write a font called FreeSans.ttf to the output +# directory and reference it in all dot files that doxygen generates. This +# font does not include all possible unicode characters however, so when you need +# these (or just want a differently looking font) you can specify the font name +# using DOT_FONTNAME. You need need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = FreeSans.ttf + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = NO + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = NO + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = YES + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = NO + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/tools/doxygen.css b/tools/doxygen.css new file mode 100644 index 0000000..c534d2b --- /dev/null +++ b/tools/doxygen.css @@ -0,0 +1,405 @@ +body, table, div, p, dl { + font-family: Lucida Grande, Verdana, Geneva, Arial, sans-serif; + font-size: 12px; +} + +/* @group Heading Levels */ + +h1 { + text-align: center; + font-size: 150%; +} + +h2 { + font-size: 120%; +} + +h3 { + font-size: 100%; +} + +/* @end */ + +caption { + font-weight: bold; +} + +div.qindex, div.navtab{ + background-color: #e8eef2; + border: 1px solid #84b0c7; + text-align: center; + margin: 2px; + padding: 2px; +} + +div.qindex, div.navpath { + width: 100%; + line-height: 140%; +} + +div.navtab { + margin-right: 15px; +} + +/* @group Link Styling */ + +a { + color: #153788; + font-weight: normal; + text-decoration: none; +} + +.contents a:visited { + color: #1b77c5; +} + +a:hover { + text-decoration: underline; +} + +a.qindex { + font-weight: bold; +} + +a.qindexHL { + font-weight: bold; + background-color: #6666cc; + color: #ffffff; + border: 1px double #9295C2; +} + +.contents a.qindexHL:visited { + color: #ffffff; +} + +a.el { + font-weight: bold; +} + +a.elRef { +} + +a.code { +} + +a.codeRef { +} + +/* @end */ + +dl.el { + margin-left: -1cm; +} + +.fragment { + font-family: monospace, fixed; + font-size: 105%; +} + +pre.fragment { + border: 1px dotted #CCCCFF; + padding: 4px 6px; + margin: 4px 8px 4px 2px; +} + +div.ah { + background-color: black; + font-weight: bold; + color: #ffffff; + margin-bottom: 3px; + margin-top: 3px +} + +div.groupHeader { + margin-left: 16px; + margin-top: 12px; + margin-bottom: 6px; + font-weight: bold; +} + +div.groupText { + margin-left: 16px; + font-style: italic; +} + +body { + background: white; + color: black; + margin-right: 20px; + margin-left: 20px; +} + +td.indexkey { + background-color: #e8eef2; + font-weight: bold; + border: 1px solid #CCCCCC; + margin: 2px 0px 2px 0; + padding: 2px 10px; +} + +td.indexvalue { + background-color: #e8eef2; + border: 1px solid #CCCCCC; + padding: 2px 10px; + margin: 2px 0px; +} + +tr.memlist { + background-color: #f0f0f0; +} + +p.formulaDsp { + text-align: center; +} + +img.formulaDsp { + +} + +img.formulaInl { + vertical-align: middle; +} + +/* @group Code Colorization */ + +span.keyword { + color: #008000 +} + +span.keywordtype { + color: #604020 +} + +span.keywordflow { + color: #e08000 +} + +span.comment { + color: #800000 +} + +span.preprocessor { + color: #806020 +} + +span.stringliteral { + color: #002080 +} + +span.charliteral { + color: #008080 +} + +/* @end */ + +td.tiny { + font-size: 75%; +} + +.dirtab { + padding: 4px; + border-collapse: collapse; + border: 1px solid #84b0c7; +} + +th.dirtab { + background: #e8eef2; + font-weight: bold; +} + +hr { + height: 0; + border: none; + border-top: 1px solid #666; +} + +/* @group Member Descriptions */ + +.mdescLeft, .mdescRight, +.memItemLeft, .memItemRight, +.memTemplItemLeft, .memTemplItemRight, .memTemplParams { + background-color: #FFFFFF; + border: none; + margin: 4px; + padding: 1px 0 0 8px; +} + +.mdescLeft, .mdescRight { + padding: 0px 8px 4px 8px; + color: #555; +} + +.memItemLeft, .memItemRight { + padding-top: 4px; + padding-bottom: 4px; + border-top: 1px solid #CCCCFF; +} + +.mdescLeft, .mdescRight { +} + +.memTemplParams { + color: #606060; +} + +/* @end */ + +/* @group Member Details */ + +/* Styles for detailed member documentation */ + +.memtemplate { + margin-left: 3px; + font-weight: normal; + font-size: 80%; + color: #606060; +} + +.memnav { + background-color: #e8eef2; + border: 1px solid #84b0c7; + text-align: center; + margin: 2px; + margin-right: 15px; + padding: 2px; +} + +.memitem { + padding: 0; +} + +.memname { + white-space: nowrap; + font-weight: bold; +} + +.memproto, .memdoc { +} + +.memproto { + padding: 0; + background-color: #EEEEFF; + border: 1px solid #CCCCFF; +} + +.memdoc { + padding: 2px 5px; + border-left: 1px solid #CCCCFF; + border-right: 1px solid #CCCCFF; + border-bottom: 1px solid #CCCCFF; +} + +.paramkey { + text-align: right; +} + +.paramtype { + white-space: nowrap; +} + +.paramname { + color: #606060; + font-weight: normal; + white-space: nowrap; +} +.paramname em { + font-style: normal; +} + +/* @end */ + +/* @group Directory (tree) */ + +/* for the tree view */ + +.ftvtree { + font-family: sans-serif; + margin: 0.5em; +} + +/* these are for tree view when used as main index */ + +.directory { + font-size: 9pt; + font-weight: bold; +} + +.directory h3 { + margin: 0px; + margin-top: 1em; + font-size: 11pt; +} + +/* +The following two styles can be used to replace the root node title +with an image of your choice. Simply uncomment the next two styles, +specify the name of your image and be sure to set 'height' to the +proper pixel height of your image. +*/ + +/* +.directory h3.swap { + height: 61px; + background-repeat: no-repeat; + background-image: url("yourimage.gif"); +} +.directory h3.swap span { + display: none; +} +*/ + +.directory > h3 { + margin-top: 0; +} + +.directory p { + margin: 0px; + white-space: nowrap; +} + +.directory div { + display: none; + margin: 0px; +} + +.directory img { + vertical-align: -30%; +} + +/* these are for tree view when not used as main index */ + +.directory-alt { + font-size: 100%; + font-weight: bold; +} + +.directory-alt h3 { + margin: 0px; + margin-top: 1em; + font-size: 11pt; +} + +.directory-alt > h3 { + margin-top: 0; +} + +.directory-alt p { + margin: 0px; + white-space: nowrap; +} + +.directory-alt div { + display: none; + margin: 0px; +} + +.directory-alt img { + vertical-align: -30%; +} + +/* @end */ + +address { + font-style: normal; + color: #333; +} diff --git a/tools/src-gendefs.js b/tools/src-gendefs.js new file mode 100644 index 0000000..a4997ae --- /dev/null +++ b/tools/src-gendefs.js @@ -0,0 +1,253 @@ +// [GenDefs] +// +// The purpose of this script is to fetch all instructions' names into a single +// string. It prevents relocation that has to be done by linked to make all +// pointers the binary application/library uses valid. This approach decreases +// the final size of AsmJit binary. + +var fs = require("fs"); + +// ---------------------------------------------------------------------------- +// [Configuration] +// ---------------------------------------------------------------------------- + +var injectStartMarker = "// ${kInstData:Begin}\n" +var injectEndMarker = "// ${kInstData:End}\n" + +// ---------------------------------------------------------------------------- +// [Utilities] +// ---------------------------------------------------------------------------- + +var uppercaseFirst = function(s) { + if (!s) + return s; + return s[0].toUpperCase() + s.substr(1); +}; + +var inject = function(s, start, end, code) { + var iStart = s.indexOf(start); + var iEnd = s.indexOf(end); + + if (iStart === -1) + throw new Error("Couldn't locate start mark."); + + if (iEnd === -1) + throw new Error("Couldn't locate end mark."); + + return s.substr(0, iStart + start.length) + code + s.substr(iEnd); +}; + +// ---------------------------------------------------------------------------- +// [Database] +// ---------------------------------------------------------------------------- + +// FullIndex - Index of the name of the instruction in one big string (no +// prefix/suffix concept). +// PrefixIndex - Index to a prefix string. +// SuffixIndex - Index to a suffix string. + +var Database = (function() { + function bestSuffix(s, suffixes) { + var best = -1; + + for (var i = 0; i < suffixes.length; i++) { + var suffix = suffixes[i]; + var si = s.lastIndexOf(suffix); + + if (si === -1 || si + suffix.length != s.length) + continue; + + if (best !== -1 && suffix.length < suffixes[best].length) + continue; + + best = i; + } + + return best; + } + + var IndexedString = function() { + this.array = []; + this.index = 0; + this.map = {}; + }; + + IndexedString.prototype.add = function(s) { + var index = this.map[s]; + if (typeof index === "number") + return index; + + index = this.index; + this.array.push(s); + this.index += s.length + 1; + this.map[s] = index; + return index; + }; + + IndexedString.prototype.get = function(s) { + return this.map[s]; + }; + + IndexedString.prototype.format = function(indent) { + var s = ""; + var array = this.array; + + for (var i = 0; i < array.length; i++) { + s += indent + "\"" + array[i] + "\\0\""; + if (i === array.length - 1) + s += ";"; + s += "\n"; + } + + return s; + }; + + IndexedString.prototype.getSize = function() { + return this.index; + }; + + var Database = function(suffixes) { + this.map = {}; + this.suffixes = suffixes; + + this.fullString = new IndexedString(); + this.prefixString = new IndexedString(); + this.suffixString = new IndexedString(); + }; + + Database.prototype.add = function(name, id) { + this.map[name] = { + id: id, + fullIndex: 0, + prefixIndex: 0, + suffixIndex: 0, + hasV: 0 + }; + }; + + Database.prototype.index = function() { + var map = this.map; + var suffixes = this.suffixes; + + for (var i = 0; i < suffixes.length; i++) { + this.suffixString.add(suffixes[i]); + } + + for (var name in map) { + var inst = map[name]; + var si = bestSuffix(name, suffixes); + + inst.fullIndex = this.fullString.add(name); + + if (name.indexOf("v") === 0) { + inst.hasV = 1; + name = name.substr(1); + } + + if (si !== -1) { + var suffix = suffixes[si]; + var prefix = name.substr(0, name.length - suffix.length); + + inst.prefixIndex = this.prefixString.add(prefix); + inst.suffixIndex = this.suffixString.add(suffix); + } + else { + inst.prefixIndex = this.prefixString.add(name); + inst.suffixIndex = this.suffixString.add(""); + } + } + }; + + return Database; +})(); + +// ---------------------------------------------------------------------------- +// [Generate] +// ---------------------------------------------------------------------------- + +var generate = function(fileName, arch, suffixes) { + var oldData = fs.readFileSync(fileName, "utf8").replace(/\r\n/g, "\n"); + + var data = oldData; + var code = ""; + + var Arch = uppercaseFirst(arch); + + // Create database. + var db = new Database(suffixes); + var re = new RegExp("INST\\(([A-Za-z0-9_]+)\\s*,\\s*\\\"([A-Za-z0-9_ ]*)\\\"", "g"); + + while (m = re.exec(data)) { + var id = m[1]; + var name = m[2]; + + db.add(name, id); + } + db.index(); + + console.log("Full size: " + db.fullString.getSize()); + console.log("Prefix size: " + db.prefixString.getSize()); + console.log("Suffix size: " + db.suffixString.getSize()); + + // Generate InstName[] string. + code += "const char _instName[] =\n"; + for (var k in db.map) { + var inst = db.map[k]; + code += " \"" + k + "\\0\"\n"; + } + code = code.substr(code, code.length - 1) + ";\n\n"; + + // Generate NameIndex. + code += "enum kInstData_NameIndex {\n"; + for (var k in db.map) { + var inst = db.map[k]; + code += " " + inst.id + "_NameIndex = " + inst.fullIndex + ",\n"; + } + code = code.substr(code, code.length - 2) + "\n};\n"; + + // Inject. + data = inject(data, injectStartMarker, injectEndMarker, code); + + // Save only if modified. + if (data !== oldData) + fs.writeFileSync(fileName, data, "utf8"); +}; + +// ---------------------------------------------------------------------------- +// [Main] +// ---------------------------------------------------------------------------- + +var main = function(files) { + files.forEach(function(file) { + generate(file.name, file.arch, file.suffixes); + }); +}; + +main([ + { + name: "../src/asmjit/x86/x86defs.cpp", + arch: "x86", + suffixes: [ + "a", "ae", + "b", "bd", "be", "bq", "bw", + "c", + "d", "dq", "dqa", "dqu", "dw", + "e", + "f128", + "g", "ge", + "hpd", "hps", + "i", "i128", "ip", + "l", "last", "ld", "le", "lpd", "lps", "lw", + "na", "nae", "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no", "np", "ns", "nz", + "o", + "p", "pd", "pe", "ph", "pi", "po", "pp", "ps", + "q", + "r", + "s", "sb", "sd", "si", "sq", "ss", "sw", + "usb", "usw", + "vpd", "vps", + "w", "wb", "wd", "wq", + "z" + ] + } +]); diff --git a/tools/src-sanity.js b/tools/src-sanity.js new file mode 100644 index 0000000..e309a37 --- /dev/null +++ b/tools/src-sanity.js @@ -0,0 +1,177 @@ +var assert = require("assert"); +var fs = require("fs"); +var path = require("path"); + +/** + * List all files that can be processed by sanitizer in the given directory. + */ +var filesToSanitize = (function() { + var listPrivate = function(array, dir, displayDir, accept) { + var files = fs.readdirSync(dir); + var subarray = []; + + for (var i = 0; i < files.length; i++) { + var baseName = files[i]; + var fullName = path.normalize(path.join(dir, baseName)); + + var stat = fs.lstatSync(fullName); + if (stat.isSymbolicLink()) + continue; + + if (stat.isDirectory()) { + listPrivate(subarray, path.join(dir, baseName), displayDir + baseName, accept); + continue; + } + + if (stat.isFile()) { + if (accept(baseName)) + array.push({ name: fullName, display: displayDir + baseName }); + continue; + } + } + + return array.concat(subarray); + }; + + return function(dir, accept) { + return listPrivate([], dir, "", accept); + }; +})(); + +var isCppHeaderExt = function(ext) { + return ext === ".h" || + ext === ".hh" || + ext === ".hpp" || + ext === ".hxx" ; +}; + +var isCppSourceExt = function(ext) { + return ext === ".c" || + ext === ".cc" || + ext === ".cpp" || + ext === ".cxx" ; +}; + +/** + * Filter that returns true if the given file name should be processed. + */ +var filesToAccept = function(name) { + var ext = path.extname(name).toLowerCase(); + + return isCppHeaderExt(ext) || + isCppSourceExt(ext) || + ext === ".cmake" || + ext === ".m" || + ext === ".md" || + ext === ".mm" ; +}; + +var sanitySpaces = function(data) { + // Remove carriage return. + data = data.replace(/\r\n/g, "\n"); + // Remove spaces before the end of the line. + data = data.replace(/[ \t]+\n/g, "\n"); + // Convert tabs to spaces. + data = data.replace(/\t/g, " "); + + return data; +}; + +var sanityHeaderGuards = function(data) { + + return data; +}; + +var sanityIncludeOrder = function(data, directive) { + var i = 0; + var nl = true; + + var startPosition = -1; + var endPosition = -1; + var list = null; + var replacement; + + while (i < data.length) { + if (nl && data.indexOf(directive, i) === i) { + var iLocal = i + + if (startPosition === -1) { + startPosition = i; + list = []; + } + + for (;;) { + if (++i >= data.length) { + list.push(data.substring(iLocal, i)); + break; + } + if (data[i] === '\n') { + list.push(data.substring(iLocal, i)); + i++; + break; + } + } + } + else if (startPosition !== -1) { + assert(nl === true); + endPosition = i; + + if (list.length > 1) { + list.sort(); + replacement = list.join("\n"); + assert(replacement.length == endPosition - startPosition - 1); + + data = data.substring(0, startPosition) + + replacement + + "\n" + + data.substring(endPosition); + } + + startPosition = -1; + endPosition = -1; + list = null; + + nl = false; + i++; + } + else { + nl = data[i] === '\n'; + i++; + } + } + + return data; +}; + +var sanity = function(data, name) { + var ext = path.extname(name).toLowerCase(); + + // Sanity spaces. + data = sanitySpaces(data); + + // Fix C/C++ header guards. + if (isCppHeaderExt(ext)) { + data = sanityHeaderGuards(data); + } + + // Sort #include files. + if (isCppHeaderExt(ext) || isCppSourceExt(ext)) { + data = sanityIncludeOrder(data, "#include"); + } + + return data; +}; + +var main = function(dir) { + filesToSanitize(dir, filesToAccept).forEach(function(file) { + var oldData = fs.readFileSync(file.name, "utf8"); + var newData = sanity(oldData, file.display); + + if (oldData !== newData) { + console.log("Sanitizing: " + file.display); + fs.writeFileSync(file.name, newData, "utf8"); + } + }); +}; + +main(path.join(__dirname, "../src"));