Refactored slightly some constants and operand handling in X86Assembler.

Refactored asmjit::x86 register definitions (now exported as a single symbol).
Refactored bit utilities, now using proper naming like `or_`, `and_`, `andNot`.
Refactored X86RegCount and X86RegMask to support K instead of Fp register.
Refactored X86 instruction table (won't stay for long, new tool to export it is in development).
Renamed instruction group to instruction encoding.
Added XSAVE/OSXSAVE and other CPU features to X86CpuInfo.
Added proper AVX and AVX-512 detection to X86CpuInfo.
Added support to get content of XCR0 in X86CpuInfo (callXGetBV).
Added XSAVE instruction set support (Assembler/Compiler).
Added SSE4a instruction set support (Assembler/Compiler).
Added X86KReg and X86KVar register/variable support (AVX-512).
Added X86ZmmReg and X86ZmmVar register/variable support (AVX-512).
This commit is contained in:
kobalicek
2014-11-01 13:07:56 +01:00
parent 1318c9aff7
commit 515d854d10
21 changed files with 6969 additions and 5448 deletions

View File

@@ -13,7 +13,7 @@
namespace asmgen {
enum { kGenOpCodeInstCount = 2640 };
enum { kGenOpCodeInstCount = 2656 };
// Generate all instructions asmjit can emit.
static void opcode(asmjit::X86Assembler& a) {
@@ -1275,9 +1275,25 @@ static void opcode(asmjit::X86Assembler& a) {
a.pcmpistrm(xmm0, ptr_gp0, 0);
a.pcmpgtq(xmm0, xmm7);
a.pcmpgtq(xmm0, ptr_gp0);
// SSE4a.
a.nop();
a.extrq(xmm0, xmm1);
a.extrq(xmm0, 0x1, 0x2);
a.insertq(xmm0, xmm1);
a.insertq(xmm0, xmm1, 0x1, 0x2);
a.movntsd(ptr_gp0, xmm0);
a.movntss(ptr_gp0, xmm0);
// POPCNT.
a.nop();
a.popcnt(gp0, ptr_gp0);
// AESNI.
a.nop();
a.aesdec(xmm0, xmm7);
a.aesdec(xmm0, ptr_gp0);
a.aesdeclast(xmm0, xmm7);
@@ -1292,10 +1308,24 @@ static void opcode(asmjit::X86Assembler& a) {
a.aeskeygenassist(xmm0, ptr_gp0, 0);
// PCLMULQDQ.
a.nop();
a.pclmulqdq(xmm0, xmm7, 0);
a.pclmulqdq(xmm0, ptr_gp0, 0);
// XSAVE.
a.nop();
a.xgetbv();
a.xsetbv();
a.xsave(ptr_gp0);
a.xsaveopt(ptr_gp0);
a.xrstor(ptr_gp0);
// AVX.
a.nop();
a.vaddpd(xmm0, xmm1, xmm2);
a.vaddpd(xmm0, xmm1, ptr_gp0);
a.vaddpd(ymm0, ymm1, ymm2);
@@ -1946,6 +1976,8 @@ static void opcode(asmjit::X86Assembler& a) {
a.vzeroupper();
// AVX+AESNI.
a.nop();
a.vaesdec(xmm0, xmm1, xmm2);
a.vaesdec(xmm0, xmm1, ptr_gp0);
a.vaesdeclast(xmm0, xmm1, xmm2);
@@ -1960,10 +1992,14 @@ static void opcode(asmjit::X86Assembler& a) {
a.vaeskeygenassist(xmm0, ptr_gp0, 0);
// AVX+PCLMULQDQ.
a.nop();
a.vpclmulqdq(xmm0, xmm1, xmm2, 0);
a.vpclmulqdq(xmm0, xmm1, ptr_gp0, 0);
// AVX2.
a.nop();
a.vbroadcasti128(ymm0, ptr_gp0);
a.vbroadcastsd(ymm0, xmm1);
a.vbroadcastss(xmm0, xmm1);
@@ -2268,6 +2304,8 @@ static void opcode(asmjit::X86Assembler& a) {
a.vpxor(ymm0, ymm1, ymm2);
// FMA3.
a.nop();
a.vfmadd132pd(xmm0, xmm1, ptr_gp0);
a.vfmadd132pd(xmm0, xmm1, xmm2);
a.vfmadd132pd(ymm0, ymm1, ptr_gp0);
@@ -2462,6 +2500,8 @@ static void opcode(asmjit::X86Assembler& a) {
a.vfnmsub231ss(xmm0, xmm1, xmm2);
// FMA4.
a.nop();
a.vfmaddpd(xmm0, xmm1, xmm2, xmm3);
a.vfmaddpd(xmm0, xmm1, ptr_gp0, xmm3);
a.vfmaddpd(xmm0, xmm1, xmm2, ptr_gp0);
@@ -2560,6 +2600,8 @@ static void opcode(asmjit::X86Assembler& a) {
a.vfnmsubss(xmm0, xmm1, xmm2, ptr_gp0);
// XOP.
a.nop();
a.vfrczpd(xmm0, xmm1);
a.vfrczpd(xmm0, ptr_gp0);
a.vfrczpd(ymm0, ymm1);
@@ -2709,6 +2751,8 @@ static void opcode(asmjit::X86Assembler& a) {
a.vpshlw(xmm0, xmm1, ptr_gp0);
// BMI.
a.nop();
a.andn(gp0, gp1, zcx);
a.andn(gp0, gp1, ptr_gp1);
a.bextr(gp0, gp1, zcx);
@@ -2721,14 +2765,20 @@ static void opcode(asmjit::X86Assembler& a) {
a.blsr(gp0, ptr_gp1);
// LZCNT.
a.nop();
a.lzcnt(gp0, gp1);
a.lzcnt(gp0, ptr_gp1);
// TZCNT.
a.nop();
a.tzcnt(gp0, gp1);
a.tzcnt(gp0, ptr_gp1);
// BMI2.
a.nop();
a.bzhi(gp0, gp1, zcx);
a.bzhi(gp0, ptr_gp1, zcx);
a.mulx(gp0, gp1, zcx);
@@ -2747,9 +2797,13 @@ static void opcode(asmjit::X86Assembler& a) {
a.shrx(gp0, ptr_gp1, zcx);
// RDRAND.
a.nop();
a.rdrand(gp0);
// F16C.
a.nop();
a.vcvtph2ps(xmm0, xmm1);
a.vcvtph2ps(xmm0, ptr_gp1);
a.vcvtph2ps(ymm0, xmm1);
@@ -2758,6 +2812,9 @@ static void opcode(asmjit::X86Assembler& a) {
a.vcvtps2ph(ptr_gp0, xmm1, 0);
a.vcvtps2ph(xmm0, ymm1, 0);
a.vcvtps2ph(ptr_gp0, ymm1, 0);
// Mark the end of the stream.
a.nop();
}
} // asmgen namespace

View File

@@ -235,15 +235,15 @@
//! `BaseMem` class. These functions are used to make operands that represents
//! memory addresses:
//!
//! - `asmjit::ptr()`
//! - `asmjit::byte_ptr()`
//! - `asmjit::word_ptr()`
//! - `asmjit::dword_ptr()`
//! - `asmjit::qword_ptr()`
//! - `asmjit::tword_ptr()`
//! - `asmjit::oword_ptr()`
//! - `asmjit::yword_ptr()`
//! - `asmjit::zword_ptr()`
//! - `asmjit::ptr()` - Address size not specified.
//! - `asmjit::byte_ptr()` - 1 byte.
//! - `asmjit::word_ptr()` - 2 bytes (Gpw size).
//! - `asmjit::dword_ptr()` - 4 bytes (Gpd size).
//! - `asmjit::qword_ptr()` - 8 bytes (Gpq/Mm size).
//! - `asmjit::tword_ptr()` - 10 bytes (FPU).
//! - `asmjit::oword_ptr()` - 16 bytes (Xmm size).
//! - `asmjit::yword_ptr()` - 32 bytes (Ymm size).
//! - `asmjit::zword_ptr()` - 64 bytes (Zmm size).
//!
//! Most useful function to make pointer should be `asmjit::ptr()`. It creates
//! pointer to the target with unspecified size. Unspecified size works in all
@@ -298,10 +298,10 @@
//! // Get `X86CpuInfo` global instance.
//! const X86CpuInfo* cpuInfo = X86CpuInfo::getHost();
//!
//! if (cpuInfo->hasFeature(kX86CpuFeatureSse2)) {
//! if (cpuInfo->hasFeature(kX86CpuFeatureSSE2)) {
//! // Processor has SSE2.
//! }
//! else if (cpuInfo->hasFeature(kX86CpuFeatureMmx)) {
//! else if (cpuInfo->hasFeature(kX86CpuFeatureMMX)) {
//! // Processor doesn't have SSE2, but has MMX.
//! }
//! else {

View File

@@ -42,7 +42,7 @@ ASMJIT_ENUM(kInstId) {
//! Instruction options (stub).
ASMJIT_ENUM(kInstOptions) {
//! No instruction options.
kInstOptionNone = 0x00,
kInstOptionNone = 0x00000000,
//! Emit short form of the instruction.
//!
@@ -53,7 +53,8 @@ ASMJIT_ENUM(kInstOptions) {
//! can be dangerous if the short jmp/jcc is required, but not encodable due
//! to large displacement, in such case an error happens and the whole
//! assembler/compiler stream is unusable.
kInstOptionShortForm = 0x01,
kInstOptionShortForm = 0x00000001,
//! Emit long form of the instruction.
//!
//! X86/X64:
@@ -61,12 +62,13 @@ ASMJIT_ENUM(kInstOptions) {
//! Long form is mosrlt related to jmp and jcc instructions, but like the
//! `kInstOptionShortForm` option it can be used by other instructions
//! supporting both 8-bit and 32-bit immediates.
kInstOptionLongForm = 0x02,
kInstOptionLongForm = 0x00000002,
//! Condition is likely to be taken.
kInstOptionTaken = 0x04,
kInstOptionTaken = 0x00000004,
//! Condition is unlikely to be taken.
kInstOptionNotTaken = 0x08
kInstOptionNotTaken = 0x00000008
};
// ============================================================================

View File

@@ -993,11 +993,11 @@ struct VarAttr {
//! Get whether `flag` is on.
ASMJIT_INLINE bool hasFlag(uint32_t flag) { return (_flags & flag) != 0; }
//! Add `flags`.
ASMJIT_INLINE void addFlags(uint32_t flags) { _flags |= flags; }
ASMJIT_INLINE void orFlags(uint32_t flags) { _flags |= flags; }
//! Mask `flags`.
ASMJIT_INLINE void andFlags(uint32_t flags) { _flags &= flags; }
//! Clear `flags`.
ASMJIT_INLINE void delFlags(uint32_t flags) { _flags &= ~flags; }
ASMJIT_INLINE void andNotFlags(uint32_t flags) { _flags &= ~flags; }
//! Get how many times the variable is used by the instruction/node.
ASMJIT_INLINE uint32_t getVarCount() const { return _varCount; }
@@ -1642,7 +1642,7 @@ struct Node {
// [Accessors - Type and Flags]
// --------------------------------------------------------------------------
//! Get type of node, see `kNodeType`.
//! Get node type, see `kNodeType`.
ASMJIT_INLINE uint32_t getType() const {
return _type;
}
@@ -1652,24 +1652,29 @@ struct Node {
return _flags;
}
//! Set node flags to `flags`.
ASMJIT_INLINE void setFlags(uint32_t flags) {
_flags = static_cast<uint16_t>(flags);
}
//! Get whether the instruction has flag `flag`.
ASMJIT_INLINE bool hasFlag(uint32_t flag) const {
return (static_cast<uint32_t>(_flags) & flag) != 0;
}
//! Set node flags to `flags`.
ASMJIT_INLINE void setFlags(uint32_t flags) {
_flags = static_cast<uint16_t>(flags);
}
//! Add instruction `flags`.
ASMJIT_INLINE void addFlags(uint32_t flags) {
ASMJIT_INLINE void orFlags(uint32_t flags) {
_flags |= static_cast<uint16_t>(flags);
}
//! And instruction `flags`.
ASMJIT_INLINE void andFlags(uint32_t flags) {
_flags &= static_cast<uint16_t>(flags);
}
//! Clear instruction `flags`.
ASMJIT_INLINE void delFlags(uint32_t flags) {
_flags &= static_cast<uint16_t>(~flags);
ASMJIT_INLINE void andNotFlags(uint32_t flags) {
_flags &= ~static_cast<uint16_t>(flags);
}
//! Get whether the node has beed fetched.
@@ -1695,18 +1700,18 @@ struct Node {
return hasFlag(kNodeFlagIsInformative);
}
//! Whether the instruction is an unconditional jump.
//! Whether the node is `InstNode` and unconditional jump.
ASMJIT_INLINE bool isJmp() const { return hasFlag(kNodeFlagIsJmp); }
//! Whether the instruction is a conditional jump.
//! Whether the node is `InstNode` and conditional jump.
ASMJIT_INLINE bool isJcc() const { return hasFlag(kNodeFlagIsJcc); }
//! Whether the instruction is an unconditional or conditional jump.
//! Whether the node is `InstNode` and conditional/unconditional jump.
ASMJIT_INLINE bool isJmpOrJcc() const { return hasFlag(kNodeFlagIsJmp | kNodeFlagIsJcc); }
//! Whether the instruction is a return.
//! Whether the node is `InstNode` and return.
ASMJIT_INLINE bool isRet() const { return hasFlag(kNodeFlagIsRet); }
//! Get whether the instruction is special.
//! Get whether the node is `InstNode` and the instruction is special.
ASMJIT_INLINE bool isSpecial() const { return hasFlag(kNodeFlagIsSpecial); }
//! Get whether the instruction accesses FPU.
//! Get whether the node is `InstNode` and the instruction uses x87-FPU.
ASMJIT_INLINE bool isFp() const { return hasFlag(kNodeFlagIsFp); }
// --------------------------------------------------------------------------
@@ -1897,7 +1902,9 @@ struct EmbedNode : public Node {
// --------------------------------------------------------------------------
//! Create a new `EmbedNode` instance.
ASMJIT_INLINE EmbedNode(Compiler* compiler, void* data, uint32_t size) : Node(compiler, kNodeTypeEmbed) {
ASMJIT_INLINE EmbedNode(Compiler* compiler, void* data, uint32_t size) :
Node(compiler, kNodeTypeEmbed) {
_size = size;
if (size <= kInlineBufferSize) {
if (data != NULL)
@@ -1953,7 +1960,7 @@ struct CommentNode : public Node {
//! Create a new `CommentNode` instance.
ASMJIT_INLINE CommentNode(Compiler* compiler, const char* comment) : Node(compiler, kNodeTypeComment) {
addFlags(kNodeFlagIsInformative);
orFlags(kNodeFlagIsInformative);
_comment = comment;
}
@@ -1974,8 +1981,10 @@ struct HintNode : public Node {
// --------------------------------------------------------------------------
//! Create a new `HintNode` instance.
ASMJIT_INLINE HintNode(Compiler* compiler, VarData* vd, uint32_t hint, uint32_t value) : Node(compiler, kNodeTypeHint) {
addFlags(kNodeFlagIsInformative);
ASMJIT_INLINE HintNode(Compiler* compiler, VarData* vd, uint32_t hint, uint32_t value) :
Node(compiler, kNodeTypeHint) {
orFlags(kNodeFlagIsInformative);
_vd = vd;
_hint = hint;
_value = value;
@@ -2101,9 +2110,12 @@ struct InstNode : public Node {
// --------------------------------------------------------------------------
//! Create a new `InstNode` instance.
ASMJIT_INLINE InstNode(Compiler* compiler, uint32_t code, uint32_t options, Operand* opList, uint32_t opCount) : Node(compiler, kNodeTypeInst) {
_code = static_cast<uint16_t>(code);
_options = static_cast<uint8_t>(options);
ASMJIT_INLINE InstNode(Compiler* compiler, uint32_t instId, uint32_t instOptions, Operand* opList, uint32_t opCount) :
Node(compiler, kNodeTypeInst) {
_instId = static_cast<uint16_t>(instId);
_reserved = 0;
_instOptions = instOptions;
_opCount = static_cast<uint8_t>(opCount);
_opList = opList;
@@ -2118,18 +2130,17 @@ struct InstNode : public Node {
// [Accessors]
// --------------------------------------------------------------------------
//! Get instruction code, see `kX86InstId`.
ASMJIT_INLINE uint32_t getCode() const {
return _code;
//! Get instruction ID, see `kX86InstId`.
ASMJIT_INLINE uint32_t getInstId() const {
return _instId;
}
//! Set instruction code to `code`.
//! Set instruction ID to `instId`.
//!
//! Please do not modify instruction code if you are not know what you are
//! doing. Incorrect instruction code or operands can raise assertion() at
//! runtime.
ASMJIT_INLINE void setCode(uint32_t code) {
_code = static_cast<uint16_t>(code);
//! Please do not modify instruction code if you don't know what are you
//! doing. Incorrect instruction code or operands can cause assertion failure.
ASMJIT_INLINE void setInstId(uint32_t instId) {
_instId = static_cast<uint16_t>(instId);
}
//! Whether the instruction is an unconditional jump or whether the
@@ -2140,23 +2151,23 @@ struct InstNode : public Node {
//! Get emit options.
ASMJIT_INLINE uint32_t getOptions() const {
return _options;
return _instOptions;
}
//! Set emit options.
ASMJIT_INLINE void setOptions(uint32_t options) {
_options = static_cast<uint8_t>(options);
_instOptions = options;
}
//! Add emit options.
ASMJIT_INLINE void addOptions(uint32_t options) {
_options |= static_cast<uint8_t>(options);
_instOptions |= options;
}
//! Mask emit options.
ASMJIT_INLINE void andOptions(uint32_t options) {
_options &= static_cast<uint8_t>(options);
_instOptions &= options;
}
//! Clear emit options.
ASMJIT_INLINE void delOptions(uint32_t options) {
_options &= static_cast<uint8_t>(~options);
_instOptions &= ~options;
}
//! Get operands list.
@@ -2225,12 +2236,14 @@ _Update:
// [Members]
// --------------------------------------------------------------------------
//! Instruction code, see `kInstId`.
uint16_t _code;
//! Instruction options, see `kInstOptions`.
uint8_t _options;
//! Instruction ID, see `kInstId`.
uint16_t _instId;
//! \internal
uint8_t _memOpIndex;
//! \internal
uint8_t _reserved;
//! Instruction options, see `kInstOptions`.
uint32_t _instOptions;
//! Operands list.
Operand* _opList;
@@ -2474,7 +2487,7 @@ struct FuncNode : public Node {
//! The "Red Zone" size - count of bytes which might be accessed without
//! adjusting the stack pointer.
uint16_t _redZoneSize;
//! Spill zone size (zone used by WIN64ABI).
//! Spill zone size (used by WIN64 ABI).
uint16_t _spillZoneSize;
//! Stack size needed for function arguments.

View File

@@ -396,7 +396,7 @@ UNIT(base_constpool) {
EXPECT(prevOffset + 8 == curOffset,
"pool.add() - Returned incorrect curOffset.");
EXPECT(pool.getSize() == (i + 1) * 8,
"pool.getSize() - Reports incorrect size.");
"pool.getSize() - Reported incorrect size.");
prevOffset = curOffset;
}

View File

@@ -566,6 +566,26 @@ union UInt64 {
return *this;
}
// --------------------------------------------------------------------------
// [AndNot]
// --------------------------------------------------------------------------
ASMJIT_INLINE UInt64& andNot(uint64_t val) {
u64 &= ~val;
return *this;
}
ASMJIT_INLINE UInt64& andNot(const UInt64& val) {
if (kArchHost64Bit) {
u64 &= ~val.u64;
}
else {
u32[0] &= ~val.u32[0];
u32[1] &= ~val.u32[1];
}
return *this;
}
// --------------------------------------------------------------------------
// [Or]
// --------------------------------------------------------------------------
@@ -606,26 +626,6 @@ union UInt64 {
return *this;
}
// --------------------------------------------------------------------------
// [Del]
// --------------------------------------------------------------------------
ASMJIT_INLINE UInt64& del(uint64_t val) {
u64 &= ~val;
return *this;
}
ASMJIT_INLINE UInt64& del(const UInt64& val) {
if (kArchHost64Bit) {
u64 &= ~val.u64;
}
else {
u32[0] &= ~val.u32[0];
u32[1] &= ~val.u32[1];
}
return *this;
}
// --------------------------------------------------------------------------
// [Eq]
// --------------------------------------------------------------------------

View File

@@ -44,49 +44,64 @@ static void dumpCpu(void) {
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
static const DumpCpuFeature x86FeaturesList[] = {
{ kX86CpuFeatureMultithreading , "Multithreading" },
{ kX86CpuFeatureExecuteDisableBit , "Execute-Disable Bit" },
{ kX86CpuFeatureRdtsc , "Rdtsc" },
{ kX86CpuFeatureRdtscp , "Rdtscp" },
{ kX86CpuFeatureCmov , "Cmov" },
{ kX86CpuFeatureCmpXchg8B , "Cmpxchg8b" },
{ kX86CpuFeatureCmpXchg16B , "Cmpxchg16b" },
{ kX86CpuFeatureClflush , "Clflush" },
{ kX86CpuFeaturePrefetch , "Prefetch" },
{ kX86CpuFeatureLahfSahf , "Lahf/Sahf" },
{ kX86CpuFeatureFxsr , "Fxsave/Fxrstor" },
{ kX86CpuFeatureFfxsr , "Fxsave/Fxrstor Opt." },
{ kX86CpuFeatureMmx , "Mmx" },
{ kX86CpuFeatureMmxExt , "MmxExt" },
{ kX86CpuFeature3dNow , "3dnow" },
{ kX86CpuFeature3dNowExt , "3dnowExt" },
{ kX86CpuFeatureSse , "Sse" },
{ kX86CpuFeatureSse2 , "Sse2" },
{ kX86CpuFeatureSse3 , "Sse3" },
{ kX86CpuFeatureSsse3 , "Ssse3" },
{ kX86CpuFeatureSse4A , "Sse4a" },
{ kX86CpuFeatureSse41 , "Sse4.1" },
{ kX86CpuFeatureSse42 , "Sse4.2" },
{ kX86CpuFeatureMsse , "Misaligned SSE" },
{ kX86CpuFeatureMonitorMWait , "Monitor/MWait" },
{ kX86CpuFeatureMovbe , "Movbe" },
{ kX86CpuFeaturePopcnt , "Popcnt" },
{ kX86CpuFeatureLzcnt , "Lzcnt" },
{ kX86CpuFeatureAesni , "AesNI" },
{ kX86CpuFeaturePclmulqdq , "Pclmulqdq" },
{ kX86CpuFeatureRdrand , "Rdrand" },
{ kX86CpuFeatureAvx , "Avx" },
{ kX86CpuFeatureAvx2 , "Avx2" },
{ kX86CpuFeatureNX , "NX (Non-Execute Bit)" },
{ kX86CpuFeatureMT , "MT (Multi-Threading)" },
{ kX86CpuFeatureRDTSC , "RDTSC" },
{ kX86CpuFeatureRDTSCP , "RDTSCP" },
{ kX86CpuFeatureCMOV , "CMOV" },
{ kX86CpuFeatureCMPXCHG8B , "CMPXCHG8B" },
{ kX86CpuFeatureCMPXCHG16B , "CMPXCHG16B" },
{ kX86CpuFeatureCLFLUSH , "CLFLUSH" },
{ kX86CpuFeatureCLFLUSHOpt , "CLFLUSH (Opt)" },
{ kX86CpuFeaturePREFETCH , "PREFETCH" },
{ kX86CpuFeaturePREFETCHWT1 , "PREFETCHWT1" },
{ kX86CpuFeatureLahfSahf , "LAHF/SAHF" },
{ kX86CpuFeatureFXSR , "FXSR" },
{ kX86CpuFeatureFXSROpt , "FXSR (Opt)" },
{ kX86CpuFeatureMMX , "MMX" },
{ kX86CpuFeatureMMX2 , "MMX2" },
{ kX86CpuFeature3DNOW , "3DNOW" },
{ kX86CpuFeature3DNOW2 , "3DNOW2" },
{ kX86CpuFeatureSSE , "SSE" },
{ kX86CpuFeatureSSE2 , "SSE2" },
{ kX86CpuFeatureSSE3 , "SSE3" },
{ kX86CpuFeatureSSSE3 , "SSSE3" },
{ kX86CpuFeatureSSE4A , "SSE4A" },
{ kX86CpuFeatureSSE4_1 , "SSE4.1" },
{ kX86CpuFeatureSSE4_2 , "SSE4.2" },
{ kX86CpuFeatureMSSE , "Misaligned SSE" },
{ kX86CpuFeatureMONITOR , "MONITOR/MWAIT" },
{ kX86CpuFeatureMOVBE , "MOVBE" },
{ kX86CpuFeaturePOPCNT , "POPCNT" },
{ kX86CpuFeatureLZCNT , "LZCNT" },
{ kX86CpuFeatureAESNI , "AESNI" },
{ kX86CpuFeaturePCLMULQDQ , "PCLMULQDQ" },
{ kX86CpuFeatureRDRAND , "RDRAND" },
{ kX86CpuFeatureRDSEED , "RDSEED" },
{ kX86CpuFeatureSHA , "SHA" },
{ kX86CpuFeatureXSave , "XSAVE" },
{ kX86CpuFeatureXSaveOS , "XSAVE (OS)" },
{ kX86CpuFeatureAVX , "AVX" },
{ kX86CpuFeatureAVX2 , "AVX2" },
{ kX86CpuFeatureF16C , "F16C" },
{ kX86CpuFeatureFma3 , "Fma3" },
{ kX86CpuFeatureFma4 , "Fma4" },
{ kX86CpuFeatureXop , "Xop" },
{ kX86CpuFeatureBmi , "Bmi" },
{ kX86CpuFeatureBmi2 , "Bmi2" },
{ kX86CpuFeatureHle , "Hle" },
{ kX86CpuFeatureRtm , "Rtm" },
{ kX86CpuFeatureFsGsBase , "FsGsBase" },
{ kX86CpuFeatureRepMovsbStosbExt , "RepMovsbStosbExt" }
{ kX86CpuFeatureFMA3 , "FMA3" },
{ kX86CpuFeatureFMA4 , "FMA4" },
{ kX86CpuFeatureXOP , "XOP" },
{ kX86CpuFeatureBMI , "BMI" },
{ kX86CpuFeatureBMI2 , "BMI2" },
{ kX86CpuFeatureHLE , "HLE" },
{ kX86CpuFeatureRTM , "RTM" },
{ kX86CpuFeatureADX , "ADX" },
{ kX86CpuFeatureMPX , "MPX" },
{ kX86CpuFeatureFSGSBase , "FS/GS Base" },
{ kX86CpuFeatureMOVSBSTOSBOpt , "REP MOVSB/STOSB (Opt)" },
{ kX86CpuFeatureAVX512F , "AVX512F" },
{ kX86CpuFeatureAVX512CD , "AVX512CD" },
{ kX86CpuFeatureAVX512PF , "AVX512PF" },
{ kX86CpuFeatureAVX512ER , "AVX512ER" },
{ kX86CpuFeatureAVX512DQ , "AVX512DQ" },
{ kX86CpuFeatureAVX512BW , "AVX512BW" },
{ kX86CpuFeatureAVX512VL , "AVX512VL" }
};
INFO("Host CPU Info (X86/X64):");
@@ -178,16 +193,19 @@ static void dumpSizeOf(void) {
#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64)
INFO("SizeOf X86/X64:");
DUMP_TYPE(asmjit::X86Assembler);
DUMP_TYPE(asmjit::X86InstInfo);
DUMP_TYPE(asmjit::X86InstExtendedInfo);
#if !defined(ASMJIT_DISABLE_COMPILER)
DUMP_TYPE(asmjit::X86Compiler);
DUMP_TYPE(asmjit::X86CallNode);
DUMP_TYPE(asmjit::X86FuncNode);
DUMP_TYPE(asmjit::X86FuncDecl);
DUMP_TYPE(asmjit::X86InstInfo);
DUMP_TYPE(asmjit::X86VarMap);
DUMP_TYPE(asmjit::X86VarInfo);
DUMP_TYPE(asmjit::X86VarState);
#endif // !ASMJIT_DISABLE_COMPILER
INFO("");
#endif // ASMJIT_BUILD_X86
}

File diff suppressed because it is too large Load Diff

View File

@@ -57,16 +57,64 @@ namespace asmjit {
return *this; \
} \
\
/*! Force REX prefix. */ \
/*! Force REX prefix (X64). */ \
ASMJIT_INLINE _Class_& rex() { \
_instOptions |= kX86InstOptionRex; \
return *this; \
} \
\
/*! Force 3-byte VEX prefix. */ \
/*! Force 3-byte VEX prefix (AVX+). */ \
ASMJIT_INLINE _Class_& vex3() { \
_instOptions |= kX86InstOptionVex3; \
return *this; \
} \
\
/*! Force 4-byte EVEX prefix (AVX512+). */ \
ASMJIT_INLINE _Class_& evex() { \
_instOptions |= kX86InstOptionEvex; \
return *this; \
} \
\
/*! Use zeroing instead of merging (AVX512+). */ \
ASMJIT_INLINE _Class_& z() { \
_instOptions |= kX86InstOptionEvexZero; \
return *this; \
} \
\
/*! Broadcast one element to all other elements (AVX512+). */ \
ASMJIT_INLINE _Class_& _1ToN() { \
_instOptions |= kX86InstOptionEvexOneN; \
return *this; \
} \
\
/*! Suppress all exceptions (AVX512+). */ \
ASMJIT_INLINE _Class_& sae() { \
_instOptions |= kX86InstOptionEvexSae; \
return *this; \
} \
\
/*! Static rounding mode `round-to-nearest` (even) and `SAE` (AVX512+). */ \
ASMJIT_INLINE _Class_& rn_sae() { \
_instOptions |= kX86InstOptionEvexRnSae; \
return *this; \
} \
\
/*! Static rounding mode `round-down` (toward -inf) and `SAE` (AVX512+). */ \
ASMJIT_INLINE _Class_& rd_sae() { \
_instOptions |= kX86InstOptionEvexRdSae; \
return *this; \
} \
\
/*! Static rounding mode `round-up` (toward +inf) and `SAE` (AVX512+). */ \
ASMJIT_INLINE _Class_& ru_sae() { \
_instOptions |= kX86InstOptionEvexRuSae; \
return *this; \
} \
\
/*! Static rounding mode `round-toward-zero` (truncate) and `SAE` (AVX512+). */ \
ASMJIT_INLINE _Class_& rz_sae() { \
_instOptions |= kX86InstOptionEvexRzSae; \
return *this; \
}
//! X86/X64 assembler.
@@ -94,10 +142,10 @@ namespace asmjit {
//! ~~~
//! // Use asmjit namespace.
//! using namespace asmjit;
//! using namespace asmjit::host;
//! using namespace asmjit::x86;
//!
//! // Create Assembler instance.
//! Assembler a;
//! // Create X86Assembler instance.
//! X86Assembler a;
//!
//! // Prolog.
//! a.push(ebp);
@@ -115,10 +163,10 @@ namespace asmjit {
//! ~~~
//!
//! You can see that syntax is very close to Intel one. Only difference is that
//! you are calling functions that emits the binary code for you. All registers
//! are in `asmjit` namespace, so it's very comfortable to use it (look at
//! first line). There is also used method `imm()` to create an immediate value.
//! Use `imm_u()` to create unsigned immediate value.
//! you are calling functions that emit binary code for you. All registers are
//! in `asmjit::x86` namespace, so it's very comfortable to use it (look at the
//! `use namespace` section). Without importing `asmjit::x86` registers would
//! have to be written as `x86::eax`, `x86::esp`, and so on.
//!
//! There is also possibility to use memory addresses and immediates. Use
//! `ptr()`, `byte_ptr()`, `word_ptr()`, `dword_ptr()` and similar functions to
@@ -126,14 +174,14 @@ namespace asmjit {
//! information related to the operand size is needed only in rare cases, that
//! is an instruction without having any register operands, such as `inc [mem]`.
//!
//! for example, `a` is `x86::Assembler` instance:
//! for example, `a` is an `X86Assembler` instance:
//!
//! ~~~
//! a.mov(ptr(eax), 0); // mov ptr [eax], 0
//! a.mov(ptr(eax), edx); // mov ptr [eax], edx
//! ~~~
//!
//! But it's also possible to create complex addresses:
//! But it's also possible to create complex addresses offered by x86 architecture:
//!
//! ~~~
//! // eax + ecx*x addresses
@@ -148,10 +196,12 @@ namespace asmjit {
//! a.mov(ptr(eax, ecx, 3, 16), 0); // mov ptr [eax + ecx * 8 + 16], 0
//! ~~~
//!
//! All addresses shown are using `ptr()` to make memory operand. Some assembler
//! instructions (single operand ones) needs to have specified memory operand
//! size. For example `a.inc(ptr(eax))` can't be called, because the meaning is
//! ambiguous, see the code below.
//! All addresses shown are using `x86::ptr()` to make memory operand. Some
//! assembler instructions using a single operand need to know the size of
//! the operand to avoid ambiguity. For example `a.inc(ptr(eax))` is ambiguous
//! and would cause a runtime error. This problem can be fixed by using memory
//! operand with size specified - `byte_ptr`, `word_ptr`, `dword_ptr`, see the
//! code below:
//!
//! ~~~
//! // [byte] address.
@@ -163,30 +213,34 @@ namespace asmjit {
//! // [dword] address.
//! a.inc(dword_ptr(eax)); // Inc dword ptr [eax].
//! a.dec(dword_ptr(eax)); // Dec dword ptr [eax].
//! // [dword] address.
//! a.inc(dword_ptr(rax)); // Inc qword ptr [rax].
//! a.dec(dword_ptr(rax)); // Dec qword ptr [rax].
//! ~~~
//!
//! Calling JIT Code
//! ----------------
//!
//! While you are over from emitting instructions, you can make your function
//! by using `Assembler::make()` method. This method will use memory
//! manager to allocate virtual memory and relocates generated code to it. For
//! memory allocation is used global memory manager by default and memory is
//! freeable, but of course this default behavior can be overridden specifying
//! your memory manager and allocation type. If you want to do with code
//! something else you can always override make() method and do what you want.
//! After you are finished with emitting instructions, you can make your function
//! callable by using `Assembler::make()` method. This method will use memory
//! manager to allocate virtual memory and relocates generated code to it. The
//! memory is allocated through `Runtime` instance provided to `X86Assembler`
//! constructor.
//!
//! You can get size of generated code by `getCodeSize()` or `getOffset()`
//! methods. These methods returns you code size or more precisely the current
//! code offset in bytes. The `takeCode()` function can be used to take the
//! internal buffer and reset the code generator, but the buffer taken has to
//! be freed manually in such case.
//! The size of the code generated can be retrieved by `getCodeSize()` and
//! `getOffset()` methods. The `getOffset()` method returns the current offset
//! (that is mostly equal to the final code size, if called after the code
//! generation) and `getCodeSize()` returns the final code size with possible
//! trampolines. The `takeCode()` method can be used to take the internal buffer
//! and reset the code generator, but the buffer returned has to be freed manually
//! in such case.
//!
//! Machine code can be executed only in memory that is marked executable. This
//! mark is usually not set for memory returned by a C/C++ `malloc` function.
//! The `VMem::alloc()` function can be used allocate a memory where the code can
//! be executed or more preferably `VMemMgr` which has interface
//! similar to `malloc/free` and can allocate chunks of various sizes.
//! mark is usually not set for memory returned by a C/C++ `malloc()` function.
//! The `VMemUtil::alloc()` function can be used allocate a memory where the code
//! can be executed. Please note that `VMemUtil` is a low-level class that works
//! at memory page level. High level interface that is similar to malloc/free is
//! provided by `VMemMgr` class.
//!
//! The next example shows how to allocate memory where the code can be executed:
//!
@@ -194,27 +248,28 @@ namespace asmjit {
//! using namespace asmjit;
//!
//! JitRuntime runtime;
//! Assembler a(&runtime);
//! X86Assembler a(&runtime);
//!
//! // ... Your code generation ...
//! ... Code generation ...
//!
//! // The function prototype
//! // The function prototype.
//! typedef void (*MyFunc)();
//!
//! // make your function
//! // Make the function.
//! MyFunc func = asmjit_cast<MyFunc>(a.make());
//!
//! // call your function
//! // Call the function.
//! func();
//!
//! // If you don't need your function again, free it.
//! // Release the function if not needed anymore.
//! runtime.release(func);
//! ~~~
//!
//! This was a very primitive showing how the generated code can be executed.
//! In production noone will probably generate a function that is only called
//! once and nobody will probably free the function right after it was executed.
//! The code just shows the proper way of code generation and cleanup.
//! This was a very primitive example showing how the generated code can be.
//! executed by using the foundation of classes AsmJit offers. In production
//! nobody is likely to generate a function that is only called once and freed
//! immediately after it's been called, however, the concept of releasing code
//! that is not needed anymore should be clear.
//!
//! Labels
//! ------
@@ -278,7 +333,7 @@ namespace asmjit {
//! a.mov(esp, ebp);
//! a.pop(ebp);
//!
//! // Return: STDCALL convention is to pop stack in called function.
//! // Return: Pop the stack by `arg_size` as defined by `STDCALL` convention.
//! a.ret(arg_size);
//! ~~~
//!
@@ -315,13 +370,13 @@ namespace asmjit {
//! Next, more advanced, but often needed technique is that you can build your
//! own registers allocator. X86 architecture contains 8 general purpose
//! registers, 8 Mm registers and 8 Xmm/Ymm/Zmm registers. X64 architecture
//! extends the count of Gp registers and Xmm/Ymm/Zmm registers to 16 or 32
//! when AVX512 is available.
//! extends the count of Gp registers and Xmm/Ymm/Zmm registers to 16. AVX-512
//! architecture extends Xmm/Ymm/Zmm SIMD registers to 32.
//!
//! To create a general purpose register operand from register index use
//! `gpb_lo()`, `gpb_hi()`, `gpw()`, `gpd()`, `gpq()`. To create registers of
//! other types there are functions `fp()`, `mm()`, `xmm()`, `ymm()` and `zmm()`
//! available.
//! other types there `fp()`, `mm()`, `k()`, `xmm()`, `ymm()` and `zmm()`
//! functions available that return a new register operand.
//!
//! \sa X86Compiler.
struct ASMJIT_VCLASS X86Assembler : public Assembler {
@@ -340,7 +395,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
// [Arch]
// --------------------------------------------------------------------------
//! Get count of registers of the current architecture.
//! Get count of registers of the current architecture and mode.
ASMJIT_INLINE const X86RegCount& getRegCount() const {
return _regCount;
}
@@ -478,7 +533,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
X86GpReg zdi;
// --------------------------------------------------------------------------
// [Base Instructions]
// [Emit]
// --------------------------------------------------------------------------
#define INST_0x(_Inst_, _Code_) \
@@ -654,6 +709,30 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
return emit(_Code_, o0, o1, o2); \
}
#define INST_3ii(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \
return emit(_Code_, o0, o1, o2); \
} \
/*! \overload */ \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, int o1, int o2) { \
Imm o1Imm(o1); \
return emit(_Code_, o0, o1Imm, o2); \
} \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, unsigned int o1, unsigned int o2) { \
Imm o1Imm(o1); \
return emit(_Code_, o0, o1Imm, static_cast<uint64_t>(o2)); \
} \
/*! \overload */ \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, int64_t o1, int64_t o2) { \
Imm o1Imm(o1); \
return emit(_Code_, o0, o1Imm, static_cast<uint64_t>(o2)); \
} \
/*! \overload */ \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, uint64_t o1, uint64_t o2) { \
Imm o1Imm(o1); \
return emit(_Code_, o0, o1Imm, o2); \
}
#define INST_4x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \
return emit(_Code_, o0, o1, o2, o3); \
@@ -686,6 +765,35 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
return emit(_Code_, o0, o1, o2, o3); \
}
#define INST_4ii(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \
return emit(_Code_, o0, o1, o2, o3); \
} \
/*! \overload */ \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, int o2, int o3) { \
Imm o2Imm(o2); \
return emit(_Code_, o0, o1, o2Imm, o3); \
} \
/*! \overload */ \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, unsigned int o2, unsigned int o3) { \
Imm o2Imm(o2); \
return emit(_Code_, o0, o1, o2Imm, static_cast<uint64_t>(o3)); \
} \
/*! \overload */ \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, int64_t o2, int64_t o3) { \
Imm o2Imm(o2); \
return emit(_Code_, o0, o1, o2Imm, static_cast<uint64_t>(o3)); \
} \
/*! \overload */ \
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, uint64_t o2, uint64_t o3) { \
Imm o2Imm(o2); \
return emit(_Code_, o0, o1, o2Imm, o3); \
}
// --------------------------------------------------------------------------
// [X86/X64]
// --------------------------------------------------------------------------
//! Add with Carry.
INST_2x(adc, kX86InstIdAdc, X86GpReg, X86GpReg)
//! \overload
@@ -837,11 +945,6 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! CPU identification (i486).
INST_0x(cpuid, kX86InstIdCpuid)
//! Accumulate crc32 value (polynomial 0x11EDC6F41) (SSE4.2).
INST_2x_(crc32, kX86InstIdCrc32, X86GpReg, X86GpReg, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
//! \overload
INST_2x_(crc32, kX86InstIdCrc32, X86GpReg, X86Mem, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
//! Decimal adjust AL after addition (X86 Only).
INST_0x(daa, kX86InstIdDaa)
//! Decimal adjust AL after subtraction (X86 Only).
@@ -1037,11 +1140,6 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! Pop stack into EFLAGS register (32-bit or 64-bit).
INST_0x(popf, kX86InstIdPopf)
//! Return the count of number of bits set to 1 (SSE4.2).
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpReg, X86GpReg, !o0.isGpb() && o0.getRegType() == o1.getRegType())
//! \overload
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpReg, X86Mem, !o0.isGpb())
//! Push WORD or DWORD/QWORD on the stack.
INST_1x_(push, kX86InstIdPush, X86GpReg, o0.getSize() == 2 || o0.getSize() == _regSize)
//! Push WORD or DWORD/QWORD on the stack.
@@ -1329,273 +1427,277 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
INST_2i(xor_, kX86InstIdXor, X86Mem, Imm)
// --------------------------------------------------------------------------
// [Fpu]
// [FPU]
// --------------------------------------------------------------------------
//! Compute 2^x - 1 (FPU).
//! Compute `2^x - 1` - `fp0 = POW(2, fp0) - 1` (FPU).
INST_0x(f2xm1, kX86InstIdF2xm1)
//! Absolute value of fp0 (FPU).
//! Abs `fp0 = ABS(fp0)` (FPU).
INST_0x(fabs, kX86InstIdFabs)
//! Add `o1` to `o0` (one has to be `fp0`) and store result in `o0` (FPU).
//! Add `o0 = o0 + o1` (one operand has to be `fp0`) (FPU).
INST_2x_(fadd, kX86InstIdFadd, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Add 4-byte or 8-byte FP `o0` to fp0 and store result in fp0 (FPU).
//! Add `fp0 = fp0 + float_or_double[o0]` (FPU).
INST_1x(fadd, kX86InstIdFadd, X86Mem)
//! Add fp0 to `o0` and pop the FPU stack (FPU).
//! Add `o0 = o0 + fp0` and POP (FPU).
INST_1x(faddp, kX86InstIdFaddp, X86FpReg)
//! \overload
//! Add `fp1 = fp1 + fp0` and POP (FPU).
INST_0x(faddp, kX86InstIdFaddp)
//! Load binary coded decimal (FPU).
//! Load BCD from `[o0]` and PUSH (FPU).
INST_1x(fbld, kX86InstIdFbld, X86Mem)
//! Store BCD integer and Pop (FPU).
//! Store BCD-Integer to `[o0]` and POP (FPU).
INST_1x(fbstp, kX86InstIdFbstp, X86Mem)
//! Change fp0 sign (FPU).
//! Complement Sign `fp0 = -fp0` (FPU).
INST_0x(fchs, kX86InstIdFchs)
//! Clear exceptions (FPU).
INST_0x(fclex, kX86InstIdFclex)
//! Conditional move (FPU).
//! Conditional move `if (CF=1) fp0 = o0` (FPU).
INST_1x(fcmovb, kX86InstIdFcmovb, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (CF|ZF=1) fp0 = o0` (FPU).
INST_1x(fcmovbe, kX86InstIdFcmovbe, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (ZF=1) fp0 = o0` (FPU).
INST_1x(fcmove, kX86InstIdFcmove, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (CF=0) fp0 = o0` (FPU).
INST_1x(fcmovnb, kX86InstIdFcmovnb, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (CF|ZF=0) fp0 = o0` (FPU).
INST_1x(fcmovnbe, kX86InstIdFcmovnbe, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (ZF=0) fp0 = o0` (FPU).
INST_1x(fcmovne, kX86InstIdFcmovne, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (PF=0) fp0 = o0` (FPU).
INST_1x(fcmovnu, kX86InstIdFcmovnu, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (PF=1) fp0 = o0` (FPU).
INST_1x(fcmovu, kX86InstIdFcmovu, X86FpReg)
//! Compare fp0 with `o0` (FPU).
//! Compare `fp0` with `o0` (FPU).
INST_1x(fcom, kX86InstIdFcom, X86FpReg)
//! Compare fp0 with fp1 (FPU).
//! Compare `fp0` with `fp1` (FPU).
INST_0x(fcom, kX86InstIdFcom)
//! Compare fp0 with 4-byte or 8-byte FP at `src` (FPU).
//! Compare `fp0` with `float_or_double[o0]` (FPU).
INST_1x(fcom, kX86InstIdFcom, X86Mem)
//! Compare fp0 with `o0` and pop the FPU stack (FPU).
//! Compare `fp0` with `o0` and POP (FPU).
INST_1x(fcomp, kX86InstIdFcomp, X86FpReg)
//! Compare fp0 with fp1 and pop the FPU stack (FPU).
//! Compare `fp0` with `fp1` and POP (FPU).
INST_0x(fcomp, kX86InstIdFcomp)
//! Compare fp0 with 4-byte or 8-byte FP at `adr` and pop the FPU stack (FPU).
//! Compare `fp0` with `float_or_double[o0]` and POP (FPU).
INST_1x(fcomp, kX86InstIdFcomp, X86Mem)
//! Compare fp0 with fp1 and pop the FPU stack twice (FPU).
//! Compare `fp0` with `fp1` and POP twice (FPU).
INST_0x(fcompp, kX86InstIdFcompp)
//! Compare fp0 and `o0` and Set EFLAGS (FPU).
//! Compare `fp0` with `o0` and set EFLAGS (FPU).
INST_1x(fcomi, kX86InstIdFcomi, X86FpReg)
//! Compare fp0 and `o0` and Set EFLAGS and pop the FPU stack (FPU).
//! Compare `fp0` with `o0` and set EFLAGS and POP (FPU).
INST_1x(fcomip, kX86InstIdFcomip, X86FpReg)
//! Calculate cosine of fp0 and store result in fp0 (FPU).
//! Cos `fp0 = cos(fp0)` (FPU).
INST_0x(fcos, kX86InstIdFcos)
//! Decrement FPU stack-top pointer (FPU).
//! Decrement FPU stack pointer (FPU).
INST_0x(fdecstp, kX86InstIdFdecstp)
//! Divide `o0` by `o1` (one has to be `fp0`) (FPU).
//! Divide `o0 = o0 / o1` (one has to be `fp0`) (FPU).
INST_2x_(fdiv, kX86InstIdFdiv, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Divide fp0 by 32-bit or 64-bit FP value (FPU).
//! Divide `fp0 = fp0 / float_or_double[o0]` (FPU).
INST_1x(fdiv, kX86InstIdFdiv, X86Mem)
//! Divide `o0` by fp0 (FPU).
//! Divide `o0 = o0 / fp0` and POP (FPU).
INST_1x(fdivp, kX86InstIdFdivp, X86FpReg)
//! \overload
//! Divide `fp1 = fp1 / fp0` and POP (FPU).
INST_0x(fdivp, kX86InstIdFdivp)
//! Reverse divide `o0` by `o1` (one has to be `fp0`) (FPU).
//! Reverse divide `o0 = o1 / o0` (one has to be `fp0`) (FPU).
INST_2x_(fdivr, kX86InstIdFdivr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Reverse divide fp0 by 32-bit or 64-bit FP value (FPU).
//! Reverse divide `fp0 = float_or_double[o0] / fp0` (FPU).
INST_1x(fdivr, kX86InstIdFdivr, X86Mem)
//! Reverse divide `o0` by fp0 (FPU).
//! Reverse divide `o0 = fp0 / o0` and POP (FPU).
INST_1x(fdivrp, kX86InstIdFdivrp, X86FpReg)
//! \overload
//! Reverse divide `fp1 = fp0 / fp1` and POP (FPU).
INST_0x(fdivrp, kX86InstIdFdivrp)
//! Free FP register (FPU).
INST_1x(ffree, kX86InstIdFfree, X86FpReg)
//! Add 16-bit or 32-bit integer to fp0 (FPU).
//! Add `fp0 = fp0 + short_or_int[o0]` (FPU).
INST_1x_(fiadd, kX86InstIdFiadd, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Compare fp0 with 16-bit or 32-bit Integer (FPU).
//! Compare `fp0` with `short_or_int[o0]` (FPU).
INST_1x_(ficom, kX86InstIdFicom, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Compare fp0 with 16-bit or 32-bit Integer and pop the FPU stack (FPU).
//! Compare `fp0` with `short_or_int[o0]` and POP (FPU).
INST_1x_(ficomp, kX86InstIdFicomp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Divide fp0 by 32-bit or 16-bit integer (`src`) (FPU).
//! Divide `fp0 = fp0 / short_or_int[o0]` (FPU).
INST_1x_(fidiv, kX86InstIdFidiv, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Reverse divide fp0 by 32-bit or 16-bit integer (`src`) (FPU).
//! Reverse divide `fp0 = short_or_int[o0] / fp0` (FPU).
INST_1x_(fidivr, kX86InstIdFidivr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Load 16-bit, 32-bit or 64-bit Integer and push it to the FPU stack (FPU).
//! Load `short_or_int_or_long[o0]` and PUSH (FPU).
INST_1x_(fild, kX86InstIdFild, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
//! Multiply fp0 by 16-bit or 32-bit integer and store it to fp0 (FPU).
//! Multiply `fp0 *= short_or_int[o0]` (FPU).
INST_1x_(fimul, kX86InstIdFimul, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Increment FPU stack-top pointer (FPU).
//! Increment FPU stack pointer (FPU).
INST_0x(fincstp, kX86InstIdFincstp)
//! Initialize FPU (FPU).
INST_0x(finit, kX86InstIdFinit)
//! Subtract 16-bit or 32-bit integer from fp0 and store result to fp0 (FPU).
//! Subtract `fp0 = fp0 - short_or_int[o0]` (FPU).
INST_1x_(fisub, kX86InstIdFisub, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Reverse subtract 16-bit or 32-bit integer from fp0 and store result to fp0 (FPU).
//! Reverse subtract `fp0 = short_or_int[o0] - fp0` (FPU).
INST_1x_(fisubr, kX86InstIdFisubr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Initialize FPU without checking for pending unmasked exceptions (FPU).
INST_0x(fninit, kX86InstIdFninit)
//! Store fp0 as 16-bit or 32-bit Integer to `o0` (FPU).
//! Store `fp0` as `short_or_int[o0]` (FPU).
INST_1x_(fist, kX86InstIdFist, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Store fp0 as 16-bit, 32-bit or 64-bit Integer to `o0` and pop the FPU stack (FPU).
//! Store `fp0` as `short_or_int_or_long[o0]` and POP (FPU).
INST_1x_(fistp, kX86InstIdFistp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
//! Push 32-bit, 64-bit or 80-bit floating point value on the FPU stack (FPU).
//! Load `float_or_double_or_extended[o0]` and PUSH (FPU).
INST_1x_(fld, kX86InstIdFld, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
//! Push `o0` on the FPU stack (FPU).
//! PUSH `o0` (FPU).
INST_1x(fld, kX86InstIdFld, X86FpReg)
//! Push +1.0 on the FPU stack (FPU).
//! PUSH `1.0` (FPU).
INST_0x(fld1, kX86InstIdFld1)
//! Push log2(10) on the FPU stack (FPU).
//! PUSH `log2(10)` (FPU).
INST_0x(fldl2t, kX86InstIdFldl2t)
//! Push log2(e) on the FPU stack (FPU).
//! PUSH `log2(e)` (FPU).
INST_0x(fldl2e, kX86InstIdFldl2e)
//! Push pi on the FPU stack (FPU).
//! PUSH `pi` (FPU).
INST_0x(fldpi, kX86InstIdFldpi)
//! Push log10(2) on the FPU stack (FPU).
//! PUSH `log10(2)` (FPU).
INST_0x(fldlg2, kX86InstIdFldlg2)
//! Push ln(2) on the FPU stack (FPU).
//! PUSH `ln(2)` (FPU).
INST_0x(fldln2, kX86InstIdFldln2)
//! Push +0.0 on the FPU stack (FPU).
//! PUSH `+0.0` (FPU).
INST_0x(fldz, kX86InstIdFldz)
//! Load x87 FPU control word (2 bytes) (FPU).
//! Load x87 FPU control word from `word_ptr[o0]` (FPU).
INST_1x(fldcw, kX86InstIdFldcw, X86Mem)
//! Load x87 FPU environment (14 or 28 bytes) (FPU).
//! Load x87 FPU environment (14 or 28 bytes) from `[o0]` (FPU).
INST_1x(fldenv, kX86InstIdFldenv, X86Mem)
//! Multiply `o0` by `o1` (one has to be `fp0`) and store result in `o0` (FPU).
//! Multiply `o0 = o0 * o1` (one has to be `fp0`) (FPU).
INST_2x_(fmul, kX86InstIdFmul, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Multiply fp0 by 32-bit or 64-bit `o0` and store result in fp0 (FPU).
//! Multiply `fp0 = fp0 * float_or_double[o0]` (FPU).
INST_1x(fmul, kX86InstIdFmul, X86Mem)
//! Multiply fp0 by `o0` and pop the FPU stack (FPU).
//! Multiply `o0 = o0 * fp0` and POP (FPU).
INST_1x(fmulp, kX86InstIdFmulp, X86FpReg)
//! \overload
//! Multiply `fp1 = fp1 * fp0` and POP (FPU).
INST_0x(fmulp, kX86InstIdFmulp)
//! Clear exceptions (FPU).
INST_0x(fnclex, kX86InstIdFnclex)
//! No operation (FPU).
INST_0x(fnop, kX86InstIdFnop)
//! Save FPU state (FPU).
//! Save FPU state to `[o0]` (FPU).
INST_1x(fnsave, kX86InstIdFnsave, X86Mem)
//! Store x87 FPU environment (FPU).
//! Store x87 FPU environment to `[o0]` (FPU).
INST_1x(fnstenv, kX86InstIdFnstenv, X86Mem)
//! Store x87 FPU control word (FPU).
//! Store x87 FPU control word to `[o0]` (FPU).
INST_1x(fnstcw, kX86InstIdFnstcw, X86Mem)
//! Store x87 FPU status word to `o0` (AX) (FPU).
INST_1x_(fnstsw, kX86InstIdFnstsw, X86GpReg, o0.isRegCode(kX86RegTypeGpw, kX86RegIndexAx))
//! Store x87 FPU status word to `o0` (2 bytes) (FPU).
//! Store x87 FPU status word to `word_ptr[o0]` (FPU).
INST_1x(fnstsw, kX86InstIdFnstsw, X86Mem)
//! Arctan(`fp1` / `fp0`) and pop the FPU stack (FPU).
//! Partial Arctan `fp1 = atan2(fp1, fp0)` and POP (FPU).
INST_0x(fpatan, kX86InstIdFpatan)
//! Fprem(`fp0`, `fp1`) and pop the FPU stack (FPU).
//! Partial Remainder[Trunc] `fp1 = fp0 % fp1` and POP (FPU).
INST_0x(fprem, kX86InstIdFprem)
//! Fprem(`fp0`, `fp1`) and pop the FPU stack (FPU).
//! Partial Remainder[Round] `fp1 = fp0 % fp1` and POP (FPU).
INST_0x(fprem1, kX86InstIdFprem1)
//! Arctan(`fp0`) and pop the FPU stack (FPU).
//! Partial Tan `fp0 = tan(fp0)` and PUSH `1.0` (FPU).
INST_0x(fptan, kX86InstIdFptan)
//! Round `fp0` to Integer (FPU).
//! Round `fp0 = round(fp0)` (FPU).
INST_0x(frndint, kX86InstIdFrndint)
//! Restore FPU state from `o0` (94 or 108 bytes) (FPU).
//! Restore FPU state from `[o0]` (94 or 108 bytes) (FPU).
INST_1x(frstor, kX86InstIdFrstor, X86Mem)
//! Save FPU state to `o0` (94 or 108 bytes) (FPU).
//! Save FPU state to `[o0]` (94 or 108 bytes) (FPU).
INST_1x(fsave, kX86InstIdFsave, X86Mem)
//! Scale `fp0` by `fp1` (FPU).
//! Scale `fp0 = fp0 * pow(2, RoundTowardsZero(fp1))` (FPU).
INST_0x(fscale, kX86InstIdFscale)
//! Sine of `fp0` and store result in `fp0` (FPU).
//! Sin `fp0 = sin(fp0)` (FPU).
INST_0x(fsin, kX86InstIdFsin)
//! Sine and cosine of `fp0`, store sine in `fp0` and push cosine on the FPU stack (FPU).
//! Sincos `fp0 = sin(fp0)` and PUSH `cos(fp0)` (FPU).
INST_0x(fsincos, kX86InstIdFsincos)
//! Square root of `fp0` and store it in `fp0` (FPU).
//! Square root `fp0 = sqrt(fp0)` (FPU).
INST_0x(fsqrt, kX86InstIdFsqrt)
//! Store floating point value to 32-bit or 64-bit memory location (FPU).
//! Store floating point value to `float_or_double[o0]` (FPU).
INST_1x_(fst, kX86InstIdFst, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
//! Store floating point value to `o0` (FPU).
//! Copy `o0 = fp0` (FPU).
INST_1x(fst, kX86InstIdFst, X86FpReg)
//! Store floating point value to 32-bit or 64-bit memory location and pop the FPU stack (FPU).
//! Store floating point value to `float_or_double_or_extended[o0]` and POP (FPU).
INST_1x_(fstp, kX86InstIdFstp, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
//! Store floating point value to `o0` and pop the FPU stack (FPU).
//! Copy `o0 = fp0` and POP (FPU).
INST_1x(fstp, kX86InstIdFstp, X86FpReg)
//! Store x87 FPU control word to `o0` (2 bytes) (FPU).
//! Store x87 FPU control word to `word_ptr[o0]` (FPU).
INST_1x(fstcw, kX86InstIdFstcw, X86Mem)
//! Store x87 FPU environment to `o0` (14 or 28 bytes) (FPU).
//! Store x87 FPU environment to `[o0]` (14 or 28 bytes) (FPU).
INST_1x(fstenv, kX86InstIdFstenv, X86Mem)
//! Store x87 FPU status word to AX (FPU).
//! Store x87 FPU status word to `o0` (AX) (FPU).
INST_1x_(fstsw, kX86InstIdFstsw, X86GpReg, o0.getRegIndex() == kX86RegIndexAx)
//! Store x87 FPU status word (2 bytes) (FPU).
//! Store x87 FPU status word to `word_ptr[o0]` (FPU).
INST_1x(fstsw, kX86InstIdFstsw, X86Mem)
//! Subtract `o0` from `o0` (one has to be `fp0`) and store result in `o0` (FPU).
//! Subtract `o0 = o0 - o1` (one has to be `fp0`) (FPU).
INST_2x_(fsub, kX86InstIdFsub, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Subtract 32-bit or 64-bit `o0` from fp0 and store result in fp0 (FPU).
//! Subtract `fp0 = fp0 - float_or_double[o0]` (FPU).
INST_1x_(fsub, kX86InstIdFsub, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
//! Subtract fp0 from `o0` and pop FPU stack (FPU).
//! Subtract `o0 = o0 - fp0` and POP (FPU).
INST_1x(fsubp, kX86InstIdFsubp, X86FpReg)
//! \overload
//! Subtract `fp1 = fp1 - fp0` and POP (FPU).
INST_0x(fsubp, kX86InstIdFsubp)
//! Reverse subtract `o1` from `o0` (one has to be `fp0`) and store result in `o0` (FPU).
//! Reverse subtract `o0 = o1 - o0` (one has to be `fp0`) (FPU).
INST_2x_(fsubr, kX86InstIdFsubr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Reverse subtract 32-bit or 64-bit `o0` from `fp0` and store result in `fp0` (FPU).
//! Reverse subtract `fp0 = fp0 - float_or_double[o0]` (FPU).
INST_1x_(fsubr, kX86InstIdFsubr, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
//! Reverse subtract `fp0` from `o0` and pop FPU stack (FPU).
//! Reverse subtract `o0 = o0 - fp0` and POP (FPU).
INST_1x(fsubrp, kX86InstIdFsubrp, X86FpReg)
//! \overload
//! Reverse subtract `fp1 = fp1 - fp0` and POP (FPU).
INST_0x(fsubrp, kX86InstIdFsubrp)
//! Floating point test - Compare `fp0` with 0.0. (FPU).
//! Compare `fp0` with `0.0` (FPU).
INST_0x(ftst, kX86InstIdFtst)
//! Unordered compare `fp0` with `o0` (FPU).
INST_1x(fucom, kX86InstIdFucom, X86FpReg)
//! Unordered compare `fp0` with `fp1` (FPU).
INST_0x(fucom, kX86InstIdFucom)
//! Unordered compare `fp0` and `o0`, check for ordered values and set EFLAGS (FPU).
//! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS (FPU).
INST_1x(fucomi, kX86InstIdFucomi, X86FpReg)
//! Unordered compare `fp0` and `o0`, check for ordered values and set EFLAGS and pop the FPU stack (FPU).
//! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS and POP (FPU).
INST_1x(fucomip, kX86InstIdFucomip, X86FpReg)
//! Unordered compare `fp0` with `o0` and pop the FPU stack (FPU).
//! Unordered compare `fp0` with `o0` and POP (FPU).
INST_1x(fucomp, kX86InstIdFucomp, X86FpReg)
//! Unordered compare `fp0` with `fp1` and pop the FPU stack (FPU).
//! Unordered compare `fp0` with `fp1` and POP (FPU).
INST_0x(fucomp, kX86InstIdFucomp)
//! Unordered compare `fp0` with `fp1` and pop the FPU stack twice (FPU).
//! Unordered compare `fp0` with `fp1` and POP twice (FPU).
INST_0x(fucompp, kX86InstIdFucompp)
INST_0x(fwait, kX86InstIdFwait)
//! Examine fp0 (FPU).
INST_0x(fxam, kX86InstIdFxam)
//! Exchange content of fp0 with `o0` (FPU).
//! Exchange `fp0` with `o0` (FPU).
INST_1x(fxch, kX86InstIdFxch, X86FpReg)
//! Restore FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
INST_1x(fxrstor, kX86InstIdFxrstor, X86Mem)
//! Store FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
INST_1x(fxsave, kX86InstIdFxsave, X86Mem)
//! Extract exponent and store to `fp0` and push significand on the FPU stack (FPU).
//! Extract `fp0 = exponent(fp0)` and PUSH `significant(fp0)` (FPU).
INST_0x(fxtract, kX86InstIdFxtract)
//! Compute `fp1 * log2(fp0)`, pop the FPU stack and store result in `fp0` (FPU).
//! Compute `fp1 = fp1 * log2(fp0)` and POP (FPU).
INST_0x(fyl2x, kX86InstIdFyl2x)
//! Compute `fp1 * log2(fp0 + 1)`, pop the FPU stack and store result in `fp0` (FPU).
//! Compute `fp1 = fp1 * log2(fp0 + 1)` and POP (FPU).
INST_0x(fyl2xp1, kX86InstIdFyl2xp1)
// --------------------------------------------------------------------------
@@ -2356,12 +2458,12 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(cvtdq2ps, kX86InstIdCvtdq2ps, X86XmmReg, X86Mem)
//! Convert packed DP-FP to packed QWORDs (SSE2).
//! Convert packed DP-FP to packed DWORDs (SSE2).
INST_2x(cvtpd2dq, kX86InstIdCvtpd2dq, X86XmmReg, X86XmmReg)
//! \overload
INST_2x(cvtpd2dq, kX86InstIdCvtpd2dq, X86XmmReg, X86Mem)
//! Convert packed DP-FP to packed QRODSs (SSE2).
//! Convert packed DP-FP to packed DWORDs (SSE2).
INST_2x(cvtpd2pi, kX86InstIdCvtpd2pi, X86MmReg, X86XmmReg)
//! \overload
INST_2x(cvtpd2pi, kX86InstIdCvtpd2pi, X86MmReg, X86Mem)
@@ -2376,7 +2478,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(cvtpi2pd, kX86InstIdCvtpi2pd, X86XmmReg, X86Mem)
//! Convert packed SP-FP to packed QWORDs (SSE2).
//! Convert packed SP-FP to packed DWORDs (SSE2).
INST_2x(cvtps2dq, kX86InstIdCvtps2dq, X86XmmReg, X86XmmReg)
//! \overload
INST_2x(cvtps2dq, kX86InstIdCvtps2dq, X86XmmReg, X86Mem)
@@ -2411,12 +2513,12 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(cvttpd2pi, kX86InstIdCvttpd2pi, X86MmReg, X86Mem)
//! Convert with truncation packed DP-FP to packed QWORDs (SSE2).
//! Convert with truncation packed DP-FP to packed DWORDs (SSE2).
INST_2x(cvttpd2dq, kX86InstIdCvttpd2dq, X86XmmReg, X86XmmReg)
//! \overload
INST_2x(cvttpd2dq, kX86InstIdCvttpd2dq, X86XmmReg, X86Mem)
//! Convert with truncation packed SP-FP to packed QWORDs (SSE2).
//! Convert with truncation packed SP-FP to packed DWORDs (SSE2).
INST_2x(cvttps2dq, kX86InstIdCvttps2dq, X86XmmReg, X86XmmReg)
//! \overload
INST_2x(cvttps2dq, kX86InstIdCvttps2dq, X86XmmReg, X86Mem)
@@ -2951,8 +3053,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(addsubps, kX86InstIdAddsubps, X86XmmReg, X86Mem)
//! Store truncated `fp0` as 16-bit, 32-bit or 64-bit integer to `o0` and pop
//! the FPU stack (FPU / SSE3).
//! Store truncated `fp0` to `short_or_int_or_long[o0]` and POP (FPU & SSE3).
INST_1x(fisttp, kX86InstIdFisttp, X86Mem)
//! Packed DP-FP horizontal add (SSE3).
@@ -3412,6 +3513,11 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
// [SSE4.2]
// --------------------------------------------------------------------------
//! Accumulate crc32 value (polynomial 0x11EDC6F41) (SSE4.2).
INST_2x_(crc32, kX86InstIdCrc32, X86GpReg, X86GpReg, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
//! \overload
INST_2x_(crc32, kX86InstIdCrc32, X86GpReg, X86Mem, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
//! Packed compare explicit length strings, return index (SSE4.2).
INST_3i(pcmpestri, kX86InstIdPcmpestri, X86XmmReg, X86XmmReg, Imm)
//! \overload
@@ -3437,6 +3543,43 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(pcmpgtq, kX86InstIdPcmpgtq, X86XmmReg, X86Mem)
// --------------------------------------------------------------------------
// [SSE4a]
// --------------------------------------------------------------------------
//! Extract Field (SSE4a).
INST_2x(extrq, kX86InstIdExtrq, X86XmmReg, X86XmmReg)
//! Extract Field (SSE4a).
INST_3ii(extrq, kX86InstIdExtrq, X86XmmReg, Imm, Imm)
//! Insert Field (SSE4a).
INST_2x(insertq, kX86InstIdInsertq, X86XmmReg, X86XmmReg)
//! Insert Field (SSE4a).
INST_4ii(insertq, kX86InstIdInsertq, X86XmmReg, X86XmmReg, Imm, Imm)
//! Move Non-Temporal Scalar DP-FP (SSE4a).
INST_2x(movntsd, kX86InstIdMovntsd, X86Mem, X86XmmReg)
//! Move Non-Temporal Scalar SP-FP (SSE4a).
INST_2x(movntss, kX86InstIdMovntss, X86Mem, X86XmmReg)
// --------------------------------------------------------------------------
// [POPCNT]
// --------------------------------------------------------------------------
//! Return the count of number of bits set to 1 (POPCNT).
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpReg, X86GpReg, !o0.isGpb() && o0.getRegType() == o1.getRegType())
//! \overload
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpReg, X86Mem, !o0.isGpb())
// --------------------------------------------------------------------------
// [LZCNT]
// --------------------------------------------------------------------------
//! Count the number of leading zero bits (LZCNT).
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpReg, X86GpReg)
//! \overload
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpReg, X86Mem)
// --------------------------------------------------------------------------
// [AESNI]
// --------------------------------------------------------------------------
@@ -3480,6 +3623,30 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_3i(pclmulqdq, kX86InstIdPclmulqdq, X86XmmReg, X86Mem, Imm)
// --------------------------------------------------------------------------
// [XSAVE]
// --------------------------------------------------------------------------
//! Restore Processor Extended States specified by `EDX:EAX` (XSAVE).
INST_1x(xrstor, kX86InstIdXrstor, X86Mem)
//! Restore Processor Extended States specified by `EDX:EAX` (XSAVE&X64).
INST_1x(xrstor64, kX86InstIdXrstor64, X86Mem)
//! Save Processor Extended States specified by `EDX:EAX` (XSAVE).
INST_1x(xsave, kX86InstIdXsave, X86Mem)
//! Save Processor Extended States specified by `EDX:EAX` (XSAVE&X64).
INST_1x(xsave64, kX86InstIdXsave64, X86Mem)
//! Save Processor Extended States specified by `EDX:EAX` (Optimized) (XSAVEOPT).
INST_1x(xsaveopt, kX86InstIdXsave, X86Mem)
//! Save Processor Extended States specified by `EDX:EAX` (Optimized) (XSAVEOPT&X64).
INST_1x(xsaveopt64, kX86InstIdXsave64, X86Mem)
//! Get XCR - `EDX:EAX <- XCR[ECX]` (XSAVE).
INST_0x(xgetbv, kX86InstIdXgetbv)
//! Set XCR - `XCR[ECX] <- EDX:EAX` (XSAVE).
INST_0x(xsetbv, kX86InstIdXsetbv)
// --------------------------------------------------------------------------
// [AVX]
// --------------------------------------------------------------------------
@@ -3667,7 +3834,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86YmmReg, X86Mem)
//! Convert packed DP-FP to packed QWORDs (AVX).
//! Convert packed DP-FP to packed DWORDs (AVX).
INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmReg, X86XmmReg)
//! \overload
INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmReg, X86YmmReg)
@@ -3681,7 +3848,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(vcvtpd2ps, kX86InstIdVcvtpd2ps, X86XmmReg, X86Mem)
//! Convert packed SP-FP to packed QWORDs (AVX).
//! Convert packed SP-FP to packed DWORDs (AVX).
INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86XmmReg, X86XmmReg)
//! \overload
INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86XmmReg, X86Mem)
@@ -3729,14 +3896,14 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(vcvtss2si, kX86InstIdVcvtss2si, X86GpReg, X86Mem)
//! Convert with truncation packed DP-FP to packed QWORDs (AVX).
//! Convert with truncation packed DP-FP to packed DWORDs (AVX).
INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86XmmReg)
//! \overload
INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86YmmReg)
//! \overload
INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86Mem)
//! Convert with truncation packed SP-FP to packed QWORDs (AVX).
//! Convert with truncation packed SP-FP to packed DWORDs (AVX).
INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86XmmReg, X86XmmReg)
//! \overload
INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86XmmReg, X86Mem)
@@ -3745,7 +3912,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86YmmReg, X86Mem)
//! Convert with truncation scalar DP-FP to DWORD (AVX).
//! Convert with truncation scalar DP-FP to INT32 (AVX).
INST_2x(vcvttsd2si, kX86InstIdVcvttsd2si, X86GpReg, X86XmmReg)
//! \overload
INST_2x(vcvttsd2si, kX86InstIdVcvttsd2si, X86GpReg, X86Mem)
@@ -6393,15 +6560,6 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
//! \overload
INST_2x(tzcnt, kX86InstIdTzcnt, X86GpReg, X86Mem)
// --------------------------------------------------------------------------
// [LZCNT]
// --------------------------------------------------------------------------
//! Count the number of leading zero bits (LZCNT).
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpReg, X86GpReg)
//! \overload
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpReg, X86Mem)
// --------------------------------------------------------------------------
// [BMI2]
// --------------------------------------------------------------------------
@@ -6450,11 +6608,11 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
// [RDRAND]
// --------------------------------------------------------------------------
//! Store a random number in destination register.
//! Store a random number in destination register (RDRAND).
//!
//! Please do not use this instruction in cryptographic software. The result
//! doesn't necessarily have to be random which may cause a major security
//! issue in the software that relies on it.
//! doesn't necessarily have to be random, which may cause a major security
//! hole in the software.
INST_1x(rdrand, kX86InstIdRdrand, X86GpReg)
// --------------------------------------------------------------------------
@@ -6503,10 +6661,12 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
#undef INST_3x
#undef INST_3x_
#undef INST_3i
#undef INST_3ii
#undef INST_4x
#undef INST_4x_
#undef INST_4i
#undef INST_4ii
};
//! \}

View File

@@ -60,14 +60,18 @@ const X86VarInfo _x86VarInfo[] = {
/* 10: kVarTypeFp32 */ { kX86RegTypeFp , 4 , C(Fp) , D(Sp) , "fp" },
/* 11: kVarTypeFp64 */ { kX86RegTypeFp , 8 , C(Fp) , D(Dp) , "fp" },
/* 12: kX86VarTypeMm */ { kX86RegTypeMm , 8 , C(Mm) , 0 , "mm" },
/* 13: kX86VarTypeXmm */ { kX86RegTypeXmm , 16, C(Xyz), 0 , "xmm" },
/* 14: kX86VarTypeXmmSs */ { kX86RegTypeXmm , 4 , C(Xyz), D(Sp) , "xmm" },
/* 15: kX86VarTypeXmmPs */ { kX86RegTypeXmm , 16, C(Xyz), D(Sp) | D(Packed), "xmm" },
/* 16: kX86VarTypeXmmSd */ { kX86RegTypeXmm , 8 , C(Xyz), D(Dp) , "xmm" },
/* 17: kX86VarTypeXmmPd */ { kX86RegTypeXmm , 16, C(Xyz), D(Dp) | D(Packed), "xmm" },
/* 18: kX86VarTypeYmm */ { kX86RegTypeYmm , 32, C(Xyz), 0 , "ymm" },
/* 19: kX86VarTypeYmmPs */ { kX86RegTypeYmm , 32, C(Xyz), D(Sp) | D(Packed), "ymm" },
/* 20: kX86VarTypeYmmPd */ { kX86RegTypeYmm , 32, C(Xyz), D(Dp) | D(Packed), "ymm" }
/* 13: kX86VarTypeK */ { kX86RegTypeK , 8 , C(K) , 0 , "k" },
/* 14: kX86VarTypeXmm */ { kX86RegTypeXmm , 16, C(Xyz), 0 , "xmm" },
/* 15: kX86VarTypeXmmSs */ { kX86RegTypeXmm , 4 , C(Xyz), D(Sp) , "xmm" },
/* 16: kX86VarTypeXmmPs */ { kX86RegTypeXmm , 16, C(Xyz), D(Sp) | D(Packed), "xmm" },
/* 17: kX86VarTypeXmmSd */ { kX86RegTypeXmm , 8 , C(Xyz), D(Dp) , "xmm" },
/* 18: kX86VarTypeXmmPd */ { kX86RegTypeXmm , 16, C(Xyz), D(Dp) | D(Packed), "xmm" },
/* 19: kX86VarTypeYmm */ { kX86RegTypeYmm , 32, C(Xyz), 0 , "ymm" },
/* 20: kX86VarTypeYmmPs */ { kX86RegTypeYmm , 32, C(Xyz), D(Sp) | D(Packed), "ymm" },
/* 21: kX86VarTypeYmmPd */ { kX86RegTypeYmm , 32, C(Xyz), D(Dp) | D(Packed), "ymm" },
/* 22: kX86VarTypeZmm */ { kX86RegTypeZmm , 64, C(Xyz), 0 , "zmm" },
/* 23: kX86VarTypeZmmPs */ { kX86RegTypeZmm , 64, C(Xyz), D(Sp) | D(Packed), "zmm" },
/* 24: kX86VarTypeZmmPd */ { kX86RegTypeZmm , 64, C(Xyz), D(Dp) | D(Packed), "zmm" }
};
#undef D
@@ -88,14 +92,18 @@ const uint8_t _x86VarMapping[kX86VarTypeCount] = {
/* 10: kVarTypeFp32 */ kVarTypeFp32,
/* 11: kVarTypeFp64 */ kVarTypeFp64,
/* 12: kX86VarTypeMm */ kX86VarTypeMm,
/* 13: kX86VarTypeXmm */ kX86VarTypeXmm,
/* 14: kX86VarTypeXmmSs */ kX86VarTypeXmmSs,
/* 15: kX86VarTypeXmmPs */ kX86VarTypeXmmPs,
/* 16: kX86VarTypeXmmSd */ kX86VarTypeXmmSd,
/* 17: kX86VarTypeXmmPd */ kX86VarTypeXmmPd,
/* 18: kX86VarTypeYmm */ kX86VarTypeYmm,
/* 19: kX86VarTypeYmmPs */ kX86VarTypeYmmPs,
/* 20: kX86VarTypeYmmPd */ kX86VarTypeYmmPd
/* 13: kX86VarTypeK */ kX86VarTypeK,
/* 14: kX86VarTypeXmm */ kX86VarTypeXmm,
/* 15: kX86VarTypeXmmSs */ kX86VarTypeXmmSs,
/* 16: kX86VarTypeXmmPs */ kX86VarTypeXmmPs,
/* 17: kX86VarTypeXmmSd */ kX86VarTypeXmmSd,
/* 18: kX86VarTypeXmmPd */ kX86VarTypeXmmPd,
/* 19: kX86VarTypeYmm */ kX86VarTypeYmm,
/* 20: kX86VarTypeYmmPs */ kX86VarTypeYmmPs,
/* 21: kX86VarTypeYmmPd */ kX86VarTypeYmmPd,
/* 22: kX86VarTypeZmm */ kX86VarTypeZmm,
/* 23: kX86VarTypeZmmPs */ kX86VarTypeZmmPs,
/* 24: kX86VarTypeZmmPd */ kX86VarTypeZmmPd
};
#endif // ASMJIT_BUILD_X86
@@ -114,14 +122,18 @@ const uint8_t _x64VarMapping[kX86VarTypeCount] = {
/* 10: kVarTypeFp32 */ kVarTypeFp32,
/* 11: kVarTypeFp64 */ kVarTypeFp64,
/* 12: kX86VarTypeMm */ kX86VarTypeMm,
/* 13: kX86VarTypeXmm */ kX86VarTypeXmm,
/* 14: kX86VarTypeXmmSs */ kX86VarTypeXmmSs,
/* 15: kX86VarTypeXmmPs */ kX86VarTypeXmmPs,
/* 16: kX86VarTypeXmmSd */ kX86VarTypeXmmSd,
/* 17: kX86VarTypeXmmPd */ kX86VarTypeXmmPd,
/* 18: kX86VarTypeYmm */ kX86VarTypeYmm,
/* 19: kX86VarTypeYmmPs */ kX86VarTypeYmmPs,
/* 20: kX86VarTypeYmmPd */ kX86VarTypeYmmPd
/* 13: kX86VarTypeK */ kX86VarTypeK,
/* 14: kX86VarTypeXmm */ kX86VarTypeXmm,
/* 15: kX86VarTypeXmmSs */ kX86VarTypeXmmSs,
/* 16: kX86VarTypeXmmPs */ kX86VarTypeXmmPs,
/* 17: kX86VarTypeXmmSd */ kX86VarTypeXmmSd,
/* 18: kX86VarTypeXmmPd */ kX86VarTypeXmmPd,
/* 19: kX86VarTypeYmm */ kX86VarTypeYmm,
/* 20: kX86VarTypeYmmPs */ kX86VarTypeYmmPs,
/* 21: kX86VarTypeYmmPd */ kX86VarTypeYmmPd,
/* 22: kX86VarTypeZmm */ kX86VarTypeZmm,
/* 23: kX86VarTypeZmmPs */ kX86VarTypeZmmPs,
/* 24: kX86VarTypeZmmPd */ kX86VarTypeZmmPd
};
#endif // ASMJIT_BUILD_X64
@@ -482,14 +494,14 @@ static Error X86FuncDecl_initFunc(X86FuncDecl* self, uint32_t arch,
if (x86ArgIsInt(varType) && i < ASMJIT_ARRAY_SIZE(self->_passedOrderGp)) {
arg._regIndex = self->_passedOrderGp[i];
self->_used.add(kX86RegClassGp, IntUtil::mask(arg.getRegIndex()));
self->_used.or_(kX86RegClassGp, IntUtil::mask(arg.getRegIndex()));
continue;
}
if (x86ArgIsFp(varType) && i < ASMJIT_ARRAY_SIZE(self->_passedOrderXmm)) {
arg._varType = static_cast<uint8_t>(x86ArgTypeToXmmType(varType));
arg._regIndex = self->_passedOrderXmm[i];
self->_used.add(kX86RegClassXyz, IntUtil::mask(arg.getRegIndex()));
self->_used.or_(kX86RegClassXyz, IntUtil::mask(arg.getRegIndex()));
}
}
@@ -527,7 +539,7 @@ static Error X86FuncDecl_initFunc(X86FuncDecl* self, uint32_t arch,
continue;
arg._regIndex = self->_passedOrderGp[gpPos++];
self->_used.add(kX86RegClassGp, IntUtil::mask(arg.getRegIndex()));
self->_used.or_(kX86RegClassGp, IntUtil::mask(arg.getRegIndex()));
}
// Register arguments (Xmm), always left-to-right.
@@ -538,7 +550,7 @@ static Error X86FuncDecl_initFunc(X86FuncDecl* self, uint32_t arch,
if (x86ArgIsFp(varType)) {
arg._varType = static_cast<uint8_t>(x86ArgTypeToXmmType(varType));
arg._regIndex = self->_passedOrderXmm[xmmPos++];
self->_used.add(kX86RegClassXyz, IntUtil::mask(arg.getRegIndex()));
self->_used.or_(kX86RegClassXyz, IntUtil::mask(arg.getRegIndex()));
}
}
@@ -722,9 +734,9 @@ Error X86Compiler::setArch(uint32_t arch) {
_regCount.reset();
_regCount._gp = 8;
_regCount._fp = 8;
_regCount._mm = 8;
_regCount._xy = 8;
_regCount._k = 8;
_regCount._xyz = 8;
zax = x86::eax;
zcx = x86::ecx;
@@ -747,9 +759,9 @@ Error X86Compiler::setArch(uint32_t arch) {
_regCount.reset();
_regCount._gp = 16;
_regCount._fp = 8;
_regCount._mm = 8;
_regCount._xy = 16;
_regCount._k = 8;
_regCount._xyz = 16;
zax = x86::rax;
zcx = x86::rcx;
@@ -783,7 +795,7 @@ static InstNode* X86Compiler_newInst(X86Compiler* self, void* p, uint32_t code,
JumpNode* node = new(p) JumpNode(self, code, options, opList, opCount);
TargetNode* jTarget = self->getTargetById(opList[0].getId());
node->addFlags(code == kX86InstIdJmp ? kNodeFlagIsJmp | kNodeFlagIsTaken : kNodeFlagIsJcc);
node->orFlags(code == kX86InstIdJmp ? kNodeFlagIsJmp | kNodeFlagIsTaken : kNodeFlagIsJcc);
node->_target = jTarget;
node->_jumpNext = static_cast<JumpNode*>(jTarget->_from);
@@ -792,9 +804,9 @@ static InstNode* X86Compiler_newInst(X86Compiler* self, void* p, uint32_t code,
// The 'jmp' is always taken, conditional jump can contain hint, we detect it.
if (code == kX86InstIdJmp)
node->addFlags(kNodeFlagIsTaken);
node->orFlags(kNodeFlagIsTaken);
else if (options & kInstOptionTaken)
node->addFlags(kNodeFlagIsTaken);
node->orFlags(kNodeFlagIsTaken);
node->addOptions(options);
return node;
@@ -1025,6 +1037,22 @@ InstNode* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1,
return static_cast<InstNode*>(addNode(node));
}
InstNode* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int o3_) {
Imm o3(o3_);
InstNode* node = newInst(code, o0, o1, o2, o3);
if (node == NULL)
return NULL;
return static_cast<InstNode*>(addNode(node));
}
InstNode* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, uint64_t o3_) {
Imm o3(o3_);
InstNode* node = newInst(code, o0, o1, o2, o3);
if (node == NULL)
return NULL;
return static_cast<InstNode*>(addNode(node));
}
// ============================================================================
// [asmjit::X86Compiler - Func]
// ============================================================================

View File

@@ -46,24 +46,34 @@ ASMJIT_ENUM(kX86VarType) {
//! Variable is Mm (MMX).
kX86VarTypeMm = 12,
//! Variable is K (AVX512+)
kX86VarTypeK,
//! Variable is Xmm (SSE+).
kX86VarTypeXmm,
//! Variable is scalar Xmm SP-FP number.
//! Variable is a scalar Xmm SP-FP number.
kX86VarTypeXmmSs,
//! Variable is packed Xmm SP-FP number (4 floats).
//! Variable is a packed Xmm SP-FP number (4 floats).
kX86VarTypeXmmPs,
//! Variable is scalar Xmm DP-FP number.
//! Variable is a scalar Xmm DP-FP number.
kX86VarTypeXmmSd,
//! Variable is packed Xmm DP-FP number (2 doubles).
//! Variable is a packed Xmm DP-FP number (2 doubles).
kX86VarTypeXmmPd,
//! Variable is Ymm (AVX+).
kX86VarTypeYmm,
//! Variable is packed Ymm SP-FP number (8 floats).
//! Variable is a packed Ymm SP-FP number (8 floats).
kX86VarTypeYmmPs,
//! Variable is packed Ymm DP-FP number (4 doubles).
//! Variable is a packed Ymm DP-FP number (4 doubles).
kX86VarTypeYmmPd,
//! Variable is Zmm (AVX512+).
kX86VarTypeZmm,
//! Variable is a packed Zmm SP-FP number (16 floats).
kX86VarTypeZmmPs,
//! Variable is a packed Zmm DP-FP number (8 doubles).
kX86VarTypeZmmPd,
//! Count of variable types.
kX86VarTypeCount,
@@ -76,7 +86,10 @@ ASMJIT_ENUM(kX86VarType) {
_kX86VarTypeXmmEnd = kX86VarTypeXmmPd,
_kX86VarTypeYmmStart = kX86VarTypeYmm,
_kX86VarTypeYmmEnd = kX86VarTypeYmmPd
_kX86VarTypeYmmEnd = kX86VarTypeYmmPd,
_kX86VarTypeZmmStart = kX86VarTypeZmm,
_kX86VarTypeZmmEnd = kX86VarTypeZmmPd
//! \}
};
@@ -552,6 +565,9 @@ struct X86Var : public Var {
//! Get whether the variable is Mm (64-bit) register.
ASMJIT_INLINE bool isMm() const { return _vreg.type == kX86RegTypeMm; }
//! Get whether the variable is K (64-bit) register.
ASMJIT_INLINE bool isK() const { return _vreg.type == kX86RegTypeK; }
//! Get whether the variable is Xmm (128-bit) register.
ASMJIT_INLINE bool isXmm() const { return _vreg.type == kX86RegTypeXmm; }
//! Get whether the variable is Ymm (256-bit) register.
@@ -2016,6 +2032,7 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
= kArchHost
#endif // ASMJIT_HOST_X86 || ASMJIT_HOST_X64
);
//! Destroy the `X86Compiler` instance.
ASMJIT_API ~X86Compiler();
@@ -2023,7 +2040,12 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
// [Arch]
// --------------------------------------------------------------------------
//! Get count of registers of the current architecture.
//! \internal
//!
//! Set the architecture to `arch`.
ASMJIT_API Error setArch(uint32_t arch);
//! Get count of registers of the current architecture and mode.
ASMJIT_INLINE const X86RegCount& getRegCount() const {
return _regCount;
}
@@ -2075,8 +2097,6 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
return x86::ptr_abs(pAbs, index, shift, disp, _regSize);
}
ASMJIT_API Error setArch(uint32_t arch);
// --------------------------------------------------------------------------
// [Inst / Emit]
// --------------------------------------------------------------------------
@@ -2119,6 +2139,10 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, int o2);
//! \overload
ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, uint64_t o2);
//! \overload
ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int o3);
//! \overload
ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, uint64_t o3);
// --------------------------------------------------------------------------
// [Func]
@@ -2438,7 +2462,7 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
X86GpReg zdi;
// --------------------------------------------------------------------------
// [X86 Instructions]
// [Emit]
// --------------------------------------------------------------------------
#define INST_0x(_Inst_, _Code_) \
@@ -2614,6 +2638,31 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
return emit(_Code_, o0, o1, o2); \
}
#define INST_3ii(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \
return emit(_Code_, o0, o1, o2); \
} \
/*! \overload */ \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, int o1, int o2) { \
Imm o1Imm(o1); \
return emit(_Code_, o0, o1Imm, o2); \
} \
/*! \overload */ \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, unsigned int o1, unsigned int o2) { \
Imm o1Imm(o1); \
return emit(_Code_, o0, o1Imm, static_cast<uint64_t>(o2)); \
} \
/*! \overload */ \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, int64_t o1, int64_t o2) { \
Imm o1Imm(o1); \
return emit(_Code_, o0, o1Imm, static_cast<uint64_t>(o2)); \
} \
/*! \overload */ \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, uint64_t o1, uint64_t o2) { \
Imm o1Imm(o1); \
return emit(_Code_, o0, o1Imm, o2); \
}
#define INST_4x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \
return emit(_Code_, o0, o1, o2, o3); \
@@ -2646,6 +2695,35 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
return emit(_Code_, o0, o1, o2, o3); \
}
#define INST_4ii(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \
return emit(_Code_, o0, o1, o2, o3); \
} \
/*! \overload */ \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, int o2, int o3) { \
Imm o2Imm(o2); \
return emit(_Code_, o0, o1, o2Imm, o3); \
} \
/*! \overload */ \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, unsigned int o2, unsigned int o3) { \
Imm o2Imm(o2); \
return emit(_Code_, o0, o1, o2Imm, static_cast<uint64_t>(o3)); \
} \
/*! \overload */ \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, int64_t o2, int64_t o3) { \
Imm o2Imm(o2); \
return emit(_Code_, o0, o1, o2Imm, static_cast<uint64_t>(o3)); \
} \
/*! \overload */ \
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, uint64_t o2, uint64_t o3) { \
Imm o2Imm(o2); \
return emit(_Code_, o0, o1, o2Imm, o3); \
}
// --------------------------------------------------------------------------
// [X86/X64]
// --------------------------------------------------------------------------
//! Add with carry.
INST_2x(adc, kX86InstIdAdc, X86GpVar, X86GpVar)
//! \overload
@@ -2832,11 +2910,6 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
return emit(kX86InstIdCpuid, x_eax, w_ebx, x_ecx, w_edx);
}
//! Accumulate crc32 value (polynomial 0x11EDC6F41) (SSE4.2).
INST_2x_(crc32, kX86InstIdCrc32, X86GpVar, X86GpVar, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
//! \overload
INST_2x_(crc32, kX86InstIdCrc32, X86GpVar, X86Mem, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
//! Decimal adjust AL after addition (X86 Only).
INST_1x(daa, kX86InstIdDaa, X86GpVar)
//! Decimal adjust AL after subtraction (X86 Only).
@@ -3024,11 +3097,6 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
//! Pop stack into EFLAGS Register (32-bit or 64-bit).
INST_0x(popf, kX86InstIdPopf)
//! Return the count of number of bits set to 1 (SSE4.2).
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpVar, X86GpVar, !o0.isGpb() && o0.getSize() == o1.getSize())
//! \overload
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpVar, X86Mem, !o0.isGpb())
//! Push WORD or DWORD/QWORD on the stack.
INST_1x_(push, kX86InstIdPush, X86GpVar, o0.getSize() == 2 || o0.getSize() == _regSize)
//! Push WORD or DWORD/QWORD on the stack.
@@ -3299,273 +3367,277 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
INST_2i(xor_, kX86InstIdXor, X86Mem, Imm)
// --------------------------------------------------------------------------
// [Fpu]
// [FPU]
// --------------------------------------------------------------------------
//! Compute 2^x - 1 (FPU).
//! Compute `2^x - 1` - `fp0 = POW(2, fp0) - 1` (FPU).
INST_0x(f2xm1, kX86InstIdF2xm1)
//! Absolute value of fp0 (FPU).
//! Abs `fp0 = ABS(fp0)` (FPU).
INST_0x(fabs, kX86InstIdFabs)
//! Add `o1` to `o0` (one has to be `fp0`) and store result in `o0` (FPU).
//! Add `o0 = o0 + o1` (one operand has to be `fp0`) (FPU).
INST_2x_(fadd, kX86InstIdFadd, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Add 4-byte or 8-byte FP `o0` to fp0 and store result in fp0 (FPU).
//! Add `fp0 = fp0 + float_or_double[o0]` (FPU).
INST_1x(fadd, kX86InstIdFadd, X86Mem)
//! Add fp0 to `o0` and pop the FPU stack (FPU).
//! Add `o0 = o0 + fp0` and POP (FPU).
INST_1x(faddp, kX86InstIdFaddp, X86FpReg)
//! \overload
//! Add `fp1 = fp1 + fp0` and POP (FPU).
INST_0x(faddp, kX86InstIdFaddp)
//! Load binary coded decimal (FPU).
//! Load BCD from `[o0]` and PUSH (FPU).
INST_1x(fbld, kX86InstIdFbld, X86Mem)
//! Store BCD integer and Pop (FPU).
//! Store BCD-Integer to `[o0]` and POP (FPU).
INST_1x(fbstp, kX86InstIdFbstp, X86Mem)
//! Change fp0 sign (FPU).
//! Complement Sign `fp0 = -fp0` (FPU).
INST_0x(fchs, kX86InstIdFchs)
//! Clear exceptions (FPU).
INST_0x(fclex, kX86InstIdFclex)
//! Conditional move (FPU).
//! Conditional move `if (CF=1) fp0 = o0` (FPU).
INST_1x(fcmovb, kX86InstIdFcmovb, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (CF|ZF=1) fp0 = o0` (FPU).
INST_1x(fcmovbe, kX86InstIdFcmovbe, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (ZF=1) fp0 = o0` (FPU).
INST_1x(fcmove, kX86InstIdFcmove, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (CF=0) fp0 = o0` (FPU).
INST_1x(fcmovnb, kX86InstIdFcmovnb, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (CF|ZF=0) fp0 = o0` (FPU).
INST_1x(fcmovnbe, kX86InstIdFcmovnbe, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (ZF=0) fp0 = o0` (FPU).
INST_1x(fcmovne, kX86InstIdFcmovne, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (PF=0) fp0 = o0` (FPU).
INST_1x(fcmovnu, kX86InstIdFcmovnu, X86FpReg)
//! Conditional move (FPU).
//! Conditional move `if (PF=1) fp0 = o0` (FPU).
INST_1x(fcmovu, kX86InstIdFcmovu, X86FpReg)
//! Compare fp0 with `o0` (FPU).
//! Compare `fp0` with `o0` (FPU).
INST_1x(fcom, kX86InstIdFcom, X86FpReg)
//! Compare fp0 with fp1 (FPU).
//! Compare `fp0` with `fp1` (FPU).
INST_0x(fcom, kX86InstIdFcom)
//! Compare fp0 with 4-byte or 8-byte FP at `src` (FPU).
//! Compare `fp0` with `float_or_double[o0]` (FPU).
INST_1x(fcom, kX86InstIdFcom, X86Mem)
//! Compare fp0 with `o0` and pop the FPU stack (FPU).
//! Compare `fp0` with `o0` and POP (FPU).
INST_1x(fcomp, kX86InstIdFcomp, X86FpReg)
//! Compare fp0 with fp1 and pop the FPU stack (FPU).
//! Compare `fp0` with `fp1` and POP (FPU).
INST_0x(fcomp, kX86InstIdFcomp)
//! Compare fp0 with 4-byte or 8-byte FP at `adr` and pop the FPU stack (FPU).
//! Compare `fp0` with `float_or_double[o0]` and POP (FPU).
INST_1x(fcomp, kX86InstIdFcomp, X86Mem)
//! Compare fp0 with fp1 and pop the FPU stack twice (FPU).
//! Compare `fp0` with `fp1` and POP twice (FPU).
INST_0x(fcompp, kX86InstIdFcompp)
//! Compare fp0 and `o0` and Set EFLAGS (FPU).
//! Compare `fp0` with `o0` and set EFLAGS (FPU).
INST_1x(fcomi, kX86InstIdFcomi, X86FpReg)
//! Compare fp0 and `o0` and Set EFLAGS and pop the FPU stack (FPU).
//! Compare `fp0` with `o0` and set EFLAGS and POP (FPU).
INST_1x(fcomip, kX86InstIdFcomip, X86FpReg)
//! Calculate cosine of fp0 and store result in fp0 (FPU).
//! Cos `fp0 = cos(fp0)` (FPU).
INST_0x(fcos, kX86InstIdFcos)
//! Decrement FPU stack-top pointer (FPU).
//! Decrement FPU stack pointer (FPU).
INST_0x(fdecstp, kX86InstIdFdecstp)
//! Divide `o0` by `o1` (one has to be `fp0`) (FPU).
//! Divide `o0 = o0 / o1` (one has to be `fp0`) (FPU).
INST_2x_(fdiv, kX86InstIdFdiv, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Divide fp0 by 32-bit or 64-bit FP value (FPU).
//! Divide `fp0 = fp0 / float_or_double[o0]` (FPU).
INST_1x(fdiv, kX86InstIdFdiv, X86Mem)
//! Divide `o0` by fp0 (FPU).
//! Divide `o0 = o0 / fp0` and POP (FPU).
INST_1x(fdivp, kX86InstIdFdivp, X86FpReg)
//! \overload
//! Divide `fp1 = fp1 / fp0` and POP (FPU).
INST_0x(fdivp, kX86InstIdFdivp)
//! Reverse divide `o0` by `o1` (one has to be `fp0`) (FPU).
//! Reverse divide `o0 = o1 / o0` (one has to be `fp0`) (FPU).
INST_2x_(fdivr, kX86InstIdFdivr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Reverse divide fp0 by 32-bit or 64-bit FP value (FPU).
//! Reverse divide `fp0 = float_or_double[o0] / fp0` (FPU).
INST_1x(fdivr, kX86InstIdFdivr, X86Mem)
//! Reverse divide `o0` by fp0 (FPU).
//! Reverse divide `o0 = fp0 / o0` and POP (FPU).
INST_1x(fdivrp, kX86InstIdFdivrp, X86FpReg)
//! \overload
//! Reverse divide `fp1 = fp0 / fp1` and POP (FPU).
INST_0x(fdivrp, kX86InstIdFdivrp)
//! Free FP register (FPU).
INST_1x(ffree, kX86InstIdFfree, X86FpReg)
//! Add 16-bit or 32-bit integer to fp0 (FPU).
//! Add `fp0 = fp0 + short_or_int[o0]` (FPU).
INST_1x_(fiadd, kX86InstIdFiadd, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Compare fp0 with 16-bit or 32-bit Integer (FPU).
//! Compare `fp0` with `short_or_int[o0]` (FPU).
INST_1x_(ficom, kX86InstIdFicom, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Compare fp0 with 16-bit or 32-bit Integer and pop the FPU stack (FPU).
//! Compare `fp0` with `short_or_int[o0]` and POP (FPU).
INST_1x_(ficomp, kX86InstIdFicomp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Divide fp0 by 32-bit or 16-bit integer (`src`) (FPU).
//! Divide `fp0 = fp0 / short_or_int[o0]` (FPU).
INST_1x_(fidiv, kX86InstIdFidiv, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Reverse divide fp0 by 32-bit or 16-bit integer (`src`) (FPU).
//! Reverse divide `fp0 = short_or_int[o0] / fp0` (FPU).
INST_1x_(fidivr, kX86InstIdFidivr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Load 16-bit, 32-bit or 64-bit Integer and push it to the FPU stack (FPU).
//! Load `short_or_int_or_long[o0]` and PUSH (FPU).
INST_1x_(fild, kX86InstIdFild, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
//! Multiply fp0 by 16-bit or 32-bit integer and store it to fp0 (FPU).
//! Multiply `fp0 *= short_or_int[o0]` (FPU).
INST_1x_(fimul, kX86InstIdFimul, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Increment FPU stack-top pointer (FPU).
//! Increment FPU stack pointer (FPU).
INST_0x(fincstp, kX86InstIdFincstp)
//! Initialize FPU (FPU).
INST_0x(finit, kX86InstIdFinit)
//! Subtract 16-bit or 32-bit integer from fp0 and store result to fp0 (FPU).
//! Subtract `fp0 = fp0 - short_or_int[o0]` (FPU).
INST_1x_(fisub, kX86InstIdFisub, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Reverse subtract 16-bit or 32-bit integer from fp0 and store result to fp0 (FPU).
//! Reverse subtract `fp0 = short_or_int[o0] - fp0` (FPU).
INST_1x_(fisubr, kX86InstIdFisubr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Initialize FPU without checking for pending unmasked exceptions (FPU).
INST_0x(fninit, kX86InstIdFninit)
//! Store fp0 as 16-bit or 32-bit Integer to `o0` (FPU).
//! Store `fp0` as `short_or_int[o0]` (FPU).
INST_1x_(fist, kX86InstIdFist, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
//! Store fp0 as 16-bit, 32-bit or 64-bit Integer to `o0` and pop the FPU stack (FPU).
//! Store `fp0` as `short_or_int_or_long[o0]` and POP (FPU).
INST_1x_(fistp, kX86InstIdFistp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
//! Push 32-bit, 64-bit or 80-bit floating point value on the FPU stack (FPU).
//! Load `float_or_double_or_extended[o0]` and PUSH (FPU).
INST_1x_(fld, kX86InstIdFld, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
//! Push `o0` on the FPU stack (FPU).
//! PUSH `o0` (FPU).
INST_1x(fld, kX86InstIdFld, X86FpReg)
//! Push +1.0 on the FPU stack (FPU).
//! PUSH `1.0` (FPU).
INST_0x(fld1, kX86InstIdFld1)
//! Push log2(10) on the FPU stack (FPU).
//! PUSH `log2(10)` (FPU).
INST_0x(fldl2t, kX86InstIdFldl2t)
//! Push log2(e) on the FPU stack (FPU).
//! PUSH `log2(e)` (FPU).
INST_0x(fldl2e, kX86InstIdFldl2e)
//! Push pi on the FPU stack (FPU).
//! PUSH `pi` (FPU).
INST_0x(fldpi, kX86InstIdFldpi)
//! Push log10(2) on the FPU stack (FPU).
//! PUSH `log10(2)` (FPU).
INST_0x(fldlg2, kX86InstIdFldlg2)
//! Push ln(2) on the FPU stack (FPU).
//! PUSH `ln(2)` (FPU).
INST_0x(fldln2, kX86InstIdFldln2)
//! Push +0.0 on the FPU stack (FPU).
//! PUSH `+0.0` (FPU).
INST_0x(fldz, kX86InstIdFldz)
//! Load x87 FPU control word (2 bytes) (FPU).
//! Load x87 FPU control word from `word_ptr[o0]` (FPU).
INST_1x(fldcw, kX86InstIdFldcw, X86Mem)
//! Load x87 FPU environment (14 or 28 bytes) (FPU).
//! Load x87 FPU environment (14 or 28 bytes) from `[o0]` (FPU).
INST_1x(fldenv, kX86InstIdFldenv, X86Mem)
//! Multiply `o0` by `o1` (one has to be `fp0`) and store result in `o0` (FPU).
//! Multiply `o0 = o0 * o1` (one has to be `fp0`) (FPU).
INST_2x_(fmul, kX86InstIdFmul, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Multiply fp0 by 32-bit or 64-bit `o0` and store result in fp0 (FPU).
//! Multiply `fp0 = fp0 * float_or_double[o0]` (FPU).
INST_1x(fmul, kX86InstIdFmul, X86Mem)
//! Multiply fp0 by `o0` and pop the FPU stack (FPU).
//! Multiply `o0 = o0 * fp0` and POP (FPU).
INST_1x(fmulp, kX86InstIdFmulp, X86FpReg)
//! \overload
//! Multiply `fp1 = fp1 * fp0` and POP (FPU).
INST_0x(fmulp, kX86InstIdFmulp)
//! Clear exceptions (FPU).
INST_0x(fnclex, kX86InstIdFnclex)
//! No operation (FPU).
INST_0x(fnop, kX86InstIdFnop)
//! Save FPU state (FPU).
//! Save FPU state to `[o0]` (FPU).
INST_1x(fnsave, kX86InstIdFnsave, X86Mem)
//! Store x87 FPU environment (FPU).
//! Store x87 FPU environment to `[o0]` (FPU).
INST_1x(fnstenv, kX86InstIdFnstenv, X86Mem)
//! Store x87 FPU control word (FPU).
//! Store x87 FPU control word to `[o0]` (FPU).
INST_1x(fnstcw, kX86InstIdFnstcw, X86Mem)
//! Store x87 FPU status word to `o0` (AX) (FPU).
INST_1x_(fnstsw, kX86InstIdFnstsw, X86GpReg, o0.isRegCode(kX86RegTypeGpw, kX86RegIndexAx))
//! Store x87 FPU status word to `o0` (2 bytes) (FPU).
INST_1x(fnstsw, kX86InstIdFnstsw, X86GpVar)
//! Store x87 FPU status word to `word_ptr[o0]` (FPU).
INST_1x(fnstsw, kX86InstIdFnstsw, X86Mem)
//! Arctan(`fp1` / `fp0`) and pop the FPU stack (FPU).
//! Partial Arctan `fp1 = atan2(fp1, fp0)` and POP (FPU).
INST_0x(fpatan, kX86InstIdFpatan)
//! Fprem(`fp0`, `fp1`) and pop the FPU stack (FPU).
//! Partial Remainder[Trunc] `fp1 = fp0 % fp1` and POP (FPU).
INST_0x(fprem, kX86InstIdFprem)
//! Fprem(`fp0`, `fp1`) and pop the FPU stack (FPU).
//! Partial Remainder[Round] `fp1 = fp0 % fp1` and POP (FPU).
INST_0x(fprem1, kX86InstIdFprem1)
//! Arctan(`fp0`) and pop the FPU stack (FPU).
//! Partial Tan `fp0 = tan(fp0)` and PUSH `1.0` (FPU).
INST_0x(fptan, kX86InstIdFptan)
//! Round `fp0` to Integer (FPU).
//! Round `fp0 = round(fp0)` (FPU).
INST_0x(frndint, kX86InstIdFrndint)
//! Restore FPU state from `o0` (94 or 108 bytes) (FPU).
//! Restore FPU state from `[o0]` (94 or 108 bytes) (FPU).
INST_1x(frstor, kX86InstIdFrstor, X86Mem)
//! Save FPU state to `o0` (94 or 108 bytes) (FPU).
//! Save FPU state to `[o0]` (94 or 108 bytes) (FPU).
INST_1x(fsave, kX86InstIdFsave, X86Mem)
//! Scale `fp0` by `fp1` (FPU).
//! Scale `fp0 = fp0 * pow(2, RoundTowardsZero(fp1))` (FPU).
INST_0x(fscale, kX86InstIdFscale)
//! Sine of `fp0` and store result in `fp0` (FPU).
//! Sin `fp0 = sin(fp0)` (FPU).
INST_0x(fsin, kX86InstIdFsin)
//! Sine and cosine of `fp0`, store sine in `fp0` and push cosine on the FPU stack (FPU).
//! Sincos `fp0 = sin(fp0)` and PUSH `cos(fp0)` (FPU).
INST_0x(fsincos, kX86InstIdFsincos)
//! Square root of `fp0` and store it in `fp0` (FPU).
//! Square root `fp0 = sqrt(fp0)` (FPU).
INST_0x(fsqrt, kX86InstIdFsqrt)
//! Store floating point value to 32-bit or 64-bit memory location (FPU).
//! Store floating point value to `float_or_double[o0]` (FPU).
INST_1x_(fst, kX86InstIdFst, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
//! Store floating point value to `o0` (FPU).
//! Copy `o0 = fp0` (FPU).
INST_1x(fst, kX86InstIdFst, X86FpReg)
//! Store floating point value to 32-bit or 64-bit memory location and pop the FPU stack (FPU).
//! Store floating point value to `float_or_double_or_extended[o0]` and POP (FPU).
INST_1x_(fstp, kX86InstIdFstp, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
//! Store floating point value to `o0` and pop the FPU stack (FPU).
//! Copy `o0 = fp0` and POP (FPU).
INST_1x(fstp, kX86InstIdFstp, X86FpReg)
//! Store x87 FPU control word to `o0` (2 bytes) (FPU).
//! Store x87 FPU control word to `word_ptr[o0]` (FPU).
INST_1x(fstcw, kX86InstIdFstcw, X86Mem)
//! Store x87 FPU environment to `o0` (14 or 28 bytes) (FPU).
//! Store x87 FPU environment to `[o0]` (14 or 28 bytes) (FPU).
INST_1x(fstenv, kX86InstIdFstenv, X86Mem)
//! Store x87 FPU status word to `o0` (allocated in AX) (FPU).
//! Store x87 FPU status word to `o0` (AX) (FPU).
INST_1x(fstsw, kX86InstIdFstsw, X86GpVar)
//! Store x87 FPU status word (2 bytes) (FPU).
//! Store x87 FPU status word to `word_ptr[o0]` (FPU).
INST_1x(fstsw, kX86InstIdFstsw, X86Mem)
//! Subtract `o0` from `o0` (one has to be `fp0`) and store result in `o0` (FPU).
//! Subtract `o0 = o0 - o1` (one has to be `fp0`) (FPU).
INST_2x_(fsub, kX86InstIdFsub, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Subtract 32-bit or 64-bit `o0` from fp0 and store result in fp0 (FPU).
//! Subtract `fp0 = fp0 - float_or_double[o0]` (FPU).
INST_1x_(fsub, kX86InstIdFsub, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
//! Subtract fp0 from `o0` and pop FPU stack (FPU).
//! Subtract `o0 = o0 - fp0` and POP (FPU).
INST_1x(fsubp, kX86InstIdFsubp, X86FpReg)
//! \overload
//! Subtract `fp1 = fp1 - fp0` and POP (FPU).
INST_0x(fsubp, kX86InstIdFsubp)
//! Reverse subtract `o1` from `o0` (one has to be `fp0`) and store result in `o0` (FPU).
//! Reverse subtract `o0 = o1 - o0` (one has to be `fp0`) (FPU).
INST_2x_(fsubr, kX86InstIdFsubr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
//! Reverse subtract 32-bit or 64-bit `o0` from `fp0` and store result in `fp0` (FPU).
//! Reverse subtract `fp0 = fp0 - float_or_double[o0]` (FPU).
INST_1x_(fsubr, kX86InstIdFsubr, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
//! Reverse subtract `fp0` from `o0` and pop FPU stack (FPU).
//! Reverse subtract `o0 = o0 - fp0` and POP (FPU).
INST_1x(fsubrp, kX86InstIdFsubrp, X86FpReg)
//! \overload
//! Reverse subtract `fp1 = fp1 - fp0` and POP (FPU).
INST_0x(fsubrp, kX86InstIdFsubrp)
//! Floating point test - Compare `fp0` with 0.0. (FPU).
//! Compare `fp0` with `0.0` (FPU).
INST_0x(ftst, kX86InstIdFtst)
//! Unordered compare `fp0` with `o0` (FPU).
INST_1x(fucom, kX86InstIdFucom, X86FpReg)
//! Unordered compare `fp0` with `fp1` (FPU).
INST_0x(fucom, kX86InstIdFucom)
//! Unordered compare `fp0` and `o0`, check for ordered values and set EFLAGS (FPU).
//! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS (FPU).
INST_1x(fucomi, kX86InstIdFucomi, X86FpReg)
//! Unordered compare `fp0` and `o0`, check for ordered values and set EFLAGS and pop the FPU stack (FPU).
//! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS and POP (FPU).
INST_1x(fucomip, kX86InstIdFucomip, X86FpReg)
//! Unordered compare `fp0` with `o0` and pop the FPU stack (FPU).
//! Unordered compare `fp0` with `o0` and POP (FPU).
INST_1x(fucomp, kX86InstIdFucomp, X86FpReg)
//! Unordered compare `fp0` with `fp1` and pop the FPU stack (FPU).
//! Unordered compare `fp0` with `fp1` and POP (FPU).
INST_0x(fucomp, kX86InstIdFucomp)
//! Unordered compare `fp0` with `fp1` and pop the FPU stack twice (FPU).
//! Unordered compare `fp0` with `fp1` and POP twice (FPU).
INST_0x(fucompp, kX86InstIdFucompp)
INST_0x(fwait, kX86InstIdFwait)
//! Examine fp0 (FPU).
INST_0x(fxam, kX86InstIdFxam)
//! Exchange content of fp0 with `o0` (FPU).
//! Exchange `fp0` with `o0` (FPU).
INST_1x(fxch, kX86InstIdFxch, X86FpReg)
//! Restore FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
INST_1x(fxrstor, kX86InstIdFxrstor, X86Mem)
//! Store FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
INST_1x(fxsave, kX86InstIdFxsave, X86Mem)
//! Extract exponent and store to `fp0` and push significand on the FPU stack (FPU).
//! Extract `fp0 = exponent(fp0)` and PUSH `significant(fp0)` (FPU).
INST_0x(fxtract, kX86InstIdFxtract)
//! Compute `fp1 * log2(fp0)`, pop the FPU stack and store result in `fp0` (FPU).
//! Compute `fp1 = fp1 * log2(fp0)` and POP (FPU).
INST_0x(fyl2x, kX86InstIdFyl2x)
//! Compute `fp1 * log2(fp0 + 1)`, pop the FPU stack and store result in `fp0` (FPU).
//! Compute `fp1 = fp1 * log2(fp0 + 1)` and POP (FPU).
INST_0x(fyl2xp1, kX86InstIdFyl2xp1)
// --------------------------------------------------------------------------
@@ -3833,7 +3905,7 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
INST_0x(emms, kX86InstIdEmms)
// --------------------------------------------------------------------------
// [3dNow]
// [3DNOW]
// --------------------------------------------------------------------------
//! Packed SP-FP to DWORD convert (3dNow!).
@@ -4921,8 +4993,7 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
//! \overload
INST_2x(addsubps, kX86InstIdAddsubps, X86XmmVar, X86Mem)
//! Store truncated `fp0` as 16-bit, 32-bit or 64-bit integer to `o0` and pop
//! the FPU stack (FPU / SSE3).
//! Store truncated `fp0` to `short_or_int_or_long[o0]` and POP (FPU & SSE3).
INST_1x(fisttp, kX86InstIdFisttp, X86Mem)
//! Packed DP-FP horizontal add (SSE3).
@@ -5382,6 +5453,11 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
// [SSE4.2]
// --------------------------------------------------------------------------
//! Accumulate crc32 value (polynomial 0x11EDC6F41) (SSE4.2).
INST_2x_(crc32, kX86InstIdCrc32, X86GpVar, X86GpVar, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
//! \overload
INST_2x_(crc32, kX86InstIdCrc32, X86GpVar, X86Mem, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
//! Packed compare explicit length strings, return index (SSE4.2).
INST_3i(pcmpestri, kX86InstIdPcmpestri, X86XmmVar, X86XmmVar, Imm)
//! \overload
@@ -5407,6 +5483,43 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
//! \overload
INST_2x(pcmpgtq, kX86InstIdPcmpgtq, X86XmmVar, X86Mem)
// --------------------------------------------------------------------------
// [SSE4a]
// --------------------------------------------------------------------------
//! Extract Field (SSE4a).
INST_2x(extrq, kX86InstIdExtrq, X86XmmVar, X86XmmVar)
//! Extract Field (SSE4a).
INST_3ii(extrq, kX86InstIdExtrq, X86XmmVar, Imm, Imm)
//! Insert Field (SSE4a).
INST_2x(insertq, kX86InstIdInsertq, X86XmmVar, X86XmmVar)
//! Insert Field (SSE4a).
INST_4ii(insertq, kX86InstIdInsertq, X86XmmVar, X86XmmVar, Imm, Imm)
//! Move Non-Temporal Scalar DP-FP (SSE4a).
INST_2x(movntsd, kX86InstIdMovntsd, X86Mem, X86XmmVar)
//! Move Non-Temporal Scalar SP-FP (SSE4a).
INST_2x(movntss, kX86InstIdMovntss, X86Mem, X86XmmVar)
// --------------------------------------------------------------------------
// [POPCNT]
// --------------------------------------------------------------------------
//! Return the count of number of bits set to 1 (POPCNT).
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpVar, X86GpVar, !o0.isGpb() && o0.getSize() == o1.getSize())
//! \overload
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpVar, X86Mem, !o0.isGpb())
// --------------------------------------------------------------------------
// [LZCNT]
// --------------------------------------------------------------------------
//! Count the number of leading zero bits (LZCNT).
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpVar, X86GpVar)
//! \overload
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpVar, X86Mem)
// --------------------------------------------------------------------------
// [AESNI]
// --------------------------------------------------------------------------
@@ -5450,6 +5563,34 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
//! \overload
INST_3i(pclmulqdq, kX86InstIdPclmulqdq, X86XmmVar, X86Mem, Imm);
// --------------------------------------------------------------------------
// [XSAVE]
// --------------------------------------------------------------------------
//! Restore Processor Extended States specified by `o1:o2` (XSAVE).
INST_3x(xrstor, kX86InstIdXrstor, X86Mem, X86GpVar, X86GpVar)
//! Restore Processor Extended States specified by `o1:o2` (XSAVE&X64).
INST_3x(xrstor64, kX86InstIdXrstor64, X86Mem, X86GpVar, X86GpVar)
//! Save Processor Extended States specified by `o1:o2` (XSAVE).
INST_3x(xsave, kX86InstIdXsave, X86Mem, X86GpVar, X86GpVar)
//! Save Processor Extended States specified by `o1:o2` (XSAVE&X64).
INST_3x(xsave64, kX86InstIdXsave64, X86Mem, X86GpVar, X86GpVar)
//! Save Processor Extended States specified by `o1:o2` (Optimized) (XSAVEOPT).
INST_3x(xsaveopt, kX86InstIdXsave, X86Mem, X86GpVar, X86GpVar)
//! Save Processor Extended States specified by `o1:o2` (Optimized) (XSAVEOPT&X64).
INST_3x(xsaveopt64, kX86InstIdXsave64, X86Mem, X86GpVar, X86GpVar)
//! Get XCR - `o1:o2 <- XCR[o0]` (`EDX:EAX <- XCR[ECX]`) (XSAVE).
INST_3x(xgetbv, kX86InstIdXgetbv, X86GpVar, X86GpVar, X86GpVar)
//! Set XCR - `XCR[o0] <- o1:o2` (`XCR[ECX] <- EDX:EAX`) (XSAVE).
INST_3x(xsetbv, kX86InstIdXsetbv, X86GpVar, X86GpVar, X86GpVar)
// --------------------------------------------------------------------------
// [Cleanup]
// --------------------------------------------------------------------------
#undef INST_0x
#undef INST_1x
@@ -5465,10 +5606,12 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
#undef INST_3x
#undef INST_3x_
#undef INST_3i
#undef INST_3ii
#undef INST_4x
#undef INST_4x_
#undef INST_4i
#undef INST_4ii
};
//! \}

View File

@@ -143,9 +143,9 @@ static void X86Context_annotateOperand(X86Context* self,
}
static bool X86Context_annotateInstruction(X86Context* self,
StringBuilder& sb, uint32_t code, const Operand* opList, uint32_t opCount) {
StringBuilder& sb, uint32_t instId, const Operand* opList, uint32_t opCount) {
sb.appendString(_x86InstInfo[code].getInstName());
sb.appendString(_x86InstInfo[instId].getInstName());
for (uint32_t i = 0; i < opCount; i++) {
if (i == 0)
sb.appendChar(' ');
@@ -207,7 +207,7 @@ static void X86Context_traceNode(X86Context* self, Node* node_) {
case kNodeTypeInst: {
InstNode* node = static_cast<InstNode*>(node_);
X86Context_annotateInstruction(self, sb,
node->getCode(), node->getOpList(), node->getOpCount());
node->getInstId(), node->getOpList(), node->getOpCount());
break;
}
@@ -287,8 +287,8 @@ void X86Context::reset() {
_stackFrameCell = NULL;
_gaRegs[kX86RegClassGp ] = IntUtil::bits(_regCount.getGp()) & ~IntUtil::mask(kX86RegIndexSp);
_gaRegs[kX86RegClassFp ] = IntUtil::bits(_regCount.getFp());
_gaRegs[kX86RegClassMm ] = IntUtil::bits(_regCount.getMm());
_gaRegs[kX86RegClassK ] = IntUtil::bits(_regCount.getK());
_gaRegs[kX86RegClassXyz] = IntUtil::bits(_regCount.getXyz());
_argBaseReg = kInvalidReg; // Used by patcher.
@@ -426,8 +426,26 @@ static const X86SpecialInst x86SpecialInstBlend[] = {
{ 0 , kInvalidReg , kVarAttrInReg }
};
static ASMJIT_INLINE const X86SpecialInst* X86SpecialInst_get(uint32_t code, const Operand* opList, uint32_t opCount) {
switch (code) {
static const X86SpecialInst x86SpecialInstXsaveXrstor[] = {
{ kInvalidReg , kInvalidReg , 0 },
{ kX86RegIndexDx, kInvalidReg , kVarAttrInReg },
{ kX86RegIndexAx, kInvalidReg , kVarAttrInReg }
};
static const X86SpecialInst x86SpecialInstXgetbv[] = {
{ kX86RegIndexCx, kInvalidReg , kVarAttrInReg },
{ kInvalidReg , kX86RegIndexDx, kVarAttrOutReg },
{ kInvalidReg , kX86RegIndexAx, kVarAttrOutReg }
};
static const X86SpecialInst x86SpecialInstXsetbv[] = {
{ kX86RegIndexCx, kInvalidReg , kVarAttrInReg },
{ kX86RegIndexDx, kInvalidReg , kVarAttrInReg },
{ kX86RegIndexAx, kInvalidReg , kVarAttrInReg }
};
static ASMJIT_INLINE const X86SpecialInst* X86SpecialInst_get(uint32_t instId, const Operand* opList, uint32_t opCount) {
switch (instId) {
case kX86InstIdCpuid:
return x86SpecialInstCpuid;
@@ -600,6 +618,20 @@ static ASMJIT_INLINE const X86SpecialInst* X86SpecialInst_get(uint32_t code, con
case kX86InstIdPblendvb:
return x86SpecialInstBlend;
case kX86InstIdXrstor:
case kX86InstIdXrstor64:
case kX86InstIdXsave:
case kX86InstIdXsave64:
case kX86InstIdXsaveopt:
case kX86InstIdXsaveopt64:
return x86SpecialInstXsaveXrstor;
case kX86InstIdXgetbv:
return x86SpecialInstXgetbv;
case kX86InstIdXsetbv:
return x86SpecialInstXsetbv;
default:
return NULL;
}
@@ -976,7 +1008,7 @@ void X86Context::emitMoveVarOnStack(
X86Reg r0, r1;
uint32_t regSize = compiler->getRegSize();
uint32_t instCode;
uint32_t instId;
switch (dstType) {
case kVarTypeInt8:
@@ -1002,7 +1034,7 @@ void X86Context::emitMoveVarOnStack(
r1.setSize(1);
r1.setCode(kX86RegTypeGpbLo, srcIndex);
instCode = (dstType == kVarTypeInt16 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
instId = (dstType == kVarTypeInt16 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
goto _ExtendMovGpD;
}
@@ -1027,7 +1059,7 @@ void X86Context::emitMoveVarOnStack(
r1.setSize(1);
r1.setCode(kX86RegTypeGpbLo, srcIndex);
instCode = (dstType == kVarTypeInt32 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
instId = (dstType == kVarTypeInt32 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
goto _ExtendMovGpD;
}
@@ -1036,7 +1068,7 @@ void X86Context::emitMoveVarOnStack(
r1.setSize(2);
r1.setCode(kX86RegTypeGpw, srcIndex);
instCode = (dstType == kVarTypeInt32 && srcType == kVarTypeInt16) ? kX86InstIdMovsx : kX86InstIdMovzx;
instId = (dstType == kVarTypeInt32 && srcType == kVarTypeInt16) ? kX86InstIdMovsx : kX86InstIdMovzx;
goto _ExtendMovGpD;
}
@@ -1060,7 +1092,7 @@ void X86Context::emitMoveVarOnStack(
r1.setSize(1);
r1.setCode(kX86RegTypeGpbLo, srcIndex);
instCode = (dstType == kVarTypeInt64 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
instId = (dstType == kVarTypeInt64 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
goto _ExtendMovGpXQ;
}
@@ -1069,7 +1101,7 @@ void X86Context::emitMoveVarOnStack(
r1.setSize(2);
r1.setCode(kX86RegTypeGpw, srcIndex);
instCode = (dstType == kVarTypeInt64 && srcType == kVarTypeInt16) ? kX86InstIdMovsx : kX86InstIdMovzx;
instId = (dstType == kVarTypeInt64 && srcType == kVarTypeInt16) ? kX86InstIdMovsx : kX86InstIdMovzx;
goto _ExtendMovGpXQ;
}
@@ -1078,7 +1110,7 @@ void X86Context::emitMoveVarOnStack(
r1.setSize(4);
r1.setCode(kX86RegTypeGpd, srcIndex);
instCode = kX86InstIdMovsxd;
instId = kX86InstIdMovsxd;
if (dstType == kVarTypeInt64 && srcType == kVarTypeInt32)
goto _ExtendMovGpXQ;
else
@@ -1104,7 +1136,7 @@ void X86Context::emitMoveVarOnStack(
r1.setSize(1);
r1.setCode(kX86RegTypeGpbLo, srcIndex);
instCode = kX86InstIdMovzx;
instId = kX86InstIdMovzx;
goto _ExtendMovGpXQ;
}
@@ -1113,7 +1145,7 @@ void X86Context::emitMoveVarOnStack(
r1.setSize(2);
r1.setCode(kX86RegTypeGpw, srcIndex);
instCode = kX86InstIdMovzx;
instId = kX86InstIdMovzx;
goto _ExtendMovGpXQ;
}
@@ -1175,7 +1207,7 @@ _ExtendMovGpD:
r0.setSize(4);
r0.setCode(kX86RegTypeGpd, srcIndex);
compiler->emit(instCode, r0, r1);
compiler->emit(instId, r0, r1);
compiler->emit(kX86InstIdMov, m0, r0);
return;
@@ -1185,7 +1217,7 @@ _ExtendMovGpXQ:
r0.setSize(8);
r0.setCode(kX86RegTypeGpq, srcIndex);
compiler->emit(instCode, r0, r1);
compiler->emit(instId, r0, r1);
compiler->emit(kX86InstIdMov, m0, r0);
}
else {
@@ -1193,7 +1225,7 @@ _ExtendMovGpXQ:
r0.setSize(4);
r0.setCode(kX86RegTypeGpd, srcIndex);
compiler->emit(instCode, r0, r1);
compiler->emit(instId, r0, r1);
_ExtendMovGpDQ:
compiler->emit(kX86InstIdMov, m0, r0);
@@ -1749,8 +1781,8 @@ static ASMJIT_INLINE Node* X86Context_getOppositeJccFlow(JumpNode* jNode) {
// ============================================================================
//! \internal
static void X86Context_prepareSingleVarInst(uint32_t code, VarAttr* va) {
switch (code) {
static void X86Context_prepareSingleVarInst(uint32_t instId, VarAttr* va) {
switch (instId) {
// - andn reg, reg ; Set all bits in reg to 0.
// - xor/pxor reg, reg ; Set all bits in reg to 0.
// - sub/psub reg, reg ; Set all bits in reg to 0.
@@ -1763,7 +1795,7 @@ static void X86Context_prepareSingleVarInst(uint32_t code, VarAttr* va) {
case kX86InstIdPsubsb : case kX86InstIdPsubsw : case kX86InstIdPsubusb : case kX86InstIdPsubusw :
case kX86InstIdPcmpeqb : case kX86InstIdPcmpeqw : case kX86InstIdPcmpeqd : case kX86InstIdPcmpeqq :
case kX86InstIdPcmpgtb : case kX86InstIdPcmpgtw : case kX86InstIdPcmpgtd : case kX86InstIdPcmpgtq :
va->delFlags(kVarAttrInReg);
va->andNotFlags(kVarAttrInReg);
break;
// - and reg, reg ; Nop.
@@ -1772,7 +1804,7 @@ static void X86Context_prepareSingleVarInst(uint32_t code, VarAttr* va) {
case kX86InstIdAnd : case kX86InstIdAndpd : case kX86InstIdAndps : case kX86InstIdPand :
case kX86InstIdOr : case kX86InstIdOrpd : case kX86InstIdOrps : case kX86InstIdPor :
case kX86InstIdXchg :
va->delFlags(kVarAttrOutReg);
va->andNotFlags(kVarAttrOutReg);
break;
}
}
@@ -1824,7 +1856,7 @@ static ASMJIT_INLINE X86RegMask X86Context_getUsedArgs(X86Context* self, X86Call
const FuncInOut& arg = decl->getArg(i);
if (!arg.hasRegIndex())
continue;
regs.add(x86VarTypeToClass(arg.getVarType()), IntUtil::mask(arg.getRegIndex()));
regs.or_(x86VarTypeToClass(arg.getVarType()), IntUtil::mask(arg.getRegIndex()));
}
return regs;
@@ -2117,7 +2149,7 @@ Error X86Context::fetch() {
goto _NoMemory; \
\
X86RegCount vaIndex; \
vaIndex.makeIndex(regCount); \
vaIndex.indexFromRegCount(regCount); \
\
map->_vaCount = vaCount; \
map->_count = regCount; \
@@ -2181,7 +2213,7 @@ Error X86Context::fetch() {
regCount.add(_Vd_->getClass()); \
} \
\
_Va_->addFlags(_Flags_); \
_Va_->orFlags(_Flags_); \
_Va_->addVarCount(1); \
} while (0)
@@ -2230,18 +2262,13 @@ _NextGroup:
VI_BEGIN();
if (node->getHint() == kVarHintAlloc) {
uint32_t remain[kX86RegClassCount];
uint32_t remain[_kX86RegClassManagedCount];
HintNode* cur = node;
remain[kX86RegClassGp ] = _regCount.getGp() - 1 - func->hasFuncFlag(kFuncFlagIsNaked);
remain[kX86RegClassFp ] = _regCount.getFp();
remain[kX86RegClassMm ] = _regCount.getMm();
// Correct. Instead of using `getXyz()` which may be 32 in 64-bit
// mode we use `getGp()`. The reason is that not all registers are
// accessible by all instructions when using AVX512, this makes the
// algorithm safe.
remain[kX86RegClassXyz] = _regCount.getGp();
remain[kX86RegClassK ] = _regCount.getK();
remain[kX86RegClassXyz] = _regCount.getXyz();
// Merge as many alloc-hints as possible.
for (;;) {
@@ -2333,14 +2360,14 @@ _NextGroup:
case kNodeTypeInst: {
InstNode* node = static_cast<InstNode*>(node_);
uint32_t code = node->getCode();
uint32_t instId = node->getInstId();
uint32_t flags = node->getFlags();
Operand* opList = node->getOpList();
uint32_t opCount = node->getOpCount();
if (opCount) {
const X86InstExtendedInfo& extendedInfo = _x86InstInfo[code].getExtendedInfo();
const X86InstExtendedInfo& extendedInfo = _x86InstInfo[instId].getExtendedInfo();
const X86SpecialInst* special = NULL;
VI_BEGIN();
@@ -2348,7 +2375,7 @@ _NextGroup:
if (extendedInfo.isFp())
flags |= kNodeFlagIsFp;
if (extendedInfo.isSpecial() && (special = X86SpecialInst_get(code, opList, opCount)) != NULL)
if (extendedInfo.isSpecial() && (special = X86SpecialInst_get(instId, opList, opCount)) != NULL)
flags |= kNodeFlagIsSpecial;
uint32_t gpAllowedMask = 0xFFFFFFFF;
@@ -2363,7 +2390,7 @@ _NextGroup:
VI_MERGE_VAR(vd, va, 0, gaRegs[vd->getClass()] & gpAllowedMask);
if (static_cast<X86Var*>(op)->isGpb()) {
va->addFlags(static_cast<X86GpVar*>(op)->isGpbLo() ? kX86VarAttrGpbLo : kX86VarAttrGpbHi);
va->orFlags(static_cast<X86GpVar*>(op)->isGpbLo() ? kX86VarAttrGpbLo : kX86VarAttrGpbHi);
if (arch == kArchX86) {
// If a byte register is accessed in 32-bit mode we have to limit
// all allocable registers for that variable to eax/ebx/ecx/edx.
@@ -2401,17 +2428,17 @@ _NextGroup:
if (inReg != kInvalidReg) {
uint32_t mask = IntUtil::mask(inReg);
inRegs.add(c, mask);
inRegs.or_(c, mask);
va->addInRegs(mask);
}
if (outReg != kInvalidReg) {
uint32_t mask = IntUtil::mask(outReg);
outRegs.add(c, mask);
outRegs.or_(c, mask);
va->setOutRegIndex(outReg);
}
va->addFlags(special[i].flags);
va->orFlags(special[i].flags);
}
else {
uint32_t inFlags = kVarAttrInReg;
@@ -2426,7 +2453,7 @@ _NextGroup:
// but there are some exceptions based on the operands' size
// and type.
if (extendedInfo.isMove()) {
uint32_t movSize = extendedInfo.getMoveSize();
uint32_t movSize = extendedInfo.getWriteSize();
uint32_t varSize = vd->getSize();
// Exception - If the source operand is a memory location
@@ -2461,7 +2488,7 @@ _NextGroup:
combinedFlags = inFlags;
}
// Imul.
else if (code == kX86InstIdImul && opCount == 3) {
else if (instId == kX86InstIdImul && opCount == 3) {
combinedFlags = outFlags;
}
}
@@ -2470,13 +2497,13 @@ _NextGroup:
combinedFlags = inFlags;
// Idiv is a special instruction, never handled here.
ASMJIT_ASSERT(code != kX86InstIdIdiv);
ASMJIT_ASSERT(instId != kX86InstIdIdiv);
// Xchg/Xadd/Imul.
if (extendedInfo.isXchg() || (code == kX86InstIdImul && opCount == 3 && i == 1))
if (extendedInfo.isXchg() || (instId == kX86InstIdImul && opCount == 3 && i == 1))
combinedFlags = inFlags | outFlags;
}
va->addFlags(combinedFlags);
va->orFlags(combinedFlags);
}
}
else if (op->isMem()) {
@@ -2488,7 +2515,7 @@ _NextGroup:
if (!vd->isStack()) {
VI_MERGE_VAR(vd, va, 0, gaRegs[vd->getClass()] & gpAllowedMask);
if (m->getMemType() == kMemTypeBaseIndex) {
va->addFlags(kVarAttrInReg);
va->orFlags(kVarAttrInReg);
}
else {
uint32_t inFlags = kVarAttrInMem;
@@ -2503,7 +2530,7 @@ _NextGroup:
// as if it's just move to the register. It's just a bit
// simpler as there are no special cases.
if (extendedInfo.isMove()) {
uint32_t movSize = IntUtil::iMax<uint32_t>(extendedInfo.getMoveSize(), m->getSize());
uint32_t movSize = IntUtil::iMax<uint32_t>(extendedInfo.getWriteSize(), m->getSize());
uint32_t varSize = vd->getSize();
if (movSize >= varSize)
@@ -2523,7 +2550,7 @@ _NextGroup:
combinedFlags = inFlags | outFlags;
}
va->addFlags(combinedFlags);
va->orFlags(combinedFlags);
}
}
}
@@ -2533,7 +2560,7 @@ _NextGroup:
vd = compiler->getVdById(m->getIndex());
VI_MERGE_VAR(vd, va, 0, gaRegs[kX86RegClassGp] & gpAllowedMask);
va->andAllocableRegs(indexMask);
va->addFlags(kVarAttrInReg);
va->orFlags(kVarAttrInReg);
}
}
}
@@ -2543,7 +2570,7 @@ _NextGroup:
// Handle instructions which result in zeros/ones or nop if used with the
// same destination and source operand.
if (vaCount == 1 && opCount >= 2 && opList[0].isVar() && opList[1].isVar() && !node->hasMemOp())
X86Context_prepareSingleVarInst(code, &vaTmpList[0]);
X86Context_prepareSingleVarInst(instId, &vaTmpList[0]);
}
VI_END(node_);
@@ -2577,7 +2604,7 @@ _NextGroup:
// backward jump. This behavior can be overridden by using
// `kInstOptionTaken` when the instruction is created.
if (!jNode->isTaken() && opCount == 1 && jTargetFlowId <= flowId) {
jNode->addFlags(kNodeFlagIsTaken);
jNode->orFlags(kNodeFlagIsTaken);
}
}
else if (jNext->isFetched()) {
@@ -2623,18 +2650,18 @@ _NextGroup:
if (arg.hasRegIndex()) {
if (x86VarTypeToClass(aType) == vd->getClass()) {
va->addFlags(kVarAttrOutReg);
va->orFlags(kVarAttrOutReg);
va->setOutRegIndex(arg.getRegIndex());
}
else {
va->addFlags(kVarAttrOutConv);
va->orFlags(kVarAttrOutConv);
}
}
else {
if ((x86VarTypeToClass(aType) == vd->getClass()) ||
(vType == kX86VarTypeXmmSs && aType == kVarTypeFp32) ||
(vType == kX86VarTypeXmmSd && aType == kVarTypeFp64)) {
va->addFlags(kVarAttrOutMem);
va->orFlags(kVarAttrOutMem);
}
else {
// TODO: [COMPILER] Not implemented.
@@ -2678,8 +2705,8 @@ _NextGroup:
// TODO: [COMPILER] Fix RetNode fetch.
VI_MERGE_VAR(vd, va, 0, 0);
va->setInRegs(i == 0 ? IntUtil::mask(kX86RegIndexAx) : IntUtil::mask(kX86RegIndexDx));
va->addFlags(kVarAttrInReg);
inRegs.add(retClass, va->getInRegs());
va->orFlags(kVarAttrInReg);
inRegs.or_(retClass, va->getInRegs());
}
}
}
@@ -2719,7 +2746,7 @@ _NextGroup:
vd = compiler->getVdById(target->getId());
VI_MERGE_VAR(vd, va, 0, 0);
va->addFlags(kVarAttrInReg | kVarAttrInCall);
va->orFlags(kVarAttrInReg | kVarAttrInCall);
if (va->getInRegs() == 0)
va->addAllocableRegs(gpAllocableMask);
}
@@ -2731,12 +2758,12 @@ _NextGroup:
if (!vd->isStack()) {
VI_MERGE_VAR(vd, va, 0, 0);
if (m->getMemType() == kMemTypeBaseIndex) {
va->addFlags(kVarAttrInReg | kVarAttrInCall);
va->orFlags(kVarAttrInReg | kVarAttrInCall);
if (va->getInRegs() == 0)
va->addAllocableRegs(gpAllocableMask);
}
else {
va->addFlags(kVarAttrInMem | kVarAttrInCall);
va->orFlags(kVarAttrInMem | kVarAttrInCall);
}
}
}
@@ -2746,7 +2773,7 @@ _NextGroup:
vd = compiler->getVdById(m->getIndex());
VI_MERGE_VAR(vd, va, 0, 0);
va->addFlags(kVarAttrInReg | kVarAttrInCall);
va->orFlags(kVarAttrInReg | kVarAttrInCall);
if ((va->getInRegs() & ~indexMask) == 0)
va->andAllocableRegs(gpAllocableMask & indexMask);
}
@@ -2769,10 +2796,10 @@ _NextGroup:
if (vd->getClass() == argClass) {
va->addInRegs(IntUtil::mask(arg.getRegIndex()));
va->addFlags(kVarAttrInReg | kVarAttrInArg);
va->orFlags(kVarAttrInReg | kVarAttrInArg);
}
else {
va->addFlags(kVarAttrInConv | kVarAttrInArg);
va->orFlags(kVarAttrInConv | kVarAttrInArg);
}
}
// If this is a stack-based argument we insert SArgNode instead of
@@ -2803,18 +2830,18 @@ _NextGroup:
if (vd->getClass() == retClass) {
va->setOutRegIndex(ret.getRegIndex());
va->addFlags(kVarAttrOutReg | kVarAttrOutRet);
va->orFlags(kVarAttrOutReg | kVarAttrOutRet);
}
else {
va->addFlags(kVarAttrOutConv | kVarAttrOutRet);
va->orFlags(kVarAttrOutConv | kVarAttrOutRet);
}
}
}
// Init clobbered.
clobberedRegs.set(kX86RegClassGp , IntUtil::bits(_regCount.getGp()) & (~decl->getPreserved(kX86RegClassGp )));
clobberedRegs.set(kX86RegClassFp , IntUtil::bits(_regCount.getFp()));
clobberedRegs.set(kX86RegClassMm , IntUtil::bits(_regCount.getMm()) & (~decl->getPreserved(kX86RegClassMm )));
clobberedRegs.set(kX86RegClassK , IntUtil::bits(_regCount.getK()) & (~decl->getPreserved(kX86RegClassK )));
clobberedRegs.set(kX86RegClassXyz, IntUtil::bits(_regCount.getXyz()) & (~decl->getPreserved(kX86RegClassXyz)));
VI_END(node_);
@@ -2860,7 +2887,7 @@ Error X86Context::annotate() {
if (node_->getComment() == NULL) {
if (node_->getType() == kNodeTypeInst) {
InstNode* node = static_cast<InstNode*>(node_);
X86Context_annotateInstruction(this, sb, node->getCode(), node->getOpList(), node->getOpCount());
X86Context_annotateInstruction(this, sb, node->getInstId(), node->getOpList(), node->getOpCount());
node_->setComment(static_cast<char*>(sa.dup(sb.getData(), sb.getLength() + 1)));
maxLen = IntUtil::iMax<uint32_t>(maxLen, static_cast<uint32_t>(sb.getLength()));
@@ -2962,7 +2989,7 @@ protected:
//! Variable map.
X86VarMap* _map;
//! VarAttr list (per register class).
VarAttr* _vaList[4];
VarAttr* _vaList[_kX86RegClassManagedCount];
//! Count of all VarAttr's.
uint32_t _vaCount;
@@ -2990,8 +3017,8 @@ ASMJIT_INLINE void X86BaseAlloc::init(Node* node, X86VarMap* map) {
{
VarAttr* va = map->getVaList();
_vaList[kX86RegClassGp ] = va;
_vaList[kX86RegClassFp ] = va + map->getVaStart(kX86RegClassFp );
_vaList[kX86RegClassMm ] = va + map->getVaStart(kX86RegClassMm );
_vaList[kX86RegClassK ] = va + map->getVaStart(kX86RegClassK );
_vaList[kX86RegClassXyz] = va + map->getVaStart(kX86RegClassXyz);
}
@@ -3223,8 +3250,8 @@ ASMJIT_INLINE Error X86VarAlloc::run(Node* node_) {
cleanup();
// Update clobbered mask.
_context->_clobberedRegs.add(_willAlloc);
_context->_clobberedRegs.add(map->_clobberedRegs);
_context->_clobberedRegs.or_(_willAlloc);
_context->_clobberedRegs.or_(map->_clobberedRegs);
// Unuse.
unuseAfter<kX86RegClassGp >();
@@ -3245,7 +3272,7 @@ ASMJIT_INLINE void X86VarAlloc::init(Node* node, X86VarMap* map) {
// add more registers when assigning registers to variables that don't need
// any specific register.
_willAlloc = map->_inRegs;
_willAlloc.add(map->_outRegs);
_willAlloc.or_(map->_outRegs);
_willSpill.reset();
}
@@ -3308,7 +3335,7 @@ ASMJIT_INLINE void X86VarAlloc::plan() {
if ((mandatoryRegs | allocableRegs) & regMask) {
va->setOutRegIndex(regIndex);
va->addFlags(kVarAttrAllocOutDone);
va->orFlags(kVarAttrAllocOutDone);
if (mandatoryRegs & regMask) {
// Case 'a' - 'willAlloc' contains initially all inRegs from all VarAttr's.
@@ -3329,7 +3356,7 @@ ASMJIT_INLINE void X86VarAlloc::plan() {
else {
if ((mandatoryRegs | allocableRegs) & regMask) {
va->setInRegIndex(regIndex);
va->addFlags(kVarAttrAllocInDone);
va->orFlags(kVarAttrAllocInDone);
if (mandatoryRegs & regMask) {
// Case 'a' - 'willAlloc' contains initially all inRegs from all VarAttr's.
@@ -3377,7 +3404,7 @@ ASMJIT_INLINE void X86VarAlloc::plan() {
}
else {
ASMJIT_TLOG("[RA-PLAN ] Done\n");
va->addFlags(kVarAttrAllocInDone);
va->orFlags(kVarAttrAllocInDone);
addVaDone(C);
continue;
}
@@ -3563,12 +3590,12 @@ ASMJIT_INLINE void X86VarAlloc::alloc() {
VarAttr* bVa = bVd->getVa();
_context->swapGp(aVd, bVd);
aVa->addFlags(kVarAttrAllocInDone);
aVa->orFlags(kVarAttrAllocInDone);
addVaDone(C);
// Doublehit, two registers allocated by a single swap.
if (bVa != NULL && bVa->getInRegIndex() == aIndex) {
bVa->addFlags(kVarAttrAllocInDone);
bVa->orFlags(kVarAttrAllocInDone);
addVaDone(C);
}
@@ -3579,7 +3606,7 @@ ASMJIT_INLINE void X86VarAlloc::alloc() {
else if (aIndex != kInvalidReg) {
_context->move<C>(aVd, bIndex);
aVa->addFlags(kVarAttrAllocInDone);
aVa->orFlags(kVarAttrAllocInDone);
addVaDone(C);
didWork = true;
@@ -3588,7 +3615,7 @@ ASMJIT_INLINE void X86VarAlloc::alloc() {
else {
_context->alloc<C>(aVd, bIndex);
aVa->addFlags(kVarAttrAllocInDone);
aVa->orFlags(kVarAttrAllocInDone);
addVaDone(C);
didWork = true;
@@ -3613,7 +3640,7 @@ ASMJIT_INLINE void X86VarAlloc::alloc() {
_context->attach<C>(vd, regIndex, false);
}
va->addFlags(kVarAttrAllocOutDone);
va->orFlags(kVarAttrAllocOutDone);
addVaDone(C);
}
}
@@ -3730,7 +3757,7 @@ ASMJIT_INLINE void X86VarAlloc::modified() {
uint32_t regMask = IntUtil::mask(regIndex);
vd->setModified(true);
_context->_x86State._modified.add(C, regMask);
_context->_x86State._modified.or_(C, regMask);
}
}
}
@@ -3972,7 +3999,7 @@ ASMJIT_INLINE void X86CallAlloc::plan() {
// is not clobbered (i.e. it will survive function call).
if ((regMask & inRegs) != 0 || ((regMask & ~clobbered) != 0 && (vaFlags & kVarAttrUnuse) == 0)) {
va->setInRegIndex(regIndex);
va->addFlags(kVarAttrAllocInDone);
va->orFlags(kVarAttrAllocInDone);
addVaDone(C);
}
else {
@@ -3985,7 +4012,7 @@ ASMJIT_INLINE void X86CallAlloc::plan() {
willFree |= regMask;
}
else {
va->addFlags(kVarAttrAllocInDone);
va->orFlags(kVarAttrAllocInDone);
addVaDone(C);
}
}
@@ -4131,12 +4158,12 @@ ASMJIT_INLINE void X86CallAlloc::alloc() {
if (C == kX86RegClassGp) {
_context->swapGp(aVd, bVd);
aVa->addFlags(kVarAttrAllocInDone);
aVa->orFlags(kVarAttrAllocInDone);
addVaDone(C);
// Doublehit, two registers allocated by a single swap.
if (bVa != NULL && bVa->getInRegIndex() == aIndex) {
bVa->addFlags(kVarAttrAllocInDone);
bVa->orFlags(kVarAttrAllocInDone);
addVaDone(C);
}
@@ -4147,7 +4174,7 @@ ASMJIT_INLINE void X86CallAlloc::alloc() {
else if (aIndex != kInvalidReg) {
_context->move<C>(aVd, bIndex);
aVa->addFlags(kVarAttrAllocInDone);
aVa->orFlags(kVarAttrAllocInDone);
addVaDone(C);
didWork = true;
@@ -4156,7 +4183,7 @@ ASMJIT_INLINE void X86CallAlloc::alloc() {
else {
_context->alloc<C>(aVd, bIndex);
aVa->addFlags(kVarAttrAllocInDone);
aVa->orFlags(kVarAttrAllocInDone);
addVaDone(C);
didWork = true;
@@ -4227,7 +4254,7 @@ ASMJIT_INLINE void X86CallAlloc::duplicate() {
for (uint32_t dupIndex = 0; inRegs != 0; dupIndex++, inRegs >>= 1) {
if (inRegs & 0x1) {
_context->emitMove(vd, dupIndex, regIndex, "Duplicate");
_context->_clobberedRegs.add(C, IntUtil::mask(dupIndex));
_context->_clobberedRegs.or_(C, IntUtil::mask(dupIndex));
}
}
}
@@ -4467,8 +4494,8 @@ static Error X86Context_initFunc(X86Context* self, X86FuncNode* func) {
// Setup "Save-Restore" registers.
func->_saveRestoreRegs.set(kX86RegClassGp , clobberedRegs.get(kX86RegClassGp ) & decl->getPreserved(kX86RegClassGp ));
func->_saveRestoreRegs.set(kX86RegClassFp , 0);
func->_saveRestoreRegs.set(kX86RegClassMm , clobberedRegs.get(kX86RegClassMm ) & decl->getPreserved(kX86RegClassMm ));
func->_saveRestoreRegs.set(kX86RegClassK , 0);
func->_saveRestoreRegs.set(kX86RegClassXyz, clobberedRegs.get(kX86RegClassXyz) & decl->getPreserved(kX86RegClassXyz));
ASMJIT_ASSERT(!func->_saveRestoreRegs.has(kX86RegClassGp, IntUtil::mask(kX86RegIndexSp)));
@@ -4540,7 +4567,7 @@ static Error X86Context_initFunc(X86Context* self, X86FuncNode* func) {
// from '_saveRestoreRegs' in case that it is preserved.
fRegMask = IntUtil::mask(fRegIndex);
if ((fRegMask & decl->getPreserved(kX86RegClassGp)) != 0) {
func->_saveRestoreRegs.del(kX86RegClassGp, fRegMask);
func->_saveRestoreRegs.andNot(kX86RegClassGp, fRegMask);
func->_isStackFrameRegPreserved = true;
}
@@ -4556,7 +4583,7 @@ static Error X86Context_initFunc(X86Context* self, X86FuncNode* func) {
else
stackFrameCopyRegs = IntUtil::keepNOnesFromRight(stackFrameCopyRegs, IntUtil::iMin<uint32_t>(maxRegs, 2));
func->_saveRestoreRegs.add(kX86RegClassGp, stackFrameCopyRegs & decl->getPreserved(kX86RegClassGp));
func->_saveRestoreRegs.or_(kX86RegClassGp, stackFrameCopyRegs & decl->getPreserved(kX86RegClassGp));
IntUtil::indexNOnesFromRight(func->_stackFrameCopyGpIndex, stackFrameCopyRegs, maxRegs);
}
}
@@ -5096,7 +5123,7 @@ _NextGroup:
}
next = node_->getNext();
node_->addFlags(kNodeFlagIsTranslated);
node_->orFlags(kNodeFlagIsTranslated);
ASMJIT_TSEC({
X86Context_traceNode(this, node_);
@@ -5143,7 +5170,7 @@ _NextGroup:
VarData* vd = va->getVd();
if (!liveness->getBit(vd->getContextId()))
va->addFlags(kVarAttrUnuse);
va->orFlags(kVarAttrUnuse);
}
}
}
@@ -5321,7 +5348,7 @@ _NextGroup:
for (;;) {
Node* next = node_->getNext();
node_->addFlags(kNodeFlagIsScheduled);
node_->orFlags(kNodeFlagIsScheduled);
// Shouldn't happen here, investigate if hit.
ASMJIT_ASSERT(node_ != stop);
@@ -5508,7 +5535,7 @@ static ASMJIT_INLINE Error X86Context_serialize(X86Context* self, X86Assembler*
case kNodeTypeInst: {
InstNode* node = static_cast<InstNode*>(node_);
uint32_t code = node->getCode();
uint32_t instId = node->getInstId();
uint32_t opCount = node->getOpCount();
const Operand* opList = node->getOpList();
@@ -5517,9 +5544,10 @@ static ASMJIT_INLINE Error X86Context_serialize(X86Context* self, X86Assembler*
const Operand* o0 = &noOperand;
const Operand* o1 = &noOperand;
const Operand* o2 = &noOperand;
const Operand* o3 = &noOperand;
if (node->isSpecial()) {
switch (code) {
switch (instId) {
case kX86InstIdCpuid:
break;
@@ -5632,6 +5660,19 @@ static ASMJIT_INLINE Error X86Context_serialize(X86Context* self, X86Assembler*
case kX86InstIdRepneScasB: case kX86InstIdRepneScasD: case kX86InstIdRepneScasQ: case kX86InstIdRepneScasW:
break;
case kX86InstIdXrstor:
case kX86InstIdXrstor64:
case kX86InstIdXsave:
case kX86InstIdXsave64:
case kX86InstIdXsaveopt:
case kX86InstIdXsaveopt64:
o0 = &opList[0];
break;
case kX86InstIdXgetbv:
case kX86InstIdXsetbv:
break;
default:
ASMJIT_ASSERT(!"Reached");
}
@@ -5640,10 +5681,11 @@ static ASMJIT_INLINE Error X86Context_serialize(X86Context* self, X86Assembler*
if (opCount > 0) o0 = &opList[0];
if (opCount > 1) o1 = &opList[1];
if (opCount > 2) o2 = &opList[2];
if (opCount > 3) o3 = &opList[3];
}
// We use this form, because it is the main one.
assembler->emit(code, *o0, *o1, *o2);
// Should call _emit() directly as 4 operand form is the main form.
assembler->emit(instId, *o0, *o1, *o2, *o3);
break;
}

View File

@@ -150,8 +150,8 @@ struct X86Context : public Context {
vd->setModified(modified);
_x86State.getListByClass(C)[regIndex] = vd;
_x86State._occupied.add(C, regMask);
_x86State._modified.add(C, static_cast<uint32_t>(modified) << regIndex);
_x86State._occupied.or_(C, regMask);
_x86State._modified.or_(C, static_cast<uint32_t>(modified) << regIndex);
ASMJIT_X86_CHECK_STATE
}
@@ -174,8 +174,8 @@ struct X86Context : public Context {
vd->setModified(false);
_x86State.getListByClass(C)[regIndex] = NULL;
_x86State._occupied.del(C, regMask);
_x86State._modified.del(C, regMask);
_x86State._occupied.andNot(C, regMask);
_x86State._modified.andNot(C, regMask);
ASMJIT_X86_CHECK_STATE
}
@@ -244,7 +244,7 @@ struct X86Context : public Context {
emitSave(vd, regIndex, "Save");
vd->setModified(false);
_x86State._modified.del(C, regMask);
_x86State._modified.andNot(C, regMask);
ASMJIT_X86_CHECK_STATE
}
@@ -381,7 +381,7 @@ struct X86Context : public Context {
uint32_t regMask = IntUtil::mask(regIndex);
vd->setModified(true);
_x86State._modified.add(C, regMask);
_x86State._modified.or_(C, regMask);
ASMJIT_X86_CHECK_STATE
}

View File

@@ -88,20 +88,29 @@ _Skip:
// in 64-bit mode not allows to use inline assembler, so we need intrinsic and
// we need also asm version.
union X86XCR {
uint64_t value;
struct {
uint32_t eax;
uint32_t edx;
};
};
// callCpuId() and detectCpuInfo() for x86 and x64 platforms begins here.
#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64)
void X86CpuUtil::callCpuId(uint32_t inEax, uint32_t inEcx, X86CpuId* outResult) {
void X86CpuUtil::callCpuId(uint32_t inEax, uint32_t inEcx, X86CpuId* result) {
#if defined(_MSC_VER)
// 2009-02-05: Thanks to Mike Tajmajer for supporting VC7.1 compiler.
// ASMJIT_HOST_X64 is here only for readibility, only VS2005 can compile 64-bit code.
# if _MSC_VER >= 1400 || defined(ASMJIT_HOST_X64)
// Done by intrinsics.
__cpuidex(reinterpret_cast<int*>(outResult->i), inEax, inEcx);
__cpuidex(reinterpret_cast<int*>(result->i), inEax, inEcx);
# else // _MSC_VER < 1400
uint32_t cpuid_eax = inEax;
uint32_t cpuid_ecx = inCax;
uint32_t* cpuid_out = outResult->i;
uint32_t* cpuid_out = result->i;
__asm {
mov eax, cpuid_eax
@@ -119,18 +128,50 @@ void X86CpuUtil::callCpuId(uint32_t inEax, uint32_t inEcx, X86CpuId* outResult)
// Note, patched to preserve ebx/rbx register which is used by GCC.
# if defined(ASMJIT_HOST_X86)
# define __myCpuId(inEax, inEcx, outEax, outEbx, outEcx, outEdx) \
asm ("mov %%ebx, %%edi\n" \
__asm__ __volatile__( \
"mov %%ebx, %%edi\n" \
"cpuid\n" \
"xchg %%edi, %%ebx\n" \
: "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) : "a" (inEax), "c" (inEcx))
: "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) \
: "a" (inEax), "c" (inEcx))
# else
# define __myCpuId(inEax, inEcx, outEax, outEbx, outEcx, outEdx) \
asm ("mov %%rbx, %%rdi\n" \
__asm__ __volatile__( \
"mov %%rbx, %%rdi\n" \
"cpuid\n" \
"xchg %%rdi, %%rbx\n" \
: "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) : "a" (inEax), "c" (inEcx))
: "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) \
: "a" (inEax), "c" (inEcx))
# endif
__myCpuId(inEax, inEcx, outResult->eax, outResult->ebx, outResult->ecx, outResult->edx);
__myCpuId(inEax, inEcx, result->eax, result->ebx, result->ecx, result->edx);
#endif // COMPILER
}
static void callXGetBV(uint32_t inEcx, X86XCR* result) {
#if defined(_MSC_VER)
# if (_MSC_FULL_VER >= 160040219) // 2010SP1+
result->value = _xgetbv(inEcx);
# else
result->value = 0;
# endif
#elif defined(__GNUC__)
unsigned int eax, edx;
# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(inEcx));
# else
__asm__ __volatile__(".byte 0x0F, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(inEcx));
# endif
result->eax = eax;
result->edx = edx;
#else
result->value = 0;
#endif // COMPILER
}
@@ -138,7 +179,11 @@ void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
X86CpuId regs;
uint32_t i;
uint32_t maxId;
uint32_t maxBaseId;
bool maybeMPX = false;
X86XCR xcr0;
xcr0.value = 0;
// Clear everything except the '_size' member.
::memset(reinterpret_cast<uint8_t*>(cpuInfo) + sizeof(uint32_t),
@@ -148,14 +193,13 @@ void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
cpuInfo->_hwThreadsCount = CpuInfo::detectHwThreadsCount();
// --------------------------------------------------------------------------
// [CPUID EAX=0x00000000]
// [CPUID EAX=0x0]
// --------------------------------------------------------------------------
// Get vendor string/id.
callCpuId(0, 0, &regs);
maxId = regs.eax;
callCpuId(0x0, 0x0, &regs);
maxBaseId = regs.eax;
::memcpy(cpuInfo->_vendorString, &regs.ebx, 4);
::memcpy(cpuInfo->_vendorString + 4, &regs.edx, 4);
::memcpy(cpuInfo->_vendorString + 8, &regs.ecx, 4);
@@ -168,11 +212,12 @@ void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
}
// --------------------------------------------------------------------------
// [CPUID EAX=0x00000001]
// [CPUID EAX=0x1]
// --------------------------------------------------------------------------
// Get feature flags in ecx/edx and family/model in eax.
callCpuId(1, 0, &regs);
if (maxBaseId >= 0x1) {
// Get feature flags in ECX/EDX and family/model in EAX.
callCpuId(0x1, 0x0, &regs);
// Fill family and model fields.
cpuInfo->_family = (regs.eax >> 8) & 0x0F;
@@ -190,57 +235,111 @@ void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
cpuInfo->_flushCacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8;
cpuInfo->_maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureSse3);
if (regs.ecx & 0x00000002U) cpuInfo->addFeature(kX86CpuFeaturePclmulqdq);
if (regs.ecx & 0x00000008U) cpuInfo->addFeature(kX86CpuFeatureMonitorMWait);
if (regs.ecx & 0x00000200U) cpuInfo->addFeature(kX86CpuFeatureSsse3);
if (regs.ecx & 0x00002000U) cpuInfo->addFeature(kX86CpuFeatureCmpXchg16B);
if (regs.ecx & 0x00080000U) cpuInfo->addFeature(kX86CpuFeatureSse41);
if (regs.ecx & 0x00100000U) cpuInfo->addFeature(kX86CpuFeatureSse42);
if (regs.ecx & 0x00400000U) cpuInfo->addFeature(kX86CpuFeatureMovbe);
if (regs.ecx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeaturePopcnt);
if (regs.ecx & 0x02000000U) cpuInfo->addFeature(kX86CpuFeatureAesni);
if (regs.ecx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeatureRdrand);
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureSSE3);
if (regs.ecx & 0x00000002U) cpuInfo->addFeature(kX86CpuFeaturePCLMULQDQ);
if (regs.ecx & 0x00000008U) cpuInfo->addFeature(kX86CpuFeatureMONITOR);
if (regs.ecx & 0x00000200U) cpuInfo->addFeature(kX86CpuFeatureSSSE3);
if (regs.ecx & 0x00002000U) cpuInfo->addFeature(kX86CpuFeatureCMPXCHG16B);
if (regs.ecx & 0x00080000U) cpuInfo->addFeature(kX86CpuFeatureSSE4_1);
if (regs.ecx & 0x00100000U) cpuInfo->addFeature(kX86CpuFeatureSSE4_2);
if (regs.ecx & 0x00400000U) cpuInfo->addFeature(kX86CpuFeatureMOVBE);
if (regs.ecx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeaturePOPCNT);
if (regs.ecx & 0x02000000U) cpuInfo->addFeature(kX86CpuFeatureAESNI);
if (regs.ecx & 0x04000000U) cpuInfo->addFeature(kX86CpuFeatureXSave);
if (regs.ecx & 0x08000000U) cpuInfo->addFeature(kX86CpuFeatureXSaveOS);
if (regs.ecx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeatureRDRAND);
if (regs.edx & 0x00000010U) cpuInfo->addFeature(kX86CpuFeatureRdtsc);
if (regs.edx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeatureCmpXchg8B);
if (regs.edx & 0x00008000U) cpuInfo->addFeature(kX86CpuFeatureCmov);
if (regs.edx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeatureMmx);
if (regs.edx & 0x01000000U) cpuInfo->addFeature(kX86CpuFeatureFxsr);
if (regs.edx & 0x02000000U) cpuInfo->addFeature(kX86CpuFeatureSse).addFeature(kX86CpuFeatureMmxExt);
if (regs.edx & 0x04000000U) cpuInfo->addFeature(kX86CpuFeatureSse).addFeature(kX86CpuFeatureSse2);
if (regs.edx & 0x10000000U) cpuInfo->addFeature(kX86CpuFeatureMultithreading);
if (regs.edx & 0x00000010U) cpuInfo->addFeature(kX86CpuFeatureRDTSC);
if (regs.edx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeatureCMPXCHG8B);
if (regs.edx & 0x00008000U) cpuInfo->addFeature(kX86CpuFeatureCMOV);
if (regs.edx & 0x00080000U) cpuInfo->addFeature(kX86CpuFeatureCLFLUSH);
if (regs.edx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeatureMMX);
if (regs.edx & 0x01000000U) cpuInfo->addFeature(kX86CpuFeatureFXSR);
if (regs.edx & 0x02000000U) cpuInfo->addFeature(kX86CpuFeatureSSE).addFeature(kX86CpuFeatureMMX2);
if (regs.edx & 0x04000000U) cpuInfo->addFeature(kX86CpuFeatureSSE).addFeature(kX86CpuFeatureSSE2);
if (regs.edx & 0x10000000U) cpuInfo->addFeature(kX86CpuFeatureMT);
if (cpuInfo->_vendorId == kCpuVendorAmd && (regs.edx & 0x10000000U)) {
// AMD sets Multithreading to ON if it has more cores.
if (cpuInfo->_hwThreadsCount == 1)
// AMD sets Multithreading to ON if it has two or more cores.
if (cpuInfo->_hwThreadsCount == 1 && cpuInfo->_vendorId == kCpuVendorAmd && (regs.edx & 0x10000000U)) {
cpuInfo->_hwThreadsCount = 2;
}
// Detect AVX.
if (regs.ecx & 0x10000000U) {
cpuInfo->addFeature(kX86CpuFeatureAvx);
if (regs.ecx & 0x00000800U) cpuInfo->addFeature(kX86CpuFeatureXop);
if (regs.ecx & 0x00004000U) cpuInfo->addFeature(kX86CpuFeatureFma3);
if (regs.ecx & 0x00010000U) cpuInfo->addFeature(kX86CpuFeatureFma4);
if (regs.ecx & 0x20000000U) cpuInfo->addFeature(kX86CpuFeatureF16C);
// Get the content of XCR0 if supported by CPU and enabled by OS.
if ((regs.ecx & 0x0C000000U) == 0x0C000000U) {
callXGetBV(0, &xcr0);
}
// Detect AVX+.
if (regs.ecx & 0x10000000U) {
// - XCR0[2:1] == 11b
// XMM & YMM states are enabled by OS.
if ((xcr0.eax & 0x00000006U) == 0x00000006U) {
cpuInfo->addFeature(kX86CpuFeatureAVX);
if (regs.ecx & 0x00000800U) cpuInfo->addFeature(kX86CpuFeatureXOP);
if (regs.ecx & 0x00004000U) cpuInfo->addFeature(kX86CpuFeatureFMA3);
if (regs.ecx & 0x00010000U) cpuInfo->addFeature(kX86CpuFeatureFMA4);
if (regs.ecx & 0x20000000U) cpuInfo->addFeature(kX86CpuFeatureF16C);
}
}
}
// --------------------------------------------------------------------------
// [CPUID EAX=0x7 ECX=0x0]
// --------------------------------------------------------------------------
// Detect new features if the processor supports CPUID-07.
if (maxId >= 7) {
callCpuId(7, 0, &regs);
if (maxBaseId >= 0x7) {
callCpuId(0x7, 0x0, &regs);
if (regs.ebx & 0x00000001) cpuInfo->addFeature(kX86CpuFeatureFsGsBase);
if (regs.ebx & 0x00000008) cpuInfo->addFeature(kX86CpuFeatureBmi);
if (regs.ebx & 0x00000010) cpuInfo->addFeature(kX86CpuFeatureHle);
if (regs.ebx & 0x00000100) cpuInfo->addFeature(kX86CpuFeatureBmi2);
if (regs.ebx & 0x00000200) cpuInfo->addFeature(kX86CpuFeatureRepMovsbStosbExt);
if (regs.ebx & 0x00000800) cpuInfo->addFeature(kX86CpuFeatureRtm);
if (regs.ebx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureFSGSBase);
if (regs.ebx & 0x00000008U) cpuInfo->addFeature(kX86CpuFeatureBMI);
if (regs.ebx & 0x00000010U) cpuInfo->addFeature(kX86CpuFeatureHLE);
if (regs.ebx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeatureBMI2);
if (regs.ebx & 0x00000200U) cpuInfo->addFeature(kX86CpuFeatureMOVSBSTOSBOpt);
if (regs.ebx & 0x00000800U) cpuInfo->addFeature(kX86CpuFeatureRTM);
if (regs.ebx & 0x00004000U) maybeMPX = true;
if (regs.ebx & 0x00040000U) cpuInfo->addFeature(kX86CpuFeatureRDSEED);
if (regs.ebx & 0x00080000U) cpuInfo->addFeature(kX86CpuFeatureADX);
if (regs.ebx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeatureCLFLUSHOpt);
if (regs.ebx & 0x20000000U) cpuInfo->addFeature(kX86CpuFeatureSHA);
// AVX2 depends on AVX.
if (cpuInfo->hasFeature(kX86CpuFeatureAvx)) {
if (regs.ebx & 0x00000020) cpuInfo->addFeature(kX86CpuFeatureAvx2);
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeaturePREFETCHWT1);
// Detect AVX2.
if (cpuInfo->hasFeature(kX86CpuFeatureAVX)) {
if (regs.ebx & 0x00000020U) cpuInfo->addFeature(kX86CpuFeatureAVX2);
}
// Detect AVX-512+.
if (regs.ebx & 0x00010000U) {
// - XCR0[2:1] == 11b
// XMM & YMM states are enabled by OS.
// - XCR0[7:5] == 111b
// Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 state are enabled by OS.
if ((xcr0.eax & 0x00000076U) == 0x00000076U) {
cpuInfo->addFeature(kX86CpuFeatureAVX512F);
if (regs.ebx & 0x00020000U) cpuInfo->addFeature(kX86CpuFeatureAVX512DQ);
if (regs.ebx & 0x04000000U) cpuInfo->addFeature(kX86CpuFeatureAVX512PF);
if (regs.ebx & 0x08000000U) cpuInfo->addFeature(kX86CpuFeatureAVX512ER);
if (regs.ebx & 0x10000000U) cpuInfo->addFeature(kX86CpuFeatureAVX512CD);
if (regs.ebx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeatureAVX512BW);
if (regs.ebx & 0x80000000U) cpuInfo->addFeature(kX86CpuFeatureAVX512VL);
}
}
}
// --------------------------------------------------------------------------
// [CPUID EAX=0xD, ECX=0x0]
// --------------------------------------------------------------------------
if (maxBaseId >= 0xD && maybeMPX) {
callCpuId(0xD, 0x0, &regs);
// Both CPUID result and XCR0 has to be enabled to have support for MPX.
if (((regs.eax & xcr0.eax) & 0x00000018U) == 0x00000018U) {
cpuInfo->addFeature(kX86CpuFeatureMPX);
}
}
@@ -250,28 +349,28 @@ void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
// Calling cpuid with 0x80000000 as the in argument gets the number of valid
// extended IDs.
callCpuId(0x80000000, 0, &regs);
callCpuId(0x80000000, 0x0, &regs);
uint32_t maxExtId = IntUtil::iMin<uint32_t>(regs.eax, 0x80000004);
uint32_t* brand = reinterpret_cast<uint32_t*>(cpuInfo->_brandString);
for (i = 0x80000001; i <= maxExtId; i++) {
callCpuId(i, 0, &regs);
callCpuId(i, 0x0, &regs);
switch (i) {
case 0x80000001:
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureLahfSahf);
if (regs.ecx & 0x00000020U) cpuInfo->addFeature(kX86CpuFeatureLzcnt);
if (regs.ecx & 0x00000040U) cpuInfo->addFeature(kX86CpuFeatureSse4A);
if (regs.ecx & 0x00000080U) cpuInfo->addFeature(kX86CpuFeatureMsse);
if (regs.ecx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeaturePrefetch);
if (regs.ecx & 0x00000020U) cpuInfo->addFeature(kX86CpuFeatureLZCNT);
if (regs.ecx & 0x00000040U) cpuInfo->addFeature(kX86CpuFeatureSSE4A);
if (regs.ecx & 0x00000080U) cpuInfo->addFeature(kX86CpuFeatureMSSE);
if (regs.ecx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeaturePREFETCH);
if (regs.edx & 0x00100000U) cpuInfo->addFeature(kX86CpuFeatureExecuteDisableBit);
if (regs.edx & 0x00200000U) cpuInfo->addFeature(kX86CpuFeatureFfxsr);
if (regs.edx & 0x00400000U) cpuInfo->addFeature(kX86CpuFeatureMmxExt);
if (regs.edx & 0x08000000U) cpuInfo->addFeature(kX86CpuFeatureRdtscp);
if (regs.edx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeature3dNowExt).addFeature(kX86CpuFeatureMmxExt);
if (regs.edx & 0x80000000U) cpuInfo->addFeature(kX86CpuFeature3dNow);
if (regs.edx & 0x00100000U) cpuInfo->addFeature(kX86CpuFeatureNX);
if (regs.edx & 0x00200000U) cpuInfo->addFeature(kX86CpuFeatureFXSROpt);
if (regs.edx & 0x00400000U) cpuInfo->addFeature(kX86CpuFeatureMMX2);
if (regs.edx & 0x08000000U) cpuInfo->addFeature(kX86CpuFeatureRDTSCP);
if (regs.edx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeature3DNOW2).addFeature(kX86CpuFeatureMMX2);
if (regs.edx & 0x80000000U) cpuInfo->addFeature(kX86CpuFeature3DNOW);
break;
case 0x80000002:

View File

@@ -31,92 +31,123 @@ struct X86CpuInfo;
//! X86 CPU features.
ASMJIT_ENUM(kX86CpuFeature) {
//! Cpu has Not-Execute-Bit.
kX86CpuFeatureNX = 0,
//! Cpu has multithreading.
kX86CpuFeatureMultithreading = 1,
//! Cpu has execute disable bit.
kX86CpuFeatureExecuteDisableBit,
kX86CpuFeatureMT,
//! Cpu has RDTSC.
kX86CpuFeatureRdtsc,
kX86CpuFeatureRDTSC,
//! Cpu has RDTSCP.
kX86CpuFeatureRdtscp,
kX86CpuFeatureRDTSCP,
//! Cpu has CMOV.
kX86CpuFeatureCmov,
kX86CpuFeatureCMOV,
//! Cpu has CMPXCHG8B.
kX86CpuFeatureCmpXchg8B,
//! Cpu has CMPXCHG16B (x64).
kX86CpuFeatureCmpXchg16B,
kX86CpuFeatureCMPXCHG8B,
//! Cpu has CMPXCHG16B (X64).
kX86CpuFeatureCMPXCHG16B,
//! Cpu has CLFUSH.
kX86CpuFeatureClflush,
kX86CpuFeatureCLFLUSH,
//! Cpu has CLFUSH (Optimized).
kX86CpuFeatureCLFLUSHOpt,
//! Cpu has PREFETCH.
kX86CpuFeaturePrefetch,
kX86CpuFeaturePREFETCH,
//! Cpu has PREFETCHWT1.
kX86CpuFeaturePREFETCHWT1,
//! Cpu has LAHF/SAHF.
kX86CpuFeatureLahfSahf,
//! Cpu has FXSAVE/FXRSTOR.
kX86CpuFeatureFxsr,
//! Cpu has FXSAVE/FXRSTOR optimizations.
kX86CpuFeatureFfxsr,
kX86CpuFeatureFXSR,
//! Cpu has FXSAVE/FXRSTOR (Optimized).
kX86CpuFeatureFXSROpt,
//! Cpu has MMX.
kX86CpuFeatureMmx,
kX86CpuFeatureMMX,
//! Cpu has extended MMX.
kX86CpuFeatureMmxExt,
kX86CpuFeatureMMX2,
//! Cpu has 3dNow!
kX86CpuFeature3dNow,
kX86CpuFeature3DNOW,
//! Cpu has enchanced 3dNow!
kX86CpuFeature3dNowExt,
kX86CpuFeature3DNOW2,
//! Cpu has SSE.
kX86CpuFeatureSse,
kX86CpuFeatureSSE,
//! Cpu has SSE2.
kX86CpuFeatureSse2,
kX86CpuFeatureSSE2,
//! Cpu has SSE3.
kX86CpuFeatureSse3,
//! Cpu has Supplemental SSE3 (SSSE3).
kX86CpuFeatureSsse3,
kX86CpuFeatureSSE3,
//! Cpu has SSSE3.
kX86CpuFeatureSSSE3,
//! Cpu has SSE4.A.
kX86CpuFeatureSse4A,
kX86CpuFeatureSSE4A,
//! Cpu has SSE4.1.
kX86CpuFeatureSse41,
kX86CpuFeatureSSE4_1,
//! Cpu has SSE4.2.
kX86CpuFeatureSse42,
kX86CpuFeatureSSE4_2,
//! Cpu has Misaligned SSE (MSSE).
kX86CpuFeatureMsse,
kX86CpuFeatureMSSE,
//! Cpu has MONITOR and MWAIT.
kX86CpuFeatureMonitorMWait,
kX86CpuFeatureMONITOR,
//! Cpu has MOVBE.
kX86CpuFeatureMovbe,
kX86CpuFeatureMOVBE,
//! Cpu has POPCNT.
kX86CpuFeaturePopcnt,
kX86CpuFeaturePOPCNT,
//! Cpu has LZCNT.
kX86CpuFeatureLzcnt,
kX86CpuFeatureLZCNT,
//! Cpu has AESNI.
kX86CpuFeatureAesni,
kX86CpuFeatureAESNI,
//! Cpu has PCLMULQDQ.
kX86CpuFeaturePclmulqdq,
kX86CpuFeaturePCLMULQDQ,
//! Cpu has RDRAND.
kX86CpuFeatureRdrand,
kX86CpuFeatureRDRAND,
//! Cpu has RDSEED.
kX86CpuFeatureRDSEED,
//! Cpu has SHA-1 and SHA-256.
kX86CpuFeatureSHA,
//! Cpu has XSAVE support - XSAVE/XRSTOR, XSETBV/XGETBV, and XCR0.
kX86CpuFeatureXSave,
//! OS has enabled XSAVE, you can call XGETBV to get value of XCR0.
kX86CpuFeatureXSaveOS,
//! Cpu has AVX.
kX86CpuFeatureAvx,
kX86CpuFeatureAVX,
//! Cpu has AVX2.
kX86CpuFeatureAvx2,
kX86CpuFeatureAVX2,
//! Cpu has F16C.
kX86CpuFeatureF16C,
//! Cpu has FMA3.
kX86CpuFeatureFma3,
kX86CpuFeatureFMA3,
//! Cpu has FMA4.
kX86CpuFeatureFma4,
kX86CpuFeatureFMA4,
//! Cpu has XOP.
kX86CpuFeatureXop,
kX86CpuFeatureXOP,
//! Cpu has BMI.
kX86CpuFeatureBmi,
kX86CpuFeatureBMI,
//! Cpu has BMI2.
kX86CpuFeatureBmi2,
kX86CpuFeatureBMI2,
//! Cpu has HLE.
kX86CpuFeatureHle,
kX86CpuFeatureHLE,
//! Cpu has RTM.
kX86CpuFeatureRtm,
kX86CpuFeatureRTM,
//! Cpu has ADX.
kX86CpuFeatureADX,
//! Cpu has MPX (Memory Protection Extensions).
kX86CpuFeatureMPX,
//! Cpu has FSGSBASE.
kX86CpuFeatureFsGsBase,
//! Cpu has enhanced REP MOVSB/STOSB.
kX86CpuFeatureRepMovsbStosbExt,
kX86CpuFeatureFSGSBase,
//! Cpu has optimized REP MOVSB/STOSB.
kX86CpuFeatureMOVSBSTOSBOpt,
//! Cpu has AVX-512F (Foundation).
kX86CpuFeatureAVX512F,
//! Cpu has AVX-512CD (Conflict Detection).
kX86CpuFeatureAVX512CD,
//! Cpu has AVX-512PF (Prefetch Instructions).
kX86CpuFeatureAVX512PF,
//! Cpu has AVX-512ER (Exponential and Reciprocal Instructions).
kX86CpuFeatureAVX512ER,
//! Cpu has AVX-512DQ (DWord/QWord).
kX86CpuFeatureAVX512DQ,
//! Cpu has AVX-512BW (Byte/Word).
kX86CpuFeatureAVX512BW,
//! Cpu has AVX VL (Vector Length Excensions).
kX86CpuFeatureAVX512VL,
//! Count of X86/X64 Cpu features.
kX86CpuFeatureCount

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -20,165 +20,262 @@
namespace asmjit {
// Prevent static initialization.
//
// Remap all classes to POD structs so they can be statically initialized
// without calling a constructor. Compiler will store these in data section.
struct X86GpReg { Operand::VRegOp data; };
struct X86FpReg { Operand::VRegOp data; };
struct X86MmReg { Operand::VRegOp data; };
struct X86XmmReg { Operand::VRegOp data; };
struct X86YmmReg { Operand::VRegOp data; };
struct X86SegReg { Operand::VRegOp data; };
namespace x86 {
// ============================================================================
// [asmjit::x86::Registers]
// ============================================================================
#define REG(_Class_, _Name_, _Type_, _Index_, _Size_) \
const _Class_ _Name_ = {{ \
#define REG(_Type_, _Index_, _Size_) {{ \
kOperandTypeReg, _Size_, { ((_Type_) << 8) + _Index_ }, kInvalidValue, {{ kInvalidVar, 0 }} \
}}
REG(X86GpReg, noGpReg, kInvalidReg, kInvalidReg, 0);
const X86RegData x86RegData = {
// RIP.
REG(kX86RegTypeRip, 0, 0),
// NpGp.
REG(kInvalidReg, kInvalidReg, 0),
REG(X86GpReg, al, kX86RegTypeGpbLo, kX86RegIndexAx, 1);
REG(X86GpReg, cl, kX86RegTypeGpbLo, kX86RegIndexCx, 1);
REG(X86GpReg, dl, kX86RegTypeGpbLo, kX86RegIndexDx, 1);
REG(X86GpReg, bl, kX86RegTypeGpbLo, kX86RegIndexBx, 1);
REG(X86GpReg, spl, kX86RegTypeGpbLo, kX86RegIndexSp, 1);
REG(X86GpReg, bpl, kX86RegTypeGpbLo, kX86RegIndexBp, 1);
REG(X86GpReg, sil, kX86RegTypeGpbLo, kX86RegIndexSi, 1);
REG(X86GpReg, dil, kX86RegTypeGpbLo, kX86RegIndexDi, 1);
REG(X86GpReg, r8b, kX86RegTypeGpbLo, 8, 1);
REG(X86GpReg, r9b, kX86RegTypeGpbLo, 9, 1);
REG(X86GpReg, r10b, kX86RegTypeGpbLo, 10, 1);
REG(X86GpReg, r11b, kX86RegTypeGpbLo, 11, 1);
REG(X86GpReg, r12b, kX86RegTypeGpbLo, 12, 1);
REG(X86GpReg, r13b, kX86RegTypeGpbLo, 13, 1);
REG(X86GpReg, r14b, kX86RegTypeGpbLo, 14, 1);
REG(X86GpReg, r15b, kX86RegTypeGpbLo, 15, 1);
// Segments.
{
REG(kX86RegTypeSeg, 0, 2), // Default.
REG(kX86RegTypeSeg, 1, 2), // ES.
REG(kX86RegTypeSeg, 2, 2), // CS.
REG(kX86RegTypeSeg, 3, 2), // SS.
REG(kX86RegTypeSeg, 4, 2), // DS.
REG(kX86RegTypeSeg, 5, 2), // FS.
REG(kX86RegTypeSeg, 6, 2) // GS.
},
REG(X86GpReg, ah, kX86RegTypeGpbHi, kX86RegIndexAx, 1);
REG(X86GpReg, ch, kX86RegTypeGpbHi, kX86RegIndexCx, 1);
REG(X86GpReg, dh, kX86RegTypeGpbHi, kX86RegIndexDx, 1);
REG(X86GpReg, bh, kX86RegTypeGpbHi, kX86RegIndexBx, 1);
// GpbLo.
{
REG(kX86RegTypeGpbLo, 0, 1),
REG(kX86RegTypeGpbLo, 1, 1),
REG(kX86RegTypeGpbLo, 2, 1),
REG(kX86RegTypeGpbLo, 3, 1),
REG(kX86RegTypeGpbLo, 4, 1),
REG(kX86RegTypeGpbLo, 5, 1),
REG(kX86RegTypeGpbLo, 6, 1),
REG(kX86RegTypeGpbLo, 7, 1),
REG(kX86RegTypeGpbLo, 8, 1),
REG(kX86RegTypeGpbLo, 9, 1),
REG(kX86RegTypeGpbLo, 10, 1),
REG(kX86RegTypeGpbLo, 11, 1),
REG(kX86RegTypeGpbLo, 12, 1),
REG(kX86RegTypeGpbLo, 13, 1),
REG(kX86RegTypeGpbLo, 14, 1),
REG(kX86RegTypeGpbLo, 15, 1)
},
REG(X86GpReg, ax, kX86RegTypeGpw, kX86RegIndexAx, 2);
REG(X86GpReg, cx, kX86RegTypeGpw, kX86RegIndexCx, 2);
REG(X86GpReg, dx, kX86RegTypeGpw, kX86RegIndexDx, 2);
REG(X86GpReg, bx, kX86RegTypeGpw, kX86RegIndexBx, 2);
REG(X86GpReg, sp, kX86RegTypeGpw, kX86RegIndexSp, 2);
REG(X86GpReg, bp, kX86RegTypeGpw, kX86RegIndexBp, 2);
REG(X86GpReg, si, kX86RegTypeGpw, kX86RegIndexSi, 2);
REG(X86GpReg, di, kX86RegTypeGpw, kX86RegIndexDi, 2);
REG(X86GpReg, r8w, kX86RegTypeGpw, 8, 2);
REG(X86GpReg, r9w, kX86RegTypeGpw, 9, 2);
REG(X86GpReg, r10w, kX86RegTypeGpw, 10, 2);
REG(X86GpReg, r11w, kX86RegTypeGpw, 11, 2);
REG(X86GpReg, r12w, kX86RegTypeGpw, 12, 2);
REG(X86GpReg, r13w, kX86RegTypeGpw, 13, 2);
REG(X86GpReg, r14w, kX86RegTypeGpw, 14, 2);
REG(X86GpReg, r15w, kX86RegTypeGpw, 15, 2);
// GpbHi.
{
REG(kX86RegTypeGpbHi, 0, 1),
REG(kX86RegTypeGpbHi, 1, 1),
REG(kX86RegTypeGpbHi, 2, 1),
REG(kX86RegTypeGpbHi, 3, 1)
},
REG(X86GpReg, eax, kX86RegTypeGpd, kX86RegIndexAx, 4);
REG(X86GpReg, ecx, kX86RegTypeGpd, kX86RegIndexCx, 4);
REG(X86GpReg, edx, kX86RegTypeGpd, kX86RegIndexDx, 4);
REG(X86GpReg, ebx, kX86RegTypeGpd, kX86RegIndexBx, 4);
REG(X86GpReg, esp, kX86RegTypeGpd, kX86RegIndexSp, 4);
REG(X86GpReg, ebp, kX86RegTypeGpd, kX86RegIndexBp, 4);
REG(X86GpReg, esi, kX86RegTypeGpd, kX86RegIndexSi, 4);
REG(X86GpReg, edi, kX86RegTypeGpd, kX86RegIndexDi, 4);
REG(X86GpReg, r8d, kX86RegTypeGpd, 8, 4);
REG(X86GpReg, r9d, kX86RegTypeGpd, 9, 4);
REG(X86GpReg, r10d, kX86RegTypeGpd, 10, 4);
REG(X86GpReg, r11d, kX86RegTypeGpd, 11, 4);
REG(X86GpReg, r12d, kX86RegTypeGpd, 12, 4);
REG(X86GpReg, r13d, kX86RegTypeGpd, 13, 4);
REG(X86GpReg, r14d, kX86RegTypeGpd, 14, 4);
REG(X86GpReg, r15d, kX86RegTypeGpd, 15, 4);
// Gpw.
{
REG(kX86RegTypeGpw, 0, 2),
REG(kX86RegTypeGpw, 1, 2),
REG(kX86RegTypeGpw, 2, 2),
REG(kX86RegTypeGpw, 3, 2),
REG(kX86RegTypeGpw, 4, 2),
REG(kX86RegTypeGpw, 5, 2),
REG(kX86RegTypeGpw, 6, 2),
REG(kX86RegTypeGpw, 7, 2),
REG(kX86RegTypeGpw, 8, 2),
REG(kX86RegTypeGpw, 9, 2),
REG(kX86RegTypeGpw, 10, 2),
REG(kX86RegTypeGpw, 11, 2),
REG(kX86RegTypeGpw, 12, 2),
REG(kX86RegTypeGpw, 13, 2),
REG(kX86RegTypeGpw, 14, 2),
REG(kX86RegTypeGpw, 15, 2)
},
REG(X86GpReg, rax, kX86RegTypeGpq, kX86RegIndexAx, 8);
REG(X86GpReg, rcx, kX86RegTypeGpq, kX86RegIndexCx, 8);
REG(X86GpReg, rdx, kX86RegTypeGpq, kX86RegIndexDx, 8);
REG(X86GpReg, rbx, kX86RegTypeGpq, kX86RegIndexBx, 8);
REG(X86GpReg, rsp, kX86RegTypeGpq, kX86RegIndexSp, 8);
REG(X86GpReg, rbp, kX86RegTypeGpq, kX86RegIndexBp, 8);
REG(X86GpReg, rsi, kX86RegTypeGpq, kX86RegIndexSi, 8);
REG(X86GpReg, rdi, kX86RegTypeGpq, kX86RegIndexDi, 8);
REG(X86GpReg, r8, kX86RegTypeGpq, 8, 8);
REG(X86GpReg, r9, kX86RegTypeGpq, 9, 8);
REG(X86GpReg, r10, kX86RegTypeGpq, 10, 8);
REG(X86GpReg, r11, kX86RegTypeGpq, 11, 8);
REG(X86GpReg, r12, kX86RegTypeGpq, 12, 8);
REG(X86GpReg, r13, kX86RegTypeGpq, 13, 8);
REG(X86GpReg, r14, kX86RegTypeGpq, 14, 8);
REG(X86GpReg, r15, kX86RegTypeGpq, 15, 8);
// Gpd.
{
REG(kX86RegTypeGpd, 0, 4),
REG(kX86RegTypeGpd, 1, 4),
REG(kX86RegTypeGpd, 2, 4),
REG(kX86RegTypeGpd, 3, 4),
REG(kX86RegTypeGpd, 4, 4),
REG(kX86RegTypeGpd, 5, 4),
REG(kX86RegTypeGpd, 6, 4),
REG(kX86RegTypeGpd, 7, 4),
REG(kX86RegTypeGpd, 8, 4),
REG(kX86RegTypeGpd, 9, 4),
REG(kX86RegTypeGpd, 10, 4),
REG(kX86RegTypeGpd, 11, 4),
REG(kX86RegTypeGpd, 12, 4),
REG(kX86RegTypeGpd, 13, 4),
REG(kX86RegTypeGpd, 14, 4),
REG(kX86RegTypeGpd, 15, 4)
},
REG(X86FpReg, fp0, kX86RegTypeFp, 0, 10);
REG(X86FpReg, fp1, kX86RegTypeFp, 1, 10);
REG(X86FpReg, fp2, kX86RegTypeFp, 2, 10);
REG(X86FpReg, fp3, kX86RegTypeFp, 3, 10);
REG(X86FpReg, fp4, kX86RegTypeFp, 4, 10);
REG(X86FpReg, fp5, kX86RegTypeFp, 5, 10);
REG(X86FpReg, fp6, kX86RegTypeFp, 6, 10);
REG(X86FpReg, fp7, kX86RegTypeFp, 7, 10);
// Gpq.
{
REG(kX86RegTypeGpq, 0, 8),
REG(kX86RegTypeGpq, 1, 8),
REG(kX86RegTypeGpq, 2, 8),
REG(kX86RegTypeGpq, 3, 8),
REG(kX86RegTypeGpq, 4, 8),
REG(kX86RegTypeGpq, 5, 8),
REG(kX86RegTypeGpq, 6, 8),
REG(kX86RegTypeGpq, 7, 8),
REG(kX86RegTypeGpq, 8, 8),
REG(kX86RegTypeGpq, 9, 8),
REG(kX86RegTypeGpq, 10, 8),
REG(kX86RegTypeGpq, 11, 8),
REG(kX86RegTypeGpq, 12, 8),
REG(kX86RegTypeGpq, 13, 8),
REG(kX86RegTypeGpq, 14, 8),
REG(kX86RegTypeGpq, 15, 8)
},
REG(X86MmReg, mm0, kX86RegTypeMm, 0, 8);
REG(X86MmReg, mm1, kX86RegTypeMm, 1, 8);
REG(X86MmReg, mm2, kX86RegTypeMm, 2, 8);
REG(X86MmReg, mm3, kX86RegTypeMm, 3, 8);
REG(X86MmReg, mm4, kX86RegTypeMm, 4, 8);
REG(X86MmReg, mm5, kX86RegTypeMm, 5, 8);
REG(X86MmReg, mm6, kX86RegTypeMm, 6, 8);
REG(X86MmReg, mm7, kX86RegTypeMm, 7, 8);
// Fp.
{
REG(kX86RegTypeFp, 0, 10),
REG(kX86RegTypeFp, 1, 10),
REG(kX86RegTypeFp, 2, 10),
REG(kX86RegTypeFp, 3, 10),
REG(kX86RegTypeFp, 4, 10),
REG(kX86RegTypeFp, 5, 10),
REG(kX86RegTypeFp, 6, 10),
REG(kX86RegTypeFp, 7, 10)
},
REG(X86XmmReg, xmm0, kX86RegTypeXmm, 0, 16);
REG(X86XmmReg, xmm1, kX86RegTypeXmm, 1, 16);
REG(X86XmmReg, xmm2, kX86RegTypeXmm, 2, 16);
REG(X86XmmReg, xmm3, kX86RegTypeXmm, 3, 16);
REG(X86XmmReg, xmm4, kX86RegTypeXmm, 4, 16);
REG(X86XmmReg, xmm5, kX86RegTypeXmm, 5, 16);
REG(X86XmmReg, xmm6, kX86RegTypeXmm, 6, 16);
REG(X86XmmReg, xmm7, kX86RegTypeXmm, 7, 16);
REG(X86XmmReg, xmm8, kX86RegTypeXmm, 8, 16);
REG(X86XmmReg, xmm9, kX86RegTypeXmm, 9, 16);
REG(X86XmmReg, xmm10, kX86RegTypeXmm, 10, 16);
REG(X86XmmReg, xmm11, kX86RegTypeXmm, 11, 16);
REG(X86XmmReg, xmm12, kX86RegTypeXmm, 12, 16);
REG(X86XmmReg, xmm13, kX86RegTypeXmm, 13, 16);
REG(X86XmmReg, xmm14, kX86RegTypeXmm, 14, 16);
REG(X86XmmReg, xmm15, kX86RegTypeXmm, 15, 16);
// Mm.
{
REG(kX86RegTypeMm, 0, 8),
REG(kX86RegTypeMm, 1, 8),
REG(kX86RegTypeMm, 2, 8),
REG(kX86RegTypeMm, 3, 8),
REG(kX86RegTypeMm, 4, 8),
REG(kX86RegTypeMm, 5, 8),
REG(kX86RegTypeMm, 6, 8),
REG(kX86RegTypeMm, 7, 8)
},
REG(X86YmmReg, ymm0, kX86RegTypeYmm, 0, 32);
REG(X86YmmReg, ymm1, kX86RegTypeYmm, 1, 32);
REG(X86YmmReg, ymm2, kX86RegTypeYmm, 2, 32);
REG(X86YmmReg, ymm3, kX86RegTypeYmm, 3, 32);
REG(X86YmmReg, ymm4, kX86RegTypeYmm, 4, 32);
REG(X86YmmReg, ymm5, kX86RegTypeYmm, 5, 32);
REG(X86YmmReg, ymm6, kX86RegTypeYmm, 6, 32);
REG(X86YmmReg, ymm7, kX86RegTypeYmm, 7, 32);
REG(X86YmmReg, ymm8, kX86RegTypeYmm, 8, 32);
REG(X86YmmReg, ymm9, kX86RegTypeYmm, 9, 32);
REG(X86YmmReg, ymm10, kX86RegTypeYmm, 10, 32);
REG(X86YmmReg, ymm11, kX86RegTypeYmm, 11, 32);
REG(X86YmmReg, ymm12, kX86RegTypeYmm, 12, 32);
REG(X86YmmReg, ymm13, kX86RegTypeYmm, 13, 32);
REG(X86YmmReg, ymm14, kX86RegTypeYmm, 14, 32);
REG(X86YmmReg, ymm15, kX86RegTypeYmm, 15, 32);
// K.
{
REG(kX86RegTypeK, 0, 8),
REG(kX86RegTypeK, 1, 8),
REG(kX86RegTypeK, 2, 8),
REG(kX86RegTypeK, 3, 8),
REG(kX86RegTypeK, 4, 8),
REG(kX86RegTypeK, 5, 8),
REG(kX86RegTypeK, 6, 8),
REG(kX86RegTypeK, 7, 8)
},
REG(X86SegReg, cs, kX86RegTypeSeg, kX86SegCs, 2);
REG(X86SegReg, ss, kX86RegTypeSeg, kX86SegSs, 2);
REG(X86SegReg, ds, kX86RegTypeSeg, kX86SegDs, 2);
REG(X86SegReg, es, kX86RegTypeSeg, kX86SegEs, 2);
REG(X86SegReg, fs, kX86RegTypeSeg, kX86SegFs, 2);
REG(X86SegReg, gs, kX86RegTypeSeg, kX86SegGs, 2);
// Xmm.
{
REG(kX86RegTypeXmm, 0, 16),
REG(kX86RegTypeXmm, 1, 16),
REG(kX86RegTypeXmm, 2, 16),
REG(kX86RegTypeXmm, 3, 16),
REG(kX86RegTypeXmm, 4, 16),
REG(kX86RegTypeXmm, 5, 16),
REG(kX86RegTypeXmm, 6, 16),
REG(kX86RegTypeXmm, 7, 16),
REG(kX86RegTypeXmm, 8, 16),
REG(kX86RegTypeXmm, 9, 16),
REG(kX86RegTypeXmm, 10, 16),
REG(kX86RegTypeXmm, 11, 16),
REG(kX86RegTypeXmm, 12, 16),
REG(kX86RegTypeXmm, 13, 16),
REG(kX86RegTypeXmm, 14, 16),
REG(kX86RegTypeXmm, 15, 16),
REG(kX86RegTypeXmm, 16, 16),
REG(kX86RegTypeXmm, 17, 16),
REG(kX86RegTypeXmm, 18, 16),
REG(kX86RegTypeXmm, 19, 16),
REG(kX86RegTypeXmm, 20, 16),
REG(kX86RegTypeXmm, 21, 16),
REG(kX86RegTypeXmm, 22, 16),
REG(kX86RegTypeXmm, 23, 16),
REG(kX86RegTypeXmm, 24, 16),
REG(kX86RegTypeXmm, 25, 16),
REG(kX86RegTypeXmm, 26, 16),
REG(kX86RegTypeXmm, 27, 16),
REG(kX86RegTypeXmm, 28, 16),
REG(kX86RegTypeXmm, 29, 16),
REG(kX86RegTypeXmm, 30, 16),
REG(kX86RegTypeXmm, 31, 16)
},
// Ymm.
{
REG(kX86RegTypeYmm, 0, 32),
REG(kX86RegTypeYmm, 1, 32),
REG(kX86RegTypeYmm, 2, 32),
REG(kX86RegTypeYmm, 3, 32),
REG(kX86RegTypeYmm, 4, 32),
REG(kX86RegTypeYmm, 5, 32),
REG(kX86RegTypeYmm, 6, 32),
REG(kX86RegTypeYmm, 7, 32),
REG(kX86RegTypeYmm, 8, 32),
REG(kX86RegTypeYmm, 9, 32),
REG(kX86RegTypeYmm, 10, 32),
REG(kX86RegTypeYmm, 11, 32),
REG(kX86RegTypeYmm, 12, 32),
REG(kX86RegTypeYmm, 13, 32),
REG(kX86RegTypeYmm, 14, 32),
REG(kX86RegTypeYmm, 15, 32),
REG(kX86RegTypeYmm, 16, 32),
REG(kX86RegTypeYmm, 17, 32),
REG(kX86RegTypeYmm, 18, 32),
REG(kX86RegTypeYmm, 19, 32),
REG(kX86RegTypeYmm, 20, 32),
REG(kX86RegTypeYmm, 21, 32),
REG(kX86RegTypeYmm, 22, 32),
REG(kX86RegTypeYmm, 23, 32),
REG(kX86RegTypeYmm, 24, 32),
REG(kX86RegTypeYmm, 25, 32),
REG(kX86RegTypeYmm, 26, 32),
REG(kX86RegTypeYmm, 27, 32),
REG(kX86RegTypeYmm, 28, 32),
REG(kX86RegTypeYmm, 29, 32),
REG(kX86RegTypeYmm, 30, 32),
REG(kX86RegTypeYmm, 31, 32)
},
// Zmm.
{
REG(kX86RegTypeZmm, 0, 64),
REG(kX86RegTypeZmm, 1, 64),
REG(kX86RegTypeZmm, 2, 64),
REG(kX86RegTypeZmm, 3, 64),
REG(kX86RegTypeZmm, 4, 64),
REG(kX86RegTypeZmm, 5, 64),
REG(kX86RegTypeZmm, 6, 64),
REG(kX86RegTypeZmm, 7, 64),
REG(kX86RegTypeZmm, 8, 64),
REG(kX86RegTypeZmm, 9, 64),
REG(kX86RegTypeZmm, 10, 64),
REG(kX86RegTypeZmm, 11, 64),
REG(kX86RegTypeZmm, 12, 64),
REG(kX86RegTypeZmm, 13, 64),
REG(kX86RegTypeZmm, 14, 64),
REG(kX86RegTypeZmm, 15, 64),
REG(kX86RegTypeZmm, 16, 64),
REG(kX86RegTypeZmm, 17, 64),
REG(kX86RegTypeZmm, 18, 64),
REG(kX86RegTypeZmm, 19, 64),
REG(kX86RegTypeZmm, 20, 64),
REG(kX86RegTypeZmm, 21, 64),
REG(kX86RegTypeZmm, 22, 64),
REG(kX86RegTypeZmm, 23, 64),
REG(kX86RegTypeZmm, 24, 64),
REG(kX86RegTypeZmm, 25, 64),
REG(kX86RegTypeZmm, 26, 64),
REG(kX86RegTypeZmm, 27, 64),
REG(kX86RegTypeZmm, 28, 64),
REG(kX86RegTypeZmm, 29, 64),
REG(kX86RegTypeZmm, 30, 64),
REG(kX86RegTypeZmm, 31, 64)
}
};
#undef REG
} // x86 namespace
} // asmjit namespace
// [Api-End]

View File

@@ -76,7 +76,7 @@ Error X86Scheduler::run(Node* start, Node* stop) {
Node* next = node_->getNext();
ASMJIT_ASSERT(node_->getType() == kNodeTypeInst);
printf(" %s\n", X86Util::getInstInfo(static_cast<InstNode*>(node_)->getCode()).getInstName());
printf(" %s\n", X86Util::getInstInfo(static_cast<InstNode*>(node_)->getInstId()).getInstName());
node_ = next;
}

View File

@@ -12,17 +12,23 @@ var fs = require("fs");
// [Utilities]
// ----------------------------------------------------------------------------
var upFirst = function(s) {
function upFirst(s) {
if (!s)
return s;
return s[0].toUpperCase() + s.substr(1);
};
}
var trimLeft = function(s) {
function trimLeft(s) {
return s.replace(/^\s+/, "");
}
var inject = function(s, start, end, code) {
function padLeft(s, n) {
while (s.length < n)
s += " ";
return s;
}
function inject(s, start, end, code) {
var iStart = s.indexOf(start);
var iEnd = s.indexOf(end);
@@ -33,7 +39,7 @@ var inject = function(s, start, end, code) {
throw new Error("Couldn't locate end mark.");
return s.substr(0, iStart + start.length) + code + s.substr(iEnd);
};
}
// ----------------------------------------------------------------------------
// [Database]
@@ -172,66 +178,100 @@ var generate = function(fileName, arch) {
var code = "";
var disclaimer = "// Automatically generated, do not edit.\n";
var instCount = 0;
var sizeof_X86InstInfo = 8;
var sizeof_X86InstExtendedInfo = 24;
// Create database.
var db = new Database();
var re = new RegExp(
"INST\\(([A-Za-z0-9_]+)\\s*," + // [01] Inst-Code.
"\\s*\\\"([A-Za-z0-9_ ]*)\\\"\\s*," + // [02] Inst-Name.
"([^,]+)," + // [03] Inst-Group.
"([^,]+)," + // [04] Inst-Flags.
"([^,]+)," + // [05] Move-Size.
"([^,]+)," + // [06] Operand-Flags[0].
"([^,]+)," + // [07] Operand-Flags[1].
"([^,]+)," + // [08] Operand-Flags[2].
"([^,]+)," + // [09] Operand-Flags[3].
"\\s*E\\(([A-Z_]+)\\)\\s*," + // [10] EFLAGS.
"(.{17}[^,]*)," + // [11] OpCode[0].
"(.{17}[^\\)]*)\\)", // [12] OpCode[1].
"INST\\(([A-Za-z0-9_]+)\\s*," + // [01] Id.
"\\s*\\\"([A-Za-z0-9_ ]*)\\\"\\s*," + // [02] Name.
"(.{20}[^,]*)," + // [03] Opcode[0].
"(.{20}[^,]*)," + // [04] Opcode[1].
"([^,]+)," + // [05] Encoding.
"([^,]+)," + // [06] IFLAGS.
"\\s*EF\\(([A-Z_]+)\\)\\s*," + // [07] EFLAGS.
"([^,]+)," + // [08] Write-Index.
"([^,]+)," + // [09] Write-Size.
"([^,]+)," + // [10] Operand-Flags[0].
"([^,]+)," + // [11] Operand-Flags[1].
"([^,]+)," + // [12] Operand-Flags[2].
"([^,]+)," + // [13] Operand-Flags[3].
"([^\\)]+)\\)", // [14] Operand-Flags[4].
"g");
var i, k, m;
var srcForm = "";
while (m = re.exec(data)) {
// Extract instruction ID and Name.
var id = m[1];
var name = m[2];
// Extract data that goes to the secondary table (ExtendedInfo).
var instGroup = trimLeft(m[3]);
var instFlags = trimLeft(m[4]);
var moveSize = trimLeft(m[5]);
var opFlags0 = trimLeft(m[6]);
var opFlags1 = trimLeft(m[7]);
var opFlags2 = trimLeft(m[8]);
var opFlags3 = trimLeft(m[9]);
var eflags = m[10];
var opCode1 = trimLeft(m[12]);
// Extract data that goes to the secondary table (X86InstExtendedInfo).
var opcode0 = trimLeft(m[3]);
var opcode1 = trimLeft(m[4]);
var encoding = trimLeft(m[5]);
var iflags = trimLeft(m[6]);
var eflags = m[7];
var writeIndex = trimLeft(m[8]);
var writeSize = trimLeft(m[9]);
var oflags0 = trimLeft(m[10]);
var oflags1 = trimLeft(m[11]);
var oflags2 = trimLeft(m[12]);
var oflags3 = trimLeft(m[13]);
var oflags4 = trimLeft(m[14]);
// Generate EFlags-In and EFlags-Out.
var eflagsIn = decToHex(getEFlagsMask(eflags, "RX" ), 2);
var eflagsOut = decToHex(getEFlagsMask(eflags, "WXU"), 2);
var extData = "" +
instGroup + ", " +
moveSize + ", " +
var extData =
encoding + ", " +
writeIndex + ", " +
writeSize + ", " +
eflagsIn + ", " +
eflagsOut + ", " +
instFlags + ", " +
"{ " + opFlags0 + ", " + opFlags1 + ", " + opFlags2 + ", " + opFlags3 + ", U }, " +
opCode1;
"0" + ", " +
"{ " + oflags0 + ", " + oflags1 + ", " + oflags2 + ", " + oflags3 + ", " + oflags4 + " }, " +
iflags + ", " +
opcode1;
srcForm += " INST(" +
padLeft(id, 27) + ", " +
padLeft('"' + name + '"', 19) + ", " +
opcode0 + ", " +
opcode1 + ", " +
encoding + ", " +
iflags + ", " +
"EF(" + eflags + "), " +
writeIndex + ", " +
writeSize + ", " +
oflags0 + ", " +
oflags1 + ", " +
oflags2 + ", " +
oflags3 + ", " +
oflags4 + "),\n";
db.add(name, id, extData);
instCount++;
}
// fs.writeFileSync("srcform.cpp", srcForm, "utf8");
db.index();
console.log("Number of instructions: " + db.instNames.array.length);
console.log("Instruction names size: " + db.instNames.getSize());
console.log("Extended-info length : " + db.extendedData.length);
var instDataSize = instCount * sizeof_X86InstInfo + db.extendedData.length * sizeof_X86InstExtendedInfo;
console.log("Number of Instructions : " + instCount);
console.log("Number of ExtInfo Rows : " + db.extendedData.length);
console.log("Instructions' Data Size: " + instDataSize);
console.log("Instructions' Names Size: " + db.instNames.getSize());
// Generate InstName[] string.
code += disclaimer;
code += "#if !defined(ASMJIT_DISABLE_INST_NAMES)\n";
code += "#if !defined(ASMJIT_DISABLE_NAMES)\n";
code += "const char _" + arch + "InstName[] =\n";
for (var k in db.instMap) {
for (k in db.instMap) {
var inst = db.instMap[k];
code += " \"" + k + "\\0\"\n";
}
@@ -248,7 +288,7 @@ var generate = function(fileName, arch) {
code += disclaimer;
code += "static const uint16_t _" + arch + "InstAlphaIndex[26] = {\n";
for (var i = 0; i < db.instAlpha.length; i++) {
for (i = 0; i < db.instAlpha.length; i++) {
var id = db.instAlpha[i];
code += " " + (id === undefined ? "0xFFFF" : id);
if (i !== db.instAlpha.length - 1)
@@ -260,18 +300,18 @@ var generate = function(fileName, arch) {
// Generate NameIndex.
code += disclaimer;
code += "enum k" + Arch + "InstData_NameIndex {\n";
for (var k in db.instMap) {
for (k in db.instMap) {
var inst = db.instMap[k];
code += " " + inst.id + "_NameIndex = " + inst.nameIndex + ",\n";
}
code = code.substr(0, code.length - 2) + "\n};\n";
code += "#endif // !ASMJIT_DISABLE_INST_NAMES\n"
code += "#endif // !ASMJIT_DISABLE_NAMES\n"
code += "\n";
// Generate ExtendedInfo.
code += disclaimer;
code += "const " + Arch + "InstExtendedInfo _" + arch + "InstExtendedInfo[] = {\n";
for (var i = 0; i < db.extendedData.length; i++) {
for (i = 0; i < db.extendedData.length; i++) {
code += " { " + db.extendedData[i] + " }";
if (i !== db.extendedData.length - 1)
code += ",";
@@ -282,7 +322,7 @@ var generate = function(fileName, arch) {
code += disclaimer;
code += "enum k" + Arch + "InstData_ExtendedIndex {\n";
for (var k in db.instMap) {
for (k in db.instMap) {
var inst = db.instMap[k];
code += " " + inst.id + "_ExtendedIndex = " + inst.extendedIndex + ",\n";
}