mirror of
https://github.com/asmjit/asmjit.git
synced 2025-12-17 20:44:37 +03:00
Refactored slightly some constants and operand handling in X86Assembler.
Refactored asmjit::x86 register definitions (now exported as a single symbol). Refactored bit utilities, now using proper naming like `or_`, `and_`, `andNot`. Refactored X86RegCount and X86RegMask to support K instead of Fp register. Refactored X86 instruction table (won't stay for long, new tool to export it is in development). Renamed instruction group to instruction encoding. Added XSAVE/OSXSAVE and other CPU features to X86CpuInfo. Added proper AVX and AVX-512 detection to X86CpuInfo. Added support to get content of XCR0 in X86CpuInfo (callXGetBV). Added XSAVE instruction set support (Assembler/Compiler). Added SSE4a instruction set support (Assembler/Compiler). Added X86KReg and X86KVar register/variable support (AVX-512). Added X86ZmmReg and X86ZmmVar register/variable support (AVX-512).
This commit is contained in:
@@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
namespace asmgen {
|
namespace asmgen {
|
||||||
|
|
||||||
enum { kGenOpCodeInstCount = 2640 };
|
enum { kGenOpCodeInstCount = 2656 };
|
||||||
|
|
||||||
// Generate all instructions asmjit can emit.
|
// Generate all instructions asmjit can emit.
|
||||||
static void opcode(asmjit::X86Assembler& a) {
|
static void opcode(asmjit::X86Assembler& a) {
|
||||||
@@ -1275,9 +1275,25 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.pcmpistrm(xmm0, ptr_gp0, 0);
|
a.pcmpistrm(xmm0, ptr_gp0, 0);
|
||||||
a.pcmpgtq(xmm0, xmm7);
|
a.pcmpgtq(xmm0, xmm7);
|
||||||
a.pcmpgtq(xmm0, ptr_gp0);
|
a.pcmpgtq(xmm0, ptr_gp0);
|
||||||
|
|
||||||
|
// SSE4a.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
|
a.extrq(xmm0, xmm1);
|
||||||
|
a.extrq(xmm0, 0x1, 0x2);
|
||||||
|
a.insertq(xmm0, xmm1);
|
||||||
|
a.insertq(xmm0, xmm1, 0x1, 0x2);
|
||||||
|
a.movntsd(ptr_gp0, xmm0);
|
||||||
|
a.movntss(ptr_gp0, xmm0);
|
||||||
|
|
||||||
|
// POPCNT.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.popcnt(gp0, ptr_gp0);
|
a.popcnt(gp0, ptr_gp0);
|
||||||
|
|
||||||
// AESNI.
|
// AESNI.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.aesdec(xmm0, xmm7);
|
a.aesdec(xmm0, xmm7);
|
||||||
a.aesdec(xmm0, ptr_gp0);
|
a.aesdec(xmm0, ptr_gp0);
|
||||||
a.aesdeclast(xmm0, xmm7);
|
a.aesdeclast(xmm0, xmm7);
|
||||||
@@ -1292,10 +1308,24 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.aeskeygenassist(xmm0, ptr_gp0, 0);
|
a.aeskeygenassist(xmm0, ptr_gp0, 0);
|
||||||
|
|
||||||
// PCLMULQDQ.
|
// PCLMULQDQ.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.pclmulqdq(xmm0, xmm7, 0);
|
a.pclmulqdq(xmm0, xmm7, 0);
|
||||||
a.pclmulqdq(xmm0, ptr_gp0, 0);
|
a.pclmulqdq(xmm0, ptr_gp0, 0);
|
||||||
|
|
||||||
|
// XSAVE.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
|
a.xgetbv();
|
||||||
|
a.xsetbv();
|
||||||
|
|
||||||
|
a.xsave(ptr_gp0);
|
||||||
|
a.xsaveopt(ptr_gp0);
|
||||||
|
a.xrstor(ptr_gp0);
|
||||||
|
|
||||||
// AVX.
|
// AVX.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.vaddpd(xmm0, xmm1, xmm2);
|
a.vaddpd(xmm0, xmm1, xmm2);
|
||||||
a.vaddpd(xmm0, xmm1, ptr_gp0);
|
a.vaddpd(xmm0, xmm1, ptr_gp0);
|
||||||
a.vaddpd(ymm0, ymm1, ymm2);
|
a.vaddpd(ymm0, ymm1, ymm2);
|
||||||
@@ -1946,6 +1976,8 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.vzeroupper();
|
a.vzeroupper();
|
||||||
|
|
||||||
// AVX+AESNI.
|
// AVX+AESNI.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.vaesdec(xmm0, xmm1, xmm2);
|
a.vaesdec(xmm0, xmm1, xmm2);
|
||||||
a.vaesdec(xmm0, xmm1, ptr_gp0);
|
a.vaesdec(xmm0, xmm1, ptr_gp0);
|
||||||
a.vaesdeclast(xmm0, xmm1, xmm2);
|
a.vaesdeclast(xmm0, xmm1, xmm2);
|
||||||
@@ -1960,10 +1992,14 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.vaeskeygenassist(xmm0, ptr_gp0, 0);
|
a.vaeskeygenassist(xmm0, ptr_gp0, 0);
|
||||||
|
|
||||||
// AVX+PCLMULQDQ.
|
// AVX+PCLMULQDQ.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.vpclmulqdq(xmm0, xmm1, xmm2, 0);
|
a.vpclmulqdq(xmm0, xmm1, xmm2, 0);
|
||||||
a.vpclmulqdq(xmm0, xmm1, ptr_gp0, 0);
|
a.vpclmulqdq(xmm0, xmm1, ptr_gp0, 0);
|
||||||
|
|
||||||
// AVX2.
|
// AVX2.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.vbroadcasti128(ymm0, ptr_gp0);
|
a.vbroadcasti128(ymm0, ptr_gp0);
|
||||||
a.vbroadcastsd(ymm0, xmm1);
|
a.vbroadcastsd(ymm0, xmm1);
|
||||||
a.vbroadcastss(xmm0, xmm1);
|
a.vbroadcastss(xmm0, xmm1);
|
||||||
@@ -2268,6 +2304,8 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.vpxor(ymm0, ymm1, ymm2);
|
a.vpxor(ymm0, ymm1, ymm2);
|
||||||
|
|
||||||
// FMA3.
|
// FMA3.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.vfmadd132pd(xmm0, xmm1, ptr_gp0);
|
a.vfmadd132pd(xmm0, xmm1, ptr_gp0);
|
||||||
a.vfmadd132pd(xmm0, xmm1, xmm2);
|
a.vfmadd132pd(xmm0, xmm1, xmm2);
|
||||||
a.vfmadd132pd(ymm0, ymm1, ptr_gp0);
|
a.vfmadd132pd(ymm0, ymm1, ptr_gp0);
|
||||||
@@ -2462,6 +2500,8 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.vfnmsub231ss(xmm0, xmm1, xmm2);
|
a.vfnmsub231ss(xmm0, xmm1, xmm2);
|
||||||
|
|
||||||
// FMA4.
|
// FMA4.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.vfmaddpd(xmm0, xmm1, xmm2, xmm3);
|
a.vfmaddpd(xmm0, xmm1, xmm2, xmm3);
|
||||||
a.vfmaddpd(xmm0, xmm1, ptr_gp0, xmm3);
|
a.vfmaddpd(xmm0, xmm1, ptr_gp0, xmm3);
|
||||||
a.vfmaddpd(xmm0, xmm1, xmm2, ptr_gp0);
|
a.vfmaddpd(xmm0, xmm1, xmm2, ptr_gp0);
|
||||||
@@ -2560,6 +2600,8 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.vfnmsubss(xmm0, xmm1, xmm2, ptr_gp0);
|
a.vfnmsubss(xmm0, xmm1, xmm2, ptr_gp0);
|
||||||
|
|
||||||
// XOP.
|
// XOP.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.vfrczpd(xmm0, xmm1);
|
a.vfrczpd(xmm0, xmm1);
|
||||||
a.vfrczpd(xmm0, ptr_gp0);
|
a.vfrczpd(xmm0, ptr_gp0);
|
||||||
a.vfrczpd(ymm0, ymm1);
|
a.vfrczpd(ymm0, ymm1);
|
||||||
@@ -2709,6 +2751,8 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.vpshlw(xmm0, xmm1, ptr_gp0);
|
a.vpshlw(xmm0, xmm1, ptr_gp0);
|
||||||
|
|
||||||
// BMI.
|
// BMI.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.andn(gp0, gp1, zcx);
|
a.andn(gp0, gp1, zcx);
|
||||||
a.andn(gp0, gp1, ptr_gp1);
|
a.andn(gp0, gp1, ptr_gp1);
|
||||||
a.bextr(gp0, gp1, zcx);
|
a.bextr(gp0, gp1, zcx);
|
||||||
@@ -2721,14 +2765,20 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.blsr(gp0, ptr_gp1);
|
a.blsr(gp0, ptr_gp1);
|
||||||
|
|
||||||
// LZCNT.
|
// LZCNT.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.lzcnt(gp0, gp1);
|
a.lzcnt(gp0, gp1);
|
||||||
a.lzcnt(gp0, ptr_gp1);
|
a.lzcnt(gp0, ptr_gp1);
|
||||||
|
|
||||||
// TZCNT.
|
// TZCNT.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.tzcnt(gp0, gp1);
|
a.tzcnt(gp0, gp1);
|
||||||
a.tzcnt(gp0, ptr_gp1);
|
a.tzcnt(gp0, ptr_gp1);
|
||||||
|
|
||||||
// BMI2.
|
// BMI2.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.bzhi(gp0, gp1, zcx);
|
a.bzhi(gp0, gp1, zcx);
|
||||||
a.bzhi(gp0, ptr_gp1, zcx);
|
a.bzhi(gp0, ptr_gp1, zcx);
|
||||||
a.mulx(gp0, gp1, zcx);
|
a.mulx(gp0, gp1, zcx);
|
||||||
@@ -2747,9 +2797,13 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.shrx(gp0, ptr_gp1, zcx);
|
a.shrx(gp0, ptr_gp1, zcx);
|
||||||
|
|
||||||
// RDRAND.
|
// RDRAND.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.rdrand(gp0);
|
a.rdrand(gp0);
|
||||||
|
|
||||||
// F16C.
|
// F16C.
|
||||||
|
a.nop();
|
||||||
|
|
||||||
a.vcvtph2ps(xmm0, xmm1);
|
a.vcvtph2ps(xmm0, xmm1);
|
||||||
a.vcvtph2ps(xmm0, ptr_gp1);
|
a.vcvtph2ps(xmm0, ptr_gp1);
|
||||||
a.vcvtph2ps(ymm0, xmm1);
|
a.vcvtph2ps(ymm0, xmm1);
|
||||||
@@ -2758,6 +2812,9 @@ static void opcode(asmjit::X86Assembler& a) {
|
|||||||
a.vcvtps2ph(ptr_gp0, xmm1, 0);
|
a.vcvtps2ph(ptr_gp0, xmm1, 0);
|
||||||
a.vcvtps2ph(xmm0, ymm1, 0);
|
a.vcvtps2ph(xmm0, ymm1, 0);
|
||||||
a.vcvtps2ph(ptr_gp0, ymm1, 0);
|
a.vcvtps2ph(ptr_gp0, ymm1, 0);
|
||||||
|
|
||||||
|
// Mark the end of the stream.
|
||||||
|
a.nop();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // asmgen namespace
|
} // asmgen namespace
|
||||||
|
|||||||
@@ -235,15 +235,15 @@
|
|||||||
//! `BaseMem` class. These functions are used to make operands that represents
|
//! `BaseMem` class. These functions are used to make operands that represents
|
||||||
//! memory addresses:
|
//! memory addresses:
|
||||||
//!
|
//!
|
||||||
//! - `asmjit::ptr()`
|
//! - `asmjit::ptr()` - Address size not specified.
|
||||||
//! - `asmjit::byte_ptr()`
|
//! - `asmjit::byte_ptr()` - 1 byte.
|
||||||
//! - `asmjit::word_ptr()`
|
//! - `asmjit::word_ptr()` - 2 bytes (Gpw size).
|
||||||
//! - `asmjit::dword_ptr()`
|
//! - `asmjit::dword_ptr()` - 4 bytes (Gpd size).
|
||||||
//! - `asmjit::qword_ptr()`
|
//! - `asmjit::qword_ptr()` - 8 bytes (Gpq/Mm size).
|
||||||
//! - `asmjit::tword_ptr()`
|
//! - `asmjit::tword_ptr()` - 10 bytes (FPU).
|
||||||
//! - `asmjit::oword_ptr()`
|
//! - `asmjit::oword_ptr()` - 16 bytes (Xmm size).
|
||||||
//! - `asmjit::yword_ptr()`
|
//! - `asmjit::yword_ptr()` - 32 bytes (Ymm size).
|
||||||
//! - `asmjit::zword_ptr()`
|
//! - `asmjit::zword_ptr()` - 64 bytes (Zmm size).
|
||||||
//!
|
//!
|
||||||
//! Most useful function to make pointer should be `asmjit::ptr()`. It creates
|
//! Most useful function to make pointer should be `asmjit::ptr()`. It creates
|
||||||
//! pointer to the target with unspecified size. Unspecified size works in all
|
//! pointer to the target with unspecified size. Unspecified size works in all
|
||||||
@@ -298,10 +298,10 @@
|
|||||||
//! // Get `X86CpuInfo` global instance.
|
//! // Get `X86CpuInfo` global instance.
|
||||||
//! const X86CpuInfo* cpuInfo = X86CpuInfo::getHost();
|
//! const X86CpuInfo* cpuInfo = X86CpuInfo::getHost();
|
||||||
//!
|
//!
|
||||||
//! if (cpuInfo->hasFeature(kX86CpuFeatureSse2)) {
|
//! if (cpuInfo->hasFeature(kX86CpuFeatureSSE2)) {
|
||||||
//! // Processor has SSE2.
|
//! // Processor has SSE2.
|
||||||
//! }
|
//! }
|
||||||
//! else if (cpuInfo->hasFeature(kX86CpuFeatureMmx)) {
|
//! else if (cpuInfo->hasFeature(kX86CpuFeatureMMX)) {
|
||||||
//! // Processor doesn't have SSE2, but has MMX.
|
//! // Processor doesn't have SSE2, but has MMX.
|
||||||
//! }
|
//! }
|
||||||
//! else {
|
//! else {
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ ASMJIT_ENUM(kInstId) {
|
|||||||
//! Instruction options (stub).
|
//! Instruction options (stub).
|
||||||
ASMJIT_ENUM(kInstOptions) {
|
ASMJIT_ENUM(kInstOptions) {
|
||||||
//! No instruction options.
|
//! No instruction options.
|
||||||
kInstOptionNone = 0x00,
|
kInstOptionNone = 0x00000000,
|
||||||
|
|
||||||
//! Emit short form of the instruction.
|
//! Emit short form of the instruction.
|
||||||
//!
|
//!
|
||||||
@@ -53,7 +53,8 @@ ASMJIT_ENUM(kInstOptions) {
|
|||||||
//! can be dangerous if the short jmp/jcc is required, but not encodable due
|
//! can be dangerous if the short jmp/jcc is required, but not encodable due
|
||||||
//! to large displacement, in such case an error happens and the whole
|
//! to large displacement, in such case an error happens and the whole
|
||||||
//! assembler/compiler stream is unusable.
|
//! assembler/compiler stream is unusable.
|
||||||
kInstOptionShortForm = 0x01,
|
kInstOptionShortForm = 0x00000001,
|
||||||
|
|
||||||
//! Emit long form of the instruction.
|
//! Emit long form of the instruction.
|
||||||
//!
|
//!
|
||||||
//! X86/X64:
|
//! X86/X64:
|
||||||
@@ -61,12 +62,13 @@ ASMJIT_ENUM(kInstOptions) {
|
|||||||
//! Long form is mosrlt related to jmp and jcc instructions, but like the
|
//! Long form is mosrlt related to jmp and jcc instructions, but like the
|
||||||
//! `kInstOptionShortForm` option it can be used by other instructions
|
//! `kInstOptionShortForm` option it can be used by other instructions
|
||||||
//! supporting both 8-bit and 32-bit immediates.
|
//! supporting both 8-bit and 32-bit immediates.
|
||||||
kInstOptionLongForm = 0x02,
|
kInstOptionLongForm = 0x00000002,
|
||||||
|
|
||||||
//! Condition is likely to be taken.
|
//! Condition is likely to be taken.
|
||||||
kInstOptionTaken = 0x04,
|
kInstOptionTaken = 0x00000004,
|
||||||
|
|
||||||
//! Condition is unlikely to be taken.
|
//! Condition is unlikely to be taken.
|
||||||
kInstOptionNotTaken = 0x08
|
kInstOptionNotTaken = 0x00000008
|
||||||
};
|
};
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
@@ -993,11 +993,11 @@ struct VarAttr {
|
|||||||
//! Get whether `flag` is on.
|
//! Get whether `flag` is on.
|
||||||
ASMJIT_INLINE bool hasFlag(uint32_t flag) { return (_flags & flag) != 0; }
|
ASMJIT_INLINE bool hasFlag(uint32_t flag) { return (_flags & flag) != 0; }
|
||||||
//! Add `flags`.
|
//! Add `flags`.
|
||||||
ASMJIT_INLINE void addFlags(uint32_t flags) { _flags |= flags; }
|
ASMJIT_INLINE void orFlags(uint32_t flags) { _flags |= flags; }
|
||||||
//! Mask `flags`.
|
//! Mask `flags`.
|
||||||
ASMJIT_INLINE void andFlags(uint32_t flags) { _flags &= flags; }
|
ASMJIT_INLINE void andFlags(uint32_t flags) { _flags &= flags; }
|
||||||
//! Clear `flags`.
|
//! Clear `flags`.
|
||||||
ASMJIT_INLINE void delFlags(uint32_t flags) { _flags &= ~flags; }
|
ASMJIT_INLINE void andNotFlags(uint32_t flags) { _flags &= ~flags; }
|
||||||
|
|
||||||
//! Get how many times the variable is used by the instruction/node.
|
//! Get how many times the variable is used by the instruction/node.
|
||||||
ASMJIT_INLINE uint32_t getVarCount() const { return _varCount; }
|
ASMJIT_INLINE uint32_t getVarCount() const { return _varCount; }
|
||||||
@@ -1642,7 +1642,7 @@ struct Node {
|
|||||||
// [Accessors - Type and Flags]
|
// [Accessors - Type and Flags]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Get type of node, see `kNodeType`.
|
//! Get node type, see `kNodeType`.
|
||||||
ASMJIT_INLINE uint32_t getType() const {
|
ASMJIT_INLINE uint32_t getType() const {
|
||||||
return _type;
|
return _type;
|
||||||
}
|
}
|
||||||
@@ -1652,24 +1652,29 @@ struct Node {
|
|||||||
return _flags;
|
return _flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Set node flags to `flags`.
|
|
||||||
ASMJIT_INLINE void setFlags(uint32_t flags) {
|
|
||||||
_flags = static_cast<uint16_t>(flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
//! Get whether the instruction has flag `flag`.
|
//! Get whether the instruction has flag `flag`.
|
||||||
ASMJIT_INLINE bool hasFlag(uint32_t flag) const {
|
ASMJIT_INLINE bool hasFlag(uint32_t flag) const {
|
||||||
return (static_cast<uint32_t>(_flags) & flag) != 0;
|
return (static_cast<uint32_t>(_flags) & flag) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//! Set node flags to `flags`.
|
||||||
|
ASMJIT_INLINE void setFlags(uint32_t flags) {
|
||||||
|
_flags = static_cast<uint16_t>(flags);
|
||||||
|
}
|
||||||
|
|
||||||
//! Add instruction `flags`.
|
//! Add instruction `flags`.
|
||||||
ASMJIT_INLINE void addFlags(uint32_t flags) {
|
ASMJIT_INLINE void orFlags(uint32_t flags) {
|
||||||
_flags |= static_cast<uint16_t>(flags);
|
_flags |= static_cast<uint16_t>(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//! And instruction `flags`.
|
||||||
|
ASMJIT_INLINE void andFlags(uint32_t flags) {
|
||||||
|
_flags &= static_cast<uint16_t>(flags);
|
||||||
|
}
|
||||||
|
|
||||||
//! Clear instruction `flags`.
|
//! Clear instruction `flags`.
|
||||||
ASMJIT_INLINE void delFlags(uint32_t flags) {
|
ASMJIT_INLINE void andNotFlags(uint32_t flags) {
|
||||||
_flags &= static_cast<uint16_t>(~flags);
|
_flags &= ~static_cast<uint16_t>(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Get whether the node has beed fetched.
|
//! Get whether the node has beed fetched.
|
||||||
@@ -1695,18 +1700,18 @@ struct Node {
|
|||||||
return hasFlag(kNodeFlagIsInformative);
|
return hasFlag(kNodeFlagIsInformative);
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Whether the instruction is an unconditional jump.
|
//! Whether the node is `InstNode` and unconditional jump.
|
||||||
ASMJIT_INLINE bool isJmp() const { return hasFlag(kNodeFlagIsJmp); }
|
ASMJIT_INLINE bool isJmp() const { return hasFlag(kNodeFlagIsJmp); }
|
||||||
//! Whether the instruction is a conditional jump.
|
//! Whether the node is `InstNode` and conditional jump.
|
||||||
ASMJIT_INLINE bool isJcc() const { return hasFlag(kNodeFlagIsJcc); }
|
ASMJIT_INLINE bool isJcc() const { return hasFlag(kNodeFlagIsJcc); }
|
||||||
//! Whether the instruction is an unconditional or conditional jump.
|
//! Whether the node is `InstNode` and conditional/unconditional jump.
|
||||||
ASMJIT_INLINE bool isJmpOrJcc() const { return hasFlag(kNodeFlagIsJmp | kNodeFlagIsJcc); }
|
ASMJIT_INLINE bool isJmpOrJcc() const { return hasFlag(kNodeFlagIsJmp | kNodeFlagIsJcc); }
|
||||||
//! Whether the instruction is a return.
|
//! Whether the node is `InstNode` and return.
|
||||||
ASMJIT_INLINE bool isRet() const { return hasFlag(kNodeFlagIsRet); }
|
ASMJIT_INLINE bool isRet() const { return hasFlag(kNodeFlagIsRet); }
|
||||||
|
|
||||||
//! Get whether the instruction is special.
|
//! Get whether the node is `InstNode` and the instruction is special.
|
||||||
ASMJIT_INLINE bool isSpecial() const { return hasFlag(kNodeFlagIsSpecial); }
|
ASMJIT_INLINE bool isSpecial() const { return hasFlag(kNodeFlagIsSpecial); }
|
||||||
//! Get whether the instruction accesses FPU.
|
//! Get whether the node is `InstNode` and the instruction uses x87-FPU.
|
||||||
ASMJIT_INLINE bool isFp() const { return hasFlag(kNodeFlagIsFp); }
|
ASMJIT_INLINE bool isFp() const { return hasFlag(kNodeFlagIsFp); }
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -1897,7 +1902,9 @@ struct EmbedNode : public Node {
|
|||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Create a new `EmbedNode` instance.
|
//! Create a new `EmbedNode` instance.
|
||||||
ASMJIT_INLINE EmbedNode(Compiler* compiler, void* data, uint32_t size) : Node(compiler, kNodeTypeEmbed) {
|
ASMJIT_INLINE EmbedNode(Compiler* compiler, void* data, uint32_t size) :
|
||||||
|
Node(compiler, kNodeTypeEmbed) {
|
||||||
|
|
||||||
_size = size;
|
_size = size;
|
||||||
if (size <= kInlineBufferSize) {
|
if (size <= kInlineBufferSize) {
|
||||||
if (data != NULL)
|
if (data != NULL)
|
||||||
@@ -1953,7 +1960,7 @@ struct CommentNode : public Node {
|
|||||||
|
|
||||||
//! Create a new `CommentNode` instance.
|
//! Create a new `CommentNode` instance.
|
||||||
ASMJIT_INLINE CommentNode(Compiler* compiler, const char* comment) : Node(compiler, kNodeTypeComment) {
|
ASMJIT_INLINE CommentNode(Compiler* compiler, const char* comment) : Node(compiler, kNodeTypeComment) {
|
||||||
addFlags(kNodeFlagIsInformative);
|
orFlags(kNodeFlagIsInformative);
|
||||||
_comment = comment;
|
_comment = comment;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1974,8 +1981,10 @@ struct HintNode : public Node {
|
|||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Create a new `HintNode` instance.
|
//! Create a new `HintNode` instance.
|
||||||
ASMJIT_INLINE HintNode(Compiler* compiler, VarData* vd, uint32_t hint, uint32_t value) : Node(compiler, kNodeTypeHint) {
|
ASMJIT_INLINE HintNode(Compiler* compiler, VarData* vd, uint32_t hint, uint32_t value) :
|
||||||
addFlags(kNodeFlagIsInformative);
|
Node(compiler, kNodeTypeHint) {
|
||||||
|
|
||||||
|
orFlags(kNodeFlagIsInformative);
|
||||||
_vd = vd;
|
_vd = vd;
|
||||||
_hint = hint;
|
_hint = hint;
|
||||||
_value = value;
|
_value = value;
|
||||||
@@ -2101,9 +2110,12 @@ struct InstNode : public Node {
|
|||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Create a new `InstNode` instance.
|
//! Create a new `InstNode` instance.
|
||||||
ASMJIT_INLINE InstNode(Compiler* compiler, uint32_t code, uint32_t options, Operand* opList, uint32_t opCount) : Node(compiler, kNodeTypeInst) {
|
ASMJIT_INLINE InstNode(Compiler* compiler, uint32_t instId, uint32_t instOptions, Operand* opList, uint32_t opCount) :
|
||||||
_code = static_cast<uint16_t>(code);
|
Node(compiler, kNodeTypeInst) {
|
||||||
_options = static_cast<uint8_t>(options);
|
|
||||||
|
_instId = static_cast<uint16_t>(instId);
|
||||||
|
_reserved = 0;
|
||||||
|
_instOptions = instOptions;
|
||||||
|
|
||||||
_opCount = static_cast<uint8_t>(opCount);
|
_opCount = static_cast<uint8_t>(opCount);
|
||||||
_opList = opList;
|
_opList = opList;
|
||||||
@@ -2118,18 +2130,17 @@ struct InstNode : public Node {
|
|||||||
// [Accessors]
|
// [Accessors]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Get instruction code, see `kX86InstId`.
|
//! Get instruction ID, see `kX86InstId`.
|
||||||
ASMJIT_INLINE uint32_t getCode() const {
|
ASMJIT_INLINE uint32_t getInstId() const {
|
||||||
return _code;
|
return _instId;
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Set instruction code to `code`.
|
//! Set instruction ID to `instId`.
|
||||||
//!
|
//!
|
||||||
//! Please do not modify instruction code if you are not know what you are
|
//! Please do not modify instruction code if you don't know what are you
|
||||||
//! doing. Incorrect instruction code or operands can raise assertion() at
|
//! doing. Incorrect instruction code or operands can cause assertion failure.
|
||||||
//! runtime.
|
ASMJIT_INLINE void setInstId(uint32_t instId) {
|
||||||
ASMJIT_INLINE void setCode(uint32_t code) {
|
_instId = static_cast<uint16_t>(instId);
|
||||||
_code = static_cast<uint16_t>(code);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Whether the instruction is an unconditional jump or whether the
|
//! Whether the instruction is an unconditional jump or whether the
|
||||||
@@ -2140,23 +2151,23 @@ struct InstNode : public Node {
|
|||||||
|
|
||||||
//! Get emit options.
|
//! Get emit options.
|
||||||
ASMJIT_INLINE uint32_t getOptions() const {
|
ASMJIT_INLINE uint32_t getOptions() const {
|
||||||
return _options;
|
return _instOptions;
|
||||||
}
|
}
|
||||||
//! Set emit options.
|
//! Set emit options.
|
||||||
ASMJIT_INLINE void setOptions(uint32_t options) {
|
ASMJIT_INLINE void setOptions(uint32_t options) {
|
||||||
_options = static_cast<uint8_t>(options);
|
_instOptions = options;
|
||||||
}
|
}
|
||||||
//! Add emit options.
|
//! Add emit options.
|
||||||
ASMJIT_INLINE void addOptions(uint32_t options) {
|
ASMJIT_INLINE void addOptions(uint32_t options) {
|
||||||
_options |= static_cast<uint8_t>(options);
|
_instOptions |= options;
|
||||||
}
|
}
|
||||||
//! Mask emit options.
|
//! Mask emit options.
|
||||||
ASMJIT_INLINE void andOptions(uint32_t options) {
|
ASMJIT_INLINE void andOptions(uint32_t options) {
|
||||||
_options &= static_cast<uint8_t>(options);
|
_instOptions &= options;
|
||||||
}
|
}
|
||||||
//! Clear emit options.
|
//! Clear emit options.
|
||||||
ASMJIT_INLINE void delOptions(uint32_t options) {
|
ASMJIT_INLINE void delOptions(uint32_t options) {
|
||||||
_options &= static_cast<uint8_t>(~options);
|
_instOptions &= ~options;
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Get operands list.
|
//! Get operands list.
|
||||||
@@ -2225,12 +2236,14 @@ _Update:
|
|||||||
// [Members]
|
// [Members]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Instruction code, see `kInstId`.
|
//! Instruction ID, see `kInstId`.
|
||||||
uint16_t _code;
|
uint16_t _instId;
|
||||||
//! Instruction options, see `kInstOptions`.
|
|
||||||
uint8_t _options;
|
|
||||||
//! \internal
|
//! \internal
|
||||||
uint8_t _memOpIndex;
|
uint8_t _memOpIndex;
|
||||||
|
//! \internal
|
||||||
|
uint8_t _reserved;
|
||||||
|
//! Instruction options, see `kInstOptions`.
|
||||||
|
uint32_t _instOptions;
|
||||||
|
|
||||||
//! Operands list.
|
//! Operands list.
|
||||||
Operand* _opList;
|
Operand* _opList;
|
||||||
@@ -2474,7 +2487,7 @@ struct FuncNode : public Node {
|
|||||||
//! The "Red Zone" size - count of bytes which might be accessed without
|
//! The "Red Zone" size - count of bytes which might be accessed without
|
||||||
//! adjusting the stack pointer.
|
//! adjusting the stack pointer.
|
||||||
uint16_t _redZoneSize;
|
uint16_t _redZoneSize;
|
||||||
//! Spill zone size (zone used by WIN64ABI).
|
//! Spill zone size (used by WIN64 ABI).
|
||||||
uint16_t _spillZoneSize;
|
uint16_t _spillZoneSize;
|
||||||
|
|
||||||
//! Stack size needed for function arguments.
|
//! Stack size needed for function arguments.
|
||||||
|
|||||||
@@ -396,7 +396,7 @@ UNIT(base_constpool) {
|
|||||||
EXPECT(prevOffset + 8 == curOffset,
|
EXPECT(prevOffset + 8 == curOffset,
|
||||||
"pool.add() - Returned incorrect curOffset.");
|
"pool.add() - Returned incorrect curOffset.");
|
||||||
EXPECT(pool.getSize() == (i + 1) * 8,
|
EXPECT(pool.getSize() == (i + 1) * 8,
|
||||||
"pool.getSize() - Reports incorrect size.");
|
"pool.getSize() - Reported incorrect size.");
|
||||||
prevOffset = curOffset;
|
prevOffset = curOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -566,6 +566,26 @@ union UInt64 {
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [AndNot]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
ASMJIT_INLINE UInt64& andNot(uint64_t val) {
|
||||||
|
u64 &= ~val;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASMJIT_INLINE UInt64& andNot(const UInt64& val) {
|
||||||
|
if (kArchHost64Bit) {
|
||||||
|
u64 &= ~val.u64;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
u32[0] &= ~val.u32[0];
|
||||||
|
u32[1] &= ~val.u32[1];
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [Or]
|
// [Or]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -606,26 +626,6 @@ union UInt64 {
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
|
||||||
// [Del]
|
|
||||||
// --------------------------------------------------------------------------
|
|
||||||
|
|
||||||
ASMJIT_INLINE UInt64& del(uint64_t val) {
|
|
||||||
u64 &= ~val;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
ASMJIT_INLINE UInt64& del(const UInt64& val) {
|
|
||||||
if (kArchHost64Bit) {
|
|
||||||
u64 &= ~val.u64;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
u32[0] &= ~val.u32[0];
|
|
||||||
u32[1] &= ~val.u32[1];
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [Eq]
|
// [Eq]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -44,49 +44,64 @@ static void dumpCpu(void) {
|
|||||||
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
|
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
|
||||||
|
|
||||||
static const DumpCpuFeature x86FeaturesList[] = {
|
static const DumpCpuFeature x86FeaturesList[] = {
|
||||||
{ kX86CpuFeatureMultithreading , "Multithreading" },
|
{ kX86CpuFeatureNX , "NX (Non-Execute Bit)" },
|
||||||
{ kX86CpuFeatureExecuteDisableBit , "Execute-Disable Bit" },
|
{ kX86CpuFeatureMT , "MT (Multi-Threading)" },
|
||||||
{ kX86CpuFeatureRdtsc , "Rdtsc" },
|
{ kX86CpuFeatureRDTSC , "RDTSC" },
|
||||||
{ kX86CpuFeatureRdtscp , "Rdtscp" },
|
{ kX86CpuFeatureRDTSCP , "RDTSCP" },
|
||||||
{ kX86CpuFeatureCmov , "Cmov" },
|
{ kX86CpuFeatureCMOV , "CMOV" },
|
||||||
{ kX86CpuFeatureCmpXchg8B , "Cmpxchg8b" },
|
{ kX86CpuFeatureCMPXCHG8B , "CMPXCHG8B" },
|
||||||
{ kX86CpuFeatureCmpXchg16B , "Cmpxchg16b" },
|
{ kX86CpuFeatureCMPXCHG16B , "CMPXCHG16B" },
|
||||||
{ kX86CpuFeatureClflush , "Clflush" },
|
{ kX86CpuFeatureCLFLUSH , "CLFLUSH" },
|
||||||
{ kX86CpuFeaturePrefetch , "Prefetch" },
|
{ kX86CpuFeatureCLFLUSHOpt , "CLFLUSH (Opt)" },
|
||||||
{ kX86CpuFeatureLahfSahf , "Lahf/Sahf" },
|
{ kX86CpuFeaturePREFETCH , "PREFETCH" },
|
||||||
{ kX86CpuFeatureFxsr , "Fxsave/Fxrstor" },
|
{ kX86CpuFeaturePREFETCHWT1 , "PREFETCHWT1" },
|
||||||
{ kX86CpuFeatureFfxsr , "Fxsave/Fxrstor Opt." },
|
{ kX86CpuFeatureLahfSahf , "LAHF/SAHF" },
|
||||||
{ kX86CpuFeatureMmx , "Mmx" },
|
{ kX86CpuFeatureFXSR , "FXSR" },
|
||||||
{ kX86CpuFeatureMmxExt , "MmxExt" },
|
{ kX86CpuFeatureFXSROpt , "FXSR (Opt)" },
|
||||||
{ kX86CpuFeature3dNow , "3dnow" },
|
{ kX86CpuFeatureMMX , "MMX" },
|
||||||
{ kX86CpuFeature3dNowExt , "3dnowExt" },
|
{ kX86CpuFeatureMMX2 , "MMX2" },
|
||||||
{ kX86CpuFeatureSse , "Sse" },
|
{ kX86CpuFeature3DNOW , "3DNOW" },
|
||||||
{ kX86CpuFeatureSse2 , "Sse2" },
|
{ kX86CpuFeature3DNOW2 , "3DNOW2" },
|
||||||
{ kX86CpuFeatureSse3 , "Sse3" },
|
{ kX86CpuFeatureSSE , "SSE" },
|
||||||
{ kX86CpuFeatureSsse3 , "Ssse3" },
|
{ kX86CpuFeatureSSE2 , "SSE2" },
|
||||||
{ kX86CpuFeatureSse4A , "Sse4a" },
|
{ kX86CpuFeatureSSE3 , "SSE3" },
|
||||||
{ kX86CpuFeatureSse41 , "Sse4.1" },
|
{ kX86CpuFeatureSSSE3 , "SSSE3" },
|
||||||
{ kX86CpuFeatureSse42 , "Sse4.2" },
|
{ kX86CpuFeatureSSE4A , "SSE4A" },
|
||||||
{ kX86CpuFeatureMsse , "Misaligned SSE" },
|
{ kX86CpuFeatureSSE4_1 , "SSE4.1" },
|
||||||
{ kX86CpuFeatureMonitorMWait , "Monitor/MWait" },
|
{ kX86CpuFeatureSSE4_2 , "SSE4.2" },
|
||||||
{ kX86CpuFeatureMovbe , "Movbe" },
|
{ kX86CpuFeatureMSSE , "Misaligned SSE" },
|
||||||
{ kX86CpuFeaturePopcnt , "Popcnt" },
|
{ kX86CpuFeatureMONITOR , "MONITOR/MWAIT" },
|
||||||
{ kX86CpuFeatureLzcnt , "Lzcnt" },
|
{ kX86CpuFeatureMOVBE , "MOVBE" },
|
||||||
{ kX86CpuFeatureAesni , "AesNI" },
|
{ kX86CpuFeaturePOPCNT , "POPCNT" },
|
||||||
{ kX86CpuFeaturePclmulqdq , "Pclmulqdq" },
|
{ kX86CpuFeatureLZCNT , "LZCNT" },
|
||||||
{ kX86CpuFeatureRdrand , "Rdrand" },
|
{ kX86CpuFeatureAESNI , "AESNI" },
|
||||||
{ kX86CpuFeatureAvx , "Avx" },
|
{ kX86CpuFeaturePCLMULQDQ , "PCLMULQDQ" },
|
||||||
{ kX86CpuFeatureAvx2 , "Avx2" },
|
{ kX86CpuFeatureRDRAND , "RDRAND" },
|
||||||
|
{ kX86CpuFeatureRDSEED , "RDSEED" },
|
||||||
|
{ kX86CpuFeatureSHA , "SHA" },
|
||||||
|
{ kX86CpuFeatureXSave , "XSAVE" },
|
||||||
|
{ kX86CpuFeatureXSaveOS , "XSAVE (OS)" },
|
||||||
|
{ kX86CpuFeatureAVX , "AVX" },
|
||||||
|
{ kX86CpuFeatureAVX2 , "AVX2" },
|
||||||
{ kX86CpuFeatureF16C , "F16C" },
|
{ kX86CpuFeatureF16C , "F16C" },
|
||||||
{ kX86CpuFeatureFma3 , "Fma3" },
|
{ kX86CpuFeatureFMA3 , "FMA3" },
|
||||||
{ kX86CpuFeatureFma4 , "Fma4" },
|
{ kX86CpuFeatureFMA4 , "FMA4" },
|
||||||
{ kX86CpuFeatureXop , "Xop" },
|
{ kX86CpuFeatureXOP , "XOP" },
|
||||||
{ kX86CpuFeatureBmi , "Bmi" },
|
{ kX86CpuFeatureBMI , "BMI" },
|
||||||
{ kX86CpuFeatureBmi2 , "Bmi2" },
|
{ kX86CpuFeatureBMI2 , "BMI2" },
|
||||||
{ kX86CpuFeatureHle , "Hle" },
|
{ kX86CpuFeatureHLE , "HLE" },
|
||||||
{ kX86CpuFeatureRtm , "Rtm" },
|
{ kX86CpuFeatureRTM , "RTM" },
|
||||||
{ kX86CpuFeatureFsGsBase , "FsGsBase" },
|
{ kX86CpuFeatureADX , "ADX" },
|
||||||
{ kX86CpuFeatureRepMovsbStosbExt , "RepMovsbStosbExt" }
|
{ kX86CpuFeatureMPX , "MPX" },
|
||||||
|
{ kX86CpuFeatureFSGSBase , "FS/GS Base" },
|
||||||
|
{ kX86CpuFeatureMOVSBSTOSBOpt , "REP MOVSB/STOSB (Opt)" },
|
||||||
|
{ kX86CpuFeatureAVX512F , "AVX512F" },
|
||||||
|
{ kX86CpuFeatureAVX512CD , "AVX512CD" },
|
||||||
|
{ kX86CpuFeatureAVX512PF , "AVX512PF" },
|
||||||
|
{ kX86CpuFeatureAVX512ER , "AVX512ER" },
|
||||||
|
{ kX86CpuFeatureAVX512DQ , "AVX512DQ" },
|
||||||
|
{ kX86CpuFeatureAVX512BW , "AVX512BW" },
|
||||||
|
{ kX86CpuFeatureAVX512VL , "AVX512VL" }
|
||||||
};
|
};
|
||||||
|
|
||||||
INFO("Host CPU Info (X86/X64):");
|
INFO("Host CPU Info (X86/X64):");
|
||||||
@@ -178,16 +193,19 @@ static void dumpSizeOf(void) {
|
|||||||
#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64)
|
#if defined(ASMJIT_BUILD_X86) || defined(ASMJIT_BUILD_X64)
|
||||||
INFO("SizeOf X86/X64:");
|
INFO("SizeOf X86/X64:");
|
||||||
DUMP_TYPE(asmjit::X86Assembler);
|
DUMP_TYPE(asmjit::X86Assembler);
|
||||||
|
DUMP_TYPE(asmjit::X86InstInfo);
|
||||||
|
DUMP_TYPE(asmjit::X86InstExtendedInfo);
|
||||||
|
|
||||||
#if !defined(ASMJIT_DISABLE_COMPILER)
|
#if !defined(ASMJIT_DISABLE_COMPILER)
|
||||||
DUMP_TYPE(asmjit::X86Compiler);
|
DUMP_TYPE(asmjit::X86Compiler);
|
||||||
DUMP_TYPE(asmjit::X86CallNode);
|
DUMP_TYPE(asmjit::X86CallNode);
|
||||||
DUMP_TYPE(asmjit::X86FuncNode);
|
DUMP_TYPE(asmjit::X86FuncNode);
|
||||||
DUMP_TYPE(asmjit::X86FuncDecl);
|
DUMP_TYPE(asmjit::X86FuncDecl);
|
||||||
DUMP_TYPE(asmjit::X86InstInfo);
|
|
||||||
DUMP_TYPE(asmjit::X86VarMap);
|
DUMP_TYPE(asmjit::X86VarMap);
|
||||||
DUMP_TYPE(asmjit::X86VarInfo);
|
DUMP_TYPE(asmjit::X86VarInfo);
|
||||||
DUMP_TYPE(asmjit::X86VarState);
|
DUMP_TYPE(asmjit::X86VarState);
|
||||||
#endif // !ASMJIT_DISABLE_COMPILER
|
#endif // !ASMJIT_DISABLE_COMPILER
|
||||||
|
|
||||||
INFO("");
|
INFO("");
|
||||||
#endif // ASMJIT_BUILD_X86
|
#endif // ASMJIT_BUILD_X86
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -57,16 +57,64 @@ namespace asmjit {
|
|||||||
return *this; \
|
return *this; \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
/*! Force REX prefix. */ \
|
/*! Force REX prefix (X64). */ \
|
||||||
ASMJIT_INLINE _Class_& rex() { \
|
ASMJIT_INLINE _Class_& rex() { \
|
||||||
_instOptions |= kX86InstOptionRex; \
|
_instOptions |= kX86InstOptionRex; \
|
||||||
return *this; \
|
return *this; \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
/*! Force 3-byte VEX prefix. */ \
|
/*! Force 3-byte VEX prefix (AVX+). */ \
|
||||||
ASMJIT_INLINE _Class_& vex3() { \
|
ASMJIT_INLINE _Class_& vex3() { \
|
||||||
_instOptions |= kX86InstOptionVex3; \
|
_instOptions |= kX86InstOptionVex3; \
|
||||||
return *this; \
|
return *this; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/*! Force 4-byte EVEX prefix (AVX512+). */ \
|
||||||
|
ASMJIT_INLINE _Class_& evex() { \
|
||||||
|
_instOptions |= kX86InstOptionEvex; \
|
||||||
|
return *this; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/*! Use zeroing instead of merging (AVX512+). */ \
|
||||||
|
ASMJIT_INLINE _Class_& z() { \
|
||||||
|
_instOptions |= kX86InstOptionEvexZero; \
|
||||||
|
return *this; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/*! Broadcast one element to all other elements (AVX512+). */ \
|
||||||
|
ASMJIT_INLINE _Class_& _1ToN() { \
|
||||||
|
_instOptions |= kX86InstOptionEvexOneN; \
|
||||||
|
return *this; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/*! Suppress all exceptions (AVX512+). */ \
|
||||||
|
ASMJIT_INLINE _Class_& sae() { \
|
||||||
|
_instOptions |= kX86InstOptionEvexSae; \
|
||||||
|
return *this; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/*! Static rounding mode `round-to-nearest` (even) and `SAE` (AVX512+). */ \
|
||||||
|
ASMJIT_INLINE _Class_& rn_sae() { \
|
||||||
|
_instOptions |= kX86InstOptionEvexRnSae; \
|
||||||
|
return *this; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/*! Static rounding mode `round-down` (toward -inf) and `SAE` (AVX512+). */ \
|
||||||
|
ASMJIT_INLINE _Class_& rd_sae() { \
|
||||||
|
_instOptions |= kX86InstOptionEvexRdSae; \
|
||||||
|
return *this; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/*! Static rounding mode `round-up` (toward +inf) and `SAE` (AVX512+). */ \
|
||||||
|
ASMJIT_INLINE _Class_& ru_sae() { \
|
||||||
|
_instOptions |= kX86InstOptionEvexRuSae; \
|
||||||
|
return *this; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/*! Static rounding mode `round-toward-zero` (truncate) and `SAE` (AVX512+). */ \
|
||||||
|
ASMJIT_INLINE _Class_& rz_sae() { \
|
||||||
|
_instOptions |= kX86InstOptionEvexRzSae; \
|
||||||
|
return *this; \
|
||||||
}
|
}
|
||||||
|
|
||||||
//! X86/X64 assembler.
|
//! X86/X64 assembler.
|
||||||
@@ -94,10 +142,10 @@ namespace asmjit {
|
|||||||
//! ~~~
|
//! ~~~
|
||||||
//! // Use asmjit namespace.
|
//! // Use asmjit namespace.
|
||||||
//! using namespace asmjit;
|
//! using namespace asmjit;
|
||||||
//! using namespace asmjit::host;
|
//! using namespace asmjit::x86;
|
||||||
//!
|
//!
|
||||||
//! // Create Assembler instance.
|
//! // Create X86Assembler instance.
|
||||||
//! Assembler a;
|
//! X86Assembler a;
|
||||||
//!
|
//!
|
||||||
//! // Prolog.
|
//! // Prolog.
|
||||||
//! a.push(ebp);
|
//! a.push(ebp);
|
||||||
@@ -115,10 +163,10 @@ namespace asmjit {
|
|||||||
//! ~~~
|
//! ~~~
|
||||||
//!
|
//!
|
||||||
//! You can see that syntax is very close to Intel one. Only difference is that
|
//! You can see that syntax is very close to Intel one. Only difference is that
|
||||||
//! you are calling functions that emits the binary code for you. All registers
|
//! you are calling functions that emit binary code for you. All registers are
|
||||||
//! are in `asmjit` namespace, so it's very comfortable to use it (look at
|
//! in `asmjit::x86` namespace, so it's very comfortable to use it (look at the
|
||||||
//! first line). There is also used method `imm()` to create an immediate value.
|
//! `use namespace` section). Without importing `asmjit::x86` registers would
|
||||||
//! Use `imm_u()` to create unsigned immediate value.
|
//! have to be written as `x86::eax`, `x86::esp`, and so on.
|
||||||
//!
|
//!
|
||||||
//! There is also possibility to use memory addresses and immediates. Use
|
//! There is also possibility to use memory addresses and immediates. Use
|
||||||
//! `ptr()`, `byte_ptr()`, `word_ptr()`, `dword_ptr()` and similar functions to
|
//! `ptr()`, `byte_ptr()`, `word_ptr()`, `dword_ptr()` and similar functions to
|
||||||
@@ -126,14 +174,14 @@ namespace asmjit {
|
|||||||
//! information related to the operand size is needed only in rare cases, that
|
//! information related to the operand size is needed only in rare cases, that
|
||||||
//! is an instruction without having any register operands, such as `inc [mem]`.
|
//! is an instruction without having any register operands, such as `inc [mem]`.
|
||||||
//!
|
//!
|
||||||
//! for example, `a` is `x86::Assembler` instance:
|
//! for example, `a` is an `X86Assembler` instance:
|
||||||
//!
|
//!
|
||||||
//! ~~~
|
//! ~~~
|
||||||
//! a.mov(ptr(eax), 0); // mov ptr [eax], 0
|
//! a.mov(ptr(eax), 0); // mov ptr [eax], 0
|
||||||
//! a.mov(ptr(eax), edx); // mov ptr [eax], edx
|
//! a.mov(ptr(eax), edx); // mov ptr [eax], edx
|
||||||
//! ~~~
|
//! ~~~
|
||||||
//!
|
//!
|
||||||
//! But it's also possible to create complex addresses:
|
//! But it's also possible to create complex addresses offered by x86 architecture:
|
||||||
//!
|
//!
|
||||||
//! ~~~
|
//! ~~~
|
||||||
//! // eax + ecx*x addresses
|
//! // eax + ecx*x addresses
|
||||||
@@ -148,10 +196,12 @@ namespace asmjit {
|
|||||||
//! a.mov(ptr(eax, ecx, 3, 16), 0); // mov ptr [eax + ecx * 8 + 16], 0
|
//! a.mov(ptr(eax, ecx, 3, 16), 0); // mov ptr [eax + ecx * 8 + 16], 0
|
||||||
//! ~~~
|
//! ~~~
|
||||||
//!
|
//!
|
||||||
//! All addresses shown are using `ptr()` to make memory operand. Some assembler
|
//! All addresses shown are using `x86::ptr()` to make memory operand. Some
|
||||||
//! instructions (single operand ones) needs to have specified memory operand
|
//! assembler instructions using a single operand need to know the size of
|
||||||
//! size. For example `a.inc(ptr(eax))` can't be called, because the meaning is
|
//! the operand to avoid ambiguity. For example `a.inc(ptr(eax))` is ambiguous
|
||||||
//! ambiguous, see the code below.
|
//! and would cause a runtime error. This problem can be fixed by using memory
|
||||||
|
//! operand with size specified - `byte_ptr`, `word_ptr`, `dword_ptr`, see the
|
||||||
|
//! code below:
|
||||||
//!
|
//!
|
||||||
//! ~~~
|
//! ~~~
|
||||||
//! // [byte] address.
|
//! // [byte] address.
|
||||||
@@ -163,30 +213,34 @@ namespace asmjit {
|
|||||||
//! // [dword] address.
|
//! // [dword] address.
|
||||||
//! a.inc(dword_ptr(eax)); // Inc dword ptr [eax].
|
//! a.inc(dword_ptr(eax)); // Inc dword ptr [eax].
|
||||||
//! a.dec(dword_ptr(eax)); // Dec dword ptr [eax].
|
//! a.dec(dword_ptr(eax)); // Dec dword ptr [eax].
|
||||||
|
//! // [dword] address.
|
||||||
|
//! a.inc(dword_ptr(rax)); // Inc qword ptr [rax].
|
||||||
|
//! a.dec(dword_ptr(rax)); // Dec qword ptr [rax].
|
||||||
//! ~~~
|
//! ~~~
|
||||||
//!
|
//!
|
||||||
//! Calling JIT Code
|
//! Calling JIT Code
|
||||||
//! ----------------
|
//! ----------------
|
||||||
//!
|
//!
|
||||||
//! While you are over from emitting instructions, you can make your function
|
//! After you are finished with emitting instructions, you can make your function
|
||||||
//! by using `Assembler::make()` method. This method will use memory
|
//! callable by using `Assembler::make()` method. This method will use memory
|
||||||
//! manager to allocate virtual memory and relocates generated code to it. For
|
//! manager to allocate virtual memory and relocates generated code to it. The
|
||||||
//! memory allocation is used global memory manager by default and memory is
|
//! memory is allocated through `Runtime` instance provided to `X86Assembler`
|
||||||
//! freeable, but of course this default behavior can be overridden specifying
|
//! constructor.
|
||||||
//! your memory manager and allocation type. If you want to do with code
|
|
||||||
//! something else you can always override make() method and do what you want.
|
|
||||||
//!
|
//!
|
||||||
//! You can get size of generated code by `getCodeSize()` or `getOffset()`
|
//! The size of the code generated can be retrieved by `getCodeSize()` and
|
||||||
//! methods. These methods returns you code size or more precisely the current
|
//! `getOffset()` methods. The `getOffset()` method returns the current offset
|
||||||
//! code offset in bytes. The `takeCode()` function can be used to take the
|
//! (that is mostly equal to the final code size, if called after the code
|
||||||
//! internal buffer and reset the code generator, but the buffer taken has to
|
//! generation) and `getCodeSize()` returns the final code size with possible
|
||||||
//! be freed manually in such case.
|
//! trampolines. The `takeCode()` method can be used to take the internal buffer
|
||||||
|
//! and reset the code generator, but the buffer returned has to be freed manually
|
||||||
|
//! in such case.
|
||||||
//!
|
//!
|
||||||
//! Machine code can be executed only in memory that is marked executable. This
|
//! Machine code can be executed only in memory that is marked executable. This
|
||||||
//! mark is usually not set for memory returned by a C/C++ `malloc` function.
|
//! mark is usually not set for memory returned by a C/C++ `malloc()` function.
|
||||||
//! The `VMem::alloc()` function can be used allocate a memory where the code can
|
//! The `VMemUtil::alloc()` function can be used allocate a memory where the code
|
||||||
//! be executed or more preferably `VMemMgr` which has interface
|
//! can be executed. Please note that `VMemUtil` is a low-level class that works
|
||||||
//! similar to `malloc/free` and can allocate chunks of various sizes.
|
//! at memory page level. High level interface that is similar to malloc/free is
|
||||||
|
//! provided by `VMemMgr` class.
|
||||||
//!
|
//!
|
||||||
//! The next example shows how to allocate memory where the code can be executed:
|
//! The next example shows how to allocate memory where the code can be executed:
|
||||||
//!
|
//!
|
||||||
@@ -194,27 +248,28 @@ namespace asmjit {
|
|||||||
//! using namespace asmjit;
|
//! using namespace asmjit;
|
||||||
//!
|
//!
|
||||||
//! JitRuntime runtime;
|
//! JitRuntime runtime;
|
||||||
//! Assembler a(&runtime);
|
//! X86Assembler a(&runtime);
|
||||||
//!
|
//!
|
||||||
//! // ... Your code generation ...
|
//! ... Code generation ...
|
||||||
//!
|
//!
|
||||||
//! // The function prototype
|
//! // The function prototype.
|
||||||
//! typedef void (*MyFunc)();
|
//! typedef void (*MyFunc)();
|
||||||
//!
|
//!
|
||||||
//! // make your function
|
//! // Make the function.
|
||||||
//! MyFunc func = asmjit_cast<MyFunc>(a.make());
|
//! MyFunc func = asmjit_cast<MyFunc>(a.make());
|
||||||
//!
|
//!
|
||||||
//! // call your function
|
//! // Call the function.
|
||||||
//! func();
|
//! func();
|
||||||
//!
|
//!
|
||||||
//! // If you don't need your function again, free it.
|
//! // Release the function if not needed anymore.
|
||||||
//! runtime.release(func);
|
//! runtime.release(func);
|
||||||
//! ~~~
|
//! ~~~
|
||||||
//!
|
//!
|
||||||
//! This was a very primitive showing how the generated code can be executed.
|
//! This was a very primitive example showing how the generated code can be.
|
||||||
//! In production noone will probably generate a function that is only called
|
//! executed by using the foundation of classes AsmJit offers. In production
|
||||||
//! once and nobody will probably free the function right after it was executed.
|
//! nobody is likely to generate a function that is only called once and freed
|
||||||
//! The code just shows the proper way of code generation and cleanup.
|
//! immediately after it's been called, however, the concept of releasing code
|
||||||
|
//! that is not needed anymore should be clear.
|
||||||
//!
|
//!
|
||||||
//! Labels
|
//! Labels
|
||||||
//! ------
|
//! ------
|
||||||
@@ -278,7 +333,7 @@ namespace asmjit {
|
|||||||
//! a.mov(esp, ebp);
|
//! a.mov(esp, ebp);
|
||||||
//! a.pop(ebp);
|
//! a.pop(ebp);
|
||||||
//!
|
//!
|
||||||
//! // Return: STDCALL convention is to pop stack in called function.
|
//! // Return: Pop the stack by `arg_size` as defined by `STDCALL` convention.
|
||||||
//! a.ret(arg_size);
|
//! a.ret(arg_size);
|
||||||
//! ~~~
|
//! ~~~
|
||||||
//!
|
//!
|
||||||
@@ -315,13 +370,13 @@ namespace asmjit {
|
|||||||
//! Next, more advanced, but often needed technique is that you can build your
|
//! Next, more advanced, but often needed technique is that you can build your
|
||||||
//! own registers allocator. X86 architecture contains 8 general purpose
|
//! own registers allocator. X86 architecture contains 8 general purpose
|
||||||
//! registers, 8 Mm registers and 8 Xmm/Ymm/Zmm registers. X64 architecture
|
//! registers, 8 Mm registers and 8 Xmm/Ymm/Zmm registers. X64 architecture
|
||||||
//! extends the count of Gp registers and Xmm/Ymm/Zmm registers to 16 or 32
|
//! extends the count of Gp registers and Xmm/Ymm/Zmm registers to 16. AVX-512
|
||||||
//! when AVX512 is available.
|
//! architecture extends Xmm/Ymm/Zmm SIMD registers to 32.
|
||||||
//!
|
//!
|
||||||
//! To create a general purpose register operand from register index use
|
//! To create a general purpose register operand from register index use
|
||||||
//! `gpb_lo()`, `gpb_hi()`, `gpw()`, `gpd()`, `gpq()`. To create registers of
|
//! `gpb_lo()`, `gpb_hi()`, `gpw()`, `gpd()`, `gpq()`. To create registers of
|
||||||
//! other types there are functions `fp()`, `mm()`, `xmm()`, `ymm()` and `zmm()`
|
//! other types there `fp()`, `mm()`, `k()`, `xmm()`, `ymm()` and `zmm()`
|
||||||
//! available.
|
//! functions available that return a new register operand.
|
||||||
//!
|
//!
|
||||||
//! \sa X86Compiler.
|
//! \sa X86Compiler.
|
||||||
struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
||||||
@@ -340,7 +395,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
// [Arch]
|
// [Arch]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Get count of registers of the current architecture.
|
//! Get count of registers of the current architecture and mode.
|
||||||
ASMJIT_INLINE const X86RegCount& getRegCount() const {
|
ASMJIT_INLINE const X86RegCount& getRegCount() const {
|
||||||
return _regCount;
|
return _regCount;
|
||||||
}
|
}
|
||||||
@@ -478,7 +533,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
X86GpReg zdi;
|
X86GpReg zdi;
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [Base Instructions]
|
// [Emit]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
#define INST_0x(_Inst_, _Code_) \
|
#define INST_0x(_Inst_, _Code_) \
|
||||||
@@ -654,6 +709,30 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
return emit(_Code_, o0, o1, o2); \
|
return emit(_Code_, o0, o1, o2); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define INST_3ii(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \
|
||||||
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \
|
||||||
|
return emit(_Code_, o0, o1, o2); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, int o1, int o2) { \
|
||||||
|
Imm o1Imm(o1); \
|
||||||
|
return emit(_Code_, o0, o1Imm, o2); \
|
||||||
|
} \
|
||||||
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, unsigned int o1, unsigned int o2) { \
|
||||||
|
Imm o1Imm(o1); \
|
||||||
|
return emit(_Code_, o0, o1Imm, static_cast<uint64_t>(o2)); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, int64_t o1, int64_t o2) { \
|
||||||
|
Imm o1Imm(o1); \
|
||||||
|
return emit(_Code_, o0, o1Imm, static_cast<uint64_t>(o2)); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, uint64_t o1, uint64_t o2) { \
|
||||||
|
Imm o1Imm(o1); \
|
||||||
|
return emit(_Code_, o0, o1Imm, o2); \
|
||||||
|
}
|
||||||
|
|
||||||
#define INST_4x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \
|
#define INST_4x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \
|
||||||
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \
|
||||||
return emit(_Code_, o0, o1, o2, o3); \
|
return emit(_Code_, o0, o1, o2, o3); \
|
||||||
@@ -686,6 +765,35 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
return emit(_Code_, o0, o1, o2, o3); \
|
return emit(_Code_, o0, o1, o2, o3); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define INST_4ii(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \
|
||||||
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \
|
||||||
|
return emit(_Code_, o0, o1, o2, o3); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, int o2, int o3) { \
|
||||||
|
Imm o2Imm(o2); \
|
||||||
|
return emit(_Code_, o0, o1, o2Imm, o3); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, unsigned int o2, unsigned int o3) { \
|
||||||
|
Imm o2Imm(o2); \
|
||||||
|
return emit(_Code_, o0, o1, o2Imm, static_cast<uint64_t>(o3)); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, int64_t o2, int64_t o3) { \
|
||||||
|
Imm o2Imm(o2); \
|
||||||
|
return emit(_Code_, o0, o1, o2Imm, static_cast<uint64_t>(o3)); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, uint64_t o2, uint64_t o3) { \
|
||||||
|
Imm o2Imm(o2); \
|
||||||
|
return emit(_Code_, o0, o1, o2Imm, o3); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [X86/X64]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Add with Carry.
|
//! Add with Carry.
|
||||||
INST_2x(adc, kX86InstIdAdc, X86GpReg, X86GpReg)
|
INST_2x(adc, kX86InstIdAdc, X86GpReg, X86GpReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
@@ -837,11 +945,6 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! CPU identification (i486).
|
//! CPU identification (i486).
|
||||||
INST_0x(cpuid, kX86InstIdCpuid)
|
INST_0x(cpuid, kX86InstIdCpuid)
|
||||||
|
|
||||||
//! Accumulate crc32 value (polynomial 0x11EDC6F41) (SSE4.2).
|
|
||||||
INST_2x_(crc32, kX86InstIdCrc32, X86GpReg, X86GpReg, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
|
|
||||||
//! \overload
|
|
||||||
INST_2x_(crc32, kX86InstIdCrc32, X86GpReg, X86Mem, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
|
|
||||||
|
|
||||||
//! Decimal adjust AL after addition (X86 Only).
|
//! Decimal adjust AL after addition (X86 Only).
|
||||||
INST_0x(daa, kX86InstIdDaa)
|
INST_0x(daa, kX86InstIdDaa)
|
||||||
//! Decimal adjust AL after subtraction (X86 Only).
|
//! Decimal adjust AL after subtraction (X86 Only).
|
||||||
@@ -1037,11 +1140,6 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! Pop stack into EFLAGS register (32-bit or 64-bit).
|
//! Pop stack into EFLAGS register (32-bit or 64-bit).
|
||||||
INST_0x(popf, kX86InstIdPopf)
|
INST_0x(popf, kX86InstIdPopf)
|
||||||
|
|
||||||
//! Return the count of number of bits set to 1 (SSE4.2).
|
|
||||||
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpReg, X86GpReg, !o0.isGpb() && o0.getRegType() == o1.getRegType())
|
|
||||||
//! \overload
|
|
||||||
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpReg, X86Mem, !o0.isGpb())
|
|
||||||
|
|
||||||
//! Push WORD or DWORD/QWORD on the stack.
|
//! Push WORD or DWORD/QWORD on the stack.
|
||||||
INST_1x_(push, kX86InstIdPush, X86GpReg, o0.getSize() == 2 || o0.getSize() == _regSize)
|
INST_1x_(push, kX86InstIdPush, X86GpReg, o0.getSize() == 2 || o0.getSize() == _regSize)
|
||||||
//! Push WORD or DWORD/QWORD on the stack.
|
//! Push WORD or DWORD/QWORD on the stack.
|
||||||
@@ -1329,273 +1427,277 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
INST_2i(xor_, kX86InstIdXor, X86Mem, Imm)
|
INST_2i(xor_, kX86InstIdXor, X86Mem, Imm)
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [Fpu]
|
// [FPU]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Compute 2^x - 1 (FPU).
|
//! Compute `2^x - 1` - `fp0 = POW(2, fp0) - 1` (FPU).
|
||||||
INST_0x(f2xm1, kX86InstIdF2xm1)
|
INST_0x(f2xm1, kX86InstIdF2xm1)
|
||||||
//! Absolute value of fp0 (FPU).
|
//! Abs `fp0 = ABS(fp0)` (FPU).
|
||||||
INST_0x(fabs, kX86InstIdFabs)
|
INST_0x(fabs, kX86InstIdFabs)
|
||||||
|
|
||||||
//! Add `o1` to `o0` (one has to be `fp0`) and store result in `o0` (FPU).
|
//! Add `o0 = o0 + o1` (one operand has to be `fp0`) (FPU).
|
||||||
INST_2x_(fadd, kX86InstIdFadd, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fadd, kX86InstIdFadd, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Add 4-byte or 8-byte FP `o0` to fp0 and store result in fp0 (FPU).
|
//! Add `fp0 = fp0 + float_or_double[o0]` (FPU).
|
||||||
INST_1x(fadd, kX86InstIdFadd, X86Mem)
|
INST_1x(fadd, kX86InstIdFadd, X86Mem)
|
||||||
//! Add fp0 to `o0` and pop the FPU stack (FPU).
|
//! Add `o0 = o0 + fp0` and POP (FPU).
|
||||||
INST_1x(faddp, kX86InstIdFaddp, X86FpReg)
|
INST_1x(faddp, kX86InstIdFaddp, X86FpReg)
|
||||||
//! \overload
|
//! Add `fp1 = fp1 + fp0` and POP (FPU).
|
||||||
INST_0x(faddp, kX86InstIdFaddp)
|
INST_0x(faddp, kX86InstIdFaddp)
|
||||||
|
|
||||||
//! Load binary coded decimal (FPU).
|
//! Load BCD from `[o0]` and PUSH (FPU).
|
||||||
INST_1x(fbld, kX86InstIdFbld, X86Mem)
|
INST_1x(fbld, kX86InstIdFbld, X86Mem)
|
||||||
//! Store BCD integer and Pop (FPU).
|
//! Store BCD-Integer to `[o0]` and POP (FPU).
|
||||||
INST_1x(fbstp, kX86InstIdFbstp, X86Mem)
|
INST_1x(fbstp, kX86InstIdFbstp, X86Mem)
|
||||||
//! Change fp0 sign (FPU).
|
|
||||||
|
//! Complement Sign `fp0 = -fp0` (FPU).
|
||||||
INST_0x(fchs, kX86InstIdFchs)
|
INST_0x(fchs, kX86InstIdFchs)
|
||||||
|
|
||||||
//! Clear exceptions (FPU).
|
//! Clear exceptions (FPU).
|
||||||
INST_0x(fclex, kX86InstIdFclex)
|
INST_0x(fclex, kX86InstIdFclex)
|
||||||
|
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (CF=1) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovb, kX86InstIdFcmovb, X86FpReg)
|
INST_1x(fcmovb, kX86InstIdFcmovb, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (CF|ZF=1) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovbe, kX86InstIdFcmovbe, X86FpReg)
|
INST_1x(fcmovbe, kX86InstIdFcmovbe, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (ZF=1) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmove, kX86InstIdFcmove, X86FpReg)
|
INST_1x(fcmove, kX86InstIdFcmove, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (CF=0) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovnb, kX86InstIdFcmovnb, X86FpReg)
|
INST_1x(fcmovnb, kX86InstIdFcmovnb, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (CF|ZF=0) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovnbe, kX86InstIdFcmovnbe, X86FpReg)
|
INST_1x(fcmovnbe, kX86InstIdFcmovnbe, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (ZF=0) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovne, kX86InstIdFcmovne, X86FpReg)
|
INST_1x(fcmovne, kX86InstIdFcmovne, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (PF=0) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovnu, kX86InstIdFcmovnu, X86FpReg)
|
INST_1x(fcmovnu, kX86InstIdFcmovnu, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (PF=1) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovu, kX86InstIdFcmovu, X86FpReg)
|
INST_1x(fcmovu, kX86InstIdFcmovu, X86FpReg)
|
||||||
|
|
||||||
//! Compare fp0 with `o0` (FPU).
|
//! Compare `fp0` with `o0` (FPU).
|
||||||
INST_1x(fcom, kX86InstIdFcom, X86FpReg)
|
INST_1x(fcom, kX86InstIdFcom, X86FpReg)
|
||||||
//! Compare fp0 with fp1 (FPU).
|
//! Compare `fp0` with `fp1` (FPU).
|
||||||
INST_0x(fcom, kX86InstIdFcom)
|
INST_0x(fcom, kX86InstIdFcom)
|
||||||
//! Compare fp0 with 4-byte or 8-byte FP at `src` (FPU).
|
//! Compare `fp0` with `float_or_double[o0]` (FPU).
|
||||||
INST_1x(fcom, kX86InstIdFcom, X86Mem)
|
INST_1x(fcom, kX86InstIdFcom, X86Mem)
|
||||||
//! Compare fp0 with `o0` and pop the FPU stack (FPU).
|
//! Compare `fp0` with `o0` and POP (FPU).
|
||||||
INST_1x(fcomp, kX86InstIdFcomp, X86FpReg)
|
INST_1x(fcomp, kX86InstIdFcomp, X86FpReg)
|
||||||
//! Compare fp0 with fp1 and pop the FPU stack (FPU).
|
//! Compare `fp0` with `fp1` and POP (FPU).
|
||||||
INST_0x(fcomp, kX86InstIdFcomp)
|
INST_0x(fcomp, kX86InstIdFcomp)
|
||||||
//! Compare fp0 with 4-byte or 8-byte FP at `adr` and pop the FPU stack (FPU).
|
//! Compare `fp0` with `float_or_double[o0]` and POP (FPU).
|
||||||
INST_1x(fcomp, kX86InstIdFcomp, X86Mem)
|
INST_1x(fcomp, kX86InstIdFcomp, X86Mem)
|
||||||
//! Compare fp0 with fp1 and pop the FPU stack twice (FPU).
|
//! Compare `fp0` with `fp1` and POP twice (FPU).
|
||||||
INST_0x(fcompp, kX86InstIdFcompp)
|
INST_0x(fcompp, kX86InstIdFcompp)
|
||||||
//! Compare fp0 and `o0` and Set EFLAGS (FPU).
|
//! Compare `fp0` with `o0` and set EFLAGS (FPU).
|
||||||
INST_1x(fcomi, kX86InstIdFcomi, X86FpReg)
|
INST_1x(fcomi, kX86InstIdFcomi, X86FpReg)
|
||||||
//! Compare fp0 and `o0` and Set EFLAGS and pop the FPU stack (FPU).
|
//! Compare `fp0` with `o0` and set EFLAGS and POP (FPU).
|
||||||
INST_1x(fcomip, kX86InstIdFcomip, X86FpReg)
|
INST_1x(fcomip, kX86InstIdFcomip, X86FpReg)
|
||||||
|
|
||||||
//! Calculate cosine of fp0 and store result in fp0 (FPU).
|
//! Cos `fp0 = cos(fp0)` (FPU).
|
||||||
INST_0x(fcos, kX86InstIdFcos)
|
INST_0x(fcos, kX86InstIdFcos)
|
||||||
//! Decrement FPU stack-top pointer (FPU).
|
|
||||||
|
//! Decrement FPU stack pointer (FPU).
|
||||||
INST_0x(fdecstp, kX86InstIdFdecstp)
|
INST_0x(fdecstp, kX86InstIdFdecstp)
|
||||||
|
|
||||||
//! Divide `o0` by `o1` (one has to be `fp0`) (FPU).
|
//! Divide `o0 = o0 / o1` (one has to be `fp0`) (FPU).
|
||||||
INST_2x_(fdiv, kX86InstIdFdiv, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fdiv, kX86InstIdFdiv, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Divide fp0 by 32-bit or 64-bit FP value (FPU).
|
//! Divide `fp0 = fp0 / float_or_double[o0]` (FPU).
|
||||||
INST_1x(fdiv, kX86InstIdFdiv, X86Mem)
|
INST_1x(fdiv, kX86InstIdFdiv, X86Mem)
|
||||||
//! Divide `o0` by fp0 (FPU).
|
//! Divide `o0 = o0 / fp0` and POP (FPU).
|
||||||
INST_1x(fdivp, kX86InstIdFdivp, X86FpReg)
|
INST_1x(fdivp, kX86InstIdFdivp, X86FpReg)
|
||||||
//! \overload
|
//! Divide `fp1 = fp1 / fp0` and POP (FPU).
|
||||||
INST_0x(fdivp, kX86InstIdFdivp)
|
INST_0x(fdivp, kX86InstIdFdivp)
|
||||||
|
|
||||||
//! Reverse divide `o0` by `o1` (one has to be `fp0`) (FPU).
|
//! Reverse divide `o0 = o1 / o0` (one has to be `fp0`) (FPU).
|
||||||
INST_2x_(fdivr, kX86InstIdFdivr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fdivr, kX86InstIdFdivr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Reverse divide fp0 by 32-bit or 64-bit FP value (FPU).
|
//! Reverse divide `fp0 = float_or_double[o0] / fp0` (FPU).
|
||||||
INST_1x(fdivr, kX86InstIdFdivr, X86Mem)
|
INST_1x(fdivr, kX86InstIdFdivr, X86Mem)
|
||||||
//! Reverse divide `o0` by fp0 (FPU).
|
//! Reverse divide `o0 = fp0 / o0` and POP (FPU).
|
||||||
INST_1x(fdivrp, kX86InstIdFdivrp, X86FpReg)
|
INST_1x(fdivrp, kX86InstIdFdivrp, X86FpReg)
|
||||||
//! \overload
|
//! Reverse divide `fp1 = fp0 / fp1` and POP (FPU).
|
||||||
INST_0x(fdivrp, kX86InstIdFdivrp)
|
INST_0x(fdivrp, kX86InstIdFdivrp)
|
||||||
|
|
||||||
//! Free FP register (FPU).
|
//! Free FP register (FPU).
|
||||||
INST_1x(ffree, kX86InstIdFfree, X86FpReg)
|
INST_1x(ffree, kX86InstIdFfree, X86FpReg)
|
||||||
|
|
||||||
//! Add 16-bit or 32-bit integer to fp0 (FPU).
|
//! Add `fp0 = fp0 + short_or_int[o0]` (FPU).
|
||||||
INST_1x_(fiadd, kX86InstIdFiadd, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fiadd, kX86InstIdFiadd, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Compare fp0 with 16-bit or 32-bit Integer (FPU).
|
//! Compare `fp0` with `short_or_int[o0]` (FPU).
|
||||||
INST_1x_(ficom, kX86InstIdFicom, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(ficom, kX86InstIdFicom, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Compare fp0 with 16-bit or 32-bit Integer and pop the FPU stack (FPU).
|
//! Compare `fp0` with `short_or_int[o0]` and POP (FPU).
|
||||||
INST_1x_(ficomp, kX86InstIdFicomp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(ficomp, kX86InstIdFicomp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Divide fp0 by 32-bit or 16-bit integer (`src`) (FPU).
|
//! Divide `fp0 = fp0 / short_or_int[o0]` (FPU).
|
||||||
INST_1x_(fidiv, kX86InstIdFidiv, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fidiv, kX86InstIdFidiv, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Reverse divide fp0 by 32-bit or 16-bit integer (`src`) (FPU).
|
//! Reverse divide `fp0 = short_or_int[o0] / fp0` (FPU).
|
||||||
INST_1x_(fidivr, kX86InstIdFidivr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fidivr, kX86InstIdFidivr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
|
|
||||||
//! Load 16-bit, 32-bit or 64-bit Integer and push it to the FPU stack (FPU).
|
//! Load `short_or_int_or_long[o0]` and PUSH (FPU).
|
||||||
INST_1x_(fild, kX86InstIdFild, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
|
INST_1x_(fild, kX86InstIdFild, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
|
||||||
//! Multiply fp0 by 16-bit or 32-bit integer and store it to fp0 (FPU).
|
//! Multiply `fp0 *= short_or_int[o0]` (FPU).
|
||||||
INST_1x_(fimul, kX86InstIdFimul, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fimul, kX86InstIdFimul, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
|
|
||||||
//! Increment FPU stack-top pointer (FPU).
|
//! Increment FPU stack pointer (FPU).
|
||||||
INST_0x(fincstp, kX86InstIdFincstp)
|
INST_0x(fincstp, kX86InstIdFincstp)
|
||||||
//! Initialize FPU (FPU).
|
//! Initialize FPU (FPU).
|
||||||
INST_0x(finit, kX86InstIdFinit)
|
INST_0x(finit, kX86InstIdFinit)
|
||||||
|
|
||||||
//! Subtract 16-bit or 32-bit integer from fp0 and store result to fp0 (FPU).
|
//! Subtract `fp0 = fp0 - short_or_int[o0]` (FPU).
|
||||||
INST_1x_(fisub, kX86InstIdFisub, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fisub, kX86InstIdFisub, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Reverse subtract 16-bit or 32-bit integer from fp0 and store result to fp0 (FPU).
|
//! Reverse subtract `fp0 = short_or_int[o0] - fp0` (FPU).
|
||||||
INST_1x_(fisubr, kX86InstIdFisubr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fisubr, kX86InstIdFisubr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
|
|
||||||
//! Initialize FPU without checking for pending unmasked exceptions (FPU).
|
//! Initialize FPU without checking for pending unmasked exceptions (FPU).
|
||||||
INST_0x(fninit, kX86InstIdFninit)
|
INST_0x(fninit, kX86InstIdFninit)
|
||||||
|
|
||||||
//! Store fp0 as 16-bit or 32-bit Integer to `o0` (FPU).
|
//! Store `fp0` as `short_or_int[o0]` (FPU).
|
||||||
INST_1x_(fist, kX86InstIdFist, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fist, kX86InstIdFist, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Store fp0 as 16-bit, 32-bit or 64-bit Integer to `o0` and pop the FPU stack (FPU).
|
//! Store `fp0` as `short_or_int_or_long[o0]` and POP (FPU).
|
||||||
INST_1x_(fistp, kX86InstIdFistp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
|
INST_1x_(fistp, kX86InstIdFistp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
|
||||||
//! Push 32-bit, 64-bit or 80-bit floating point value on the FPU stack (FPU).
|
|
||||||
|
//! Load `float_or_double_or_extended[o0]` and PUSH (FPU).
|
||||||
INST_1x_(fld, kX86InstIdFld, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
|
INST_1x_(fld, kX86InstIdFld, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
|
||||||
//! Push `o0` on the FPU stack (FPU).
|
//! PUSH `o0` (FPU).
|
||||||
INST_1x(fld, kX86InstIdFld, X86FpReg)
|
INST_1x(fld, kX86InstIdFld, X86FpReg)
|
||||||
|
|
||||||
//! Push +1.0 on the FPU stack (FPU).
|
//! PUSH `1.0` (FPU).
|
||||||
INST_0x(fld1, kX86InstIdFld1)
|
INST_0x(fld1, kX86InstIdFld1)
|
||||||
//! Push log2(10) on the FPU stack (FPU).
|
//! PUSH `log2(10)` (FPU).
|
||||||
INST_0x(fldl2t, kX86InstIdFldl2t)
|
INST_0x(fldl2t, kX86InstIdFldl2t)
|
||||||
//! Push log2(e) on the FPU stack (FPU).
|
//! PUSH `log2(e)` (FPU).
|
||||||
INST_0x(fldl2e, kX86InstIdFldl2e)
|
INST_0x(fldl2e, kX86InstIdFldl2e)
|
||||||
//! Push pi on the FPU stack (FPU).
|
//! PUSH `pi` (FPU).
|
||||||
INST_0x(fldpi, kX86InstIdFldpi)
|
INST_0x(fldpi, kX86InstIdFldpi)
|
||||||
//! Push log10(2) on the FPU stack (FPU).
|
//! PUSH `log10(2)` (FPU).
|
||||||
INST_0x(fldlg2, kX86InstIdFldlg2)
|
INST_0x(fldlg2, kX86InstIdFldlg2)
|
||||||
//! Push ln(2) on the FPU stack (FPU).
|
//! PUSH `ln(2)` (FPU).
|
||||||
INST_0x(fldln2, kX86InstIdFldln2)
|
INST_0x(fldln2, kX86InstIdFldln2)
|
||||||
//! Push +0.0 on the FPU stack (FPU).
|
//! PUSH `+0.0` (FPU).
|
||||||
INST_0x(fldz, kX86InstIdFldz)
|
INST_0x(fldz, kX86InstIdFldz)
|
||||||
|
|
||||||
//! Load x87 FPU control word (2 bytes) (FPU).
|
//! Load x87 FPU control word from `word_ptr[o0]` (FPU).
|
||||||
INST_1x(fldcw, kX86InstIdFldcw, X86Mem)
|
INST_1x(fldcw, kX86InstIdFldcw, X86Mem)
|
||||||
//! Load x87 FPU environment (14 or 28 bytes) (FPU).
|
//! Load x87 FPU environment (14 or 28 bytes) from `[o0]` (FPU).
|
||||||
INST_1x(fldenv, kX86InstIdFldenv, X86Mem)
|
INST_1x(fldenv, kX86InstIdFldenv, X86Mem)
|
||||||
|
|
||||||
//! Multiply `o0` by `o1` (one has to be `fp0`) and store result in `o0` (FPU).
|
//! Multiply `o0 = o0 * o1` (one has to be `fp0`) (FPU).
|
||||||
INST_2x_(fmul, kX86InstIdFmul, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fmul, kX86InstIdFmul, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Multiply fp0 by 32-bit or 64-bit `o0` and store result in fp0 (FPU).
|
//! Multiply `fp0 = fp0 * float_or_double[o0]` (FPU).
|
||||||
INST_1x(fmul, kX86InstIdFmul, X86Mem)
|
INST_1x(fmul, kX86InstIdFmul, X86Mem)
|
||||||
//! Multiply fp0 by `o0` and pop the FPU stack (FPU).
|
//! Multiply `o0 = o0 * fp0` and POP (FPU).
|
||||||
INST_1x(fmulp, kX86InstIdFmulp, X86FpReg)
|
INST_1x(fmulp, kX86InstIdFmulp, X86FpReg)
|
||||||
//! \overload
|
//! Multiply `fp1 = fp1 * fp0` and POP (FPU).
|
||||||
INST_0x(fmulp, kX86InstIdFmulp)
|
INST_0x(fmulp, kX86InstIdFmulp)
|
||||||
|
|
||||||
//! Clear exceptions (FPU).
|
//! Clear exceptions (FPU).
|
||||||
INST_0x(fnclex, kX86InstIdFnclex)
|
INST_0x(fnclex, kX86InstIdFnclex)
|
||||||
//! No operation (FPU).
|
//! No operation (FPU).
|
||||||
INST_0x(fnop, kX86InstIdFnop)
|
INST_0x(fnop, kX86InstIdFnop)
|
||||||
//! Save FPU state (FPU).
|
//! Save FPU state to `[o0]` (FPU).
|
||||||
INST_1x(fnsave, kX86InstIdFnsave, X86Mem)
|
INST_1x(fnsave, kX86InstIdFnsave, X86Mem)
|
||||||
//! Store x87 FPU environment (FPU).
|
//! Store x87 FPU environment to `[o0]` (FPU).
|
||||||
INST_1x(fnstenv, kX86InstIdFnstenv, X86Mem)
|
INST_1x(fnstenv, kX86InstIdFnstenv, X86Mem)
|
||||||
//! Store x87 FPU control word (FPU).
|
//! Store x87 FPU control word to `[o0]` (FPU).
|
||||||
INST_1x(fnstcw, kX86InstIdFnstcw, X86Mem)
|
INST_1x(fnstcw, kX86InstIdFnstcw, X86Mem)
|
||||||
|
|
||||||
//! Store x87 FPU status word to `o0` (AX) (FPU).
|
//! Store x87 FPU status word to `o0` (AX) (FPU).
|
||||||
INST_1x_(fnstsw, kX86InstIdFnstsw, X86GpReg, o0.isRegCode(kX86RegTypeGpw, kX86RegIndexAx))
|
INST_1x_(fnstsw, kX86InstIdFnstsw, X86GpReg, o0.isRegCode(kX86RegTypeGpw, kX86RegIndexAx))
|
||||||
//! Store x87 FPU status word to `o0` (2 bytes) (FPU).
|
//! Store x87 FPU status word to `word_ptr[o0]` (FPU).
|
||||||
INST_1x(fnstsw, kX86InstIdFnstsw, X86Mem)
|
INST_1x(fnstsw, kX86InstIdFnstsw, X86Mem)
|
||||||
|
|
||||||
//! Arctan(`fp1` / `fp0`) and pop the FPU stack (FPU).
|
//! Partial Arctan `fp1 = atan2(fp1, fp0)` and POP (FPU).
|
||||||
INST_0x(fpatan, kX86InstIdFpatan)
|
INST_0x(fpatan, kX86InstIdFpatan)
|
||||||
//! Fprem(`fp0`, `fp1`) and pop the FPU stack (FPU).
|
//! Partial Remainder[Trunc] `fp1 = fp0 % fp1` and POP (FPU).
|
||||||
INST_0x(fprem, kX86InstIdFprem)
|
INST_0x(fprem, kX86InstIdFprem)
|
||||||
//! Fprem(`fp0`, `fp1`) and pop the FPU stack (FPU).
|
//! Partial Remainder[Round] `fp1 = fp0 % fp1` and POP (FPU).
|
||||||
INST_0x(fprem1, kX86InstIdFprem1)
|
INST_0x(fprem1, kX86InstIdFprem1)
|
||||||
//! Arctan(`fp0`) and pop the FPU stack (FPU).
|
//! Partial Tan `fp0 = tan(fp0)` and PUSH `1.0` (FPU).
|
||||||
INST_0x(fptan, kX86InstIdFptan)
|
INST_0x(fptan, kX86InstIdFptan)
|
||||||
//! Round `fp0` to Integer (FPU).
|
//! Round `fp0 = round(fp0)` (FPU).
|
||||||
INST_0x(frndint, kX86InstIdFrndint)
|
INST_0x(frndint, kX86InstIdFrndint)
|
||||||
|
|
||||||
//! Restore FPU state from `o0` (94 or 108 bytes) (FPU).
|
//! Restore FPU state from `[o0]` (94 or 108 bytes) (FPU).
|
||||||
INST_1x(frstor, kX86InstIdFrstor, X86Mem)
|
INST_1x(frstor, kX86InstIdFrstor, X86Mem)
|
||||||
//! Save FPU state to `o0` (94 or 108 bytes) (FPU).
|
//! Save FPU state to `[o0]` (94 or 108 bytes) (FPU).
|
||||||
INST_1x(fsave, kX86InstIdFsave, X86Mem)
|
INST_1x(fsave, kX86InstIdFsave, X86Mem)
|
||||||
|
|
||||||
//! Scale `fp0` by `fp1` (FPU).
|
//! Scale `fp0 = fp0 * pow(2, RoundTowardsZero(fp1))` (FPU).
|
||||||
INST_0x(fscale, kX86InstIdFscale)
|
INST_0x(fscale, kX86InstIdFscale)
|
||||||
//! Sine of `fp0` and store result in `fp0` (FPU).
|
//! Sin `fp0 = sin(fp0)` (FPU).
|
||||||
INST_0x(fsin, kX86InstIdFsin)
|
INST_0x(fsin, kX86InstIdFsin)
|
||||||
//! Sine and cosine of `fp0`, store sine in `fp0` and push cosine on the FPU stack (FPU).
|
//! Sincos `fp0 = sin(fp0)` and PUSH `cos(fp0)` (FPU).
|
||||||
INST_0x(fsincos, kX86InstIdFsincos)
|
INST_0x(fsincos, kX86InstIdFsincos)
|
||||||
//! Square root of `fp0` and store it in `fp0` (FPU).
|
//! Square root `fp0 = sqrt(fp0)` (FPU).
|
||||||
INST_0x(fsqrt, kX86InstIdFsqrt)
|
INST_0x(fsqrt, kX86InstIdFsqrt)
|
||||||
|
|
||||||
//! Store floating point value to 32-bit or 64-bit memory location (FPU).
|
//! Store floating point value to `float_or_double[o0]` (FPU).
|
||||||
INST_1x_(fst, kX86InstIdFst, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
INST_1x_(fst, kX86InstIdFst, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
||||||
//! Store floating point value to `o0` (FPU).
|
//! Copy `o0 = fp0` (FPU).
|
||||||
INST_1x(fst, kX86InstIdFst, X86FpReg)
|
INST_1x(fst, kX86InstIdFst, X86FpReg)
|
||||||
//! Store floating point value to 32-bit or 64-bit memory location and pop the FPU stack (FPU).
|
//! Store floating point value to `float_or_double_or_extended[o0]` and POP (FPU).
|
||||||
INST_1x_(fstp, kX86InstIdFstp, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
|
INST_1x_(fstp, kX86InstIdFstp, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
|
||||||
//! Store floating point value to `o0` and pop the FPU stack (FPU).
|
//! Copy `o0 = fp0` and POP (FPU).
|
||||||
INST_1x(fstp, kX86InstIdFstp, X86FpReg)
|
INST_1x(fstp, kX86InstIdFstp, X86FpReg)
|
||||||
|
|
||||||
//! Store x87 FPU control word to `o0` (2 bytes) (FPU).
|
//! Store x87 FPU control word to `word_ptr[o0]` (FPU).
|
||||||
INST_1x(fstcw, kX86InstIdFstcw, X86Mem)
|
INST_1x(fstcw, kX86InstIdFstcw, X86Mem)
|
||||||
//! Store x87 FPU environment to `o0` (14 or 28 bytes) (FPU).
|
//! Store x87 FPU environment to `[o0]` (14 or 28 bytes) (FPU).
|
||||||
INST_1x(fstenv, kX86InstIdFstenv, X86Mem)
|
INST_1x(fstenv, kX86InstIdFstenv, X86Mem)
|
||||||
//! Store x87 FPU status word to AX (FPU).
|
//! Store x87 FPU status word to `o0` (AX) (FPU).
|
||||||
INST_1x_(fstsw, kX86InstIdFstsw, X86GpReg, o0.getRegIndex() == kX86RegIndexAx)
|
INST_1x_(fstsw, kX86InstIdFstsw, X86GpReg, o0.getRegIndex() == kX86RegIndexAx)
|
||||||
//! Store x87 FPU status word (2 bytes) (FPU).
|
//! Store x87 FPU status word to `word_ptr[o0]` (FPU).
|
||||||
INST_1x(fstsw, kX86InstIdFstsw, X86Mem)
|
INST_1x(fstsw, kX86InstIdFstsw, X86Mem)
|
||||||
|
|
||||||
//! Subtract `o0` from `o0` (one has to be `fp0`) and store result in `o0` (FPU).
|
//! Subtract `o0 = o0 - o1` (one has to be `fp0`) (FPU).
|
||||||
INST_2x_(fsub, kX86InstIdFsub, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fsub, kX86InstIdFsub, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Subtract 32-bit or 64-bit `o0` from fp0 and store result in fp0 (FPU).
|
//! Subtract `fp0 = fp0 - float_or_double[o0]` (FPU).
|
||||||
INST_1x_(fsub, kX86InstIdFsub, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
INST_1x_(fsub, kX86InstIdFsub, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
||||||
//! Subtract fp0 from `o0` and pop FPU stack (FPU).
|
//! Subtract `o0 = o0 - fp0` and POP (FPU).
|
||||||
INST_1x(fsubp, kX86InstIdFsubp, X86FpReg)
|
INST_1x(fsubp, kX86InstIdFsubp, X86FpReg)
|
||||||
//! \overload
|
//! Subtract `fp1 = fp1 - fp0` and POP (FPU).
|
||||||
INST_0x(fsubp, kX86InstIdFsubp)
|
INST_0x(fsubp, kX86InstIdFsubp)
|
||||||
|
|
||||||
//! Reverse subtract `o1` from `o0` (one has to be `fp0`) and store result in `o0` (FPU).
|
//! Reverse subtract `o0 = o1 - o0` (one has to be `fp0`) (FPU).
|
||||||
INST_2x_(fsubr, kX86InstIdFsubr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fsubr, kX86InstIdFsubr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Reverse subtract 32-bit or 64-bit `o0` from `fp0` and store result in `fp0` (FPU).
|
//! Reverse subtract `fp0 = fp0 - float_or_double[o0]` (FPU).
|
||||||
INST_1x_(fsubr, kX86InstIdFsubr, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
INST_1x_(fsubr, kX86InstIdFsubr, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
||||||
//! Reverse subtract `fp0` from `o0` and pop FPU stack (FPU).
|
//! Reverse subtract `o0 = o0 - fp0` and POP (FPU).
|
||||||
INST_1x(fsubrp, kX86InstIdFsubrp, X86FpReg)
|
INST_1x(fsubrp, kX86InstIdFsubrp, X86FpReg)
|
||||||
//! \overload
|
//! Reverse subtract `fp1 = fp1 - fp0` and POP (FPU).
|
||||||
INST_0x(fsubrp, kX86InstIdFsubrp)
|
INST_0x(fsubrp, kX86InstIdFsubrp)
|
||||||
|
|
||||||
//! Floating point test - Compare `fp0` with 0.0. (FPU).
|
//! Compare `fp0` with `0.0` (FPU).
|
||||||
INST_0x(ftst, kX86InstIdFtst)
|
INST_0x(ftst, kX86InstIdFtst)
|
||||||
|
|
||||||
//! Unordered compare `fp0` with `o0` (FPU).
|
//! Unordered compare `fp0` with `o0` (FPU).
|
||||||
INST_1x(fucom, kX86InstIdFucom, X86FpReg)
|
INST_1x(fucom, kX86InstIdFucom, X86FpReg)
|
||||||
//! Unordered compare `fp0` with `fp1` (FPU).
|
//! Unordered compare `fp0` with `fp1` (FPU).
|
||||||
INST_0x(fucom, kX86InstIdFucom)
|
INST_0x(fucom, kX86InstIdFucom)
|
||||||
//! Unordered compare `fp0` and `o0`, check for ordered values and set EFLAGS (FPU).
|
//! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS (FPU).
|
||||||
INST_1x(fucomi, kX86InstIdFucomi, X86FpReg)
|
INST_1x(fucomi, kX86InstIdFucomi, X86FpReg)
|
||||||
//! Unordered compare `fp0` and `o0`, check for ordered values and set EFLAGS and pop the FPU stack (FPU).
|
//! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS and POP (FPU).
|
||||||
INST_1x(fucomip, kX86InstIdFucomip, X86FpReg)
|
INST_1x(fucomip, kX86InstIdFucomip, X86FpReg)
|
||||||
//! Unordered compare `fp0` with `o0` and pop the FPU stack (FPU).
|
//! Unordered compare `fp0` with `o0` and POP (FPU).
|
||||||
INST_1x(fucomp, kX86InstIdFucomp, X86FpReg)
|
INST_1x(fucomp, kX86InstIdFucomp, X86FpReg)
|
||||||
//! Unordered compare `fp0` with `fp1` and pop the FPU stack (FPU).
|
//! Unordered compare `fp0` with `fp1` and POP (FPU).
|
||||||
INST_0x(fucomp, kX86InstIdFucomp)
|
INST_0x(fucomp, kX86InstIdFucomp)
|
||||||
//! Unordered compare `fp0` with `fp1` and pop the FPU stack twice (FPU).
|
//! Unordered compare `fp0` with `fp1` and POP twice (FPU).
|
||||||
INST_0x(fucompp, kX86InstIdFucompp)
|
INST_0x(fucompp, kX86InstIdFucompp)
|
||||||
|
|
||||||
INST_0x(fwait, kX86InstIdFwait)
|
INST_0x(fwait, kX86InstIdFwait)
|
||||||
|
|
||||||
//! Examine fp0 (FPU).
|
//! Examine fp0 (FPU).
|
||||||
INST_0x(fxam, kX86InstIdFxam)
|
INST_0x(fxam, kX86InstIdFxam)
|
||||||
//! Exchange content of fp0 with `o0` (FPU).
|
//! Exchange `fp0` with `o0` (FPU).
|
||||||
INST_1x(fxch, kX86InstIdFxch, X86FpReg)
|
INST_1x(fxch, kX86InstIdFxch, X86FpReg)
|
||||||
|
|
||||||
//! Restore FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
|
//! Restore FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
|
||||||
INST_1x(fxrstor, kX86InstIdFxrstor, X86Mem)
|
INST_1x(fxrstor, kX86InstIdFxrstor, X86Mem)
|
||||||
//! Store FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
|
//! Store FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
|
||||||
INST_1x(fxsave, kX86InstIdFxsave, X86Mem)
|
INST_1x(fxsave, kX86InstIdFxsave, X86Mem)
|
||||||
//! Extract exponent and store to `fp0` and push significand on the FPU stack (FPU).
|
//! Extract `fp0 = exponent(fp0)` and PUSH `significant(fp0)` (FPU).
|
||||||
INST_0x(fxtract, kX86InstIdFxtract)
|
INST_0x(fxtract, kX86InstIdFxtract)
|
||||||
|
|
||||||
//! Compute `fp1 * log2(fp0)`, pop the FPU stack and store result in `fp0` (FPU).
|
//! Compute `fp1 = fp1 * log2(fp0)` and POP (FPU).
|
||||||
INST_0x(fyl2x, kX86InstIdFyl2x)
|
INST_0x(fyl2x, kX86InstIdFyl2x)
|
||||||
//! Compute `fp1 * log2(fp0 + 1)`, pop the FPU stack and store result in `fp0` (FPU).
|
//! Compute `fp1 = fp1 * log2(fp0 + 1)` and POP (FPU).
|
||||||
INST_0x(fyl2xp1, kX86InstIdFyl2xp1)
|
INST_0x(fyl2xp1, kX86InstIdFyl2xp1)
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -2356,12 +2458,12 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(cvtdq2ps, kX86InstIdCvtdq2ps, X86XmmReg, X86Mem)
|
INST_2x(cvtdq2ps, kX86InstIdCvtdq2ps, X86XmmReg, X86Mem)
|
||||||
|
|
||||||
//! Convert packed DP-FP to packed QWORDs (SSE2).
|
//! Convert packed DP-FP to packed DWORDs (SSE2).
|
||||||
INST_2x(cvtpd2dq, kX86InstIdCvtpd2dq, X86XmmReg, X86XmmReg)
|
INST_2x(cvtpd2dq, kX86InstIdCvtpd2dq, X86XmmReg, X86XmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(cvtpd2dq, kX86InstIdCvtpd2dq, X86XmmReg, X86Mem)
|
INST_2x(cvtpd2dq, kX86InstIdCvtpd2dq, X86XmmReg, X86Mem)
|
||||||
|
|
||||||
//! Convert packed DP-FP to packed QRODSs (SSE2).
|
//! Convert packed DP-FP to packed DWORDs (SSE2).
|
||||||
INST_2x(cvtpd2pi, kX86InstIdCvtpd2pi, X86MmReg, X86XmmReg)
|
INST_2x(cvtpd2pi, kX86InstIdCvtpd2pi, X86MmReg, X86XmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(cvtpd2pi, kX86InstIdCvtpd2pi, X86MmReg, X86Mem)
|
INST_2x(cvtpd2pi, kX86InstIdCvtpd2pi, X86MmReg, X86Mem)
|
||||||
@@ -2376,7 +2478,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(cvtpi2pd, kX86InstIdCvtpi2pd, X86XmmReg, X86Mem)
|
INST_2x(cvtpi2pd, kX86InstIdCvtpi2pd, X86XmmReg, X86Mem)
|
||||||
|
|
||||||
//! Convert packed SP-FP to packed QWORDs (SSE2).
|
//! Convert packed SP-FP to packed DWORDs (SSE2).
|
||||||
INST_2x(cvtps2dq, kX86InstIdCvtps2dq, X86XmmReg, X86XmmReg)
|
INST_2x(cvtps2dq, kX86InstIdCvtps2dq, X86XmmReg, X86XmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(cvtps2dq, kX86InstIdCvtps2dq, X86XmmReg, X86Mem)
|
INST_2x(cvtps2dq, kX86InstIdCvtps2dq, X86XmmReg, X86Mem)
|
||||||
@@ -2411,12 +2513,12 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(cvttpd2pi, kX86InstIdCvttpd2pi, X86MmReg, X86Mem)
|
INST_2x(cvttpd2pi, kX86InstIdCvttpd2pi, X86MmReg, X86Mem)
|
||||||
|
|
||||||
//! Convert with truncation packed DP-FP to packed QWORDs (SSE2).
|
//! Convert with truncation packed DP-FP to packed DWORDs (SSE2).
|
||||||
INST_2x(cvttpd2dq, kX86InstIdCvttpd2dq, X86XmmReg, X86XmmReg)
|
INST_2x(cvttpd2dq, kX86InstIdCvttpd2dq, X86XmmReg, X86XmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(cvttpd2dq, kX86InstIdCvttpd2dq, X86XmmReg, X86Mem)
|
INST_2x(cvttpd2dq, kX86InstIdCvttpd2dq, X86XmmReg, X86Mem)
|
||||||
|
|
||||||
//! Convert with truncation packed SP-FP to packed QWORDs (SSE2).
|
//! Convert with truncation packed SP-FP to packed DWORDs (SSE2).
|
||||||
INST_2x(cvttps2dq, kX86InstIdCvttps2dq, X86XmmReg, X86XmmReg)
|
INST_2x(cvttps2dq, kX86InstIdCvttps2dq, X86XmmReg, X86XmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(cvttps2dq, kX86InstIdCvttps2dq, X86XmmReg, X86Mem)
|
INST_2x(cvttps2dq, kX86InstIdCvttps2dq, X86XmmReg, X86Mem)
|
||||||
@@ -2951,8 +3053,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(addsubps, kX86InstIdAddsubps, X86XmmReg, X86Mem)
|
INST_2x(addsubps, kX86InstIdAddsubps, X86XmmReg, X86Mem)
|
||||||
|
|
||||||
//! Store truncated `fp0` as 16-bit, 32-bit or 64-bit integer to `o0` and pop
|
//! Store truncated `fp0` to `short_or_int_or_long[o0]` and POP (FPU & SSE3).
|
||||||
//! the FPU stack (FPU / SSE3).
|
|
||||||
INST_1x(fisttp, kX86InstIdFisttp, X86Mem)
|
INST_1x(fisttp, kX86InstIdFisttp, X86Mem)
|
||||||
|
|
||||||
//! Packed DP-FP horizontal add (SSE3).
|
//! Packed DP-FP horizontal add (SSE3).
|
||||||
@@ -3412,6 +3513,11 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
// [SSE4.2]
|
// [SSE4.2]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! Accumulate crc32 value (polynomial 0x11EDC6F41) (SSE4.2).
|
||||||
|
INST_2x_(crc32, kX86InstIdCrc32, X86GpReg, X86GpReg, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
|
||||||
|
//! \overload
|
||||||
|
INST_2x_(crc32, kX86InstIdCrc32, X86GpReg, X86Mem, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
|
||||||
|
|
||||||
//! Packed compare explicit length strings, return index (SSE4.2).
|
//! Packed compare explicit length strings, return index (SSE4.2).
|
||||||
INST_3i(pcmpestri, kX86InstIdPcmpestri, X86XmmReg, X86XmmReg, Imm)
|
INST_3i(pcmpestri, kX86InstIdPcmpestri, X86XmmReg, X86XmmReg, Imm)
|
||||||
//! \overload
|
//! \overload
|
||||||
@@ -3437,6 +3543,43 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(pcmpgtq, kX86InstIdPcmpgtq, X86XmmReg, X86Mem)
|
INST_2x(pcmpgtq, kX86InstIdPcmpgtq, X86XmmReg, X86Mem)
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [SSE4a]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! Extract Field (SSE4a).
|
||||||
|
INST_2x(extrq, kX86InstIdExtrq, X86XmmReg, X86XmmReg)
|
||||||
|
//! Extract Field (SSE4a).
|
||||||
|
INST_3ii(extrq, kX86InstIdExtrq, X86XmmReg, Imm, Imm)
|
||||||
|
|
||||||
|
//! Insert Field (SSE4a).
|
||||||
|
INST_2x(insertq, kX86InstIdInsertq, X86XmmReg, X86XmmReg)
|
||||||
|
//! Insert Field (SSE4a).
|
||||||
|
INST_4ii(insertq, kX86InstIdInsertq, X86XmmReg, X86XmmReg, Imm, Imm)
|
||||||
|
|
||||||
|
//! Move Non-Temporal Scalar DP-FP (SSE4a).
|
||||||
|
INST_2x(movntsd, kX86InstIdMovntsd, X86Mem, X86XmmReg)
|
||||||
|
//! Move Non-Temporal Scalar SP-FP (SSE4a).
|
||||||
|
INST_2x(movntss, kX86InstIdMovntss, X86Mem, X86XmmReg)
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [POPCNT]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! Return the count of number of bits set to 1 (POPCNT).
|
||||||
|
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpReg, X86GpReg, !o0.isGpb() && o0.getRegType() == o1.getRegType())
|
||||||
|
//! \overload
|
||||||
|
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpReg, X86Mem, !o0.isGpb())
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [LZCNT]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! Count the number of leading zero bits (LZCNT).
|
||||||
|
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpReg, X86GpReg)
|
||||||
|
//! \overload
|
||||||
|
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpReg, X86Mem)
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [AESNI]
|
// [AESNI]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -3480,6 +3623,30 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_3i(pclmulqdq, kX86InstIdPclmulqdq, X86XmmReg, X86Mem, Imm)
|
INST_3i(pclmulqdq, kX86InstIdPclmulqdq, X86XmmReg, X86Mem, Imm)
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [XSAVE]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! Restore Processor Extended States specified by `EDX:EAX` (XSAVE).
|
||||||
|
INST_1x(xrstor, kX86InstIdXrstor, X86Mem)
|
||||||
|
//! Restore Processor Extended States specified by `EDX:EAX` (XSAVE&X64).
|
||||||
|
INST_1x(xrstor64, kX86InstIdXrstor64, X86Mem)
|
||||||
|
|
||||||
|
//! Save Processor Extended States specified by `EDX:EAX` (XSAVE).
|
||||||
|
INST_1x(xsave, kX86InstIdXsave, X86Mem)
|
||||||
|
//! Save Processor Extended States specified by `EDX:EAX` (XSAVE&X64).
|
||||||
|
INST_1x(xsave64, kX86InstIdXsave64, X86Mem)
|
||||||
|
|
||||||
|
//! Save Processor Extended States specified by `EDX:EAX` (Optimized) (XSAVEOPT).
|
||||||
|
INST_1x(xsaveopt, kX86InstIdXsave, X86Mem)
|
||||||
|
//! Save Processor Extended States specified by `EDX:EAX` (Optimized) (XSAVEOPT&X64).
|
||||||
|
INST_1x(xsaveopt64, kX86InstIdXsave64, X86Mem)
|
||||||
|
|
||||||
|
//! Get XCR - `EDX:EAX <- XCR[ECX]` (XSAVE).
|
||||||
|
INST_0x(xgetbv, kX86InstIdXgetbv)
|
||||||
|
//! Set XCR - `XCR[ECX] <- EDX:EAX` (XSAVE).
|
||||||
|
INST_0x(xsetbv, kX86InstIdXsetbv)
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [AVX]
|
// [AVX]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -3667,7 +3834,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86YmmReg, X86Mem)
|
INST_2x(vcvtdq2ps, kX86InstIdVcvtdq2ps, X86YmmReg, X86Mem)
|
||||||
|
|
||||||
//! Convert packed DP-FP to packed QWORDs (AVX).
|
//! Convert packed DP-FP to packed DWORDs (AVX).
|
||||||
INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmReg, X86XmmReg)
|
INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmReg, X86XmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmReg, X86YmmReg)
|
INST_2x(vcvtpd2dq, kX86InstIdVcvtpd2dq, X86XmmReg, X86YmmReg)
|
||||||
@@ -3681,7 +3848,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(vcvtpd2ps, kX86InstIdVcvtpd2ps, X86XmmReg, X86Mem)
|
INST_2x(vcvtpd2ps, kX86InstIdVcvtpd2ps, X86XmmReg, X86Mem)
|
||||||
|
|
||||||
//! Convert packed SP-FP to packed QWORDs (AVX).
|
//! Convert packed SP-FP to packed DWORDs (AVX).
|
||||||
INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86XmmReg, X86XmmReg)
|
INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86XmmReg, X86XmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86XmmReg, X86Mem)
|
INST_2x(vcvtps2dq, kX86InstIdVcvtps2dq, X86XmmReg, X86Mem)
|
||||||
@@ -3729,14 +3896,14 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(vcvtss2si, kX86InstIdVcvtss2si, X86GpReg, X86Mem)
|
INST_2x(vcvtss2si, kX86InstIdVcvtss2si, X86GpReg, X86Mem)
|
||||||
|
|
||||||
//! Convert with truncation packed DP-FP to packed QWORDs (AVX).
|
//! Convert with truncation packed DP-FP to packed DWORDs (AVX).
|
||||||
INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86XmmReg)
|
INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86XmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86YmmReg)
|
INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86YmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86Mem)
|
INST_2x(vcvttpd2dq, kX86InstIdVcvttpd2dq, X86XmmReg, X86Mem)
|
||||||
|
|
||||||
//! Convert with truncation packed SP-FP to packed QWORDs (AVX).
|
//! Convert with truncation packed SP-FP to packed DWORDs (AVX).
|
||||||
INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86XmmReg, X86XmmReg)
|
INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86XmmReg, X86XmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86XmmReg, X86Mem)
|
INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86XmmReg, X86Mem)
|
||||||
@@ -3745,7 +3912,7 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86YmmReg, X86Mem)
|
INST_2x(vcvttps2dq, kX86InstIdVcvttps2dq, X86YmmReg, X86Mem)
|
||||||
|
|
||||||
//! Convert with truncation scalar DP-FP to DWORD (AVX).
|
//! Convert with truncation scalar DP-FP to INT32 (AVX).
|
||||||
INST_2x(vcvttsd2si, kX86InstIdVcvttsd2si, X86GpReg, X86XmmReg)
|
INST_2x(vcvttsd2si, kX86InstIdVcvttsd2si, X86GpReg, X86XmmReg)
|
||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(vcvttsd2si, kX86InstIdVcvttsd2si, X86GpReg, X86Mem)
|
INST_2x(vcvttsd2si, kX86InstIdVcvttsd2si, X86GpReg, X86Mem)
|
||||||
@@ -6393,15 +6560,6 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(tzcnt, kX86InstIdTzcnt, X86GpReg, X86Mem)
|
INST_2x(tzcnt, kX86InstIdTzcnt, X86GpReg, X86Mem)
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
|
||||||
// [LZCNT]
|
|
||||||
// --------------------------------------------------------------------------
|
|
||||||
|
|
||||||
//! Count the number of leading zero bits (LZCNT).
|
|
||||||
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpReg, X86GpReg)
|
|
||||||
//! \overload
|
|
||||||
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpReg, X86Mem)
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [BMI2]
|
// [BMI2]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -6450,11 +6608,11 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
// [RDRAND]
|
// [RDRAND]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Store a random number in destination register.
|
//! Store a random number in destination register (RDRAND).
|
||||||
//!
|
//!
|
||||||
//! Please do not use this instruction in cryptographic software. The result
|
//! Please do not use this instruction in cryptographic software. The result
|
||||||
//! doesn't necessarily have to be random which may cause a major security
|
//! doesn't necessarily have to be random, which may cause a major security
|
||||||
//! issue in the software that relies on it.
|
//! hole in the software.
|
||||||
INST_1x(rdrand, kX86InstIdRdrand, X86GpReg)
|
INST_1x(rdrand, kX86InstIdRdrand, X86GpReg)
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -6503,10 +6661,12 @@ struct ASMJIT_VCLASS X86Assembler : public Assembler {
|
|||||||
#undef INST_3x
|
#undef INST_3x
|
||||||
#undef INST_3x_
|
#undef INST_3x_
|
||||||
#undef INST_3i
|
#undef INST_3i
|
||||||
|
#undef INST_3ii
|
||||||
|
|
||||||
#undef INST_4x
|
#undef INST_4x
|
||||||
#undef INST_4x_
|
#undef INST_4x_
|
||||||
#undef INST_4i
|
#undef INST_4i
|
||||||
|
#undef INST_4ii
|
||||||
};
|
};
|
||||||
|
|
||||||
//! \}
|
//! \}
|
||||||
|
|||||||
@@ -60,14 +60,18 @@ const X86VarInfo _x86VarInfo[] = {
|
|||||||
/* 10: kVarTypeFp32 */ { kX86RegTypeFp , 4 , C(Fp) , D(Sp) , "fp" },
|
/* 10: kVarTypeFp32 */ { kX86RegTypeFp , 4 , C(Fp) , D(Sp) , "fp" },
|
||||||
/* 11: kVarTypeFp64 */ { kX86RegTypeFp , 8 , C(Fp) , D(Dp) , "fp" },
|
/* 11: kVarTypeFp64 */ { kX86RegTypeFp , 8 , C(Fp) , D(Dp) , "fp" },
|
||||||
/* 12: kX86VarTypeMm */ { kX86RegTypeMm , 8 , C(Mm) , 0 , "mm" },
|
/* 12: kX86VarTypeMm */ { kX86RegTypeMm , 8 , C(Mm) , 0 , "mm" },
|
||||||
/* 13: kX86VarTypeXmm */ { kX86RegTypeXmm , 16, C(Xyz), 0 , "xmm" },
|
/* 13: kX86VarTypeK */ { kX86RegTypeK , 8 , C(K) , 0 , "k" },
|
||||||
/* 14: kX86VarTypeXmmSs */ { kX86RegTypeXmm , 4 , C(Xyz), D(Sp) , "xmm" },
|
/* 14: kX86VarTypeXmm */ { kX86RegTypeXmm , 16, C(Xyz), 0 , "xmm" },
|
||||||
/* 15: kX86VarTypeXmmPs */ { kX86RegTypeXmm , 16, C(Xyz), D(Sp) | D(Packed), "xmm" },
|
/* 15: kX86VarTypeXmmSs */ { kX86RegTypeXmm , 4 , C(Xyz), D(Sp) , "xmm" },
|
||||||
/* 16: kX86VarTypeXmmSd */ { kX86RegTypeXmm , 8 , C(Xyz), D(Dp) , "xmm" },
|
/* 16: kX86VarTypeXmmPs */ { kX86RegTypeXmm , 16, C(Xyz), D(Sp) | D(Packed), "xmm" },
|
||||||
/* 17: kX86VarTypeXmmPd */ { kX86RegTypeXmm , 16, C(Xyz), D(Dp) | D(Packed), "xmm" },
|
/* 17: kX86VarTypeXmmSd */ { kX86RegTypeXmm , 8 , C(Xyz), D(Dp) , "xmm" },
|
||||||
/* 18: kX86VarTypeYmm */ { kX86RegTypeYmm , 32, C(Xyz), 0 , "ymm" },
|
/* 18: kX86VarTypeXmmPd */ { kX86RegTypeXmm , 16, C(Xyz), D(Dp) | D(Packed), "xmm" },
|
||||||
/* 19: kX86VarTypeYmmPs */ { kX86RegTypeYmm , 32, C(Xyz), D(Sp) | D(Packed), "ymm" },
|
/* 19: kX86VarTypeYmm */ { kX86RegTypeYmm , 32, C(Xyz), 0 , "ymm" },
|
||||||
/* 20: kX86VarTypeYmmPd */ { kX86RegTypeYmm , 32, C(Xyz), D(Dp) | D(Packed), "ymm" }
|
/* 20: kX86VarTypeYmmPs */ { kX86RegTypeYmm , 32, C(Xyz), D(Sp) | D(Packed), "ymm" },
|
||||||
|
/* 21: kX86VarTypeYmmPd */ { kX86RegTypeYmm , 32, C(Xyz), D(Dp) | D(Packed), "ymm" },
|
||||||
|
/* 22: kX86VarTypeZmm */ { kX86RegTypeZmm , 64, C(Xyz), 0 , "zmm" },
|
||||||
|
/* 23: kX86VarTypeZmmPs */ { kX86RegTypeZmm , 64, C(Xyz), D(Sp) | D(Packed), "zmm" },
|
||||||
|
/* 24: kX86VarTypeZmmPd */ { kX86RegTypeZmm , 64, C(Xyz), D(Dp) | D(Packed), "zmm" }
|
||||||
};
|
};
|
||||||
|
|
||||||
#undef D
|
#undef D
|
||||||
@@ -88,14 +92,18 @@ const uint8_t _x86VarMapping[kX86VarTypeCount] = {
|
|||||||
/* 10: kVarTypeFp32 */ kVarTypeFp32,
|
/* 10: kVarTypeFp32 */ kVarTypeFp32,
|
||||||
/* 11: kVarTypeFp64 */ kVarTypeFp64,
|
/* 11: kVarTypeFp64 */ kVarTypeFp64,
|
||||||
/* 12: kX86VarTypeMm */ kX86VarTypeMm,
|
/* 12: kX86VarTypeMm */ kX86VarTypeMm,
|
||||||
/* 13: kX86VarTypeXmm */ kX86VarTypeXmm,
|
/* 13: kX86VarTypeK */ kX86VarTypeK,
|
||||||
/* 14: kX86VarTypeXmmSs */ kX86VarTypeXmmSs,
|
/* 14: kX86VarTypeXmm */ kX86VarTypeXmm,
|
||||||
/* 15: kX86VarTypeXmmPs */ kX86VarTypeXmmPs,
|
/* 15: kX86VarTypeXmmSs */ kX86VarTypeXmmSs,
|
||||||
/* 16: kX86VarTypeXmmSd */ kX86VarTypeXmmSd,
|
/* 16: kX86VarTypeXmmPs */ kX86VarTypeXmmPs,
|
||||||
/* 17: kX86VarTypeXmmPd */ kX86VarTypeXmmPd,
|
/* 17: kX86VarTypeXmmSd */ kX86VarTypeXmmSd,
|
||||||
/* 18: kX86VarTypeYmm */ kX86VarTypeYmm,
|
/* 18: kX86VarTypeXmmPd */ kX86VarTypeXmmPd,
|
||||||
/* 19: kX86VarTypeYmmPs */ kX86VarTypeYmmPs,
|
/* 19: kX86VarTypeYmm */ kX86VarTypeYmm,
|
||||||
/* 20: kX86VarTypeYmmPd */ kX86VarTypeYmmPd
|
/* 20: kX86VarTypeYmmPs */ kX86VarTypeYmmPs,
|
||||||
|
/* 21: kX86VarTypeYmmPd */ kX86VarTypeYmmPd,
|
||||||
|
/* 22: kX86VarTypeZmm */ kX86VarTypeZmm,
|
||||||
|
/* 23: kX86VarTypeZmmPs */ kX86VarTypeZmmPs,
|
||||||
|
/* 24: kX86VarTypeZmmPd */ kX86VarTypeZmmPd
|
||||||
};
|
};
|
||||||
#endif // ASMJIT_BUILD_X86
|
#endif // ASMJIT_BUILD_X86
|
||||||
|
|
||||||
@@ -114,14 +122,18 @@ const uint8_t _x64VarMapping[kX86VarTypeCount] = {
|
|||||||
/* 10: kVarTypeFp32 */ kVarTypeFp32,
|
/* 10: kVarTypeFp32 */ kVarTypeFp32,
|
||||||
/* 11: kVarTypeFp64 */ kVarTypeFp64,
|
/* 11: kVarTypeFp64 */ kVarTypeFp64,
|
||||||
/* 12: kX86VarTypeMm */ kX86VarTypeMm,
|
/* 12: kX86VarTypeMm */ kX86VarTypeMm,
|
||||||
/* 13: kX86VarTypeXmm */ kX86VarTypeXmm,
|
/* 13: kX86VarTypeK */ kX86VarTypeK,
|
||||||
/* 14: kX86VarTypeXmmSs */ kX86VarTypeXmmSs,
|
/* 14: kX86VarTypeXmm */ kX86VarTypeXmm,
|
||||||
/* 15: kX86VarTypeXmmPs */ kX86VarTypeXmmPs,
|
/* 15: kX86VarTypeXmmSs */ kX86VarTypeXmmSs,
|
||||||
/* 16: kX86VarTypeXmmSd */ kX86VarTypeXmmSd,
|
/* 16: kX86VarTypeXmmPs */ kX86VarTypeXmmPs,
|
||||||
/* 17: kX86VarTypeXmmPd */ kX86VarTypeXmmPd,
|
/* 17: kX86VarTypeXmmSd */ kX86VarTypeXmmSd,
|
||||||
/* 18: kX86VarTypeYmm */ kX86VarTypeYmm,
|
/* 18: kX86VarTypeXmmPd */ kX86VarTypeXmmPd,
|
||||||
/* 19: kX86VarTypeYmmPs */ kX86VarTypeYmmPs,
|
/* 19: kX86VarTypeYmm */ kX86VarTypeYmm,
|
||||||
/* 20: kX86VarTypeYmmPd */ kX86VarTypeYmmPd
|
/* 20: kX86VarTypeYmmPs */ kX86VarTypeYmmPs,
|
||||||
|
/* 21: kX86VarTypeYmmPd */ kX86VarTypeYmmPd,
|
||||||
|
/* 22: kX86VarTypeZmm */ kX86VarTypeZmm,
|
||||||
|
/* 23: kX86VarTypeZmmPs */ kX86VarTypeZmmPs,
|
||||||
|
/* 24: kX86VarTypeZmmPd */ kX86VarTypeZmmPd
|
||||||
};
|
};
|
||||||
#endif // ASMJIT_BUILD_X64
|
#endif // ASMJIT_BUILD_X64
|
||||||
|
|
||||||
@@ -482,14 +494,14 @@ static Error X86FuncDecl_initFunc(X86FuncDecl* self, uint32_t arch,
|
|||||||
|
|
||||||
if (x86ArgIsInt(varType) && i < ASMJIT_ARRAY_SIZE(self->_passedOrderGp)) {
|
if (x86ArgIsInt(varType) && i < ASMJIT_ARRAY_SIZE(self->_passedOrderGp)) {
|
||||||
arg._regIndex = self->_passedOrderGp[i];
|
arg._regIndex = self->_passedOrderGp[i];
|
||||||
self->_used.add(kX86RegClassGp, IntUtil::mask(arg.getRegIndex()));
|
self->_used.or_(kX86RegClassGp, IntUtil::mask(arg.getRegIndex()));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (x86ArgIsFp(varType) && i < ASMJIT_ARRAY_SIZE(self->_passedOrderXmm)) {
|
if (x86ArgIsFp(varType) && i < ASMJIT_ARRAY_SIZE(self->_passedOrderXmm)) {
|
||||||
arg._varType = static_cast<uint8_t>(x86ArgTypeToXmmType(varType));
|
arg._varType = static_cast<uint8_t>(x86ArgTypeToXmmType(varType));
|
||||||
arg._regIndex = self->_passedOrderXmm[i];
|
arg._regIndex = self->_passedOrderXmm[i];
|
||||||
self->_used.add(kX86RegClassXyz, IntUtil::mask(arg.getRegIndex()));
|
self->_used.or_(kX86RegClassXyz, IntUtil::mask(arg.getRegIndex()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -527,7 +539,7 @@ static Error X86FuncDecl_initFunc(X86FuncDecl* self, uint32_t arch,
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
arg._regIndex = self->_passedOrderGp[gpPos++];
|
arg._regIndex = self->_passedOrderGp[gpPos++];
|
||||||
self->_used.add(kX86RegClassGp, IntUtil::mask(arg.getRegIndex()));
|
self->_used.or_(kX86RegClassGp, IntUtil::mask(arg.getRegIndex()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Register arguments (Xmm), always left-to-right.
|
// Register arguments (Xmm), always left-to-right.
|
||||||
@@ -538,7 +550,7 @@ static Error X86FuncDecl_initFunc(X86FuncDecl* self, uint32_t arch,
|
|||||||
if (x86ArgIsFp(varType)) {
|
if (x86ArgIsFp(varType)) {
|
||||||
arg._varType = static_cast<uint8_t>(x86ArgTypeToXmmType(varType));
|
arg._varType = static_cast<uint8_t>(x86ArgTypeToXmmType(varType));
|
||||||
arg._regIndex = self->_passedOrderXmm[xmmPos++];
|
arg._regIndex = self->_passedOrderXmm[xmmPos++];
|
||||||
self->_used.add(kX86RegClassXyz, IntUtil::mask(arg.getRegIndex()));
|
self->_used.or_(kX86RegClassXyz, IntUtil::mask(arg.getRegIndex()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -722,9 +734,9 @@ Error X86Compiler::setArch(uint32_t arch) {
|
|||||||
|
|
||||||
_regCount.reset();
|
_regCount.reset();
|
||||||
_regCount._gp = 8;
|
_regCount._gp = 8;
|
||||||
_regCount._fp = 8;
|
|
||||||
_regCount._mm = 8;
|
_regCount._mm = 8;
|
||||||
_regCount._xy = 8;
|
_regCount._k = 8;
|
||||||
|
_regCount._xyz = 8;
|
||||||
|
|
||||||
zax = x86::eax;
|
zax = x86::eax;
|
||||||
zcx = x86::ecx;
|
zcx = x86::ecx;
|
||||||
@@ -747,9 +759,9 @@ Error X86Compiler::setArch(uint32_t arch) {
|
|||||||
|
|
||||||
_regCount.reset();
|
_regCount.reset();
|
||||||
_regCount._gp = 16;
|
_regCount._gp = 16;
|
||||||
_regCount._fp = 8;
|
|
||||||
_regCount._mm = 8;
|
_regCount._mm = 8;
|
||||||
_regCount._xy = 16;
|
_regCount._k = 8;
|
||||||
|
_regCount._xyz = 16;
|
||||||
|
|
||||||
zax = x86::rax;
|
zax = x86::rax;
|
||||||
zcx = x86::rcx;
|
zcx = x86::rcx;
|
||||||
@@ -783,7 +795,7 @@ static InstNode* X86Compiler_newInst(X86Compiler* self, void* p, uint32_t code,
|
|||||||
JumpNode* node = new(p) JumpNode(self, code, options, opList, opCount);
|
JumpNode* node = new(p) JumpNode(self, code, options, opList, opCount);
|
||||||
TargetNode* jTarget = self->getTargetById(opList[0].getId());
|
TargetNode* jTarget = self->getTargetById(opList[0].getId());
|
||||||
|
|
||||||
node->addFlags(code == kX86InstIdJmp ? kNodeFlagIsJmp | kNodeFlagIsTaken : kNodeFlagIsJcc);
|
node->orFlags(code == kX86InstIdJmp ? kNodeFlagIsJmp | kNodeFlagIsTaken : kNodeFlagIsJcc);
|
||||||
node->_target = jTarget;
|
node->_target = jTarget;
|
||||||
node->_jumpNext = static_cast<JumpNode*>(jTarget->_from);
|
node->_jumpNext = static_cast<JumpNode*>(jTarget->_from);
|
||||||
|
|
||||||
@@ -792,9 +804,9 @@ static InstNode* X86Compiler_newInst(X86Compiler* self, void* p, uint32_t code,
|
|||||||
|
|
||||||
// The 'jmp' is always taken, conditional jump can contain hint, we detect it.
|
// The 'jmp' is always taken, conditional jump can contain hint, we detect it.
|
||||||
if (code == kX86InstIdJmp)
|
if (code == kX86InstIdJmp)
|
||||||
node->addFlags(kNodeFlagIsTaken);
|
node->orFlags(kNodeFlagIsTaken);
|
||||||
else if (options & kInstOptionTaken)
|
else if (options & kInstOptionTaken)
|
||||||
node->addFlags(kNodeFlagIsTaken);
|
node->orFlags(kNodeFlagIsTaken);
|
||||||
|
|
||||||
node->addOptions(options);
|
node->addOptions(options);
|
||||||
return node;
|
return node;
|
||||||
@@ -1025,6 +1037,22 @@ InstNode* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1,
|
|||||||
return static_cast<InstNode*>(addNode(node));
|
return static_cast<InstNode*>(addNode(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InstNode* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int o3_) {
|
||||||
|
Imm o3(o3_);
|
||||||
|
InstNode* node = newInst(code, o0, o1, o2, o3);
|
||||||
|
if (node == NULL)
|
||||||
|
return NULL;
|
||||||
|
return static_cast<InstNode*>(addNode(node));
|
||||||
|
}
|
||||||
|
|
||||||
|
InstNode* X86Compiler::emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, uint64_t o3_) {
|
||||||
|
Imm o3(o3_);
|
||||||
|
InstNode* node = newInst(code, o0, o1, o2, o3);
|
||||||
|
if (node == NULL)
|
||||||
|
return NULL;
|
||||||
|
return static_cast<InstNode*>(addNode(node));
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// [asmjit::X86Compiler - Func]
|
// [asmjit::X86Compiler - Func]
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
@@ -46,24 +46,34 @@ ASMJIT_ENUM(kX86VarType) {
|
|||||||
//! Variable is Mm (MMX).
|
//! Variable is Mm (MMX).
|
||||||
kX86VarTypeMm = 12,
|
kX86VarTypeMm = 12,
|
||||||
|
|
||||||
|
//! Variable is K (AVX512+)
|
||||||
|
kX86VarTypeK,
|
||||||
|
|
||||||
//! Variable is Xmm (SSE+).
|
//! Variable is Xmm (SSE+).
|
||||||
kX86VarTypeXmm,
|
kX86VarTypeXmm,
|
||||||
//! Variable is scalar Xmm SP-FP number.
|
//! Variable is a scalar Xmm SP-FP number.
|
||||||
kX86VarTypeXmmSs,
|
kX86VarTypeXmmSs,
|
||||||
//! Variable is packed Xmm SP-FP number (4 floats).
|
//! Variable is a packed Xmm SP-FP number (4 floats).
|
||||||
kX86VarTypeXmmPs,
|
kX86VarTypeXmmPs,
|
||||||
//! Variable is scalar Xmm DP-FP number.
|
//! Variable is a scalar Xmm DP-FP number.
|
||||||
kX86VarTypeXmmSd,
|
kX86VarTypeXmmSd,
|
||||||
//! Variable is packed Xmm DP-FP number (2 doubles).
|
//! Variable is a packed Xmm DP-FP number (2 doubles).
|
||||||
kX86VarTypeXmmPd,
|
kX86VarTypeXmmPd,
|
||||||
|
|
||||||
//! Variable is Ymm (AVX+).
|
//! Variable is Ymm (AVX+).
|
||||||
kX86VarTypeYmm,
|
kX86VarTypeYmm,
|
||||||
//! Variable is packed Ymm SP-FP number (8 floats).
|
//! Variable is a packed Ymm SP-FP number (8 floats).
|
||||||
kX86VarTypeYmmPs,
|
kX86VarTypeYmmPs,
|
||||||
//! Variable is packed Ymm DP-FP number (4 doubles).
|
//! Variable is a packed Ymm DP-FP number (4 doubles).
|
||||||
kX86VarTypeYmmPd,
|
kX86VarTypeYmmPd,
|
||||||
|
|
||||||
|
//! Variable is Zmm (AVX512+).
|
||||||
|
kX86VarTypeZmm,
|
||||||
|
//! Variable is a packed Zmm SP-FP number (16 floats).
|
||||||
|
kX86VarTypeZmmPs,
|
||||||
|
//! Variable is a packed Zmm DP-FP number (8 doubles).
|
||||||
|
kX86VarTypeZmmPd,
|
||||||
|
|
||||||
//! Count of variable types.
|
//! Count of variable types.
|
||||||
kX86VarTypeCount,
|
kX86VarTypeCount,
|
||||||
|
|
||||||
@@ -76,7 +86,10 @@ ASMJIT_ENUM(kX86VarType) {
|
|||||||
_kX86VarTypeXmmEnd = kX86VarTypeXmmPd,
|
_kX86VarTypeXmmEnd = kX86VarTypeXmmPd,
|
||||||
|
|
||||||
_kX86VarTypeYmmStart = kX86VarTypeYmm,
|
_kX86VarTypeYmmStart = kX86VarTypeYmm,
|
||||||
_kX86VarTypeYmmEnd = kX86VarTypeYmmPd
|
_kX86VarTypeYmmEnd = kX86VarTypeYmmPd,
|
||||||
|
|
||||||
|
_kX86VarTypeZmmStart = kX86VarTypeZmm,
|
||||||
|
_kX86VarTypeZmmEnd = kX86VarTypeZmmPd
|
||||||
//! \}
|
//! \}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -552,6 +565,9 @@ struct X86Var : public Var {
|
|||||||
|
|
||||||
//! Get whether the variable is Mm (64-bit) register.
|
//! Get whether the variable is Mm (64-bit) register.
|
||||||
ASMJIT_INLINE bool isMm() const { return _vreg.type == kX86RegTypeMm; }
|
ASMJIT_INLINE bool isMm() const { return _vreg.type == kX86RegTypeMm; }
|
||||||
|
//! Get whether the variable is K (64-bit) register.
|
||||||
|
ASMJIT_INLINE bool isK() const { return _vreg.type == kX86RegTypeK; }
|
||||||
|
|
||||||
//! Get whether the variable is Xmm (128-bit) register.
|
//! Get whether the variable is Xmm (128-bit) register.
|
||||||
ASMJIT_INLINE bool isXmm() const { return _vreg.type == kX86RegTypeXmm; }
|
ASMJIT_INLINE bool isXmm() const { return _vreg.type == kX86RegTypeXmm; }
|
||||||
//! Get whether the variable is Ymm (256-bit) register.
|
//! Get whether the variable is Ymm (256-bit) register.
|
||||||
@@ -2016,6 +2032,7 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
= kArchHost
|
= kArchHost
|
||||||
#endif // ASMJIT_HOST_X86 || ASMJIT_HOST_X64
|
#endif // ASMJIT_HOST_X86 || ASMJIT_HOST_X64
|
||||||
);
|
);
|
||||||
|
|
||||||
//! Destroy the `X86Compiler` instance.
|
//! Destroy the `X86Compiler` instance.
|
||||||
ASMJIT_API ~X86Compiler();
|
ASMJIT_API ~X86Compiler();
|
||||||
|
|
||||||
@@ -2023,7 +2040,12 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
// [Arch]
|
// [Arch]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Get count of registers of the current architecture.
|
//! \internal
|
||||||
|
//!
|
||||||
|
//! Set the architecture to `arch`.
|
||||||
|
ASMJIT_API Error setArch(uint32_t arch);
|
||||||
|
|
||||||
|
//! Get count of registers of the current architecture and mode.
|
||||||
ASMJIT_INLINE const X86RegCount& getRegCount() const {
|
ASMJIT_INLINE const X86RegCount& getRegCount() const {
|
||||||
return _regCount;
|
return _regCount;
|
||||||
}
|
}
|
||||||
@@ -2075,8 +2097,6 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
return x86::ptr_abs(pAbs, index, shift, disp, _regSize);
|
return x86::ptr_abs(pAbs, index, shift, disp, _regSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
ASMJIT_API Error setArch(uint32_t arch);
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [Inst / Emit]
|
// [Inst / Emit]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -2119,6 +2139,10 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, int o2);
|
ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, int o2);
|
||||||
//! \overload
|
//! \overload
|
||||||
ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, uint64_t o2);
|
ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, uint64_t o2);
|
||||||
|
//! \overload
|
||||||
|
ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, int o3);
|
||||||
|
//! \overload
|
||||||
|
ASMJIT_API InstNode* emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2, uint64_t o3);
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [Func]
|
// [Func]
|
||||||
@@ -2438,7 +2462,7 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
X86GpReg zdi;
|
X86GpReg zdi;
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [X86 Instructions]
|
// [Emit]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
#define INST_0x(_Inst_, _Code_) \
|
#define INST_0x(_Inst_, _Code_) \
|
||||||
@@ -2614,6 +2638,31 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
return emit(_Code_, o0, o1, o2); \
|
return emit(_Code_, o0, o1, o2); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define INST_3ii(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \
|
||||||
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \
|
||||||
|
return emit(_Code_, o0, o1, o2); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, int o1, int o2) { \
|
||||||
|
Imm o1Imm(o1); \
|
||||||
|
return emit(_Code_, o0, o1Imm, o2); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, unsigned int o1, unsigned int o2) { \
|
||||||
|
Imm o1Imm(o1); \
|
||||||
|
return emit(_Code_, o0, o1Imm, static_cast<uint64_t>(o2)); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, int64_t o1, int64_t o2) { \
|
||||||
|
Imm o1Imm(o1); \
|
||||||
|
return emit(_Code_, o0, o1Imm, static_cast<uint64_t>(o2)); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, uint64_t o1, uint64_t o2) { \
|
||||||
|
Imm o1Imm(o1); \
|
||||||
|
return emit(_Code_, o0, o1Imm, o2); \
|
||||||
|
}
|
||||||
|
|
||||||
#define INST_4x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \
|
#define INST_4x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_) \
|
||||||
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \
|
||||||
return emit(_Code_, o0, o1, o2, o3); \
|
return emit(_Code_, o0, o1, o2, o3); \
|
||||||
@@ -2646,6 +2695,35 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
return emit(_Code_, o0, o1, o2, o3); \
|
return emit(_Code_, o0, o1, o2, o3); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define INST_4ii(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \
|
||||||
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \
|
||||||
|
return emit(_Code_, o0, o1, o2, o3); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, int o2, int o3) { \
|
||||||
|
Imm o2Imm(o2); \
|
||||||
|
return emit(_Code_, o0, o1, o2Imm, o3); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, unsigned int o2, unsigned int o3) { \
|
||||||
|
Imm o2Imm(o2); \
|
||||||
|
return emit(_Code_, o0, o1, o2Imm, static_cast<uint64_t>(o3)); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, int64_t o2, int64_t o3) { \
|
||||||
|
Imm o2Imm(o2); \
|
||||||
|
return emit(_Code_, o0, o1, o2Imm, static_cast<uint64_t>(o3)); \
|
||||||
|
} \
|
||||||
|
/*! \overload */ \
|
||||||
|
ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, uint64_t o2, uint64_t o3) { \
|
||||||
|
Imm o2Imm(o2); \
|
||||||
|
return emit(_Code_, o0, o1, o2Imm, o3); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [X86/X64]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Add with carry.
|
//! Add with carry.
|
||||||
INST_2x(adc, kX86InstIdAdc, X86GpVar, X86GpVar)
|
INST_2x(adc, kX86InstIdAdc, X86GpVar, X86GpVar)
|
||||||
//! \overload
|
//! \overload
|
||||||
@@ -2832,11 +2910,6 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
return emit(kX86InstIdCpuid, x_eax, w_ebx, x_ecx, w_edx);
|
return emit(kX86InstIdCpuid, x_eax, w_ebx, x_ecx, w_edx);
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Accumulate crc32 value (polynomial 0x11EDC6F41) (SSE4.2).
|
|
||||||
INST_2x_(crc32, kX86InstIdCrc32, X86GpVar, X86GpVar, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
|
|
||||||
//! \overload
|
|
||||||
INST_2x_(crc32, kX86InstIdCrc32, X86GpVar, X86Mem, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
|
|
||||||
|
|
||||||
//! Decimal adjust AL after addition (X86 Only).
|
//! Decimal adjust AL after addition (X86 Only).
|
||||||
INST_1x(daa, kX86InstIdDaa, X86GpVar)
|
INST_1x(daa, kX86InstIdDaa, X86GpVar)
|
||||||
//! Decimal adjust AL after subtraction (X86 Only).
|
//! Decimal adjust AL after subtraction (X86 Only).
|
||||||
@@ -3024,11 +3097,6 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
//! Pop stack into EFLAGS Register (32-bit or 64-bit).
|
//! Pop stack into EFLAGS Register (32-bit or 64-bit).
|
||||||
INST_0x(popf, kX86InstIdPopf)
|
INST_0x(popf, kX86InstIdPopf)
|
||||||
|
|
||||||
//! Return the count of number of bits set to 1 (SSE4.2).
|
|
||||||
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpVar, X86GpVar, !o0.isGpb() && o0.getSize() == o1.getSize())
|
|
||||||
//! \overload
|
|
||||||
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpVar, X86Mem, !o0.isGpb())
|
|
||||||
|
|
||||||
//! Push WORD or DWORD/QWORD on the stack.
|
//! Push WORD or DWORD/QWORD on the stack.
|
||||||
INST_1x_(push, kX86InstIdPush, X86GpVar, o0.getSize() == 2 || o0.getSize() == _regSize)
|
INST_1x_(push, kX86InstIdPush, X86GpVar, o0.getSize() == 2 || o0.getSize() == _regSize)
|
||||||
//! Push WORD or DWORD/QWORD on the stack.
|
//! Push WORD or DWORD/QWORD on the stack.
|
||||||
@@ -3299,273 +3367,277 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
INST_2i(xor_, kX86InstIdXor, X86Mem, Imm)
|
INST_2i(xor_, kX86InstIdXor, X86Mem, Imm)
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [Fpu]
|
// [FPU]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Compute 2^x - 1 (FPU).
|
//! Compute `2^x - 1` - `fp0 = POW(2, fp0) - 1` (FPU).
|
||||||
INST_0x(f2xm1, kX86InstIdF2xm1)
|
INST_0x(f2xm1, kX86InstIdF2xm1)
|
||||||
//! Absolute value of fp0 (FPU).
|
//! Abs `fp0 = ABS(fp0)` (FPU).
|
||||||
INST_0x(fabs, kX86InstIdFabs)
|
INST_0x(fabs, kX86InstIdFabs)
|
||||||
|
|
||||||
//! Add `o1` to `o0` (one has to be `fp0`) and store result in `o0` (FPU).
|
//! Add `o0 = o0 + o1` (one operand has to be `fp0`) (FPU).
|
||||||
INST_2x_(fadd, kX86InstIdFadd, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fadd, kX86InstIdFadd, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Add 4-byte or 8-byte FP `o0` to fp0 and store result in fp0 (FPU).
|
//! Add `fp0 = fp0 + float_or_double[o0]` (FPU).
|
||||||
INST_1x(fadd, kX86InstIdFadd, X86Mem)
|
INST_1x(fadd, kX86InstIdFadd, X86Mem)
|
||||||
//! Add fp0 to `o0` and pop the FPU stack (FPU).
|
//! Add `o0 = o0 + fp0` and POP (FPU).
|
||||||
INST_1x(faddp, kX86InstIdFaddp, X86FpReg)
|
INST_1x(faddp, kX86InstIdFaddp, X86FpReg)
|
||||||
//! \overload
|
//! Add `fp1 = fp1 + fp0` and POP (FPU).
|
||||||
INST_0x(faddp, kX86InstIdFaddp)
|
INST_0x(faddp, kX86InstIdFaddp)
|
||||||
|
|
||||||
//! Load binary coded decimal (FPU).
|
//! Load BCD from `[o0]` and PUSH (FPU).
|
||||||
INST_1x(fbld, kX86InstIdFbld, X86Mem)
|
INST_1x(fbld, kX86InstIdFbld, X86Mem)
|
||||||
//! Store BCD integer and Pop (FPU).
|
//! Store BCD-Integer to `[o0]` and POP (FPU).
|
||||||
INST_1x(fbstp, kX86InstIdFbstp, X86Mem)
|
INST_1x(fbstp, kX86InstIdFbstp, X86Mem)
|
||||||
//! Change fp0 sign (FPU).
|
|
||||||
|
//! Complement Sign `fp0 = -fp0` (FPU).
|
||||||
INST_0x(fchs, kX86InstIdFchs)
|
INST_0x(fchs, kX86InstIdFchs)
|
||||||
|
|
||||||
//! Clear exceptions (FPU).
|
//! Clear exceptions (FPU).
|
||||||
INST_0x(fclex, kX86InstIdFclex)
|
INST_0x(fclex, kX86InstIdFclex)
|
||||||
|
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (CF=1) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovb, kX86InstIdFcmovb, X86FpReg)
|
INST_1x(fcmovb, kX86InstIdFcmovb, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (CF|ZF=1) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovbe, kX86InstIdFcmovbe, X86FpReg)
|
INST_1x(fcmovbe, kX86InstIdFcmovbe, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (ZF=1) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmove, kX86InstIdFcmove, X86FpReg)
|
INST_1x(fcmove, kX86InstIdFcmove, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (CF=0) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovnb, kX86InstIdFcmovnb, X86FpReg)
|
INST_1x(fcmovnb, kX86InstIdFcmovnb, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (CF|ZF=0) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovnbe, kX86InstIdFcmovnbe, X86FpReg)
|
INST_1x(fcmovnbe, kX86InstIdFcmovnbe, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (ZF=0) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovne, kX86InstIdFcmovne, X86FpReg)
|
INST_1x(fcmovne, kX86InstIdFcmovne, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (PF=0) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovnu, kX86InstIdFcmovnu, X86FpReg)
|
INST_1x(fcmovnu, kX86InstIdFcmovnu, X86FpReg)
|
||||||
//! Conditional move (FPU).
|
//! Conditional move `if (PF=1) fp0 = o0` (FPU).
|
||||||
INST_1x(fcmovu, kX86InstIdFcmovu, X86FpReg)
|
INST_1x(fcmovu, kX86InstIdFcmovu, X86FpReg)
|
||||||
|
|
||||||
//! Compare fp0 with `o0` (FPU).
|
//! Compare `fp0` with `o0` (FPU).
|
||||||
INST_1x(fcom, kX86InstIdFcom, X86FpReg)
|
INST_1x(fcom, kX86InstIdFcom, X86FpReg)
|
||||||
//! Compare fp0 with fp1 (FPU).
|
//! Compare `fp0` with `fp1` (FPU).
|
||||||
INST_0x(fcom, kX86InstIdFcom)
|
INST_0x(fcom, kX86InstIdFcom)
|
||||||
//! Compare fp0 with 4-byte or 8-byte FP at `src` (FPU).
|
//! Compare `fp0` with `float_or_double[o0]` (FPU).
|
||||||
INST_1x(fcom, kX86InstIdFcom, X86Mem)
|
INST_1x(fcom, kX86InstIdFcom, X86Mem)
|
||||||
//! Compare fp0 with `o0` and pop the FPU stack (FPU).
|
//! Compare `fp0` with `o0` and POP (FPU).
|
||||||
INST_1x(fcomp, kX86InstIdFcomp, X86FpReg)
|
INST_1x(fcomp, kX86InstIdFcomp, X86FpReg)
|
||||||
//! Compare fp0 with fp1 and pop the FPU stack (FPU).
|
//! Compare `fp0` with `fp1` and POP (FPU).
|
||||||
INST_0x(fcomp, kX86InstIdFcomp)
|
INST_0x(fcomp, kX86InstIdFcomp)
|
||||||
//! Compare fp0 with 4-byte or 8-byte FP at `adr` and pop the FPU stack (FPU).
|
//! Compare `fp0` with `float_or_double[o0]` and POP (FPU).
|
||||||
INST_1x(fcomp, kX86InstIdFcomp, X86Mem)
|
INST_1x(fcomp, kX86InstIdFcomp, X86Mem)
|
||||||
//! Compare fp0 with fp1 and pop the FPU stack twice (FPU).
|
//! Compare `fp0` with `fp1` and POP twice (FPU).
|
||||||
INST_0x(fcompp, kX86InstIdFcompp)
|
INST_0x(fcompp, kX86InstIdFcompp)
|
||||||
//! Compare fp0 and `o0` and Set EFLAGS (FPU).
|
//! Compare `fp0` with `o0` and set EFLAGS (FPU).
|
||||||
INST_1x(fcomi, kX86InstIdFcomi, X86FpReg)
|
INST_1x(fcomi, kX86InstIdFcomi, X86FpReg)
|
||||||
//! Compare fp0 and `o0` and Set EFLAGS and pop the FPU stack (FPU).
|
//! Compare `fp0` with `o0` and set EFLAGS and POP (FPU).
|
||||||
INST_1x(fcomip, kX86InstIdFcomip, X86FpReg)
|
INST_1x(fcomip, kX86InstIdFcomip, X86FpReg)
|
||||||
|
|
||||||
//! Calculate cosine of fp0 and store result in fp0 (FPU).
|
//! Cos `fp0 = cos(fp0)` (FPU).
|
||||||
INST_0x(fcos, kX86InstIdFcos)
|
INST_0x(fcos, kX86InstIdFcos)
|
||||||
//! Decrement FPU stack-top pointer (FPU).
|
|
||||||
|
//! Decrement FPU stack pointer (FPU).
|
||||||
INST_0x(fdecstp, kX86InstIdFdecstp)
|
INST_0x(fdecstp, kX86InstIdFdecstp)
|
||||||
|
|
||||||
//! Divide `o0` by `o1` (one has to be `fp0`) (FPU).
|
//! Divide `o0 = o0 / o1` (one has to be `fp0`) (FPU).
|
||||||
INST_2x_(fdiv, kX86InstIdFdiv, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fdiv, kX86InstIdFdiv, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Divide fp0 by 32-bit or 64-bit FP value (FPU).
|
//! Divide `fp0 = fp0 / float_or_double[o0]` (FPU).
|
||||||
INST_1x(fdiv, kX86InstIdFdiv, X86Mem)
|
INST_1x(fdiv, kX86InstIdFdiv, X86Mem)
|
||||||
//! Divide `o0` by fp0 (FPU).
|
//! Divide `o0 = o0 / fp0` and POP (FPU).
|
||||||
INST_1x(fdivp, kX86InstIdFdivp, X86FpReg)
|
INST_1x(fdivp, kX86InstIdFdivp, X86FpReg)
|
||||||
//! \overload
|
//! Divide `fp1 = fp1 / fp0` and POP (FPU).
|
||||||
INST_0x(fdivp, kX86InstIdFdivp)
|
INST_0x(fdivp, kX86InstIdFdivp)
|
||||||
|
|
||||||
//! Reverse divide `o0` by `o1` (one has to be `fp0`) (FPU).
|
//! Reverse divide `o0 = o1 / o0` (one has to be `fp0`) (FPU).
|
||||||
INST_2x_(fdivr, kX86InstIdFdivr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fdivr, kX86InstIdFdivr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Reverse divide fp0 by 32-bit or 64-bit FP value (FPU).
|
//! Reverse divide `fp0 = float_or_double[o0] / fp0` (FPU).
|
||||||
INST_1x(fdivr, kX86InstIdFdivr, X86Mem)
|
INST_1x(fdivr, kX86InstIdFdivr, X86Mem)
|
||||||
//! Reverse divide `o0` by fp0 (FPU).
|
//! Reverse divide `o0 = fp0 / o0` and POP (FPU).
|
||||||
INST_1x(fdivrp, kX86InstIdFdivrp, X86FpReg)
|
INST_1x(fdivrp, kX86InstIdFdivrp, X86FpReg)
|
||||||
//! \overload
|
//! Reverse divide `fp1 = fp0 / fp1` and POP (FPU).
|
||||||
INST_0x(fdivrp, kX86InstIdFdivrp)
|
INST_0x(fdivrp, kX86InstIdFdivrp)
|
||||||
|
|
||||||
//! Free FP register (FPU).
|
//! Free FP register (FPU).
|
||||||
INST_1x(ffree, kX86InstIdFfree, X86FpReg)
|
INST_1x(ffree, kX86InstIdFfree, X86FpReg)
|
||||||
|
|
||||||
//! Add 16-bit or 32-bit integer to fp0 (FPU).
|
//! Add `fp0 = fp0 + short_or_int[o0]` (FPU).
|
||||||
INST_1x_(fiadd, kX86InstIdFiadd, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fiadd, kX86InstIdFiadd, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Compare fp0 with 16-bit or 32-bit Integer (FPU).
|
//! Compare `fp0` with `short_or_int[o0]` (FPU).
|
||||||
INST_1x_(ficom, kX86InstIdFicom, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(ficom, kX86InstIdFicom, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Compare fp0 with 16-bit or 32-bit Integer and pop the FPU stack (FPU).
|
//! Compare `fp0` with `short_or_int[o0]` and POP (FPU).
|
||||||
INST_1x_(ficomp, kX86InstIdFicomp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(ficomp, kX86InstIdFicomp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Divide fp0 by 32-bit or 16-bit integer (`src`) (FPU).
|
//! Divide `fp0 = fp0 / short_or_int[o0]` (FPU).
|
||||||
INST_1x_(fidiv, kX86InstIdFidiv, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fidiv, kX86InstIdFidiv, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Reverse divide fp0 by 32-bit or 16-bit integer (`src`) (FPU).
|
//! Reverse divide `fp0 = short_or_int[o0] / fp0` (FPU).
|
||||||
INST_1x_(fidivr, kX86InstIdFidivr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fidivr, kX86InstIdFidivr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
|
|
||||||
//! Load 16-bit, 32-bit or 64-bit Integer and push it to the FPU stack (FPU).
|
//! Load `short_or_int_or_long[o0]` and PUSH (FPU).
|
||||||
INST_1x_(fild, kX86InstIdFild, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
|
INST_1x_(fild, kX86InstIdFild, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
|
||||||
//! Multiply fp0 by 16-bit or 32-bit integer and store it to fp0 (FPU).
|
//! Multiply `fp0 *= short_or_int[o0]` (FPU).
|
||||||
INST_1x_(fimul, kX86InstIdFimul, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fimul, kX86InstIdFimul, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
|
|
||||||
//! Increment FPU stack-top pointer (FPU).
|
//! Increment FPU stack pointer (FPU).
|
||||||
INST_0x(fincstp, kX86InstIdFincstp)
|
INST_0x(fincstp, kX86InstIdFincstp)
|
||||||
//! Initialize FPU (FPU).
|
//! Initialize FPU (FPU).
|
||||||
INST_0x(finit, kX86InstIdFinit)
|
INST_0x(finit, kX86InstIdFinit)
|
||||||
|
|
||||||
//! Subtract 16-bit or 32-bit integer from fp0 and store result to fp0 (FPU).
|
//! Subtract `fp0 = fp0 - short_or_int[o0]` (FPU).
|
||||||
INST_1x_(fisub, kX86InstIdFisub, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fisub, kX86InstIdFisub, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Reverse subtract 16-bit or 32-bit integer from fp0 and store result to fp0 (FPU).
|
//! Reverse subtract `fp0 = short_or_int[o0] - fp0` (FPU).
|
||||||
INST_1x_(fisubr, kX86InstIdFisubr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fisubr, kX86InstIdFisubr, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
|
|
||||||
//! Initialize FPU without checking for pending unmasked exceptions (FPU).
|
//! Initialize FPU without checking for pending unmasked exceptions (FPU).
|
||||||
INST_0x(fninit, kX86InstIdFninit)
|
INST_0x(fninit, kX86InstIdFninit)
|
||||||
|
|
||||||
//! Store fp0 as 16-bit or 32-bit Integer to `o0` (FPU).
|
//! Store `fp0` as `short_or_int[o0]` (FPU).
|
||||||
INST_1x_(fist, kX86InstIdFist, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
INST_1x_(fist, kX86InstIdFist, X86Mem, o0.getSize() == 2 || o0.getSize() == 4)
|
||||||
//! Store fp0 as 16-bit, 32-bit or 64-bit Integer to `o0` and pop the FPU stack (FPU).
|
//! Store `fp0` as `short_or_int_or_long[o0]` and POP (FPU).
|
||||||
INST_1x_(fistp, kX86InstIdFistp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
|
INST_1x_(fistp, kX86InstIdFistp, X86Mem, o0.getSize() == 2 || o0.getSize() == 4 || o0.getSize() == 8)
|
||||||
//! Push 32-bit, 64-bit or 80-bit floating point value on the FPU stack (FPU).
|
|
||||||
|
//! Load `float_or_double_or_extended[o0]` and PUSH (FPU).
|
||||||
INST_1x_(fld, kX86InstIdFld, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
|
INST_1x_(fld, kX86InstIdFld, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
|
||||||
//! Push `o0` on the FPU stack (FPU).
|
//! PUSH `o0` (FPU).
|
||||||
INST_1x(fld, kX86InstIdFld, X86FpReg)
|
INST_1x(fld, kX86InstIdFld, X86FpReg)
|
||||||
|
|
||||||
//! Push +1.0 on the FPU stack (FPU).
|
//! PUSH `1.0` (FPU).
|
||||||
INST_0x(fld1, kX86InstIdFld1)
|
INST_0x(fld1, kX86InstIdFld1)
|
||||||
//! Push log2(10) on the FPU stack (FPU).
|
//! PUSH `log2(10)` (FPU).
|
||||||
INST_0x(fldl2t, kX86InstIdFldl2t)
|
INST_0x(fldl2t, kX86InstIdFldl2t)
|
||||||
//! Push log2(e) on the FPU stack (FPU).
|
//! PUSH `log2(e)` (FPU).
|
||||||
INST_0x(fldl2e, kX86InstIdFldl2e)
|
INST_0x(fldl2e, kX86InstIdFldl2e)
|
||||||
//! Push pi on the FPU stack (FPU).
|
//! PUSH `pi` (FPU).
|
||||||
INST_0x(fldpi, kX86InstIdFldpi)
|
INST_0x(fldpi, kX86InstIdFldpi)
|
||||||
//! Push log10(2) on the FPU stack (FPU).
|
//! PUSH `log10(2)` (FPU).
|
||||||
INST_0x(fldlg2, kX86InstIdFldlg2)
|
INST_0x(fldlg2, kX86InstIdFldlg2)
|
||||||
//! Push ln(2) on the FPU stack (FPU).
|
//! PUSH `ln(2)` (FPU).
|
||||||
INST_0x(fldln2, kX86InstIdFldln2)
|
INST_0x(fldln2, kX86InstIdFldln2)
|
||||||
//! Push +0.0 on the FPU stack (FPU).
|
//! PUSH `+0.0` (FPU).
|
||||||
INST_0x(fldz, kX86InstIdFldz)
|
INST_0x(fldz, kX86InstIdFldz)
|
||||||
|
|
||||||
//! Load x87 FPU control word (2 bytes) (FPU).
|
//! Load x87 FPU control word from `word_ptr[o0]` (FPU).
|
||||||
INST_1x(fldcw, kX86InstIdFldcw, X86Mem)
|
INST_1x(fldcw, kX86InstIdFldcw, X86Mem)
|
||||||
//! Load x87 FPU environment (14 or 28 bytes) (FPU).
|
//! Load x87 FPU environment (14 or 28 bytes) from `[o0]` (FPU).
|
||||||
INST_1x(fldenv, kX86InstIdFldenv, X86Mem)
|
INST_1x(fldenv, kX86InstIdFldenv, X86Mem)
|
||||||
|
|
||||||
//! Multiply `o0` by `o1` (one has to be `fp0`) and store result in `o0` (FPU).
|
//! Multiply `o0 = o0 * o1` (one has to be `fp0`) (FPU).
|
||||||
INST_2x_(fmul, kX86InstIdFmul, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fmul, kX86InstIdFmul, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Multiply fp0 by 32-bit or 64-bit `o0` and store result in fp0 (FPU).
|
//! Multiply `fp0 = fp0 * float_or_double[o0]` (FPU).
|
||||||
INST_1x(fmul, kX86InstIdFmul, X86Mem)
|
INST_1x(fmul, kX86InstIdFmul, X86Mem)
|
||||||
//! Multiply fp0 by `o0` and pop the FPU stack (FPU).
|
//! Multiply `o0 = o0 * fp0` and POP (FPU).
|
||||||
INST_1x(fmulp, kX86InstIdFmulp, X86FpReg)
|
INST_1x(fmulp, kX86InstIdFmulp, X86FpReg)
|
||||||
//! \overload
|
//! Multiply `fp1 = fp1 * fp0` and POP (FPU).
|
||||||
INST_0x(fmulp, kX86InstIdFmulp)
|
INST_0x(fmulp, kX86InstIdFmulp)
|
||||||
|
|
||||||
//! Clear exceptions (FPU).
|
//! Clear exceptions (FPU).
|
||||||
INST_0x(fnclex, kX86InstIdFnclex)
|
INST_0x(fnclex, kX86InstIdFnclex)
|
||||||
//! No operation (FPU).
|
//! No operation (FPU).
|
||||||
INST_0x(fnop, kX86InstIdFnop)
|
INST_0x(fnop, kX86InstIdFnop)
|
||||||
//! Save FPU state (FPU).
|
//! Save FPU state to `[o0]` (FPU).
|
||||||
INST_1x(fnsave, kX86InstIdFnsave, X86Mem)
|
INST_1x(fnsave, kX86InstIdFnsave, X86Mem)
|
||||||
//! Store x87 FPU environment (FPU).
|
//! Store x87 FPU environment to `[o0]` (FPU).
|
||||||
INST_1x(fnstenv, kX86InstIdFnstenv, X86Mem)
|
INST_1x(fnstenv, kX86InstIdFnstenv, X86Mem)
|
||||||
//! Store x87 FPU control word (FPU).
|
//! Store x87 FPU control word to `[o0]` (FPU).
|
||||||
INST_1x(fnstcw, kX86InstIdFnstcw, X86Mem)
|
INST_1x(fnstcw, kX86InstIdFnstcw, X86Mem)
|
||||||
|
|
||||||
//! Store x87 FPU status word to `o0` (AX) (FPU).
|
//! Store x87 FPU status word to `o0` (AX) (FPU).
|
||||||
INST_1x_(fnstsw, kX86InstIdFnstsw, X86GpReg, o0.isRegCode(kX86RegTypeGpw, kX86RegIndexAx))
|
INST_1x(fnstsw, kX86InstIdFnstsw, X86GpVar)
|
||||||
//! Store x87 FPU status word to `o0` (2 bytes) (FPU).
|
//! Store x87 FPU status word to `word_ptr[o0]` (FPU).
|
||||||
INST_1x(fnstsw, kX86InstIdFnstsw, X86Mem)
|
INST_1x(fnstsw, kX86InstIdFnstsw, X86Mem)
|
||||||
|
|
||||||
//! Arctan(`fp1` / `fp0`) and pop the FPU stack (FPU).
|
//! Partial Arctan `fp1 = atan2(fp1, fp0)` and POP (FPU).
|
||||||
INST_0x(fpatan, kX86InstIdFpatan)
|
INST_0x(fpatan, kX86InstIdFpatan)
|
||||||
//! Fprem(`fp0`, `fp1`) and pop the FPU stack (FPU).
|
//! Partial Remainder[Trunc] `fp1 = fp0 % fp1` and POP (FPU).
|
||||||
INST_0x(fprem, kX86InstIdFprem)
|
INST_0x(fprem, kX86InstIdFprem)
|
||||||
//! Fprem(`fp0`, `fp1`) and pop the FPU stack (FPU).
|
//! Partial Remainder[Round] `fp1 = fp0 % fp1` and POP (FPU).
|
||||||
INST_0x(fprem1, kX86InstIdFprem1)
|
INST_0x(fprem1, kX86InstIdFprem1)
|
||||||
//! Arctan(`fp0`) and pop the FPU stack (FPU).
|
//! Partial Tan `fp0 = tan(fp0)` and PUSH `1.0` (FPU).
|
||||||
INST_0x(fptan, kX86InstIdFptan)
|
INST_0x(fptan, kX86InstIdFptan)
|
||||||
//! Round `fp0` to Integer (FPU).
|
//! Round `fp0 = round(fp0)` (FPU).
|
||||||
INST_0x(frndint, kX86InstIdFrndint)
|
INST_0x(frndint, kX86InstIdFrndint)
|
||||||
|
|
||||||
//! Restore FPU state from `o0` (94 or 108 bytes) (FPU).
|
//! Restore FPU state from `[o0]` (94 or 108 bytes) (FPU).
|
||||||
INST_1x(frstor, kX86InstIdFrstor, X86Mem)
|
INST_1x(frstor, kX86InstIdFrstor, X86Mem)
|
||||||
//! Save FPU state to `o0` (94 or 108 bytes) (FPU).
|
//! Save FPU state to `[o0]` (94 or 108 bytes) (FPU).
|
||||||
INST_1x(fsave, kX86InstIdFsave, X86Mem)
|
INST_1x(fsave, kX86InstIdFsave, X86Mem)
|
||||||
|
|
||||||
//! Scale `fp0` by `fp1` (FPU).
|
//! Scale `fp0 = fp0 * pow(2, RoundTowardsZero(fp1))` (FPU).
|
||||||
INST_0x(fscale, kX86InstIdFscale)
|
INST_0x(fscale, kX86InstIdFscale)
|
||||||
//! Sine of `fp0` and store result in `fp0` (FPU).
|
//! Sin `fp0 = sin(fp0)` (FPU).
|
||||||
INST_0x(fsin, kX86InstIdFsin)
|
INST_0x(fsin, kX86InstIdFsin)
|
||||||
//! Sine and cosine of `fp0`, store sine in `fp0` and push cosine on the FPU stack (FPU).
|
//! Sincos `fp0 = sin(fp0)` and PUSH `cos(fp0)` (FPU).
|
||||||
INST_0x(fsincos, kX86InstIdFsincos)
|
INST_0x(fsincos, kX86InstIdFsincos)
|
||||||
//! Square root of `fp0` and store it in `fp0` (FPU).
|
//! Square root `fp0 = sqrt(fp0)` (FPU).
|
||||||
INST_0x(fsqrt, kX86InstIdFsqrt)
|
INST_0x(fsqrt, kX86InstIdFsqrt)
|
||||||
|
|
||||||
//! Store floating point value to 32-bit or 64-bit memory location (FPU).
|
//! Store floating point value to `float_or_double[o0]` (FPU).
|
||||||
INST_1x_(fst, kX86InstIdFst, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
INST_1x_(fst, kX86InstIdFst, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
||||||
//! Store floating point value to `o0` (FPU).
|
//! Copy `o0 = fp0` (FPU).
|
||||||
INST_1x(fst, kX86InstIdFst, X86FpReg)
|
INST_1x(fst, kX86InstIdFst, X86FpReg)
|
||||||
//! Store floating point value to 32-bit or 64-bit memory location and pop the FPU stack (FPU).
|
//! Store floating point value to `float_or_double_or_extended[o0]` and POP (FPU).
|
||||||
INST_1x_(fstp, kX86InstIdFstp, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
|
INST_1x_(fstp, kX86InstIdFstp, X86Mem, o0.getSize() == 4 || o0.getSize() == 8 || o0.getSize() == 10)
|
||||||
//! Store floating point value to `o0` and pop the FPU stack (FPU).
|
//! Copy `o0 = fp0` and POP (FPU).
|
||||||
INST_1x(fstp, kX86InstIdFstp, X86FpReg)
|
INST_1x(fstp, kX86InstIdFstp, X86FpReg)
|
||||||
|
|
||||||
//! Store x87 FPU control word to `o0` (2 bytes) (FPU).
|
//! Store x87 FPU control word to `word_ptr[o0]` (FPU).
|
||||||
INST_1x(fstcw, kX86InstIdFstcw, X86Mem)
|
INST_1x(fstcw, kX86InstIdFstcw, X86Mem)
|
||||||
//! Store x87 FPU environment to `o0` (14 or 28 bytes) (FPU).
|
//! Store x87 FPU environment to `[o0]` (14 or 28 bytes) (FPU).
|
||||||
INST_1x(fstenv, kX86InstIdFstenv, X86Mem)
|
INST_1x(fstenv, kX86InstIdFstenv, X86Mem)
|
||||||
//! Store x87 FPU status word to `o0` (allocated in AX) (FPU).
|
//! Store x87 FPU status word to `o0` (AX) (FPU).
|
||||||
INST_1x(fstsw, kX86InstIdFstsw, X86GpVar)
|
INST_1x(fstsw, kX86InstIdFstsw, X86GpVar)
|
||||||
//! Store x87 FPU status word (2 bytes) (FPU).
|
//! Store x87 FPU status word to `word_ptr[o0]` (FPU).
|
||||||
INST_1x(fstsw, kX86InstIdFstsw, X86Mem)
|
INST_1x(fstsw, kX86InstIdFstsw, X86Mem)
|
||||||
|
|
||||||
//! Subtract `o0` from `o0` (one has to be `fp0`) and store result in `o0` (FPU).
|
//! Subtract `o0 = o0 - o1` (one has to be `fp0`) (FPU).
|
||||||
INST_2x_(fsub, kX86InstIdFsub, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fsub, kX86InstIdFsub, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Subtract 32-bit or 64-bit `o0` from fp0 and store result in fp0 (FPU).
|
//! Subtract `fp0 = fp0 - float_or_double[o0]` (FPU).
|
||||||
INST_1x_(fsub, kX86InstIdFsub, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
INST_1x_(fsub, kX86InstIdFsub, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
||||||
//! Subtract fp0 from `o0` and pop FPU stack (FPU).
|
//! Subtract `o0 = o0 - fp0` and POP (FPU).
|
||||||
INST_1x(fsubp, kX86InstIdFsubp, X86FpReg)
|
INST_1x(fsubp, kX86InstIdFsubp, X86FpReg)
|
||||||
//! \overload
|
//! Subtract `fp1 = fp1 - fp0` and POP (FPU).
|
||||||
INST_0x(fsubp, kX86InstIdFsubp)
|
INST_0x(fsubp, kX86InstIdFsubp)
|
||||||
|
|
||||||
//! Reverse subtract `o1` from `o0` (one has to be `fp0`) and store result in `o0` (FPU).
|
//! Reverse subtract `o0 = o1 - o0` (one has to be `fp0`) (FPU).
|
||||||
INST_2x_(fsubr, kX86InstIdFsubr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
INST_2x_(fsubr, kX86InstIdFsubr, X86FpReg, X86FpReg, o0.getRegIndex() == 0 || o1.getRegIndex() == 0)
|
||||||
//! Reverse subtract 32-bit or 64-bit `o0` from `fp0` and store result in `fp0` (FPU).
|
//! Reverse subtract `fp0 = fp0 - float_or_double[o0]` (FPU).
|
||||||
INST_1x_(fsubr, kX86InstIdFsubr, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
INST_1x_(fsubr, kX86InstIdFsubr, X86Mem, o0.getSize() == 4 || o0.getSize() == 8)
|
||||||
//! Reverse subtract `fp0` from `o0` and pop FPU stack (FPU).
|
//! Reverse subtract `o0 = o0 - fp0` and POP (FPU).
|
||||||
INST_1x(fsubrp, kX86InstIdFsubrp, X86FpReg)
|
INST_1x(fsubrp, kX86InstIdFsubrp, X86FpReg)
|
||||||
//! \overload
|
//! Reverse subtract `fp1 = fp1 - fp0` and POP (FPU).
|
||||||
INST_0x(fsubrp, kX86InstIdFsubrp)
|
INST_0x(fsubrp, kX86InstIdFsubrp)
|
||||||
|
|
||||||
//! Floating point test - Compare `fp0` with 0.0. (FPU).
|
//! Compare `fp0` with `0.0` (FPU).
|
||||||
INST_0x(ftst, kX86InstIdFtst)
|
INST_0x(ftst, kX86InstIdFtst)
|
||||||
|
|
||||||
//! Unordered compare `fp0` with `o0` (FPU).
|
//! Unordered compare `fp0` with `o0` (FPU).
|
||||||
INST_1x(fucom, kX86InstIdFucom, X86FpReg)
|
INST_1x(fucom, kX86InstIdFucom, X86FpReg)
|
||||||
//! Unordered compare `fp0` with `fp1` (FPU).
|
//! Unordered compare `fp0` with `fp1` (FPU).
|
||||||
INST_0x(fucom, kX86InstIdFucom)
|
INST_0x(fucom, kX86InstIdFucom)
|
||||||
//! Unordered compare `fp0` and `o0`, check for ordered values and set EFLAGS (FPU).
|
//! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS (FPU).
|
||||||
INST_1x(fucomi, kX86InstIdFucomi, X86FpReg)
|
INST_1x(fucomi, kX86InstIdFucomi, X86FpReg)
|
||||||
//! Unordered compare `fp0` and `o0`, check for ordered values and set EFLAGS and pop the FPU stack (FPU).
|
//! Unordered compare `fp0` with `o0`, check for ordered values and set EFLAGS and POP (FPU).
|
||||||
INST_1x(fucomip, kX86InstIdFucomip, X86FpReg)
|
INST_1x(fucomip, kX86InstIdFucomip, X86FpReg)
|
||||||
//! Unordered compare `fp0` with `o0` and pop the FPU stack (FPU).
|
//! Unordered compare `fp0` with `o0` and POP (FPU).
|
||||||
INST_1x(fucomp, kX86InstIdFucomp, X86FpReg)
|
INST_1x(fucomp, kX86InstIdFucomp, X86FpReg)
|
||||||
//! Unordered compare `fp0` with `fp1` and pop the FPU stack (FPU).
|
//! Unordered compare `fp0` with `fp1` and POP (FPU).
|
||||||
INST_0x(fucomp, kX86InstIdFucomp)
|
INST_0x(fucomp, kX86InstIdFucomp)
|
||||||
//! Unordered compare `fp0` with `fp1` and pop the FPU stack twice (FPU).
|
//! Unordered compare `fp0` with `fp1` and POP twice (FPU).
|
||||||
INST_0x(fucompp, kX86InstIdFucompp)
|
INST_0x(fucompp, kX86InstIdFucompp)
|
||||||
|
|
||||||
INST_0x(fwait, kX86InstIdFwait)
|
INST_0x(fwait, kX86InstIdFwait)
|
||||||
|
|
||||||
//! Examine fp0 (FPU).
|
//! Examine fp0 (FPU).
|
||||||
INST_0x(fxam, kX86InstIdFxam)
|
INST_0x(fxam, kX86InstIdFxam)
|
||||||
//! Exchange content of fp0 with `o0` (FPU).
|
//! Exchange `fp0` with `o0` (FPU).
|
||||||
INST_1x(fxch, kX86InstIdFxch, X86FpReg)
|
INST_1x(fxch, kX86InstIdFxch, X86FpReg)
|
||||||
|
|
||||||
//! Restore FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
|
//! Restore FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
|
||||||
INST_1x(fxrstor, kX86InstIdFxrstor, X86Mem)
|
INST_1x(fxrstor, kX86InstIdFxrstor, X86Mem)
|
||||||
//! Store FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
|
//! Store FP/MMX/SIMD extension states to `o0` (512 bytes) (FPU, MMX, SSE).
|
||||||
INST_1x(fxsave, kX86InstIdFxsave, X86Mem)
|
INST_1x(fxsave, kX86InstIdFxsave, X86Mem)
|
||||||
//! Extract exponent and store to `fp0` and push significand on the FPU stack (FPU).
|
//! Extract `fp0 = exponent(fp0)` and PUSH `significant(fp0)` (FPU).
|
||||||
INST_0x(fxtract, kX86InstIdFxtract)
|
INST_0x(fxtract, kX86InstIdFxtract)
|
||||||
|
|
||||||
//! Compute `fp1 * log2(fp0)`, pop the FPU stack and store result in `fp0` (FPU).
|
//! Compute `fp1 = fp1 * log2(fp0)` and POP (FPU).
|
||||||
INST_0x(fyl2x, kX86InstIdFyl2x)
|
INST_0x(fyl2x, kX86InstIdFyl2x)
|
||||||
//! Compute `fp1 * log2(fp0 + 1)`, pop the FPU stack and store result in `fp0` (FPU).
|
//! Compute `fp1 = fp1 * log2(fp0 + 1)` and POP (FPU).
|
||||||
INST_0x(fyl2xp1, kX86InstIdFyl2xp1)
|
INST_0x(fyl2xp1, kX86InstIdFyl2xp1)
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -3833,7 +3905,7 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
INST_0x(emms, kX86InstIdEmms)
|
INST_0x(emms, kX86InstIdEmms)
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [3dNow]
|
// [3DNOW]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Packed SP-FP to DWORD convert (3dNow!).
|
//! Packed SP-FP to DWORD convert (3dNow!).
|
||||||
@@ -4921,8 +4993,7 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(addsubps, kX86InstIdAddsubps, X86XmmVar, X86Mem)
|
INST_2x(addsubps, kX86InstIdAddsubps, X86XmmVar, X86Mem)
|
||||||
|
|
||||||
//! Store truncated `fp0` as 16-bit, 32-bit or 64-bit integer to `o0` and pop
|
//! Store truncated `fp0` to `short_or_int_or_long[o0]` and POP (FPU & SSE3).
|
||||||
//! the FPU stack (FPU / SSE3).
|
|
||||||
INST_1x(fisttp, kX86InstIdFisttp, X86Mem)
|
INST_1x(fisttp, kX86InstIdFisttp, X86Mem)
|
||||||
|
|
||||||
//! Packed DP-FP horizontal add (SSE3).
|
//! Packed DP-FP horizontal add (SSE3).
|
||||||
@@ -5382,6 +5453,11 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
// [SSE4.2]
|
// [SSE4.2]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! Accumulate crc32 value (polynomial 0x11EDC6F41) (SSE4.2).
|
||||||
|
INST_2x_(crc32, kX86InstIdCrc32, X86GpVar, X86GpVar, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
|
||||||
|
//! \overload
|
||||||
|
INST_2x_(crc32, kX86InstIdCrc32, X86GpVar, X86Mem, o0.isRegType(kX86RegTypeGpd) || o0.isRegType(kX86RegTypeGpq))
|
||||||
|
|
||||||
//! Packed compare explicit length strings, return index (SSE4.2).
|
//! Packed compare explicit length strings, return index (SSE4.2).
|
||||||
INST_3i(pcmpestri, kX86InstIdPcmpestri, X86XmmVar, X86XmmVar, Imm)
|
INST_3i(pcmpestri, kX86InstIdPcmpestri, X86XmmVar, X86XmmVar, Imm)
|
||||||
//! \overload
|
//! \overload
|
||||||
@@ -5407,6 +5483,43 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_2x(pcmpgtq, kX86InstIdPcmpgtq, X86XmmVar, X86Mem)
|
INST_2x(pcmpgtq, kX86InstIdPcmpgtq, X86XmmVar, X86Mem)
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [SSE4a]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! Extract Field (SSE4a).
|
||||||
|
INST_2x(extrq, kX86InstIdExtrq, X86XmmVar, X86XmmVar)
|
||||||
|
//! Extract Field (SSE4a).
|
||||||
|
INST_3ii(extrq, kX86InstIdExtrq, X86XmmVar, Imm, Imm)
|
||||||
|
|
||||||
|
//! Insert Field (SSE4a).
|
||||||
|
INST_2x(insertq, kX86InstIdInsertq, X86XmmVar, X86XmmVar)
|
||||||
|
//! Insert Field (SSE4a).
|
||||||
|
INST_4ii(insertq, kX86InstIdInsertq, X86XmmVar, X86XmmVar, Imm, Imm)
|
||||||
|
|
||||||
|
//! Move Non-Temporal Scalar DP-FP (SSE4a).
|
||||||
|
INST_2x(movntsd, kX86InstIdMovntsd, X86Mem, X86XmmVar)
|
||||||
|
//! Move Non-Temporal Scalar SP-FP (SSE4a).
|
||||||
|
INST_2x(movntss, kX86InstIdMovntss, X86Mem, X86XmmVar)
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [POPCNT]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! Return the count of number of bits set to 1 (POPCNT).
|
||||||
|
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpVar, X86GpVar, !o0.isGpb() && o0.getSize() == o1.getSize())
|
||||||
|
//! \overload
|
||||||
|
INST_2x_(popcnt, kX86InstIdPopcnt, X86GpVar, X86Mem, !o0.isGpb())
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [LZCNT]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! Count the number of leading zero bits (LZCNT).
|
||||||
|
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpVar, X86GpVar)
|
||||||
|
//! \overload
|
||||||
|
INST_2x(lzcnt, kX86InstIdLzcnt, X86GpVar, X86Mem)
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [AESNI]
|
// [AESNI]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
@@ -5450,6 +5563,34 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
//! \overload
|
//! \overload
|
||||||
INST_3i(pclmulqdq, kX86InstIdPclmulqdq, X86XmmVar, X86Mem, Imm);
|
INST_3i(pclmulqdq, kX86InstIdPclmulqdq, X86XmmVar, X86Mem, Imm);
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [XSAVE]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! Restore Processor Extended States specified by `o1:o2` (XSAVE).
|
||||||
|
INST_3x(xrstor, kX86InstIdXrstor, X86Mem, X86GpVar, X86GpVar)
|
||||||
|
//! Restore Processor Extended States specified by `o1:o2` (XSAVE&X64).
|
||||||
|
INST_3x(xrstor64, kX86InstIdXrstor64, X86Mem, X86GpVar, X86GpVar)
|
||||||
|
|
||||||
|
//! Save Processor Extended States specified by `o1:o2` (XSAVE).
|
||||||
|
INST_3x(xsave, kX86InstIdXsave, X86Mem, X86GpVar, X86GpVar)
|
||||||
|
//! Save Processor Extended States specified by `o1:o2` (XSAVE&X64).
|
||||||
|
INST_3x(xsave64, kX86InstIdXsave64, X86Mem, X86GpVar, X86GpVar)
|
||||||
|
|
||||||
|
//! Save Processor Extended States specified by `o1:o2` (Optimized) (XSAVEOPT).
|
||||||
|
INST_3x(xsaveopt, kX86InstIdXsave, X86Mem, X86GpVar, X86GpVar)
|
||||||
|
//! Save Processor Extended States specified by `o1:o2` (Optimized) (XSAVEOPT&X64).
|
||||||
|
INST_3x(xsaveopt64, kX86InstIdXsave64, X86Mem, X86GpVar, X86GpVar)
|
||||||
|
|
||||||
|
//! Get XCR - `o1:o2 <- XCR[o0]` (`EDX:EAX <- XCR[ECX]`) (XSAVE).
|
||||||
|
INST_3x(xgetbv, kX86InstIdXgetbv, X86GpVar, X86GpVar, X86GpVar)
|
||||||
|
//! Set XCR - `XCR[o0] <- o1:o2` (`XCR[ECX] <- EDX:EAX`) (XSAVE).
|
||||||
|
INST_3x(xsetbv, kX86InstIdXsetbv, X86GpVar, X86GpVar, X86GpVar)
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [Cleanup]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
#undef INST_0x
|
#undef INST_0x
|
||||||
|
|
||||||
#undef INST_1x
|
#undef INST_1x
|
||||||
@@ -5465,10 +5606,12 @@ struct ASMJIT_VCLASS X86Compiler : public Compiler {
|
|||||||
#undef INST_3x
|
#undef INST_3x
|
||||||
#undef INST_3x_
|
#undef INST_3x_
|
||||||
#undef INST_3i
|
#undef INST_3i
|
||||||
|
#undef INST_3ii
|
||||||
|
|
||||||
#undef INST_4x
|
#undef INST_4x
|
||||||
#undef INST_4x_
|
#undef INST_4x_
|
||||||
#undef INST_4i
|
#undef INST_4i
|
||||||
|
#undef INST_4ii
|
||||||
};
|
};
|
||||||
|
|
||||||
//! \}
|
//! \}
|
||||||
|
|||||||
@@ -143,9 +143,9 @@ static void X86Context_annotateOperand(X86Context* self,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool X86Context_annotateInstruction(X86Context* self,
|
static bool X86Context_annotateInstruction(X86Context* self,
|
||||||
StringBuilder& sb, uint32_t code, const Operand* opList, uint32_t opCount) {
|
StringBuilder& sb, uint32_t instId, const Operand* opList, uint32_t opCount) {
|
||||||
|
|
||||||
sb.appendString(_x86InstInfo[code].getInstName());
|
sb.appendString(_x86InstInfo[instId].getInstName());
|
||||||
for (uint32_t i = 0; i < opCount; i++) {
|
for (uint32_t i = 0; i < opCount; i++) {
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
sb.appendChar(' ');
|
sb.appendChar(' ');
|
||||||
@@ -207,7 +207,7 @@ static void X86Context_traceNode(X86Context* self, Node* node_) {
|
|||||||
case kNodeTypeInst: {
|
case kNodeTypeInst: {
|
||||||
InstNode* node = static_cast<InstNode*>(node_);
|
InstNode* node = static_cast<InstNode*>(node_);
|
||||||
X86Context_annotateInstruction(self, sb,
|
X86Context_annotateInstruction(self, sb,
|
||||||
node->getCode(), node->getOpList(), node->getOpCount());
|
node->getInstId(), node->getOpList(), node->getOpCount());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -287,8 +287,8 @@ void X86Context::reset() {
|
|||||||
|
|
||||||
_stackFrameCell = NULL;
|
_stackFrameCell = NULL;
|
||||||
_gaRegs[kX86RegClassGp ] = IntUtil::bits(_regCount.getGp()) & ~IntUtil::mask(kX86RegIndexSp);
|
_gaRegs[kX86RegClassGp ] = IntUtil::bits(_regCount.getGp()) & ~IntUtil::mask(kX86RegIndexSp);
|
||||||
_gaRegs[kX86RegClassFp ] = IntUtil::bits(_regCount.getFp());
|
|
||||||
_gaRegs[kX86RegClassMm ] = IntUtil::bits(_regCount.getMm());
|
_gaRegs[kX86RegClassMm ] = IntUtil::bits(_regCount.getMm());
|
||||||
|
_gaRegs[kX86RegClassK ] = IntUtil::bits(_regCount.getK());
|
||||||
_gaRegs[kX86RegClassXyz] = IntUtil::bits(_regCount.getXyz());
|
_gaRegs[kX86RegClassXyz] = IntUtil::bits(_regCount.getXyz());
|
||||||
|
|
||||||
_argBaseReg = kInvalidReg; // Used by patcher.
|
_argBaseReg = kInvalidReg; // Used by patcher.
|
||||||
@@ -426,8 +426,26 @@ static const X86SpecialInst x86SpecialInstBlend[] = {
|
|||||||
{ 0 , kInvalidReg , kVarAttrInReg }
|
{ 0 , kInvalidReg , kVarAttrInReg }
|
||||||
};
|
};
|
||||||
|
|
||||||
static ASMJIT_INLINE const X86SpecialInst* X86SpecialInst_get(uint32_t code, const Operand* opList, uint32_t opCount) {
|
static const X86SpecialInst x86SpecialInstXsaveXrstor[] = {
|
||||||
switch (code) {
|
{ kInvalidReg , kInvalidReg , 0 },
|
||||||
|
{ kX86RegIndexDx, kInvalidReg , kVarAttrInReg },
|
||||||
|
{ kX86RegIndexAx, kInvalidReg , kVarAttrInReg }
|
||||||
|
};
|
||||||
|
|
||||||
|
static const X86SpecialInst x86SpecialInstXgetbv[] = {
|
||||||
|
{ kX86RegIndexCx, kInvalidReg , kVarAttrInReg },
|
||||||
|
{ kInvalidReg , kX86RegIndexDx, kVarAttrOutReg },
|
||||||
|
{ kInvalidReg , kX86RegIndexAx, kVarAttrOutReg }
|
||||||
|
};
|
||||||
|
|
||||||
|
static const X86SpecialInst x86SpecialInstXsetbv[] = {
|
||||||
|
{ kX86RegIndexCx, kInvalidReg , kVarAttrInReg },
|
||||||
|
{ kX86RegIndexDx, kInvalidReg , kVarAttrInReg },
|
||||||
|
{ kX86RegIndexAx, kInvalidReg , kVarAttrInReg }
|
||||||
|
};
|
||||||
|
|
||||||
|
static ASMJIT_INLINE const X86SpecialInst* X86SpecialInst_get(uint32_t instId, const Operand* opList, uint32_t opCount) {
|
||||||
|
switch (instId) {
|
||||||
case kX86InstIdCpuid:
|
case kX86InstIdCpuid:
|
||||||
return x86SpecialInstCpuid;
|
return x86SpecialInstCpuid;
|
||||||
|
|
||||||
@@ -600,6 +618,20 @@ static ASMJIT_INLINE const X86SpecialInst* X86SpecialInst_get(uint32_t code, con
|
|||||||
case kX86InstIdPblendvb:
|
case kX86InstIdPblendvb:
|
||||||
return x86SpecialInstBlend;
|
return x86SpecialInstBlend;
|
||||||
|
|
||||||
|
case kX86InstIdXrstor:
|
||||||
|
case kX86InstIdXrstor64:
|
||||||
|
case kX86InstIdXsave:
|
||||||
|
case kX86InstIdXsave64:
|
||||||
|
case kX86InstIdXsaveopt:
|
||||||
|
case kX86InstIdXsaveopt64:
|
||||||
|
return x86SpecialInstXsaveXrstor;
|
||||||
|
|
||||||
|
case kX86InstIdXgetbv:
|
||||||
|
return x86SpecialInstXgetbv;
|
||||||
|
|
||||||
|
case kX86InstIdXsetbv:
|
||||||
|
return x86SpecialInstXsetbv;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@@ -976,7 +1008,7 @@ void X86Context::emitMoveVarOnStack(
|
|||||||
X86Reg r0, r1;
|
X86Reg r0, r1;
|
||||||
|
|
||||||
uint32_t regSize = compiler->getRegSize();
|
uint32_t regSize = compiler->getRegSize();
|
||||||
uint32_t instCode;
|
uint32_t instId;
|
||||||
|
|
||||||
switch (dstType) {
|
switch (dstType) {
|
||||||
case kVarTypeInt8:
|
case kVarTypeInt8:
|
||||||
@@ -1002,7 +1034,7 @@ void X86Context::emitMoveVarOnStack(
|
|||||||
r1.setSize(1);
|
r1.setSize(1);
|
||||||
r1.setCode(kX86RegTypeGpbLo, srcIndex);
|
r1.setCode(kX86RegTypeGpbLo, srcIndex);
|
||||||
|
|
||||||
instCode = (dstType == kVarTypeInt16 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
|
instId = (dstType == kVarTypeInt16 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
|
||||||
goto _ExtendMovGpD;
|
goto _ExtendMovGpD;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1027,7 +1059,7 @@ void X86Context::emitMoveVarOnStack(
|
|||||||
r1.setSize(1);
|
r1.setSize(1);
|
||||||
r1.setCode(kX86RegTypeGpbLo, srcIndex);
|
r1.setCode(kX86RegTypeGpbLo, srcIndex);
|
||||||
|
|
||||||
instCode = (dstType == kVarTypeInt32 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
|
instId = (dstType == kVarTypeInt32 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
|
||||||
goto _ExtendMovGpD;
|
goto _ExtendMovGpD;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1036,7 +1068,7 @@ void X86Context::emitMoveVarOnStack(
|
|||||||
r1.setSize(2);
|
r1.setSize(2);
|
||||||
r1.setCode(kX86RegTypeGpw, srcIndex);
|
r1.setCode(kX86RegTypeGpw, srcIndex);
|
||||||
|
|
||||||
instCode = (dstType == kVarTypeInt32 && srcType == kVarTypeInt16) ? kX86InstIdMovsx : kX86InstIdMovzx;
|
instId = (dstType == kVarTypeInt32 && srcType == kVarTypeInt16) ? kX86InstIdMovsx : kX86InstIdMovzx;
|
||||||
goto _ExtendMovGpD;
|
goto _ExtendMovGpD;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1060,7 +1092,7 @@ void X86Context::emitMoveVarOnStack(
|
|||||||
r1.setSize(1);
|
r1.setSize(1);
|
||||||
r1.setCode(kX86RegTypeGpbLo, srcIndex);
|
r1.setCode(kX86RegTypeGpbLo, srcIndex);
|
||||||
|
|
||||||
instCode = (dstType == kVarTypeInt64 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
|
instId = (dstType == kVarTypeInt64 && srcType == kVarTypeInt8) ? kX86InstIdMovsx : kX86InstIdMovzx;
|
||||||
goto _ExtendMovGpXQ;
|
goto _ExtendMovGpXQ;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1069,7 +1101,7 @@ void X86Context::emitMoveVarOnStack(
|
|||||||
r1.setSize(2);
|
r1.setSize(2);
|
||||||
r1.setCode(kX86RegTypeGpw, srcIndex);
|
r1.setCode(kX86RegTypeGpw, srcIndex);
|
||||||
|
|
||||||
instCode = (dstType == kVarTypeInt64 && srcType == kVarTypeInt16) ? kX86InstIdMovsx : kX86InstIdMovzx;
|
instId = (dstType == kVarTypeInt64 && srcType == kVarTypeInt16) ? kX86InstIdMovsx : kX86InstIdMovzx;
|
||||||
goto _ExtendMovGpXQ;
|
goto _ExtendMovGpXQ;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1078,7 +1110,7 @@ void X86Context::emitMoveVarOnStack(
|
|||||||
r1.setSize(4);
|
r1.setSize(4);
|
||||||
r1.setCode(kX86RegTypeGpd, srcIndex);
|
r1.setCode(kX86RegTypeGpd, srcIndex);
|
||||||
|
|
||||||
instCode = kX86InstIdMovsxd;
|
instId = kX86InstIdMovsxd;
|
||||||
if (dstType == kVarTypeInt64 && srcType == kVarTypeInt32)
|
if (dstType == kVarTypeInt64 && srcType == kVarTypeInt32)
|
||||||
goto _ExtendMovGpXQ;
|
goto _ExtendMovGpXQ;
|
||||||
else
|
else
|
||||||
@@ -1104,7 +1136,7 @@ void X86Context::emitMoveVarOnStack(
|
|||||||
r1.setSize(1);
|
r1.setSize(1);
|
||||||
r1.setCode(kX86RegTypeGpbLo, srcIndex);
|
r1.setCode(kX86RegTypeGpbLo, srcIndex);
|
||||||
|
|
||||||
instCode = kX86InstIdMovzx;
|
instId = kX86InstIdMovzx;
|
||||||
goto _ExtendMovGpXQ;
|
goto _ExtendMovGpXQ;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1113,7 +1145,7 @@ void X86Context::emitMoveVarOnStack(
|
|||||||
r1.setSize(2);
|
r1.setSize(2);
|
||||||
r1.setCode(kX86RegTypeGpw, srcIndex);
|
r1.setCode(kX86RegTypeGpw, srcIndex);
|
||||||
|
|
||||||
instCode = kX86InstIdMovzx;
|
instId = kX86InstIdMovzx;
|
||||||
goto _ExtendMovGpXQ;
|
goto _ExtendMovGpXQ;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1175,7 +1207,7 @@ _ExtendMovGpD:
|
|||||||
r0.setSize(4);
|
r0.setSize(4);
|
||||||
r0.setCode(kX86RegTypeGpd, srcIndex);
|
r0.setCode(kX86RegTypeGpd, srcIndex);
|
||||||
|
|
||||||
compiler->emit(instCode, r0, r1);
|
compiler->emit(instId, r0, r1);
|
||||||
compiler->emit(kX86InstIdMov, m0, r0);
|
compiler->emit(kX86InstIdMov, m0, r0);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@@ -1185,7 +1217,7 @@ _ExtendMovGpXQ:
|
|||||||
r0.setSize(8);
|
r0.setSize(8);
|
||||||
r0.setCode(kX86RegTypeGpq, srcIndex);
|
r0.setCode(kX86RegTypeGpq, srcIndex);
|
||||||
|
|
||||||
compiler->emit(instCode, r0, r1);
|
compiler->emit(instId, r0, r1);
|
||||||
compiler->emit(kX86InstIdMov, m0, r0);
|
compiler->emit(kX86InstIdMov, m0, r0);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -1193,7 +1225,7 @@ _ExtendMovGpXQ:
|
|||||||
r0.setSize(4);
|
r0.setSize(4);
|
||||||
r0.setCode(kX86RegTypeGpd, srcIndex);
|
r0.setCode(kX86RegTypeGpd, srcIndex);
|
||||||
|
|
||||||
compiler->emit(instCode, r0, r1);
|
compiler->emit(instId, r0, r1);
|
||||||
|
|
||||||
_ExtendMovGpDQ:
|
_ExtendMovGpDQ:
|
||||||
compiler->emit(kX86InstIdMov, m0, r0);
|
compiler->emit(kX86InstIdMov, m0, r0);
|
||||||
@@ -1749,8 +1781,8 @@ static ASMJIT_INLINE Node* X86Context_getOppositeJccFlow(JumpNode* jNode) {
|
|||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
//! \internal
|
//! \internal
|
||||||
static void X86Context_prepareSingleVarInst(uint32_t code, VarAttr* va) {
|
static void X86Context_prepareSingleVarInst(uint32_t instId, VarAttr* va) {
|
||||||
switch (code) {
|
switch (instId) {
|
||||||
// - andn reg, reg ; Set all bits in reg to 0.
|
// - andn reg, reg ; Set all bits in reg to 0.
|
||||||
// - xor/pxor reg, reg ; Set all bits in reg to 0.
|
// - xor/pxor reg, reg ; Set all bits in reg to 0.
|
||||||
// - sub/psub reg, reg ; Set all bits in reg to 0.
|
// - sub/psub reg, reg ; Set all bits in reg to 0.
|
||||||
@@ -1763,7 +1795,7 @@ static void X86Context_prepareSingleVarInst(uint32_t code, VarAttr* va) {
|
|||||||
case kX86InstIdPsubsb : case kX86InstIdPsubsw : case kX86InstIdPsubusb : case kX86InstIdPsubusw :
|
case kX86InstIdPsubsb : case kX86InstIdPsubsw : case kX86InstIdPsubusb : case kX86InstIdPsubusw :
|
||||||
case kX86InstIdPcmpeqb : case kX86InstIdPcmpeqw : case kX86InstIdPcmpeqd : case kX86InstIdPcmpeqq :
|
case kX86InstIdPcmpeqb : case kX86InstIdPcmpeqw : case kX86InstIdPcmpeqd : case kX86InstIdPcmpeqq :
|
||||||
case kX86InstIdPcmpgtb : case kX86InstIdPcmpgtw : case kX86InstIdPcmpgtd : case kX86InstIdPcmpgtq :
|
case kX86InstIdPcmpgtb : case kX86InstIdPcmpgtw : case kX86InstIdPcmpgtd : case kX86InstIdPcmpgtq :
|
||||||
va->delFlags(kVarAttrInReg);
|
va->andNotFlags(kVarAttrInReg);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// - and reg, reg ; Nop.
|
// - and reg, reg ; Nop.
|
||||||
@@ -1772,7 +1804,7 @@ static void X86Context_prepareSingleVarInst(uint32_t code, VarAttr* va) {
|
|||||||
case kX86InstIdAnd : case kX86InstIdAndpd : case kX86InstIdAndps : case kX86InstIdPand :
|
case kX86InstIdAnd : case kX86InstIdAndpd : case kX86InstIdAndps : case kX86InstIdPand :
|
||||||
case kX86InstIdOr : case kX86InstIdOrpd : case kX86InstIdOrps : case kX86InstIdPor :
|
case kX86InstIdOr : case kX86InstIdOrpd : case kX86InstIdOrps : case kX86InstIdPor :
|
||||||
case kX86InstIdXchg :
|
case kX86InstIdXchg :
|
||||||
va->delFlags(kVarAttrOutReg);
|
va->andNotFlags(kVarAttrOutReg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1824,7 +1856,7 @@ static ASMJIT_INLINE X86RegMask X86Context_getUsedArgs(X86Context* self, X86Call
|
|||||||
const FuncInOut& arg = decl->getArg(i);
|
const FuncInOut& arg = decl->getArg(i);
|
||||||
if (!arg.hasRegIndex())
|
if (!arg.hasRegIndex())
|
||||||
continue;
|
continue;
|
||||||
regs.add(x86VarTypeToClass(arg.getVarType()), IntUtil::mask(arg.getRegIndex()));
|
regs.or_(x86VarTypeToClass(arg.getVarType()), IntUtil::mask(arg.getRegIndex()));
|
||||||
}
|
}
|
||||||
|
|
||||||
return regs;
|
return regs;
|
||||||
@@ -2117,7 +2149,7 @@ Error X86Context::fetch() {
|
|||||||
goto _NoMemory; \
|
goto _NoMemory; \
|
||||||
\
|
\
|
||||||
X86RegCount vaIndex; \
|
X86RegCount vaIndex; \
|
||||||
vaIndex.makeIndex(regCount); \
|
vaIndex.indexFromRegCount(regCount); \
|
||||||
\
|
\
|
||||||
map->_vaCount = vaCount; \
|
map->_vaCount = vaCount; \
|
||||||
map->_count = regCount; \
|
map->_count = regCount; \
|
||||||
@@ -2181,7 +2213,7 @@ Error X86Context::fetch() {
|
|||||||
regCount.add(_Vd_->getClass()); \
|
regCount.add(_Vd_->getClass()); \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
_Va_->addFlags(_Flags_); \
|
_Va_->orFlags(_Flags_); \
|
||||||
_Va_->addVarCount(1); \
|
_Va_->addVarCount(1); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
@@ -2230,18 +2262,13 @@ _NextGroup:
|
|||||||
VI_BEGIN();
|
VI_BEGIN();
|
||||||
|
|
||||||
if (node->getHint() == kVarHintAlloc) {
|
if (node->getHint() == kVarHintAlloc) {
|
||||||
uint32_t remain[kX86RegClassCount];
|
uint32_t remain[_kX86RegClassManagedCount];
|
||||||
HintNode* cur = node;
|
HintNode* cur = node;
|
||||||
|
|
||||||
remain[kX86RegClassGp ] = _regCount.getGp() - 1 - func->hasFuncFlag(kFuncFlagIsNaked);
|
remain[kX86RegClassGp ] = _regCount.getGp() - 1 - func->hasFuncFlag(kFuncFlagIsNaked);
|
||||||
remain[kX86RegClassFp ] = _regCount.getFp();
|
|
||||||
remain[kX86RegClassMm ] = _regCount.getMm();
|
remain[kX86RegClassMm ] = _regCount.getMm();
|
||||||
|
remain[kX86RegClassK ] = _regCount.getK();
|
||||||
// Correct. Instead of using `getXyz()` which may be 32 in 64-bit
|
remain[kX86RegClassXyz] = _regCount.getXyz();
|
||||||
// mode we use `getGp()`. The reason is that not all registers are
|
|
||||||
// accessible by all instructions when using AVX512, this makes the
|
|
||||||
// algorithm safe.
|
|
||||||
remain[kX86RegClassXyz] = _regCount.getGp();
|
|
||||||
|
|
||||||
// Merge as many alloc-hints as possible.
|
// Merge as many alloc-hints as possible.
|
||||||
for (;;) {
|
for (;;) {
|
||||||
@@ -2333,14 +2360,14 @@ _NextGroup:
|
|||||||
case kNodeTypeInst: {
|
case kNodeTypeInst: {
|
||||||
InstNode* node = static_cast<InstNode*>(node_);
|
InstNode* node = static_cast<InstNode*>(node_);
|
||||||
|
|
||||||
uint32_t code = node->getCode();
|
uint32_t instId = node->getInstId();
|
||||||
uint32_t flags = node->getFlags();
|
uint32_t flags = node->getFlags();
|
||||||
|
|
||||||
Operand* opList = node->getOpList();
|
Operand* opList = node->getOpList();
|
||||||
uint32_t opCount = node->getOpCount();
|
uint32_t opCount = node->getOpCount();
|
||||||
|
|
||||||
if (opCount) {
|
if (opCount) {
|
||||||
const X86InstExtendedInfo& extendedInfo = _x86InstInfo[code].getExtendedInfo();
|
const X86InstExtendedInfo& extendedInfo = _x86InstInfo[instId].getExtendedInfo();
|
||||||
const X86SpecialInst* special = NULL;
|
const X86SpecialInst* special = NULL;
|
||||||
VI_BEGIN();
|
VI_BEGIN();
|
||||||
|
|
||||||
@@ -2348,7 +2375,7 @@ _NextGroup:
|
|||||||
if (extendedInfo.isFp())
|
if (extendedInfo.isFp())
|
||||||
flags |= kNodeFlagIsFp;
|
flags |= kNodeFlagIsFp;
|
||||||
|
|
||||||
if (extendedInfo.isSpecial() && (special = X86SpecialInst_get(code, opList, opCount)) != NULL)
|
if (extendedInfo.isSpecial() && (special = X86SpecialInst_get(instId, opList, opCount)) != NULL)
|
||||||
flags |= kNodeFlagIsSpecial;
|
flags |= kNodeFlagIsSpecial;
|
||||||
|
|
||||||
uint32_t gpAllowedMask = 0xFFFFFFFF;
|
uint32_t gpAllowedMask = 0xFFFFFFFF;
|
||||||
@@ -2363,7 +2390,7 @@ _NextGroup:
|
|||||||
VI_MERGE_VAR(vd, va, 0, gaRegs[vd->getClass()] & gpAllowedMask);
|
VI_MERGE_VAR(vd, va, 0, gaRegs[vd->getClass()] & gpAllowedMask);
|
||||||
|
|
||||||
if (static_cast<X86Var*>(op)->isGpb()) {
|
if (static_cast<X86Var*>(op)->isGpb()) {
|
||||||
va->addFlags(static_cast<X86GpVar*>(op)->isGpbLo() ? kX86VarAttrGpbLo : kX86VarAttrGpbHi);
|
va->orFlags(static_cast<X86GpVar*>(op)->isGpbLo() ? kX86VarAttrGpbLo : kX86VarAttrGpbHi);
|
||||||
if (arch == kArchX86) {
|
if (arch == kArchX86) {
|
||||||
// If a byte register is accessed in 32-bit mode we have to limit
|
// If a byte register is accessed in 32-bit mode we have to limit
|
||||||
// all allocable registers for that variable to eax/ebx/ecx/edx.
|
// all allocable registers for that variable to eax/ebx/ecx/edx.
|
||||||
@@ -2401,17 +2428,17 @@ _NextGroup:
|
|||||||
|
|
||||||
if (inReg != kInvalidReg) {
|
if (inReg != kInvalidReg) {
|
||||||
uint32_t mask = IntUtil::mask(inReg);
|
uint32_t mask = IntUtil::mask(inReg);
|
||||||
inRegs.add(c, mask);
|
inRegs.or_(c, mask);
|
||||||
va->addInRegs(mask);
|
va->addInRegs(mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (outReg != kInvalidReg) {
|
if (outReg != kInvalidReg) {
|
||||||
uint32_t mask = IntUtil::mask(outReg);
|
uint32_t mask = IntUtil::mask(outReg);
|
||||||
outRegs.add(c, mask);
|
outRegs.or_(c, mask);
|
||||||
va->setOutRegIndex(outReg);
|
va->setOutRegIndex(outReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
va->addFlags(special[i].flags);
|
va->orFlags(special[i].flags);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
uint32_t inFlags = kVarAttrInReg;
|
uint32_t inFlags = kVarAttrInReg;
|
||||||
@@ -2426,7 +2453,7 @@ _NextGroup:
|
|||||||
// but there are some exceptions based on the operands' size
|
// but there are some exceptions based on the operands' size
|
||||||
// and type.
|
// and type.
|
||||||
if (extendedInfo.isMove()) {
|
if (extendedInfo.isMove()) {
|
||||||
uint32_t movSize = extendedInfo.getMoveSize();
|
uint32_t movSize = extendedInfo.getWriteSize();
|
||||||
uint32_t varSize = vd->getSize();
|
uint32_t varSize = vd->getSize();
|
||||||
|
|
||||||
// Exception - If the source operand is a memory location
|
// Exception - If the source operand is a memory location
|
||||||
@@ -2461,7 +2488,7 @@ _NextGroup:
|
|||||||
combinedFlags = inFlags;
|
combinedFlags = inFlags;
|
||||||
}
|
}
|
||||||
// Imul.
|
// Imul.
|
||||||
else if (code == kX86InstIdImul && opCount == 3) {
|
else if (instId == kX86InstIdImul && opCount == 3) {
|
||||||
combinedFlags = outFlags;
|
combinedFlags = outFlags;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2470,13 +2497,13 @@ _NextGroup:
|
|||||||
combinedFlags = inFlags;
|
combinedFlags = inFlags;
|
||||||
|
|
||||||
// Idiv is a special instruction, never handled here.
|
// Idiv is a special instruction, never handled here.
|
||||||
ASMJIT_ASSERT(code != kX86InstIdIdiv);
|
ASMJIT_ASSERT(instId != kX86InstIdIdiv);
|
||||||
|
|
||||||
// Xchg/Xadd/Imul.
|
// Xchg/Xadd/Imul.
|
||||||
if (extendedInfo.isXchg() || (code == kX86InstIdImul && opCount == 3 && i == 1))
|
if (extendedInfo.isXchg() || (instId == kX86InstIdImul && opCount == 3 && i == 1))
|
||||||
combinedFlags = inFlags | outFlags;
|
combinedFlags = inFlags | outFlags;
|
||||||
}
|
}
|
||||||
va->addFlags(combinedFlags);
|
va->orFlags(combinedFlags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (op->isMem()) {
|
else if (op->isMem()) {
|
||||||
@@ -2488,7 +2515,7 @@ _NextGroup:
|
|||||||
if (!vd->isStack()) {
|
if (!vd->isStack()) {
|
||||||
VI_MERGE_VAR(vd, va, 0, gaRegs[vd->getClass()] & gpAllowedMask);
|
VI_MERGE_VAR(vd, va, 0, gaRegs[vd->getClass()] & gpAllowedMask);
|
||||||
if (m->getMemType() == kMemTypeBaseIndex) {
|
if (m->getMemType() == kMemTypeBaseIndex) {
|
||||||
va->addFlags(kVarAttrInReg);
|
va->orFlags(kVarAttrInReg);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
uint32_t inFlags = kVarAttrInMem;
|
uint32_t inFlags = kVarAttrInMem;
|
||||||
@@ -2503,7 +2530,7 @@ _NextGroup:
|
|||||||
// as if it's just move to the register. It's just a bit
|
// as if it's just move to the register. It's just a bit
|
||||||
// simpler as there are no special cases.
|
// simpler as there are no special cases.
|
||||||
if (extendedInfo.isMove()) {
|
if (extendedInfo.isMove()) {
|
||||||
uint32_t movSize = IntUtil::iMax<uint32_t>(extendedInfo.getMoveSize(), m->getSize());
|
uint32_t movSize = IntUtil::iMax<uint32_t>(extendedInfo.getWriteSize(), m->getSize());
|
||||||
uint32_t varSize = vd->getSize();
|
uint32_t varSize = vd->getSize();
|
||||||
|
|
||||||
if (movSize >= varSize)
|
if (movSize >= varSize)
|
||||||
@@ -2523,7 +2550,7 @@ _NextGroup:
|
|||||||
combinedFlags = inFlags | outFlags;
|
combinedFlags = inFlags | outFlags;
|
||||||
}
|
}
|
||||||
|
|
||||||
va->addFlags(combinedFlags);
|
va->orFlags(combinedFlags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2533,7 +2560,7 @@ _NextGroup:
|
|||||||
vd = compiler->getVdById(m->getIndex());
|
vd = compiler->getVdById(m->getIndex());
|
||||||
VI_MERGE_VAR(vd, va, 0, gaRegs[kX86RegClassGp] & gpAllowedMask);
|
VI_MERGE_VAR(vd, va, 0, gaRegs[kX86RegClassGp] & gpAllowedMask);
|
||||||
va->andAllocableRegs(indexMask);
|
va->andAllocableRegs(indexMask);
|
||||||
va->addFlags(kVarAttrInReg);
|
va->orFlags(kVarAttrInReg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2543,7 +2570,7 @@ _NextGroup:
|
|||||||
// Handle instructions which result in zeros/ones or nop if used with the
|
// Handle instructions which result in zeros/ones or nop if used with the
|
||||||
// same destination and source operand.
|
// same destination and source operand.
|
||||||
if (vaCount == 1 && opCount >= 2 && opList[0].isVar() && opList[1].isVar() && !node->hasMemOp())
|
if (vaCount == 1 && opCount >= 2 && opList[0].isVar() && opList[1].isVar() && !node->hasMemOp())
|
||||||
X86Context_prepareSingleVarInst(code, &vaTmpList[0]);
|
X86Context_prepareSingleVarInst(instId, &vaTmpList[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
VI_END(node_);
|
VI_END(node_);
|
||||||
@@ -2577,7 +2604,7 @@ _NextGroup:
|
|||||||
// backward jump. This behavior can be overridden by using
|
// backward jump. This behavior can be overridden by using
|
||||||
// `kInstOptionTaken` when the instruction is created.
|
// `kInstOptionTaken` when the instruction is created.
|
||||||
if (!jNode->isTaken() && opCount == 1 && jTargetFlowId <= flowId) {
|
if (!jNode->isTaken() && opCount == 1 && jTargetFlowId <= flowId) {
|
||||||
jNode->addFlags(kNodeFlagIsTaken);
|
jNode->orFlags(kNodeFlagIsTaken);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (jNext->isFetched()) {
|
else if (jNext->isFetched()) {
|
||||||
@@ -2623,18 +2650,18 @@ _NextGroup:
|
|||||||
|
|
||||||
if (arg.hasRegIndex()) {
|
if (arg.hasRegIndex()) {
|
||||||
if (x86VarTypeToClass(aType) == vd->getClass()) {
|
if (x86VarTypeToClass(aType) == vd->getClass()) {
|
||||||
va->addFlags(kVarAttrOutReg);
|
va->orFlags(kVarAttrOutReg);
|
||||||
va->setOutRegIndex(arg.getRegIndex());
|
va->setOutRegIndex(arg.getRegIndex());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
va->addFlags(kVarAttrOutConv);
|
va->orFlags(kVarAttrOutConv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ((x86VarTypeToClass(aType) == vd->getClass()) ||
|
if ((x86VarTypeToClass(aType) == vd->getClass()) ||
|
||||||
(vType == kX86VarTypeXmmSs && aType == kVarTypeFp32) ||
|
(vType == kX86VarTypeXmmSs && aType == kVarTypeFp32) ||
|
||||||
(vType == kX86VarTypeXmmSd && aType == kVarTypeFp64)) {
|
(vType == kX86VarTypeXmmSd && aType == kVarTypeFp64)) {
|
||||||
va->addFlags(kVarAttrOutMem);
|
va->orFlags(kVarAttrOutMem);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// TODO: [COMPILER] Not implemented.
|
// TODO: [COMPILER] Not implemented.
|
||||||
@@ -2678,8 +2705,8 @@ _NextGroup:
|
|||||||
// TODO: [COMPILER] Fix RetNode fetch.
|
// TODO: [COMPILER] Fix RetNode fetch.
|
||||||
VI_MERGE_VAR(vd, va, 0, 0);
|
VI_MERGE_VAR(vd, va, 0, 0);
|
||||||
va->setInRegs(i == 0 ? IntUtil::mask(kX86RegIndexAx) : IntUtil::mask(kX86RegIndexDx));
|
va->setInRegs(i == 0 ? IntUtil::mask(kX86RegIndexAx) : IntUtil::mask(kX86RegIndexDx));
|
||||||
va->addFlags(kVarAttrInReg);
|
va->orFlags(kVarAttrInReg);
|
||||||
inRegs.add(retClass, va->getInRegs());
|
inRegs.or_(retClass, va->getInRegs());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2719,7 +2746,7 @@ _NextGroup:
|
|||||||
vd = compiler->getVdById(target->getId());
|
vd = compiler->getVdById(target->getId());
|
||||||
VI_MERGE_VAR(vd, va, 0, 0);
|
VI_MERGE_VAR(vd, va, 0, 0);
|
||||||
|
|
||||||
va->addFlags(kVarAttrInReg | kVarAttrInCall);
|
va->orFlags(kVarAttrInReg | kVarAttrInCall);
|
||||||
if (va->getInRegs() == 0)
|
if (va->getInRegs() == 0)
|
||||||
va->addAllocableRegs(gpAllocableMask);
|
va->addAllocableRegs(gpAllocableMask);
|
||||||
}
|
}
|
||||||
@@ -2731,12 +2758,12 @@ _NextGroup:
|
|||||||
if (!vd->isStack()) {
|
if (!vd->isStack()) {
|
||||||
VI_MERGE_VAR(vd, va, 0, 0);
|
VI_MERGE_VAR(vd, va, 0, 0);
|
||||||
if (m->getMemType() == kMemTypeBaseIndex) {
|
if (m->getMemType() == kMemTypeBaseIndex) {
|
||||||
va->addFlags(kVarAttrInReg | kVarAttrInCall);
|
va->orFlags(kVarAttrInReg | kVarAttrInCall);
|
||||||
if (va->getInRegs() == 0)
|
if (va->getInRegs() == 0)
|
||||||
va->addAllocableRegs(gpAllocableMask);
|
va->addAllocableRegs(gpAllocableMask);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
va->addFlags(kVarAttrInMem | kVarAttrInCall);
|
va->orFlags(kVarAttrInMem | kVarAttrInCall);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2746,7 +2773,7 @@ _NextGroup:
|
|||||||
vd = compiler->getVdById(m->getIndex());
|
vd = compiler->getVdById(m->getIndex());
|
||||||
VI_MERGE_VAR(vd, va, 0, 0);
|
VI_MERGE_VAR(vd, va, 0, 0);
|
||||||
|
|
||||||
va->addFlags(kVarAttrInReg | kVarAttrInCall);
|
va->orFlags(kVarAttrInReg | kVarAttrInCall);
|
||||||
if ((va->getInRegs() & ~indexMask) == 0)
|
if ((va->getInRegs() & ~indexMask) == 0)
|
||||||
va->andAllocableRegs(gpAllocableMask & indexMask);
|
va->andAllocableRegs(gpAllocableMask & indexMask);
|
||||||
}
|
}
|
||||||
@@ -2769,10 +2796,10 @@ _NextGroup:
|
|||||||
|
|
||||||
if (vd->getClass() == argClass) {
|
if (vd->getClass() == argClass) {
|
||||||
va->addInRegs(IntUtil::mask(arg.getRegIndex()));
|
va->addInRegs(IntUtil::mask(arg.getRegIndex()));
|
||||||
va->addFlags(kVarAttrInReg | kVarAttrInArg);
|
va->orFlags(kVarAttrInReg | kVarAttrInArg);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
va->addFlags(kVarAttrInConv | kVarAttrInArg);
|
va->orFlags(kVarAttrInConv | kVarAttrInArg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If this is a stack-based argument we insert SArgNode instead of
|
// If this is a stack-based argument we insert SArgNode instead of
|
||||||
@@ -2803,18 +2830,18 @@ _NextGroup:
|
|||||||
|
|
||||||
if (vd->getClass() == retClass) {
|
if (vd->getClass() == retClass) {
|
||||||
va->setOutRegIndex(ret.getRegIndex());
|
va->setOutRegIndex(ret.getRegIndex());
|
||||||
va->addFlags(kVarAttrOutReg | kVarAttrOutRet);
|
va->orFlags(kVarAttrOutReg | kVarAttrOutRet);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
va->addFlags(kVarAttrOutConv | kVarAttrOutRet);
|
va->orFlags(kVarAttrOutConv | kVarAttrOutRet);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init clobbered.
|
// Init clobbered.
|
||||||
clobberedRegs.set(kX86RegClassGp , IntUtil::bits(_regCount.getGp()) & (~decl->getPreserved(kX86RegClassGp)));
|
clobberedRegs.set(kX86RegClassGp , IntUtil::bits(_regCount.getGp()) & (~decl->getPreserved(kX86RegClassGp )));
|
||||||
clobberedRegs.set(kX86RegClassFp , IntUtil::bits(_regCount.getFp()));
|
clobberedRegs.set(kX86RegClassMm , IntUtil::bits(_regCount.getMm()) & (~decl->getPreserved(kX86RegClassMm )));
|
||||||
clobberedRegs.set(kX86RegClassMm , IntUtil::bits(_regCount.getMm()) & (~decl->getPreserved(kX86RegClassMm)));
|
clobberedRegs.set(kX86RegClassK , IntUtil::bits(_regCount.getK()) & (~decl->getPreserved(kX86RegClassK )));
|
||||||
clobberedRegs.set(kX86RegClassXyz, IntUtil::bits(_regCount.getXyz()) & (~decl->getPreserved(kX86RegClassXyz)));
|
clobberedRegs.set(kX86RegClassXyz, IntUtil::bits(_regCount.getXyz()) & (~decl->getPreserved(kX86RegClassXyz)));
|
||||||
|
|
||||||
VI_END(node_);
|
VI_END(node_);
|
||||||
@@ -2860,7 +2887,7 @@ Error X86Context::annotate() {
|
|||||||
if (node_->getComment() == NULL) {
|
if (node_->getComment() == NULL) {
|
||||||
if (node_->getType() == kNodeTypeInst) {
|
if (node_->getType() == kNodeTypeInst) {
|
||||||
InstNode* node = static_cast<InstNode*>(node_);
|
InstNode* node = static_cast<InstNode*>(node_);
|
||||||
X86Context_annotateInstruction(this, sb, node->getCode(), node->getOpList(), node->getOpCount());
|
X86Context_annotateInstruction(this, sb, node->getInstId(), node->getOpList(), node->getOpCount());
|
||||||
|
|
||||||
node_->setComment(static_cast<char*>(sa.dup(sb.getData(), sb.getLength() + 1)));
|
node_->setComment(static_cast<char*>(sa.dup(sb.getData(), sb.getLength() + 1)));
|
||||||
maxLen = IntUtil::iMax<uint32_t>(maxLen, static_cast<uint32_t>(sb.getLength()));
|
maxLen = IntUtil::iMax<uint32_t>(maxLen, static_cast<uint32_t>(sb.getLength()));
|
||||||
@@ -2962,7 +2989,7 @@ protected:
|
|||||||
//! Variable map.
|
//! Variable map.
|
||||||
X86VarMap* _map;
|
X86VarMap* _map;
|
||||||
//! VarAttr list (per register class).
|
//! VarAttr list (per register class).
|
||||||
VarAttr* _vaList[4];
|
VarAttr* _vaList[_kX86RegClassManagedCount];
|
||||||
|
|
||||||
//! Count of all VarAttr's.
|
//! Count of all VarAttr's.
|
||||||
uint32_t _vaCount;
|
uint32_t _vaCount;
|
||||||
@@ -2990,8 +3017,8 @@ ASMJIT_INLINE void X86BaseAlloc::init(Node* node, X86VarMap* map) {
|
|||||||
{
|
{
|
||||||
VarAttr* va = map->getVaList();
|
VarAttr* va = map->getVaList();
|
||||||
_vaList[kX86RegClassGp ] = va;
|
_vaList[kX86RegClassGp ] = va;
|
||||||
_vaList[kX86RegClassFp ] = va + map->getVaStart(kX86RegClassFp );
|
|
||||||
_vaList[kX86RegClassMm ] = va + map->getVaStart(kX86RegClassMm );
|
_vaList[kX86RegClassMm ] = va + map->getVaStart(kX86RegClassMm );
|
||||||
|
_vaList[kX86RegClassK ] = va + map->getVaStart(kX86RegClassK );
|
||||||
_vaList[kX86RegClassXyz] = va + map->getVaStart(kX86RegClassXyz);
|
_vaList[kX86RegClassXyz] = va + map->getVaStart(kX86RegClassXyz);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3223,8 +3250,8 @@ ASMJIT_INLINE Error X86VarAlloc::run(Node* node_) {
|
|||||||
cleanup();
|
cleanup();
|
||||||
|
|
||||||
// Update clobbered mask.
|
// Update clobbered mask.
|
||||||
_context->_clobberedRegs.add(_willAlloc);
|
_context->_clobberedRegs.or_(_willAlloc);
|
||||||
_context->_clobberedRegs.add(map->_clobberedRegs);
|
_context->_clobberedRegs.or_(map->_clobberedRegs);
|
||||||
|
|
||||||
// Unuse.
|
// Unuse.
|
||||||
unuseAfter<kX86RegClassGp >();
|
unuseAfter<kX86RegClassGp >();
|
||||||
@@ -3245,7 +3272,7 @@ ASMJIT_INLINE void X86VarAlloc::init(Node* node, X86VarMap* map) {
|
|||||||
// add more registers when assigning registers to variables that don't need
|
// add more registers when assigning registers to variables that don't need
|
||||||
// any specific register.
|
// any specific register.
|
||||||
_willAlloc = map->_inRegs;
|
_willAlloc = map->_inRegs;
|
||||||
_willAlloc.add(map->_outRegs);
|
_willAlloc.or_(map->_outRegs);
|
||||||
_willSpill.reset();
|
_willSpill.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3308,7 +3335,7 @@ ASMJIT_INLINE void X86VarAlloc::plan() {
|
|||||||
|
|
||||||
if ((mandatoryRegs | allocableRegs) & regMask) {
|
if ((mandatoryRegs | allocableRegs) & regMask) {
|
||||||
va->setOutRegIndex(regIndex);
|
va->setOutRegIndex(regIndex);
|
||||||
va->addFlags(kVarAttrAllocOutDone);
|
va->orFlags(kVarAttrAllocOutDone);
|
||||||
|
|
||||||
if (mandatoryRegs & regMask) {
|
if (mandatoryRegs & regMask) {
|
||||||
// Case 'a' - 'willAlloc' contains initially all inRegs from all VarAttr's.
|
// Case 'a' - 'willAlloc' contains initially all inRegs from all VarAttr's.
|
||||||
@@ -3329,7 +3356,7 @@ ASMJIT_INLINE void X86VarAlloc::plan() {
|
|||||||
else {
|
else {
|
||||||
if ((mandatoryRegs | allocableRegs) & regMask) {
|
if ((mandatoryRegs | allocableRegs) & regMask) {
|
||||||
va->setInRegIndex(regIndex);
|
va->setInRegIndex(regIndex);
|
||||||
va->addFlags(kVarAttrAllocInDone);
|
va->orFlags(kVarAttrAllocInDone);
|
||||||
|
|
||||||
if (mandatoryRegs & regMask) {
|
if (mandatoryRegs & regMask) {
|
||||||
// Case 'a' - 'willAlloc' contains initially all inRegs from all VarAttr's.
|
// Case 'a' - 'willAlloc' contains initially all inRegs from all VarAttr's.
|
||||||
@@ -3377,7 +3404,7 @@ ASMJIT_INLINE void X86VarAlloc::plan() {
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ASMJIT_TLOG("[RA-PLAN ] Done\n");
|
ASMJIT_TLOG("[RA-PLAN ] Done\n");
|
||||||
va->addFlags(kVarAttrAllocInDone);
|
va->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -3563,12 +3590,12 @@ ASMJIT_INLINE void X86VarAlloc::alloc() {
|
|||||||
VarAttr* bVa = bVd->getVa();
|
VarAttr* bVa = bVd->getVa();
|
||||||
_context->swapGp(aVd, bVd);
|
_context->swapGp(aVd, bVd);
|
||||||
|
|
||||||
aVa->addFlags(kVarAttrAllocInDone);
|
aVa->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
|
|
||||||
// Doublehit, two registers allocated by a single swap.
|
// Doublehit, two registers allocated by a single swap.
|
||||||
if (bVa != NULL && bVa->getInRegIndex() == aIndex) {
|
if (bVa != NULL && bVa->getInRegIndex() == aIndex) {
|
||||||
bVa->addFlags(kVarAttrAllocInDone);
|
bVa->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3579,7 +3606,7 @@ ASMJIT_INLINE void X86VarAlloc::alloc() {
|
|||||||
else if (aIndex != kInvalidReg) {
|
else if (aIndex != kInvalidReg) {
|
||||||
_context->move<C>(aVd, bIndex);
|
_context->move<C>(aVd, bIndex);
|
||||||
|
|
||||||
aVa->addFlags(kVarAttrAllocInDone);
|
aVa->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
|
|
||||||
didWork = true;
|
didWork = true;
|
||||||
@@ -3588,7 +3615,7 @@ ASMJIT_INLINE void X86VarAlloc::alloc() {
|
|||||||
else {
|
else {
|
||||||
_context->alloc<C>(aVd, bIndex);
|
_context->alloc<C>(aVd, bIndex);
|
||||||
|
|
||||||
aVa->addFlags(kVarAttrAllocInDone);
|
aVa->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
|
|
||||||
didWork = true;
|
didWork = true;
|
||||||
@@ -3613,7 +3640,7 @@ ASMJIT_INLINE void X86VarAlloc::alloc() {
|
|||||||
_context->attach<C>(vd, regIndex, false);
|
_context->attach<C>(vd, regIndex, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
va->addFlags(kVarAttrAllocOutDone);
|
va->orFlags(kVarAttrAllocOutDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3730,7 +3757,7 @@ ASMJIT_INLINE void X86VarAlloc::modified() {
|
|||||||
uint32_t regMask = IntUtil::mask(regIndex);
|
uint32_t regMask = IntUtil::mask(regIndex);
|
||||||
|
|
||||||
vd->setModified(true);
|
vd->setModified(true);
|
||||||
_context->_x86State._modified.add(C, regMask);
|
_context->_x86State._modified.or_(C, regMask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3972,7 +3999,7 @@ ASMJIT_INLINE void X86CallAlloc::plan() {
|
|||||||
// is not clobbered (i.e. it will survive function call).
|
// is not clobbered (i.e. it will survive function call).
|
||||||
if ((regMask & inRegs) != 0 || ((regMask & ~clobbered) != 0 && (vaFlags & kVarAttrUnuse) == 0)) {
|
if ((regMask & inRegs) != 0 || ((regMask & ~clobbered) != 0 && (vaFlags & kVarAttrUnuse) == 0)) {
|
||||||
va->setInRegIndex(regIndex);
|
va->setInRegIndex(regIndex);
|
||||||
va->addFlags(kVarAttrAllocInDone);
|
va->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -3985,7 +4012,7 @@ ASMJIT_INLINE void X86CallAlloc::plan() {
|
|||||||
willFree |= regMask;
|
willFree |= regMask;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
va->addFlags(kVarAttrAllocInDone);
|
va->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -4131,12 +4158,12 @@ ASMJIT_INLINE void X86CallAlloc::alloc() {
|
|||||||
if (C == kX86RegClassGp) {
|
if (C == kX86RegClassGp) {
|
||||||
_context->swapGp(aVd, bVd);
|
_context->swapGp(aVd, bVd);
|
||||||
|
|
||||||
aVa->addFlags(kVarAttrAllocInDone);
|
aVa->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
|
|
||||||
// Doublehit, two registers allocated by a single swap.
|
// Doublehit, two registers allocated by a single swap.
|
||||||
if (bVa != NULL && bVa->getInRegIndex() == aIndex) {
|
if (bVa != NULL && bVa->getInRegIndex() == aIndex) {
|
||||||
bVa->addFlags(kVarAttrAllocInDone);
|
bVa->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4147,7 +4174,7 @@ ASMJIT_INLINE void X86CallAlloc::alloc() {
|
|||||||
else if (aIndex != kInvalidReg) {
|
else if (aIndex != kInvalidReg) {
|
||||||
_context->move<C>(aVd, bIndex);
|
_context->move<C>(aVd, bIndex);
|
||||||
|
|
||||||
aVa->addFlags(kVarAttrAllocInDone);
|
aVa->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
|
|
||||||
didWork = true;
|
didWork = true;
|
||||||
@@ -4156,7 +4183,7 @@ ASMJIT_INLINE void X86CallAlloc::alloc() {
|
|||||||
else {
|
else {
|
||||||
_context->alloc<C>(aVd, bIndex);
|
_context->alloc<C>(aVd, bIndex);
|
||||||
|
|
||||||
aVa->addFlags(kVarAttrAllocInDone);
|
aVa->orFlags(kVarAttrAllocInDone);
|
||||||
addVaDone(C);
|
addVaDone(C);
|
||||||
|
|
||||||
didWork = true;
|
didWork = true;
|
||||||
@@ -4227,7 +4254,7 @@ ASMJIT_INLINE void X86CallAlloc::duplicate() {
|
|||||||
for (uint32_t dupIndex = 0; inRegs != 0; dupIndex++, inRegs >>= 1) {
|
for (uint32_t dupIndex = 0; inRegs != 0; dupIndex++, inRegs >>= 1) {
|
||||||
if (inRegs & 0x1) {
|
if (inRegs & 0x1) {
|
||||||
_context->emitMove(vd, dupIndex, regIndex, "Duplicate");
|
_context->emitMove(vd, dupIndex, regIndex, "Duplicate");
|
||||||
_context->_clobberedRegs.add(C, IntUtil::mask(dupIndex));
|
_context->_clobberedRegs.or_(C, IntUtil::mask(dupIndex));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -4467,8 +4494,8 @@ static Error X86Context_initFunc(X86Context* self, X86FuncNode* func) {
|
|||||||
|
|
||||||
// Setup "Save-Restore" registers.
|
// Setup "Save-Restore" registers.
|
||||||
func->_saveRestoreRegs.set(kX86RegClassGp , clobberedRegs.get(kX86RegClassGp ) & decl->getPreserved(kX86RegClassGp ));
|
func->_saveRestoreRegs.set(kX86RegClassGp , clobberedRegs.get(kX86RegClassGp ) & decl->getPreserved(kX86RegClassGp ));
|
||||||
func->_saveRestoreRegs.set(kX86RegClassFp , 0);
|
|
||||||
func->_saveRestoreRegs.set(kX86RegClassMm , clobberedRegs.get(kX86RegClassMm ) & decl->getPreserved(kX86RegClassMm ));
|
func->_saveRestoreRegs.set(kX86RegClassMm , clobberedRegs.get(kX86RegClassMm ) & decl->getPreserved(kX86RegClassMm ));
|
||||||
|
func->_saveRestoreRegs.set(kX86RegClassK , 0);
|
||||||
func->_saveRestoreRegs.set(kX86RegClassXyz, clobberedRegs.get(kX86RegClassXyz) & decl->getPreserved(kX86RegClassXyz));
|
func->_saveRestoreRegs.set(kX86RegClassXyz, clobberedRegs.get(kX86RegClassXyz) & decl->getPreserved(kX86RegClassXyz));
|
||||||
|
|
||||||
ASMJIT_ASSERT(!func->_saveRestoreRegs.has(kX86RegClassGp, IntUtil::mask(kX86RegIndexSp)));
|
ASMJIT_ASSERT(!func->_saveRestoreRegs.has(kX86RegClassGp, IntUtil::mask(kX86RegIndexSp)));
|
||||||
@@ -4540,7 +4567,7 @@ static Error X86Context_initFunc(X86Context* self, X86FuncNode* func) {
|
|||||||
// from '_saveRestoreRegs' in case that it is preserved.
|
// from '_saveRestoreRegs' in case that it is preserved.
|
||||||
fRegMask = IntUtil::mask(fRegIndex);
|
fRegMask = IntUtil::mask(fRegIndex);
|
||||||
if ((fRegMask & decl->getPreserved(kX86RegClassGp)) != 0) {
|
if ((fRegMask & decl->getPreserved(kX86RegClassGp)) != 0) {
|
||||||
func->_saveRestoreRegs.del(kX86RegClassGp, fRegMask);
|
func->_saveRestoreRegs.andNot(kX86RegClassGp, fRegMask);
|
||||||
func->_isStackFrameRegPreserved = true;
|
func->_isStackFrameRegPreserved = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4556,7 +4583,7 @@ static Error X86Context_initFunc(X86Context* self, X86FuncNode* func) {
|
|||||||
else
|
else
|
||||||
stackFrameCopyRegs = IntUtil::keepNOnesFromRight(stackFrameCopyRegs, IntUtil::iMin<uint32_t>(maxRegs, 2));
|
stackFrameCopyRegs = IntUtil::keepNOnesFromRight(stackFrameCopyRegs, IntUtil::iMin<uint32_t>(maxRegs, 2));
|
||||||
|
|
||||||
func->_saveRestoreRegs.add(kX86RegClassGp, stackFrameCopyRegs & decl->getPreserved(kX86RegClassGp));
|
func->_saveRestoreRegs.or_(kX86RegClassGp, stackFrameCopyRegs & decl->getPreserved(kX86RegClassGp));
|
||||||
IntUtil::indexNOnesFromRight(func->_stackFrameCopyGpIndex, stackFrameCopyRegs, maxRegs);
|
IntUtil::indexNOnesFromRight(func->_stackFrameCopyGpIndex, stackFrameCopyRegs, maxRegs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -5096,7 +5123,7 @@ _NextGroup:
|
|||||||
}
|
}
|
||||||
|
|
||||||
next = node_->getNext();
|
next = node_->getNext();
|
||||||
node_->addFlags(kNodeFlagIsTranslated);
|
node_->orFlags(kNodeFlagIsTranslated);
|
||||||
|
|
||||||
ASMJIT_TSEC({
|
ASMJIT_TSEC({
|
||||||
X86Context_traceNode(this, node_);
|
X86Context_traceNode(this, node_);
|
||||||
@@ -5143,7 +5170,7 @@ _NextGroup:
|
|||||||
VarData* vd = va->getVd();
|
VarData* vd = va->getVd();
|
||||||
|
|
||||||
if (!liveness->getBit(vd->getContextId()))
|
if (!liveness->getBit(vd->getContextId()))
|
||||||
va->addFlags(kVarAttrUnuse);
|
va->orFlags(kVarAttrUnuse);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -5321,7 +5348,7 @@ _NextGroup:
|
|||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
Node* next = node_->getNext();
|
Node* next = node_->getNext();
|
||||||
node_->addFlags(kNodeFlagIsScheduled);
|
node_->orFlags(kNodeFlagIsScheduled);
|
||||||
|
|
||||||
// Shouldn't happen here, investigate if hit.
|
// Shouldn't happen here, investigate if hit.
|
||||||
ASMJIT_ASSERT(node_ != stop);
|
ASMJIT_ASSERT(node_ != stop);
|
||||||
@@ -5508,7 +5535,7 @@ static ASMJIT_INLINE Error X86Context_serialize(X86Context* self, X86Assembler*
|
|||||||
case kNodeTypeInst: {
|
case kNodeTypeInst: {
|
||||||
InstNode* node = static_cast<InstNode*>(node_);
|
InstNode* node = static_cast<InstNode*>(node_);
|
||||||
|
|
||||||
uint32_t code = node->getCode();
|
uint32_t instId = node->getInstId();
|
||||||
uint32_t opCount = node->getOpCount();
|
uint32_t opCount = node->getOpCount();
|
||||||
|
|
||||||
const Operand* opList = node->getOpList();
|
const Operand* opList = node->getOpList();
|
||||||
@@ -5517,9 +5544,10 @@ static ASMJIT_INLINE Error X86Context_serialize(X86Context* self, X86Assembler*
|
|||||||
const Operand* o0 = &noOperand;
|
const Operand* o0 = &noOperand;
|
||||||
const Operand* o1 = &noOperand;
|
const Operand* o1 = &noOperand;
|
||||||
const Operand* o2 = &noOperand;
|
const Operand* o2 = &noOperand;
|
||||||
|
const Operand* o3 = &noOperand;
|
||||||
|
|
||||||
if (node->isSpecial()) {
|
if (node->isSpecial()) {
|
||||||
switch (code) {
|
switch (instId) {
|
||||||
case kX86InstIdCpuid:
|
case kX86InstIdCpuid:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -5632,6 +5660,19 @@ static ASMJIT_INLINE Error X86Context_serialize(X86Context* self, X86Assembler*
|
|||||||
case kX86InstIdRepneScasB: case kX86InstIdRepneScasD: case kX86InstIdRepneScasQ: case kX86InstIdRepneScasW:
|
case kX86InstIdRepneScasB: case kX86InstIdRepneScasD: case kX86InstIdRepneScasQ: case kX86InstIdRepneScasW:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case kX86InstIdXrstor:
|
||||||
|
case kX86InstIdXrstor64:
|
||||||
|
case kX86InstIdXsave:
|
||||||
|
case kX86InstIdXsave64:
|
||||||
|
case kX86InstIdXsaveopt:
|
||||||
|
case kX86InstIdXsaveopt64:
|
||||||
|
o0 = &opList[0];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case kX86InstIdXgetbv:
|
||||||
|
case kX86InstIdXsetbv:
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
ASMJIT_ASSERT(!"Reached");
|
ASMJIT_ASSERT(!"Reached");
|
||||||
}
|
}
|
||||||
@@ -5640,10 +5681,11 @@ static ASMJIT_INLINE Error X86Context_serialize(X86Context* self, X86Assembler*
|
|||||||
if (opCount > 0) o0 = &opList[0];
|
if (opCount > 0) o0 = &opList[0];
|
||||||
if (opCount > 1) o1 = &opList[1];
|
if (opCount > 1) o1 = &opList[1];
|
||||||
if (opCount > 2) o2 = &opList[2];
|
if (opCount > 2) o2 = &opList[2];
|
||||||
|
if (opCount > 3) o3 = &opList[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
// We use this form, because it is the main one.
|
// Should call _emit() directly as 4 operand form is the main form.
|
||||||
assembler->emit(code, *o0, *o1, *o2);
|
assembler->emit(instId, *o0, *o1, *o2, *o3);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -150,8 +150,8 @@ struct X86Context : public Context {
|
|||||||
vd->setModified(modified);
|
vd->setModified(modified);
|
||||||
|
|
||||||
_x86State.getListByClass(C)[regIndex] = vd;
|
_x86State.getListByClass(C)[regIndex] = vd;
|
||||||
_x86State._occupied.add(C, regMask);
|
_x86State._occupied.or_(C, regMask);
|
||||||
_x86State._modified.add(C, static_cast<uint32_t>(modified) << regIndex);
|
_x86State._modified.or_(C, static_cast<uint32_t>(modified) << regIndex);
|
||||||
|
|
||||||
ASMJIT_X86_CHECK_STATE
|
ASMJIT_X86_CHECK_STATE
|
||||||
}
|
}
|
||||||
@@ -174,8 +174,8 @@ struct X86Context : public Context {
|
|||||||
vd->setModified(false);
|
vd->setModified(false);
|
||||||
|
|
||||||
_x86State.getListByClass(C)[regIndex] = NULL;
|
_x86State.getListByClass(C)[regIndex] = NULL;
|
||||||
_x86State._occupied.del(C, regMask);
|
_x86State._occupied.andNot(C, regMask);
|
||||||
_x86State._modified.del(C, regMask);
|
_x86State._modified.andNot(C, regMask);
|
||||||
|
|
||||||
ASMJIT_X86_CHECK_STATE
|
ASMJIT_X86_CHECK_STATE
|
||||||
}
|
}
|
||||||
@@ -244,7 +244,7 @@ struct X86Context : public Context {
|
|||||||
emitSave(vd, regIndex, "Save");
|
emitSave(vd, regIndex, "Save");
|
||||||
|
|
||||||
vd->setModified(false);
|
vd->setModified(false);
|
||||||
_x86State._modified.del(C, regMask);
|
_x86State._modified.andNot(C, regMask);
|
||||||
|
|
||||||
ASMJIT_X86_CHECK_STATE
|
ASMJIT_X86_CHECK_STATE
|
||||||
}
|
}
|
||||||
@@ -381,7 +381,7 @@ struct X86Context : public Context {
|
|||||||
uint32_t regMask = IntUtil::mask(regIndex);
|
uint32_t regMask = IntUtil::mask(regIndex);
|
||||||
|
|
||||||
vd->setModified(true);
|
vd->setModified(true);
|
||||||
_x86State._modified.add(C, regMask);
|
_x86State._modified.or_(C, regMask);
|
||||||
|
|
||||||
ASMJIT_X86_CHECK_STATE
|
ASMJIT_X86_CHECK_STATE
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -88,20 +88,29 @@ _Skip:
|
|||||||
// in 64-bit mode not allows to use inline assembler, so we need intrinsic and
|
// in 64-bit mode not allows to use inline assembler, so we need intrinsic and
|
||||||
// we need also asm version.
|
// we need also asm version.
|
||||||
|
|
||||||
|
union X86XCR {
|
||||||
|
uint64_t value;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
uint32_t eax;
|
||||||
|
uint32_t edx;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
// callCpuId() and detectCpuInfo() for x86 and x64 platforms begins here.
|
// callCpuId() and detectCpuInfo() for x86 and x64 platforms begins here.
|
||||||
#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64)
|
#if defined(ASMJIT_HOST_X86) || defined(ASMJIT_HOST_X64)
|
||||||
void X86CpuUtil::callCpuId(uint32_t inEax, uint32_t inEcx, X86CpuId* outResult) {
|
void X86CpuUtil::callCpuId(uint32_t inEax, uint32_t inEcx, X86CpuId* result) {
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
// 2009-02-05: Thanks to Mike Tajmajer for supporting VC7.1 compiler.
|
// 2009-02-05: Thanks to Mike Tajmajer for supporting VC7.1 compiler.
|
||||||
// ASMJIT_HOST_X64 is here only for readibility, only VS2005 can compile 64-bit code.
|
// ASMJIT_HOST_X64 is here only for readibility, only VS2005 can compile 64-bit code.
|
||||||
# if _MSC_VER >= 1400 || defined(ASMJIT_HOST_X64)
|
# if _MSC_VER >= 1400 || defined(ASMJIT_HOST_X64)
|
||||||
// Done by intrinsics.
|
// Done by intrinsics.
|
||||||
__cpuidex(reinterpret_cast<int*>(outResult->i), inEax, inEcx);
|
__cpuidex(reinterpret_cast<int*>(result->i), inEax, inEcx);
|
||||||
# else // _MSC_VER < 1400
|
# else // _MSC_VER < 1400
|
||||||
uint32_t cpuid_eax = inEax;
|
uint32_t cpuid_eax = inEax;
|
||||||
uint32_t cpuid_ecx = inCax;
|
uint32_t cpuid_ecx = inCax;
|
||||||
uint32_t* cpuid_out = outResult->i;
|
uint32_t* cpuid_out = result->i;
|
||||||
|
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, cpuid_eax
|
mov eax, cpuid_eax
|
||||||
@@ -119,18 +128,50 @@ void X86CpuUtil::callCpuId(uint32_t inEax, uint32_t inEcx, X86CpuId* outResult)
|
|||||||
// Note, patched to preserve ebx/rbx register which is used by GCC.
|
// Note, patched to preserve ebx/rbx register which is used by GCC.
|
||||||
# if defined(ASMJIT_HOST_X86)
|
# if defined(ASMJIT_HOST_X86)
|
||||||
# define __myCpuId(inEax, inEcx, outEax, outEbx, outEcx, outEdx) \
|
# define __myCpuId(inEax, inEcx, outEax, outEbx, outEcx, outEdx) \
|
||||||
asm ("mov %%ebx, %%edi\n" \
|
__asm__ __volatile__( \
|
||||||
|
"mov %%ebx, %%edi\n" \
|
||||||
"cpuid\n" \
|
"cpuid\n" \
|
||||||
"xchg %%edi, %%ebx\n" \
|
"xchg %%edi, %%ebx\n" \
|
||||||
: "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) : "a" (inEax), "c" (inEcx))
|
: "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) \
|
||||||
|
: "a" (inEax), "c" (inEcx))
|
||||||
# else
|
# else
|
||||||
# define __myCpuId(inEax, inEcx, outEax, outEbx, outEcx, outEdx) \
|
# define __myCpuId(inEax, inEcx, outEax, outEbx, outEcx, outEdx) \
|
||||||
asm ("mov %%rbx, %%rdi\n" \
|
__asm__ __volatile__( \
|
||||||
|
"mov %%rbx, %%rdi\n" \
|
||||||
"cpuid\n" \
|
"cpuid\n" \
|
||||||
"xchg %%rdi, %%rbx\n" \
|
"xchg %%rdi, %%rbx\n" \
|
||||||
: "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) : "a" (inEax), "c" (inEcx))
|
: "=a" (outEax), "=D" (outEbx), "=c" (outEcx), "=d" (outEdx) \
|
||||||
|
: "a" (inEax), "c" (inEcx))
|
||||||
# endif
|
# endif
|
||||||
__myCpuId(inEax, inEcx, outResult->eax, outResult->ebx, outResult->ecx, outResult->edx);
|
__myCpuId(inEax, inEcx, result->eax, result->ebx, result->ecx, result->edx);
|
||||||
|
#endif // COMPILER
|
||||||
|
}
|
||||||
|
|
||||||
|
static void callXGetBV(uint32_t inEcx, X86XCR* result) {
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
|
||||||
|
# if (_MSC_FULL_VER >= 160040219) // 2010SP1+
|
||||||
|
result->value = _xgetbv(inEcx);
|
||||||
|
# else
|
||||||
|
result->value = 0;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
|
||||||
|
unsigned int eax, edx;
|
||||||
|
# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
|
||||||
|
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(inEcx));
|
||||||
|
# else
|
||||||
|
__asm__ __volatile__(".byte 0x0F, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(inEcx));
|
||||||
|
# endif
|
||||||
|
result->eax = eax;
|
||||||
|
result->edx = edx;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
result->value = 0;
|
||||||
|
|
||||||
#endif // COMPILER
|
#endif // COMPILER
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,7 +179,11 @@ void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
|
|||||||
X86CpuId regs;
|
X86CpuId regs;
|
||||||
|
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
uint32_t maxId;
|
uint32_t maxBaseId;
|
||||||
|
|
||||||
|
bool maybeMPX = false;
|
||||||
|
X86XCR xcr0;
|
||||||
|
xcr0.value = 0;
|
||||||
|
|
||||||
// Clear everything except the '_size' member.
|
// Clear everything except the '_size' member.
|
||||||
::memset(reinterpret_cast<uint8_t*>(cpuInfo) + sizeof(uint32_t),
|
::memset(reinterpret_cast<uint8_t*>(cpuInfo) + sizeof(uint32_t),
|
||||||
@@ -148,14 +193,13 @@ void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
|
|||||||
cpuInfo->_hwThreadsCount = CpuInfo::detectHwThreadsCount();
|
cpuInfo->_hwThreadsCount = CpuInfo::detectHwThreadsCount();
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [CPUID EAX=0x00000000]
|
// [CPUID EAX=0x0]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
// Get vendor string/id.
|
// Get vendor string/id.
|
||||||
callCpuId(0, 0, ®s);
|
callCpuId(0x0, 0x0, ®s);
|
||||||
|
|
||||||
maxId = regs.eax;
|
|
||||||
|
|
||||||
|
maxBaseId = regs.eax;
|
||||||
::memcpy(cpuInfo->_vendorString, ®s.ebx, 4);
|
::memcpy(cpuInfo->_vendorString, ®s.ebx, 4);
|
||||||
::memcpy(cpuInfo->_vendorString + 4, ®s.edx, 4);
|
::memcpy(cpuInfo->_vendorString + 4, ®s.edx, 4);
|
||||||
::memcpy(cpuInfo->_vendorString + 8, ®s.ecx, 4);
|
::memcpy(cpuInfo->_vendorString + 8, ®s.ecx, 4);
|
||||||
@@ -168,11 +212,12 @@ void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [CPUID EAX=0x00000001]
|
// [CPUID EAX=0x1]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
// Get feature flags in ecx/edx and family/model in eax.
|
if (maxBaseId >= 0x1) {
|
||||||
callCpuId(1, 0, ®s);
|
// Get feature flags in ECX/EDX and family/model in EAX.
|
||||||
|
callCpuId(0x1, 0x0, ®s);
|
||||||
|
|
||||||
// Fill family and model fields.
|
// Fill family and model fields.
|
||||||
cpuInfo->_family = (regs.eax >> 8) & 0x0F;
|
cpuInfo->_family = (regs.eax >> 8) & 0x0F;
|
||||||
@@ -190,57 +235,111 @@ void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
|
|||||||
cpuInfo->_flushCacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8;
|
cpuInfo->_flushCacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8;
|
||||||
cpuInfo->_maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
|
cpuInfo->_maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
|
||||||
|
|
||||||
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureSse3);
|
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureSSE3);
|
||||||
if (regs.ecx & 0x00000002U) cpuInfo->addFeature(kX86CpuFeaturePclmulqdq);
|
if (regs.ecx & 0x00000002U) cpuInfo->addFeature(kX86CpuFeaturePCLMULQDQ);
|
||||||
if (regs.ecx & 0x00000008U) cpuInfo->addFeature(kX86CpuFeatureMonitorMWait);
|
if (regs.ecx & 0x00000008U) cpuInfo->addFeature(kX86CpuFeatureMONITOR);
|
||||||
if (regs.ecx & 0x00000200U) cpuInfo->addFeature(kX86CpuFeatureSsse3);
|
if (regs.ecx & 0x00000200U) cpuInfo->addFeature(kX86CpuFeatureSSSE3);
|
||||||
if (regs.ecx & 0x00002000U) cpuInfo->addFeature(kX86CpuFeatureCmpXchg16B);
|
if (regs.ecx & 0x00002000U) cpuInfo->addFeature(kX86CpuFeatureCMPXCHG16B);
|
||||||
if (regs.ecx & 0x00080000U) cpuInfo->addFeature(kX86CpuFeatureSse41);
|
if (regs.ecx & 0x00080000U) cpuInfo->addFeature(kX86CpuFeatureSSE4_1);
|
||||||
if (regs.ecx & 0x00100000U) cpuInfo->addFeature(kX86CpuFeatureSse42);
|
if (regs.ecx & 0x00100000U) cpuInfo->addFeature(kX86CpuFeatureSSE4_2);
|
||||||
if (regs.ecx & 0x00400000U) cpuInfo->addFeature(kX86CpuFeatureMovbe);
|
if (regs.ecx & 0x00400000U) cpuInfo->addFeature(kX86CpuFeatureMOVBE);
|
||||||
if (regs.ecx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeaturePopcnt);
|
if (regs.ecx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeaturePOPCNT);
|
||||||
if (regs.ecx & 0x02000000U) cpuInfo->addFeature(kX86CpuFeatureAesni);
|
if (regs.ecx & 0x02000000U) cpuInfo->addFeature(kX86CpuFeatureAESNI);
|
||||||
if (regs.ecx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeatureRdrand);
|
if (regs.ecx & 0x04000000U) cpuInfo->addFeature(kX86CpuFeatureXSave);
|
||||||
|
if (regs.ecx & 0x08000000U) cpuInfo->addFeature(kX86CpuFeatureXSaveOS);
|
||||||
|
if (regs.ecx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeatureRDRAND);
|
||||||
|
|
||||||
if (regs.edx & 0x00000010U) cpuInfo->addFeature(kX86CpuFeatureRdtsc);
|
if (regs.edx & 0x00000010U) cpuInfo->addFeature(kX86CpuFeatureRDTSC);
|
||||||
if (regs.edx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeatureCmpXchg8B);
|
if (regs.edx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeatureCMPXCHG8B);
|
||||||
if (regs.edx & 0x00008000U) cpuInfo->addFeature(kX86CpuFeatureCmov);
|
if (regs.edx & 0x00008000U) cpuInfo->addFeature(kX86CpuFeatureCMOV);
|
||||||
if (regs.edx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeatureMmx);
|
if (regs.edx & 0x00080000U) cpuInfo->addFeature(kX86CpuFeatureCLFLUSH);
|
||||||
if (regs.edx & 0x01000000U) cpuInfo->addFeature(kX86CpuFeatureFxsr);
|
if (regs.edx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeatureMMX);
|
||||||
if (regs.edx & 0x02000000U) cpuInfo->addFeature(kX86CpuFeatureSse).addFeature(kX86CpuFeatureMmxExt);
|
if (regs.edx & 0x01000000U) cpuInfo->addFeature(kX86CpuFeatureFXSR);
|
||||||
if (regs.edx & 0x04000000U) cpuInfo->addFeature(kX86CpuFeatureSse).addFeature(kX86CpuFeatureSse2);
|
if (regs.edx & 0x02000000U) cpuInfo->addFeature(kX86CpuFeatureSSE).addFeature(kX86CpuFeatureMMX2);
|
||||||
if (regs.edx & 0x10000000U) cpuInfo->addFeature(kX86CpuFeatureMultithreading);
|
if (regs.edx & 0x04000000U) cpuInfo->addFeature(kX86CpuFeatureSSE).addFeature(kX86CpuFeatureSSE2);
|
||||||
|
if (regs.edx & 0x10000000U) cpuInfo->addFeature(kX86CpuFeatureMT);
|
||||||
|
|
||||||
if (cpuInfo->_vendorId == kCpuVendorAmd && (regs.edx & 0x10000000U)) {
|
// AMD sets Multithreading to ON if it has two or more cores.
|
||||||
// AMD sets Multithreading to ON if it has more cores.
|
if (cpuInfo->_hwThreadsCount == 1 && cpuInfo->_vendorId == kCpuVendorAmd && (regs.edx & 0x10000000U)) {
|
||||||
if (cpuInfo->_hwThreadsCount == 1)
|
|
||||||
cpuInfo->_hwThreadsCount = 2;
|
cpuInfo->_hwThreadsCount = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detect AVX.
|
// Get the content of XCR0 if supported by CPU and enabled by OS.
|
||||||
if (regs.ecx & 0x10000000U) {
|
if ((regs.ecx & 0x0C000000U) == 0x0C000000U) {
|
||||||
cpuInfo->addFeature(kX86CpuFeatureAvx);
|
callXGetBV(0, &xcr0);
|
||||||
|
|
||||||
if (regs.ecx & 0x00000800U) cpuInfo->addFeature(kX86CpuFeatureXop);
|
|
||||||
if (regs.ecx & 0x00004000U) cpuInfo->addFeature(kX86CpuFeatureFma3);
|
|
||||||
if (regs.ecx & 0x00010000U) cpuInfo->addFeature(kX86CpuFeatureFma4);
|
|
||||||
if (regs.ecx & 0x20000000U) cpuInfo->addFeature(kX86CpuFeatureF16C);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Detect AVX+.
|
||||||
|
if (regs.ecx & 0x10000000U) {
|
||||||
|
// - XCR0[2:1] == 11b
|
||||||
|
// XMM & YMM states are enabled by OS.
|
||||||
|
if ((xcr0.eax & 0x00000006U) == 0x00000006U) {
|
||||||
|
cpuInfo->addFeature(kX86CpuFeatureAVX);
|
||||||
|
|
||||||
|
if (regs.ecx & 0x00000800U) cpuInfo->addFeature(kX86CpuFeatureXOP);
|
||||||
|
if (regs.ecx & 0x00004000U) cpuInfo->addFeature(kX86CpuFeatureFMA3);
|
||||||
|
if (regs.ecx & 0x00010000U) cpuInfo->addFeature(kX86CpuFeatureFMA4);
|
||||||
|
if (regs.ecx & 0x20000000U) cpuInfo->addFeature(kX86CpuFeatureF16C);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [CPUID EAX=0x7 ECX=0x0]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
// Detect new features if the processor supports CPUID-07.
|
// Detect new features if the processor supports CPUID-07.
|
||||||
if (maxId >= 7) {
|
if (maxBaseId >= 0x7) {
|
||||||
callCpuId(7, 0, ®s);
|
callCpuId(0x7, 0x0, ®s);
|
||||||
|
|
||||||
if (regs.ebx & 0x00000001) cpuInfo->addFeature(kX86CpuFeatureFsGsBase);
|
if (regs.ebx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureFSGSBase);
|
||||||
if (regs.ebx & 0x00000008) cpuInfo->addFeature(kX86CpuFeatureBmi);
|
if (regs.ebx & 0x00000008U) cpuInfo->addFeature(kX86CpuFeatureBMI);
|
||||||
if (regs.ebx & 0x00000010) cpuInfo->addFeature(kX86CpuFeatureHle);
|
if (regs.ebx & 0x00000010U) cpuInfo->addFeature(kX86CpuFeatureHLE);
|
||||||
if (regs.ebx & 0x00000100) cpuInfo->addFeature(kX86CpuFeatureBmi2);
|
if (regs.ebx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeatureBMI2);
|
||||||
if (regs.ebx & 0x00000200) cpuInfo->addFeature(kX86CpuFeatureRepMovsbStosbExt);
|
if (regs.ebx & 0x00000200U) cpuInfo->addFeature(kX86CpuFeatureMOVSBSTOSBOpt);
|
||||||
if (regs.ebx & 0x00000800) cpuInfo->addFeature(kX86CpuFeatureRtm);
|
if (regs.ebx & 0x00000800U) cpuInfo->addFeature(kX86CpuFeatureRTM);
|
||||||
|
if (regs.ebx & 0x00004000U) maybeMPX = true;
|
||||||
|
if (regs.ebx & 0x00040000U) cpuInfo->addFeature(kX86CpuFeatureRDSEED);
|
||||||
|
if (regs.ebx & 0x00080000U) cpuInfo->addFeature(kX86CpuFeatureADX);
|
||||||
|
if (regs.ebx & 0x00800000U) cpuInfo->addFeature(kX86CpuFeatureCLFLUSHOpt);
|
||||||
|
if (regs.ebx & 0x20000000U) cpuInfo->addFeature(kX86CpuFeatureSHA);
|
||||||
|
|
||||||
// AVX2 depends on AVX.
|
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeaturePREFETCHWT1);
|
||||||
if (cpuInfo->hasFeature(kX86CpuFeatureAvx)) {
|
|
||||||
if (regs.ebx & 0x00000020) cpuInfo->addFeature(kX86CpuFeatureAvx2);
|
// Detect AVX2.
|
||||||
|
if (cpuInfo->hasFeature(kX86CpuFeatureAVX)) {
|
||||||
|
if (regs.ebx & 0x00000020U) cpuInfo->addFeature(kX86CpuFeatureAVX2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect AVX-512+.
|
||||||
|
if (regs.ebx & 0x00010000U) {
|
||||||
|
// - XCR0[2:1] == 11b
|
||||||
|
// XMM & YMM states are enabled by OS.
|
||||||
|
// - XCR0[7:5] == 111b
|
||||||
|
// Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 state are enabled by OS.
|
||||||
|
if ((xcr0.eax & 0x00000076U) == 0x00000076U) {
|
||||||
|
cpuInfo->addFeature(kX86CpuFeatureAVX512F);
|
||||||
|
|
||||||
|
if (regs.ebx & 0x00020000U) cpuInfo->addFeature(kX86CpuFeatureAVX512DQ);
|
||||||
|
if (regs.ebx & 0x04000000U) cpuInfo->addFeature(kX86CpuFeatureAVX512PF);
|
||||||
|
if (regs.ebx & 0x08000000U) cpuInfo->addFeature(kX86CpuFeatureAVX512ER);
|
||||||
|
if (regs.ebx & 0x10000000U) cpuInfo->addFeature(kX86CpuFeatureAVX512CD);
|
||||||
|
if (regs.ebx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeatureAVX512BW);
|
||||||
|
if (regs.ebx & 0x80000000U) cpuInfo->addFeature(kX86CpuFeatureAVX512VL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
// [CPUID EAX=0xD, ECX=0x0]
|
||||||
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
if (maxBaseId >= 0xD && maybeMPX) {
|
||||||
|
callCpuId(0xD, 0x0, ®s);
|
||||||
|
|
||||||
|
// Both CPUID result and XCR0 has to be enabled to have support for MPX.
|
||||||
|
if (((regs.eax & xcr0.eax) & 0x00000018U) == 0x00000018U) {
|
||||||
|
cpuInfo->addFeature(kX86CpuFeatureMPX);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -250,28 +349,28 @@ void X86CpuUtil::detect(X86CpuInfo* cpuInfo) {
|
|||||||
|
|
||||||
// Calling cpuid with 0x80000000 as the in argument gets the number of valid
|
// Calling cpuid with 0x80000000 as the in argument gets the number of valid
|
||||||
// extended IDs.
|
// extended IDs.
|
||||||
callCpuId(0x80000000, 0, ®s);
|
callCpuId(0x80000000, 0x0, ®s);
|
||||||
|
|
||||||
uint32_t maxExtId = IntUtil::iMin<uint32_t>(regs.eax, 0x80000004);
|
uint32_t maxExtId = IntUtil::iMin<uint32_t>(regs.eax, 0x80000004);
|
||||||
uint32_t* brand = reinterpret_cast<uint32_t*>(cpuInfo->_brandString);
|
uint32_t* brand = reinterpret_cast<uint32_t*>(cpuInfo->_brandString);
|
||||||
|
|
||||||
for (i = 0x80000001; i <= maxExtId; i++) {
|
for (i = 0x80000001; i <= maxExtId; i++) {
|
||||||
callCpuId(i, 0, ®s);
|
callCpuId(i, 0x0, ®s);
|
||||||
|
|
||||||
switch (i) {
|
switch (i) {
|
||||||
case 0x80000001:
|
case 0x80000001:
|
||||||
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureLahfSahf);
|
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(kX86CpuFeatureLahfSahf);
|
||||||
if (regs.ecx & 0x00000020U) cpuInfo->addFeature(kX86CpuFeatureLzcnt);
|
if (regs.ecx & 0x00000020U) cpuInfo->addFeature(kX86CpuFeatureLZCNT);
|
||||||
if (regs.ecx & 0x00000040U) cpuInfo->addFeature(kX86CpuFeatureSse4A);
|
if (regs.ecx & 0x00000040U) cpuInfo->addFeature(kX86CpuFeatureSSE4A);
|
||||||
if (regs.ecx & 0x00000080U) cpuInfo->addFeature(kX86CpuFeatureMsse);
|
if (regs.ecx & 0x00000080U) cpuInfo->addFeature(kX86CpuFeatureMSSE);
|
||||||
if (regs.ecx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeaturePrefetch);
|
if (regs.ecx & 0x00000100U) cpuInfo->addFeature(kX86CpuFeaturePREFETCH);
|
||||||
|
|
||||||
if (regs.edx & 0x00100000U) cpuInfo->addFeature(kX86CpuFeatureExecuteDisableBit);
|
if (regs.edx & 0x00100000U) cpuInfo->addFeature(kX86CpuFeatureNX);
|
||||||
if (regs.edx & 0x00200000U) cpuInfo->addFeature(kX86CpuFeatureFfxsr);
|
if (regs.edx & 0x00200000U) cpuInfo->addFeature(kX86CpuFeatureFXSROpt);
|
||||||
if (regs.edx & 0x00400000U) cpuInfo->addFeature(kX86CpuFeatureMmxExt);
|
if (regs.edx & 0x00400000U) cpuInfo->addFeature(kX86CpuFeatureMMX2);
|
||||||
if (regs.edx & 0x08000000U) cpuInfo->addFeature(kX86CpuFeatureRdtscp);
|
if (regs.edx & 0x08000000U) cpuInfo->addFeature(kX86CpuFeatureRDTSCP);
|
||||||
if (regs.edx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeature3dNowExt).addFeature(kX86CpuFeatureMmxExt);
|
if (regs.edx & 0x40000000U) cpuInfo->addFeature(kX86CpuFeature3DNOW2).addFeature(kX86CpuFeatureMMX2);
|
||||||
if (regs.edx & 0x80000000U) cpuInfo->addFeature(kX86CpuFeature3dNow);
|
if (regs.edx & 0x80000000U) cpuInfo->addFeature(kX86CpuFeature3DNOW);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x80000002:
|
case 0x80000002:
|
||||||
|
|||||||
@@ -31,92 +31,123 @@ struct X86CpuInfo;
|
|||||||
|
|
||||||
//! X86 CPU features.
|
//! X86 CPU features.
|
||||||
ASMJIT_ENUM(kX86CpuFeature) {
|
ASMJIT_ENUM(kX86CpuFeature) {
|
||||||
|
//! Cpu has Not-Execute-Bit.
|
||||||
|
kX86CpuFeatureNX = 0,
|
||||||
//! Cpu has multithreading.
|
//! Cpu has multithreading.
|
||||||
kX86CpuFeatureMultithreading = 1,
|
kX86CpuFeatureMT,
|
||||||
//! Cpu has execute disable bit.
|
|
||||||
kX86CpuFeatureExecuteDisableBit,
|
|
||||||
//! Cpu has RDTSC.
|
//! Cpu has RDTSC.
|
||||||
kX86CpuFeatureRdtsc,
|
kX86CpuFeatureRDTSC,
|
||||||
//! Cpu has RDTSCP.
|
//! Cpu has RDTSCP.
|
||||||
kX86CpuFeatureRdtscp,
|
kX86CpuFeatureRDTSCP,
|
||||||
//! Cpu has CMOV.
|
//! Cpu has CMOV.
|
||||||
kX86CpuFeatureCmov,
|
kX86CpuFeatureCMOV,
|
||||||
//! Cpu has CMPXCHG8B.
|
//! Cpu has CMPXCHG8B.
|
||||||
kX86CpuFeatureCmpXchg8B,
|
kX86CpuFeatureCMPXCHG8B,
|
||||||
//! Cpu has CMPXCHG16B (x64).
|
//! Cpu has CMPXCHG16B (X64).
|
||||||
kX86CpuFeatureCmpXchg16B,
|
kX86CpuFeatureCMPXCHG16B,
|
||||||
//! Cpu has CLFUSH.
|
//! Cpu has CLFUSH.
|
||||||
kX86CpuFeatureClflush,
|
kX86CpuFeatureCLFLUSH,
|
||||||
|
//! Cpu has CLFUSH (Optimized).
|
||||||
|
kX86CpuFeatureCLFLUSHOpt,
|
||||||
//! Cpu has PREFETCH.
|
//! Cpu has PREFETCH.
|
||||||
kX86CpuFeaturePrefetch,
|
kX86CpuFeaturePREFETCH,
|
||||||
|
//! Cpu has PREFETCHWT1.
|
||||||
|
kX86CpuFeaturePREFETCHWT1,
|
||||||
//! Cpu has LAHF/SAHF.
|
//! Cpu has LAHF/SAHF.
|
||||||
kX86CpuFeatureLahfSahf,
|
kX86CpuFeatureLahfSahf,
|
||||||
//! Cpu has FXSAVE/FXRSTOR.
|
//! Cpu has FXSAVE/FXRSTOR.
|
||||||
kX86CpuFeatureFxsr,
|
kX86CpuFeatureFXSR,
|
||||||
//! Cpu has FXSAVE/FXRSTOR optimizations.
|
//! Cpu has FXSAVE/FXRSTOR (Optimized).
|
||||||
kX86CpuFeatureFfxsr,
|
kX86CpuFeatureFXSROpt,
|
||||||
//! Cpu has MMX.
|
//! Cpu has MMX.
|
||||||
kX86CpuFeatureMmx,
|
kX86CpuFeatureMMX,
|
||||||
//! Cpu has extended MMX.
|
//! Cpu has extended MMX.
|
||||||
kX86CpuFeatureMmxExt,
|
kX86CpuFeatureMMX2,
|
||||||
//! Cpu has 3dNow!
|
//! Cpu has 3dNow!
|
||||||
kX86CpuFeature3dNow,
|
kX86CpuFeature3DNOW,
|
||||||
//! Cpu has enchanced 3dNow!
|
//! Cpu has enchanced 3dNow!
|
||||||
kX86CpuFeature3dNowExt,
|
kX86CpuFeature3DNOW2,
|
||||||
//! Cpu has SSE.
|
//! Cpu has SSE.
|
||||||
kX86CpuFeatureSse,
|
kX86CpuFeatureSSE,
|
||||||
//! Cpu has SSE2.
|
//! Cpu has SSE2.
|
||||||
kX86CpuFeatureSse2,
|
kX86CpuFeatureSSE2,
|
||||||
//! Cpu has SSE3.
|
//! Cpu has SSE3.
|
||||||
kX86CpuFeatureSse3,
|
kX86CpuFeatureSSE3,
|
||||||
//! Cpu has Supplemental SSE3 (SSSE3).
|
//! Cpu has SSSE3.
|
||||||
kX86CpuFeatureSsse3,
|
kX86CpuFeatureSSSE3,
|
||||||
//! Cpu has SSE4.A.
|
//! Cpu has SSE4.A.
|
||||||
kX86CpuFeatureSse4A,
|
kX86CpuFeatureSSE4A,
|
||||||
//! Cpu has SSE4.1.
|
//! Cpu has SSE4.1.
|
||||||
kX86CpuFeatureSse41,
|
kX86CpuFeatureSSE4_1,
|
||||||
//! Cpu has SSE4.2.
|
//! Cpu has SSE4.2.
|
||||||
kX86CpuFeatureSse42,
|
kX86CpuFeatureSSE4_2,
|
||||||
//! Cpu has Misaligned SSE (MSSE).
|
//! Cpu has Misaligned SSE (MSSE).
|
||||||
kX86CpuFeatureMsse,
|
kX86CpuFeatureMSSE,
|
||||||
//! Cpu has MONITOR and MWAIT.
|
//! Cpu has MONITOR and MWAIT.
|
||||||
kX86CpuFeatureMonitorMWait,
|
kX86CpuFeatureMONITOR,
|
||||||
//! Cpu has MOVBE.
|
//! Cpu has MOVBE.
|
||||||
kX86CpuFeatureMovbe,
|
kX86CpuFeatureMOVBE,
|
||||||
//! Cpu has POPCNT.
|
//! Cpu has POPCNT.
|
||||||
kX86CpuFeaturePopcnt,
|
kX86CpuFeaturePOPCNT,
|
||||||
//! Cpu has LZCNT.
|
//! Cpu has LZCNT.
|
||||||
kX86CpuFeatureLzcnt,
|
kX86CpuFeatureLZCNT,
|
||||||
//! Cpu has AESNI.
|
//! Cpu has AESNI.
|
||||||
kX86CpuFeatureAesni,
|
kX86CpuFeatureAESNI,
|
||||||
//! Cpu has PCLMULQDQ.
|
//! Cpu has PCLMULQDQ.
|
||||||
kX86CpuFeaturePclmulqdq,
|
kX86CpuFeaturePCLMULQDQ,
|
||||||
//! Cpu has RDRAND.
|
//! Cpu has RDRAND.
|
||||||
kX86CpuFeatureRdrand,
|
kX86CpuFeatureRDRAND,
|
||||||
|
//! Cpu has RDSEED.
|
||||||
|
kX86CpuFeatureRDSEED,
|
||||||
|
//! Cpu has SHA-1 and SHA-256.
|
||||||
|
kX86CpuFeatureSHA,
|
||||||
|
//! Cpu has XSAVE support - XSAVE/XRSTOR, XSETBV/XGETBV, and XCR0.
|
||||||
|
kX86CpuFeatureXSave,
|
||||||
|
//! OS has enabled XSAVE, you can call XGETBV to get value of XCR0.
|
||||||
|
kX86CpuFeatureXSaveOS,
|
||||||
//! Cpu has AVX.
|
//! Cpu has AVX.
|
||||||
kX86CpuFeatureAvx,
|
kX86CpuFeatureAVX,
|
||||||
//! Cpu has AVX2.
|
//! Cpu has AVX2.
|
||||||
kX86CpuFeatureAvx2,
|
kX86CpuFeatureAVX2,
|
||||||
//! Cpu has F16C.
|
//! Cpu has F16C.
|
||||||
kX86CpuFeatureF16C,
|
kX86CpuFeatureF16C,
|
||||||
//! Cpu has FMA3.
|
//! Cpu has FMA3.
|
||||||
kX86CpuFeatureFma3,
|
kX86CpuFeatureFMA3,
|
||||||
//! Cpu has FMA4.
|
//! Cpu has FMA4.
|
||||||
kX86CpuFeatureFma4,
|
kX86CpuFeatureFMA4,
|
||||||
//! Cpu has XOP.
|
//! Cpu has XOP.
|
||||||
kX86CpuFeatureXop,
|
kX86CpuFeatureXOP,
|
||||||
//! Cpu has BMI.
|
//! Cpu has BMI.
|
||||||
kX86CpuFeatureBmi,
|
kX86CpuFeatureBMI,
|
||||||
//! Cpu has BMI2.
|
//! Cpu has BMI2.
|
||||||
kX86CpuFeatureBmi2,
|
kX86CpuFeatureBMI2,
|
||||||
//! Cpu has HLE.
|
//! Cpu has HLE.
|
||||||
kX86CpuFeatureHle,
|
kX86CpuFeatureHLE,
|
||||||
//! Cpu has RTM.
|
//! Cpu has RTM.
|
||||||
kX86CpuFeatureRtm,
|
kX86CpuFeatureRTM,
|
||||||
|
//! Cpu has ADX.
|
||||||
|
kX86CpuFeatureADX,
|
||||||
|
//! Cpu has MPX (Memory Protection Extensions).
|
||||||
|
kX86CpuFeatureMPX,
|
||||||
//! Cpu has FSGSBASE.
|
//! Cpu has FSGSBASE.
|
||||||
kX86CpuFeatureFsGsBase,
|
kX86CpuFeatureFSGSBase,
|
||||||
//! Cpu has enhanced REP MOVSB/STOSB.
|
//! Cpu has optimized REP MOVSB/STOSB.
|
||||||
kX86CpuFeatureRepMovsbStosbExt,
|
kX86CpuFeatureMOVSBSTOSBOpt,
|
||||||
|
|
||||||
|
//! Cpu has AVX-512F (Foundation).
|
||||||
|
kX86CpuFeatureAVX512F,
|
||||||
|
//! Cpu has AVX-512CD (Conflict Detection).
|
||||||
|
kX86CpuFeatureAVX512CD,
|
||||||
|
//! Cpu has AVX-512PF (Prefetch Instructions).
|
||||||
|
kX86CpuFeatureAVX512PF,
|
||||||
|
//! Cpu has AVX-512ER (Exponential and Reciprocal Instructions).
|
||||||
|
kX86CpuFeatureAVX512ER,
|
||||||
|
//! Cpu has AVX-512DQ (DWord/QWord).
|
||||||
|
kX86CpuFeatureAVX512DQ,
|
||||||
|
//! Cpu has AVX-512BW (Byte/Word).
|
||||||
|
kX86CpuFeatureAVX512BW,
|
||||||
|
//! Cpu has AVX VL (Vector Length Excensions).
|
||||||
|
kX86CpuFeatureAVX512VL,
|
||||||
|
|
||||||
//! Count of X86/X64 Cpu features.
|
//! Count of X86/X64 Cpu features.
|
||||||
kX86CpuFeatureCount
|
kX86CpuFeatureCount
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -20,165 +20,262 @@
|
|||||||
|
|
||||||
namespace asmjit {
|
namespace asmjit {
|
||||||
|
|
||||||
// Prevent static initialization.
|
#define REG(_Type_, _Index_, _Size_) {{ \
|
||||||
//
|
|
||||||
// Remap all classes to POD structs so they can be statically initialized
|
|
||||||
// without calling a constructor. Compiler will store these in data section.
|
|
||||||
struct X86GpReg { Operand::VRegOp data; };
|
|
||||||
struct X86FpReg { Operand::VRegOp data; };
|
|
||||||
struct X86MmReg { Operand::VRegOp data; };
|
|
||||||
struct X86XmmReg { Operand::VRegOp data; };
|
|
||||||
struct X86YmmReg { Operand::VRegOp data; };
|
|
||||||
struct X86SegReg { Operand::VRegOp data; };
|
|
||||||
|
|
||||||
namespace x86 {
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// [asmjit::x86::Registers]
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
#define REG(_Class_, _Name_, _Type_, _Index_, _Size_) \
|
|
||||||
const _Class_ _Name_ = {{ \
|
|
||||||
kOperandTypeReg, _Size_, { ((_Type_) << 8) + _Index_ }, kInvalidValue, {{ kInvalidVar, 0 }} \
|
kOperandTypeReg, _Size_, { ((_Type_) << 8) + _Index_ }, kInvalidValue, {{ kInvalidVar, 0 }} \
|
||||||
}}
|
}}
|
||||||
|
|
||||||
REG(X86GpReg, noGpReg, kInvalidReg, kInvalidReg, 0);
|
const X86RegData x86RegData = {
|
||||||
|
// RIP.
|
||||||
|
REG(kX86RegTypeRip, 0, 0),
|
||||||
|
// NpGp.
|
||||||
|
REG(kInvalidReg, kInvalidReg, 0),
|
||||||
|
|
||||||
REG(X86GpReg, al, kX86RegTypeGpbLo, kX86RegIndexAx, 1);
|
// Segments.
|
||||||
REG(X86GpReg, cl, kX86RegTypeGpbLo, kX86RegIndexCx, 1);
|
{
|
||||||
REG(X86GpReg, dl, kX86RegTypeGpbLo, kX86RegIndexDx, 1);
|
REG(kX86RegTypeSeg, 0, 2), // Default.
|
||||||
REG(X86GpReg, bl, kX86RegTypeGpbLo, kX86RegIndexBx, 1);
|
REG(kX86RegTypeSeg, 1, 2), // ES.
|
||||||
REG(X86GpReg, spl, kX86RegTypeGpbLo, kX86RegIndexSp, 1);
|
REG(kX86RegTypeSeg, 2, 2), // CS.
|
||||||
REG(X86GpReg, bpl, kX86RegTypeGpbLo, kX86RegIndexBp, 1);
|
REG(kX86RegTypeSeg, 3, 2), // SS.
|
||||||
REG(X86GpReg, sil, kX86RegTypeGpbLo, kX86RegIndexSi, 1);
|
REG(kX86RegTypeSeg, 4, 2), // DS.
|
||||||
REG(X86GpReg, dil, kX86RegTypeGpbLo, kX86RegIndexDi, 1);
|
REG(kX86RegTypeSeg, 5, 2), // FS.
|
||||||
REG(X86GpReg, r8b, kX86RegTypeGpbLo, 8, 1);
|
REG(kX86RegTypeSeg, 6, 2) // GS.
|
||||||
REG(X86GpReg, r9b, kX86RegTypeGpbLo, 9, 1);
|
},
|
||||||
REG(X86GpReg, r10b, kX86RegTypeGpbLo, 10, 1);
|
|
||||||
REG(X86GpReg, r11b, kX86RegTypeGpbLo, 11, 1);
|
|
||||||
REG(X86GpReg, r12b, kX86RegTypeGpbLo, 12, 1);
|
|
||||||
REG(X86GpReg, r13b, kX86RegTypeGpbLo, 13, 1);
|
|
||||||
REG(X86GpReg, r14b, kX86RegTypeGpbLo, 14, 1);
|
|
||||||
REG(X86GpReg, r15b, kX86RegTypeGpbLo, 15, 1);
|
|
||||||
|
|
||||||
REG(X86GpReg, ah, kX86RegTypeGpbHi, kX86RegIndexAx, 1);
|
// GpbLo.
|
||||||
REG(X86GpReg, ch, kX86RegTypeGpbHi, kX86RegIndexCx, 1);
|
{
|
||||||
REG(X86GpReg, dh, kX86RegTypeGpbHi, kX86RegIndexDx, 1);
|
REG(kX86RegTypeGpbLo, 0, 1),
|
||||||
REG(X86GpReg, bh, kX86RegTypeGpbHi, kX86RegIndexBx, 1);
|
REG(kX86RegTypeGpbLo, 1, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 2, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 3, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 4, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 5, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 6, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 7, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 8, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 9, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 10, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 11, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 12, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 13, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 14, 1),
|
||||||
|
REG(kX86RegTypeGpbLo, 15, 1)
|
||||||
|
},
|
||||||
|
|
||||||
REG(X86GpReg, ax, kX86RegTypeGpw, kX86RegIndexAx, 2);
|
// GpbHi.
|
||||||
REG(X86GpReg, cx, kX86RegTypeGpw, kX86RegIndexCx, 2);
|
{
|
||||||
REG(X86GpReg, dx, kX86RegTypeGpw, kX86RegIndexDx, 2);
|
REG(kX86RegTypeGpbHi, 0, 1),
|
||||||
REG(X86GpReg, bx, kX86RegTypeGpw, kX86RegIndexBx, 2);
|
REG(kX86RegTypeGpbHi, 1, 1),
|
||||||
REG(X86GpReg, sp, kX86RegTypeGpw, kX86RegIndexSp, 2);
|
REG(kX86RegTypeGpbHi, 2, 1),
|
||||||
REG(X86GpReg, bp, kX86RegTypeGpw, kX86RegIndexBp, 2);
|
REG(kX86RegTypeGpbHi, 3, 1)
|
||||||
REG(X86GpReg, si, kX86RegTypeGpw, kX86RegIndexSi, 2);
|
},
|
||||||
REG(X86GpReg, di, kX86RegTypeGpw, kX86RegIndexDi, 2);
|
|
||||||
REG(X86GpReg, r8w, kX86RegTypeGpw, 8, 2);
|
|
||||||
REG(X86GpReg, r9w, kX86RegTypeGpw, 9, 2);
|
|
||||||
REG(X86GpReg, r10w, kX86RegTypeGpw, 10, 2);
|
|
||||||
REG(X86GpReg, r11w, kX86RegTypeGpw, 11, 2);
|
|
||||||
REG(X86GpReg, r12w, kX86RegTypeGpw, 12, 2);
|
|
||||||
REG(X86GpReg, r13w, kX86RegTypeGpw, 13, 2);
|
|
||||||
REG(X86GpReg, r14w, kX86RegTypeGpw, 14, 2);
|
|
||||||
REG(X86GpReg, r15w, kX86RegTypeGpw, 15, 2);
|
|
||||||
|
|
||||||
REG(X86GpReg, eax, kX86RegTypeGpd, kX86RegIndexAx, 4);
|
// Gpw.
|
||||||
REG(X86GpReg, ecx, kX86RegTypeGpd, kX86RegIndexCx, 4);
|
{
|
||||||
REG(X86GpReg, edx, kX86RegTypeGpd, kX86RegIndexDx, 4);
|
REG(kX86RegTypeGpw, 0, 2),
|
||||||
REG(X86GpReg, ebx, kX86RegTypeGpd, kX86RegIndexBx, 4);
|
REG(kX86RegTypeGpw, 1, 2),
|
||||||
REG(X86GpReg, esp, kX86RegTypeGpd, kX86RegIndexSp, 4);
|
REG(kX86RegTypeGpw, 2, 2),
|
||||||
REG(X86GpReg, ebp, kX86RegTypeGpd, kX86RegIndexBp, 4);
|
REG(kX86RegTypeGpw, 3, 2),
|
||||||
REG(X86GpReg, esi, kX86RegTypeGpd, kX86RegIndexSi, 4);
|
REG(kX86RegTypeGpw, 4, 2),
|
||||||
REG(X86GpReg, edi, kX86RegTypeGpd, kX86RegIndexDi, 4);
|
REG(kX86RegTypeGpw, 5, 2),
|
||||||
REG(X86GpReg, r8d, kX86RegTypeGpd, 8, 4);
|
REG(kX86RegTypeGpw, 6, 2),
|
||||||
REG(X86GpReg, r9d, kX86RegTypeGpd, 9, 4);
|
REG(kX86RegTypeGpw, 7, 2),
|
||||||
REG(X86GpReg, r10d, kX86RegTypeGpd, 10, 4);
|
REG(kX86RegTypeGpw, 8, 2),
|
||||||
REG(X86GpReg, r11d, kX86RegTypeGpd, 11, 4);
|
REG(kX86RegTypeGpw, 9, 2),
|
||||||
REG(X86GpReg, r12d, kX86RegTypeGpd, 12, 4);
|
REG(kX86RegTypeGpw, 10, 2),
|
||||||
REG(X86GpReg, r13d, kX86RegTypeGpd, 13, 4);
|
REG(kX86RegTypeGpw, 11, 2),
|
||||||
REG(X86GpReg, r14d, kX86RegTypeGpd, 14, 4);
|
REG(kX86RegTypeGpw, 12, 2),
|
||||||
REG(X86GpReg, r15d, kX86RegTypeGpd, 15, 4);
|
REG(kX86RegTypeGpw, 13, 2),
|
||||||
|
REG(kX86RegTypeGpw, 14, 2),
|
||||||
|
REG(kX86RegTypeGpw, 15, 2)
|
||||||
|
},
|
||||||
|
|
||||||
REG(X86GpReg, rax, kX86RegTypeGpq, kX86RegIndexAx, 8);
|
// Gpd.
|
||||||
REG(X86GpReg, rcx, kX86RegTypeGpq, kX86RegIndexCx, 8);
|
{
|
||||||
REG(X86GpReg, rdx, kX86RegTypeGpq, kX86RegIndexDx, 8);
|
REG(kX86RegTypeGpd, 0, 4),
|
||||||
REG(X86GpReg, rbx, kX86RegTypeGpq, kX86RegIndexBx, 8);
|
REG(kX86RegTypeGpd, 1, 4),
|
||||||
REG(X86GpReg, rsp, kX86RegTypeGpq, kX86RegIndexSp, 8);
|
REG(kX86RegTypeGpd, 2, 4),
|
||||||
REG(X86GpReg, rbp, kX86RegTypeGpq, kX86RegIndexBp, 8);
|
REG(kX86RegTypeGpd, 3, 4),
|
||||||
REG(X86GpReg, rsi, kX86RegTypeGpq, kX86RegIndexSi, 8);
|
REG(kX86RegTypeGpd, 4, 4),
|
||||||
REG(X86GpReg, rdi, kX86RegTypeGpq, kX86RegIndexDi, 8);
|
REG(kX86RegTypeGpd, 5, 4),
|
||||||
REG(X86GpReg, r8, kX86RegTypeGpq, 8, 8);
|
REG(kX86RegTypeGpd, 6, 4),
|
||||||
REG(X86GpReg, r9, kX86RegTypeGpq, 9, 8);
|
REG(kX86RegTypeGpd, 7, 4),
|
||||||
REG(X86GpReg, r10, kX86RegTypeGpq, 10, 8);
|
REG(kX86RegTypeGpd, 8, 4),
|
||||||
REG(X86GpReg, r11, kX86RegTypeGpq, 11, 8);
|
REG(kX86RegTypeGpd, 9, 4),
|
||||||
REG(X86GpReg, r12, kX86RegTypeGpq, 12, 8);
|
REG(kX86RegTypeGpd, 10, 4),
|
||||||
REG(X86GpReg, r13, kX86RegTypeGpq, 13, 8);
|
REG(kX86RegTypeGpd, 11, 4),
|
||||||
REG(X86GpReg, r14, kX86RegTypeGpq, 14, 8);
|
REG(kX86RegTypeGpd, 12, 4),
|
||||||
REG(X86GpReg, r15, kX86RegTypeGpq, 15, 8);
|
REG(kX86RegTypeGpd, 13, 4),
|
||||||
|
REG(kX86RegTypeGpd, 14, 4),
|
||||||
|
REG(kX86RegTypeGpd, 15, 4)
|
||||||
|
},
|
||||||
|
|
||||||
REG(X86FpReg, fp0, kX86RegTypeFp, 0, 10);
|
// Gpq.
|
||||||
REG(X86FpReg, fp1, kX86RegTypeFp, 1, 10);
|
{
|
||||||
REG(X86FpReg, fp2, kX86RegTypeFp, 2, 10);
|
REG(kX86RegTypeGpq, 0, 8),
|
||||||
REG(X86FpReg, fp3, kX86RegTypeFp, 3, 10);
|
REG(kX86RegTypeGpq, 1, 8),
|
||||||
REG(X86FpReg, fp4, kX86RegTypeFp, 4, 10);
|
REG(kX86RegTypeGpq, 2, 8),
|
||||||
REG(X86FpReg, fp5, kX86RegTypeFp, 5, 10);
|
REG(kX86RegTypeGpq, 3, 8),
|
||||||
REG(X86FpReg, fp6, kX86RegTypeFp, 6, 10);
|
REG(kX86RegTypeGpq, 4, 8),
|
||||||
REG(X86FpReg, fp7, kX86RegTypeFp, 7, 10);
|
REG(kX86RegTypeGpq, 5, 8),
|
||||||
|
REG(kX86RegTypeGpq, 6, 8),
|
||||||
|
REG(kX86RegTypeGpq, 7, 8),
|
||||||
|
REG(kX86RegTypeGpq, 8, 8),
|
||||||
|
REG(kX86RegTypeGpq, 9, 8),
|
||||||
|
REG(kX86RegTypeGpq, 10, 8),
|
||||||
|
REG(kX86RegTypeGpq, 11, 8),
|
||||||
|
REG(kX86RegTypeGpq, 12, 8),
|
||||||
|
REG(kX86RegTypeGpq, 13, 8),
|
||||||
|
REG(kX86RegTypeGpq, 14, 8),
|
||||||
|
REG(kX86RegTypeGpq, 15, 8)
|
||||||
|
},
|
||||||
|
|
||||||
REG(X86MmReg, mm0, kX86RegTypeMm, 0, 8);
|
// Fp.
|
||||||
REG(X86MmReg, mm1, kX86RegTypeMm, 1, 8);
|
{
|
||||||
REG(X86MmReg, mm2, kX86RegTypeMm, 2, 8);
|
REG(kX86RegTypeFp, 0, 10),
|
||||||
REG(X86MmReg, mm3, kX86RegTypeMm, 3, 8);
|
REG(kX86RegTypeFp, 1, 10),
|
||||||
REG(X86MmReg, mm4, kX86RegTypeMm, 4, 8);
|
REG(kX86RegTypeFp, 2, 10),
|
||||||
REG(X86MmReg, mm5, kX86RegTypeMm, 5, 8);
|
REG(kX86RegTypeFp, 3, 10),
|
||||||
REG(X86MmReg, mm6, kX86RegTypeMm, 6, 8);
|
REG(kX86RegTypeFp, 4, 10),
|
||||||
REG(X86MmReg, mm7, kX86RegTypeMm, 7, 8);
|
REG(kX86RegTypeFp, 5, 10),
|
||||||
|
REG(kX86RegTypeFp, 6, 10),
|
||||||
|
REG(kX86RegTypeFp, 7, 10)
|
||||||
|
},
|
||||||
|
|
||||||
REG(X86XmmReg, xmm0, kX86RegTypeXmm, 0, 16);
|
// Mm.
|
||||||
REG(X86XmmReg, xmm1, kX86RegTypeXmm, 1, 16);
|
{
|
||||||
REG(X86XmmReg, xmm2, kX86RegTypeXmm, 2, 16);
|
REG(kX86RegTypeMm, 0, 8),
|
||||||
REG(X86XmmReg, xmm3, kX86RegTypeXmm, 3, 16);
|
REG(kX86RegTypeMm, 1, 8),
|
||||||
REG(X86XmmReg, xmm4, kX86RegTypeXmm, 4, 16);
|
REG(kX86RegTypeMm, 2, 8),
|
||||||
REG(X86XmmReg, xmm5, kX86RegTypeXmm, 5, 16);
|
REG(kX86RegTypeMm, 3, 8),
|
||||||
REG(X86XmmReg, xmm6, kX86RegTypeXmm, 6, 16);
|
REG(kX86RegTypeMm, 4, 8),
|
||||||
REG(X86XmmReg, xmm7, kX86RegTypeXmm, 7, 16);
|
REG(kX86RegTypeMm, 5, 8),
|
||||||
REG(X86XmmReg, xmm8, kX86RegTypeXmm, 8, 16);
|
REG(kX86RegTypeMm, 6, 8),
|
||||||
REG(X86XmmReg, xmm9, kX86RegTypeXmm, 9, 16);
|
REG(kX86RegTypeMm, 7, 8)
|
||||||
REG(X86XmmReg, xmm10, kX86RegTypeXmm, 10, 16);
|
},
|
||||||
REG(X86XmmReg, xmm11, kX86RegTypeXmm, 11, 16);
|
|
||||||
REG(X86XmmReg, xmm12, kX86RegTypeXmm, 12, 16);
|
|
||||||
REG(X86XmmReg, xmm13, kX86RegTypeXmm, 13, 16);
|
|
||||||
REG(X86XmmReg, xmm14, kX86RegTypeXmm, 14, 16);
|
|
||||||
REG(X86XmmReg, xmm15, kX86RegTypeXmm, 15, 16);
|
|
||||||
|
|
||||||
REG(X86YmmReg, ymm0, kX86RegTypeYmm, 0, 32);
|
// K.
|
||||||
REG(X86YmmReg, ymm1, kX86RegTypeYmm, 1, 32);
|
{
|
||||||
REG(X86YmmReg, ymm2, kX86RegTypeYmm, 2, 32);
|
REG(kX86RegTypeK, 0, 8),
|
||||||
REG(X86YmmReg, ymm3, kX86RegTypeYmm, 3, 32);
|
REG(kX86RegTypeK, 1, 8),
|
||||||
REG(X86YmmReg, ymm4, kX86RegTypeYmm, 4, 32);
|
REG(kX86RegTypeK, 2, 8),
|
||||||
REG(X86YmmReg, ymm5, kX86RegTypeYmm, 5, 32);
|
REG(kX86RegTypeK, 3, 8),
|
||||||
REG(X86YmmReg, ymm6, kX86RegTypeYmm, 6, 32);
|
REG(kX86RegTypeK, 4, 8),
|
||||||
REG(X86YmmReg, ymm7, kX86RegTypeYmm, 7, 32);
|
REG(kX86RegTypeK, 5, 8),
|
||||||
REG(X86YmmReg, ymm8, kX86RegTypeYmm, 8, 32);
|
REG(kX86RegTypeK, 6, 8),
|
||||||
REG(X86YmmReg, ymm9, kX86RegTypeYmm, 9, 32);
|
REG(kX86RegTypeK, 7, 8)
|
||||||
REG(X86YmmReg, ymm10, kX86RegTypeYmm, 10, 32);
|
},
|
||||||
REG(X86YmmReg, ymm11, kX86RegTypeYmm, 11, 32);
|
|
||||||
REG(X86YmmReg, ymm12, kX86RegTypeYmm, 12, 32);
|
|
||||||
REG(X86YmmReg, ymm13, kX86RegTypeYmm, 13, 32);
|
|
||||||
REG(X86YmmReg, ymm14, kX86RegTypeYmm, 14, 32);
|
|
||||||
REG(X86YmmReg, ymm15, kX86RegTypeYmm, 15, 32);
|
|
||||||
|
|
||||||
REG(X86SegReg, cs, kX86RegTypeSeg, kX86SegCs, 2);
|
// Xmm.
|
||||||
REG(X86SegReg, ss, kX86RegTypeSeg, kX86SegSs, 2);
|
{
|
||||||
REG(X86SegReg, ds, kX86RegTypeSeg, kX86SegDs, 2);
|
REG(kX86RegTypeXmm, 0, 16),
|
||||||
REG(X86SegReg, es, kX86RegTypeSeg, kX86SegEs, 2);
|
REG(kX86RegTypeXmm, 1, 16),
|
||||||
REG(X86SegReg, fs, kX86RegTypeSeg, kX86SegFs, 2);
|
REG(kX86RegTypeXmm, 2, 16),
|
||||||
REG(X86SegReg, gs, kX86RegTypeSeg, kX86SegGs, 2);
|
REG(kX86RegTypeXmm, 3, 16),
|
||||||
|
REG(kX86RegTypeXmm, 4, 16),
|
||||||
|
REG(kX86RegTypeXmm, 5, 16),
|
||||||
|
REG(kX86RegTypeXmm, 6, 16),
|
||||||
|
REG(kX86RegTypeXmm, 7, 16),
|
||||||
|
REG(kX86RegTypeXmm, 8, 16),
|
||||||
|
REG(kX86RegTypeXmm, 9, 16),
|
||||||
|
REG(kX86RegTypeXmm, 10, 16),
|
||||||
|
REG(kX86RegTypeXmm, 11, 16),
|
||||||
|
REG(kX86RegTypeXmm, 12, 16),
|
||||||
|
REG(kX86RegTypeXmm, 13, 16),
|
||||||
|
REG(kX86RegTypeXmm, 14, 16),
|
||||||
|
REG(kX86RegTypeXmm, 15, 16),
|
||||||
|
REG(kX86RegTypeXmm, 16, 16),
|
||||||
|
REG(kX86RegTypeXmm, 17, 16),
|
||||||
|
REG(kX86RegTypeXmm, 18, 16),
|
||||||
|
REG(kX86RegTypeXmm, 19, 16),
|
||||||
|
REG(kX86RegTypeXmm, 20, 16),
|
||||||
|
REG(kX86RegTypeXmm, 21, 16),
|
||||||
|
REG(kX86RegTypeXmm, 22, 16),
|
||||||
|
REG(kX86RegTypeXmm, 23, 16),
|
||||||
|
REG(kX86RegTypeXmm, 24, 16),
|
||||||
|
REG(kX86RegTypeXmm, 25, 16),
|
||||||
|
REG(kX86RegTypeXmm, 26, 16),
|
||||||
|
REG(kX86RegTypeXmm, 27, 16),
|
||||||
|
REG(kX86RegTypeXmm, 28, 16),
|
||||||
|
REG(kX86RegTypeXmm, 29, 16),
|
||||||
|
REG(kX86RegTypeXmm, 30, 16),
|
||||||
|
REG(kX86RegTypeXmm, 31, 16)
|
||||||
|
},
|
||||||
|
|
||||||
|
// Ymm.
|
||||||
|
{
|
||||||
|
REG(kX86RegTypeYmm, 0, 32),
|
||||||
|
REG(kX86RegTypeYmm, 1, 32),
|
||||||
|
REG(kX86RegTypeYmm, 2, 32),
|
||||||
|
REG(kX86RegTypeYmm, 3, 32),
|
||||||
|
REG(kX86RegTypeYmm, 4, 32),
|
||||||
|
REG(kX86RegTypeYmm, 5, 32),
|
||||||
|
REG(kX86RegTypeYmm, 6, 32),
|
||||||
|
REG(kX86RegTypeYmm, 7, 32),
|
||||||
|
REG(kX86RegTypeYmm, 8, 32),
|
||||||
|
REG(kX86RegTypeYmm, 9, 32),
|
||||||
|
REG(kX86RegTypeYmm, 10, 32),
|
||||||
|
REG(kX86RegTypeYmm, 11, 32),
|
||||||
|
REG(kX86RegTypeYmm, 12, 32),
|
||||||
|
REG(kX86RegTypeYmm, 13, 32),
|
||||||
|
REG(kX86RegTypeYmm, 14, 32),
|
||||||
|
REG(kX86RegTypeYmm, 15, 32),
|
||||||
|
REG(kX86RegTypeYmm, 16, 32),
|
||||||
|
REG(kX86RegTypeYmm, 17, 32),
|
||||||
|
REG(kX86RegTypeYmm, 18, 32),
|
||||||
|
REG(kX86RegTypeYmm, 19, 32),
|
||||||
|
REG(kX86RegTypeYmm, 20, 32),
|
||||||
|
REG(kX86RegTypeYmm, 21, 32),
|
||||||
|
REG(kX86RegTypeYmm, 22, 32),
|
||||||
|
REG(kX86RegTypeYmm, 23, 32),
|
||||||
|
REG(kX86RegTypeYmm, 24, 32),
|
||||||
|
REG(kX86RegTypeYmm, 25, 32),
|
||||||
|
REG(kX86RegTypeYmm, 26, 32),
|
||||||
|
REG(kX86RegTypeYmm, 27, 32),
|
||||||
|
REG(kX86RegTypeYmm, 28, 32),
|
||||||
|
REG(kX86RegTypeYmm, 29, 32),
|
||||||
|
REG(kX86RegTypeYmm, 30, 32),
|
||||||
|
REG(kX86RegTypeYmm, 31, 32)
|
||||||
|
},
|
||||||
|
|
||||||
|
// Zmm.
|
||||||
|
{
|
||||||
|
REG(kX86RegTypeZmm, 0, 64),
|
||||||
|
REG(kX86RegTypeZmm, 1, 64),
|
||||||
|
REG(kX86RegTypeZmm, 2, 64),
|
||||||
|
REG(kX86RegTypeZmm, 3, 64),
|
||||||
|
REG(kX86RegTypeZmm, 4, 64),
|
||||||
|
REG(kX86RegTypeZmm, 5, 64),
|
||||||
|
REG(kX86RegTypeZmm, 6, 64),
|
||||||
|
REG(kX86RegTypeZmm, 7, 64),
|
||||||
|
REG(kX86RegTypeZmm, 8, 64),
|
||||||
|
REG(kX86RegTypeZmm, 9, 64),
|
||||||
|
REG(kX86RegTypeZmm, 10, 64),
|
||||||
|
REG(kX86RegTypeZmm, 11, 64),
|
||||||
|
REG(kX86RegTypeZmm, 12, 64),
|
||||||
|
REG(kX86RegTypeZmm, 13, 64),
|
||||||
|
REG(kX86RegTypeZmm, 14, 64),
|
||||||
|
REG(kX86RegTypeZmm, 15, 64),
|
||||||
|
REG(kX86RegTypeZmm, 16, 64),
|
||||||
|
REG(kX86RegTypeZmm, 17, 64),
|
||||||
|
REG(kX86RegTypeZmm, 18, 64),
|
||||||
|
REG(kX86RegTypeZmm, 19, 64),
|
||||||
|
REG(kX86RegTypeZmm, 20, 64),
|
||||||
|
REG(kX86RegTypeZmm, 21, 64),
|
||||||
|
REG(kX86RegTypeZmm, 22, 64),
|
||||||
|
REG(kX86RegTypeZmm, 23, 64),
|
||||||
|
REG(kX86RegTypeZmm, 24, 64),
|
||||||
|
REG(kX86RegTypeZmm, 25, 64),
|
||||||
|
REG(kX86RegTypeZmm, 26, 64),
|
||||||
|
REG(kX86RegTypeZmm, 27, 64),
|
||||||
|
REG(kX86RegTypeZmm, 28, 64),
|
||||||
|
REG(kX86RegTypeZmm, 29, 64),
|
||||||
|
REG(kX86RegTypeZmm, 30, 64),
|
||||||
|
REG(kX86RegTypeZmm, 31, 64)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#undef REG
|
#undef REG
|
||||||
|
|
||||||
} // x86 namespace
|
|
||||||
} // asmjit namespace
|
} // asmjit namespace
|
||||||
|
|
||||||
// [Api-End]
|
// [Api-End]
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ Error X86Scheduler::run(Node* start, Node* stop) {
|
|||||||
Node* next = node_->getNext();
|
Node* next = node_->getNext();
|
||||||
ASMJIT_ASSERT(node_->getType() == kNodeTypeInst);
|
ASMJIT_ASSERT(node_->getType() == kNodeTypeInst);
|
||||||
|
|
||||||
printf(" %s\n", X86Util::getInstInfo(static_cast<InstNode*>(node_)->getCode()).getInstName());
|
printf(" %s\n", X86Util::getInstInfo(static_cast<InstNode*>(node_)->getInstId()).getInstName());
|
||||||
node_ = next;
|
node_ = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -12,17 +12,23 @@ var fs = require("fs");
|
|||||||
// [Utilities]
|
// [Utilities]
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
var upFirst = function(s) {
|
function upFirst(s) {
|
||||||
if (!s)
|
if (!s)
|
||||||
return s;
|
return s;
|
||||||
return s[0].toUpperCase() + s.substr(1);
|
return s[0].toUpperCase() + s.substr(1);
|
||||||
};
|
}
|
||||||
|
|
||||||
var trimLeft = function(s) {
|
function trimLeft(s) {
|
||||||
return s.replace(/^\s+/, "");
|
return s.replace(/^\s+/, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
var inject = function(s, start, end, code) {
|
function padLeft(s, n) {
|
||||||
|
while (s.length < n)
|
||||||
|
s += " ";
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
function inject(s, start, end, code) {
|
||||||
var iStart = s.indexOf(start);
|
var iStart = s.indexOf(start);
|
||||||
var iEnd = s.indexOf(end);
|
var iEnd = s.indexOf(end);
|
||||||
|
|
||||||
@@ -33,7 +39,7 @@ var inject = function(s, start, end, code) {
|
|||||||
throw new Error("Couldn't locate end mark.");
|
throw new Error("Couldn't locate end mark.");
|
||||||
|
|
||||||
return s.substr(0, iStart + start.length) + code + s.substr(iEnd);
|
return s.substr(0, iStart + start.length) + code + s.substr(iEnd);
|
||||||
};
|
}
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// [Database]
|
// [Database]
|
||||||
@@ -172,66 +178,100 @@ var generate = function(fileName, arch) {
|
|||||||
var code = "";
|
var code = "";
|
||||||
var disclaimer = "// Automatically generated, do not edit.\n";
|
var disclaimer = "// Automatically generated, do not edit.\n";
|
||||||
|
|
||||||
|
var instCount = 0;
|
||||||
|
var sizeof_X86InstInfo = 8;
|
||||||
|
var sizeof_X86InstExtendedInfo = 24;
|
||||||
|
|
||||||
// Create database.
|
// Create database.
|
||||||
var db = new Database();
|
var db = new Database();
|
||||||
var re = new RegExp(
|
var re = new RegExp(
|
||||||
"INST\\(([A-Za-z0-9_]+)\\s*," + // [01] Inst-Code.
|
"INST\\(([A-Za-z0-9_]+)\\s*," + // [01] Id.
|
||||||
"\\s*\\\"([A-Za-z0-9_ ]*)\\\"\\s*," + // [02] Inst-Name.
|
"\\s*\\\"([A-Za-z0-9_ ]*)\\\"\\s*," + // [02] Name.
|
||||||
"([^,]+)," + // [03] Inst-Group.
|
"(.{20}[^,]*)," + // [03] Opcode[0].
|
||||||
"([^,]+)," + // [04] Inst-Flags.
|
"(.{20}[^,]*)," + // [04] Opcode[1].
|
||||||
"([^,]+)," + // [05] Move-Size.
|
"([^,]+)," + // [05] Encoding.
|
||||||
"([^,]+)," + // [06] Operand-Flags[0].
|
"([^,]+)," + // [06] IFLAGS.
|
||||||
"([^,]+)," + // [07] Operand-Flags[1].
|
"\\s*EF\\(([A-Z_]+)\\)\\s*," + // [07] EFLAGS.
|
||||||
"([^,]+)," + // [08] Operand-Flags[2].
|
"([^,]+)," + // [08] Write-Index.
|
||||||
"([^,]+)," + // [09] Operand-Flags[3].
|
"([^,]+)," + // [09] Write-Size.
|
||||||
"\\s*E\\(([A-Z_]+)\\)\\s*," + // [10] EFLAGS.
|
"([^,]+)," + // [10] Operand-Flags[0].
|
||||||
"(.{17}[^,]*)," + // [11] OpCode[0].
|
"([^,]+)," + // [11] Operand-Flags[1].
|
||||||
"(.{17}[^\\)]*)\\)", // [12] OpCode[1].
|
"([^,]+)," + // [12] Operand-Flags[2].
|
||||||
|
"([^,]+)," + // [13] Operand-Flags[3].
|
||||||
|
"([^\\)]+)\\)", // [14] Operand-Flags[4].
|
||||||
"g");
|
"g");
|
||||||
|
|
||||||
|
var i, k, m;
|
||||||
|
var srcForm = "";
|
||||||
|
|
||||||
while (m = re.exec(data)) {
|
while (m = re.exec(data)) {
|
||||||
// Extract instruction ID and Name.
|
// Extract instruction ID and Name.
|
||||||
var id = m[1];
|
var id = m[1];
|
||||||
var name = m[2];
|
var name = m[2];
|
||||||
|
|
||||||
// Extract data that goes to the secondary table (ExtendedInfo).
|
// Extract data that goes to the secondary table (X86InstExtendedInfo).
|
||||||
var instGroup = trimLeft(m[3]);
|
var opcode0 = trimLeft(m[3]);
|
||||||
var instFlags = trimLeft(m[4]);
|
var opcode1 = trimLeft(m[4]);
|
||||||
var moveSize = trimLeft(m[5]);
|
var encoding = trimLeft(m[5]);
|
||||||
|
var iflags = trimLeft(m[6]);
|
||||||
var opFlags0 = trimLeft(m[6]);
|
var eflags = m[7];
|
||||||
var opFlags1 = trimLeft(m[7]);
|
var writeIndex = trimLeft(m[8]);
|
||||||
var opFlags2 = trimLeft(m[8]);
|
var writeSize = trimLeft(m[9]);
|
||||||
var opFlags3 = trimLeft(m[9]);
|
var oflags0 = trimLeft(m[10]);
|
||||||
var eflags = m[10];
|
var oflags1 = trimLeft(m[11]);
|
||||||
var opCode1 = trimLeft(m[12]);
|
var oflags2 = trimLeft(m[12]);
|
||||||
|
var oflags3 = trimLeft(m[13]);
|
||||||
|
var oflags4 = trimLeft(m[14]);
|
||||||
|
|
||||||
// Generate EFlags-In and EFlags-Out.
|
// Generate EFlags-In and EFlags-Out.
|
||||||
var eflagsIn = decToHex(getEFlagsMask(eflags, "RX"), 2);
|
var eflagsIn = decToHex(getEFlagsMask(eflags, "RX" ), 2);
|
||||||
var eflagsOut = decToHex(getEFlagsMask(eflags, "WXU"), 2);
|
var eflagsOut = decToHex(getEFlagsMask(eflags, "WXU"), 2);
|
||||||
|
|
||||||
var extData = "" +
|
var extData =
|
||||||
instGroup + ", " +
|
encoding + ", " +
|
||||||
moveSize + ", " +
|
writeIndex + ", " +
|
||||||
|
writeSize + ", " +
|
||||||
eflagsIn + ", " +
|
eflagsIn + ", " +
|
||||||
eflagsOut + ", " +
|
eflagsOut + ", " +
|
||||||
instFlags + ", " +
|
"0" + ", " +
|
||||||
"{ " + opFlags0 + ", " + opFlags1 + ", " + opFlags2 + ", " + opFlags3 + ", U }, " +
|
"{ " + oflags0 + ", " + oflags1 + ", " + oflags2 + ", " + oflags3 + ", " + oflags4 + " }, " +
|
||||||
opCode1;
|
iflags + ", " +
|
||||||
|
opcode1;
|
||||||
|
|
||||||
|
srcForm += " INST(" +
|
||||||
|
padLeft(id, 27) + ", " +
|
||||||
|
padLeft('"' + name + '"', 19) + ", " +
|
||||||
|
opcode0 + ", " +
|
||||||
|
opcode1 + ", " +
|
||||||
|
encoding + ", " +
|
||||||
|
iflags + ", " +
|
||||||
|
"EF(" + eflags + "), " +
|
||||||
|
writeIndex + ", " +
|
||||||
|
writeSize + ", " +
|
||||||
|
oflags0 + ", " +
|
||||||
|
oflags1 + ", " +
|
||||||
|
oflags2 + ", " +
|
||||||
|
oflags3 + ", " +
|
||||||
|
oflags4 + "),\n";
|
||||||
|
|
||||||
db.add(name, id, extData);
|
db.add(name, id, extData);
|
||||||
|
instCount++;
|
||||||
}
|
}
|
||||||
|
// fs.writeFileSync("srcform.cpp", srcForm, "utf8");
|
||||||
db.index();
|
db.index();
|
||||||
|
|
||||||
console.log("Number of instructions: " + db.instNames.array.length);
|
var instDataSize = instCount * sizeof_X86InstInfo + db.extendedData.length * sizeof_X86InstExtendedInfo;
|
||||||
console.log("Instruction names size: " + db.instNames.getSize());
|
|
||||||
console.log("Extended-info length : " + db.extendedData.length);
|
console.log("Number of Instructions : " + instCount);
|
||||||
|
console.log("Number of ExtInfo Rows : " + db.extendedData.length);
|
||||||
|
console.log("Instructions' Data Size: " + instDataSize);
|
||||||
|
console.log("Instructions' Names Size: " + db.instNames.getSize());
|
||||||
|
|
||||||
// Generate InstName[] string.
|
// Generate InstName[] string.
|
||||||
code += disclaimer;
|
code += disclaimer;
|
||||||
code += "#if !defined(ASMJIT_DISABLE_INST_NAMES)\n";
|
code += "#if !defined(ASMJIT_DISABLE_NAMES)\n";
|
||||||
code += "const char _" + arch + "InstName[] =\n";
|
code += "const char _" + arch + "InstName[] =\n";
|
||||||
for (var k in db.instMap) {
|
for (k in db.instMap) {
|
||||||
var inst = db.instMap[k];
|
var inst = db.instMap[k];
|
||||||
code += " \"" + k + "\\0\"\n";
|
code += " \"" + k + "\\0\"\n";
|
||||||
}
|
}
|
||||||
@@ -248,7 +288,7 @@ var generate = function(fileName, arch) {
|
|||||||
|
|
||||||
code += disclaimer;
|
code += disclaimer;
|
||||||
code += "static const uint16_t _" + arch + "InstAlphaIndex[26] = {\n";
|
code += "static const uint16_t _" + arch + "InstAlphaIndex[26] = {\n";
|
||||||
for (var i = 0; i < db.instAlpha.length; i++) {
|
for (i = 0; i < db.instAlpha.length; i++) {
|
||||||
var id = db.instAlpha[i];
|
var id = db.instAlpha[i];
|
||||||
code += " " + (id === undefined ? "0xFFFF" : id);
|
code += " " + (id === undefined ? "0xFFFF" : id);
|
||||||
if (i !== db.instAlpha.length - 1)
|
if (i !== db.instAlpha.length - 1)
|
||||||
@@ -260,18 +300,18 @@ var generate = function(fileName, arch) {
|
|||||||
// Generate NameIndex.
|
// Generate NameIndex.
|
||||||
code += disclaimer;
|
code += disclaimer;
|
||||||
code += "enum k" + Arch + "InstData_NameIndex {\n";
|
code += "enum k" + Arch + "InstData_NameIndex {\n";
|
||||||
for (var k in db.instMap) {
|
for (k in db.instMap) {
|
||||||
var inst = db.instMap[k];
|
var inst = db.instMap[k];
|
||||||
code += " " + inst.id + "_NameIndex = " + inst.nameIndex + ",\n";
|
code += " " + inst.id + "_NameIndex = " + inst.nameIndex + ",\n";
|
||||||
}
|
}
|
||||||
code = code.substr(0, code.length - 2) + "\n};\n";
|
code = code.substr(0, code.length - 2) + "\n};\n";
|
||||||
code += "#endif // !ASMJIT_DISABLE_INST_NAMES\n"
|
code += "#endif // !ASMJIT_DISABLE_NAMES\n"
|
||||||
code += "\n";
|
code += "\n";
|
||||||
|
|
||||||
// Generate ExtendedInfo.
|
// Generate ExtendedInfo.
|
||||||
code += disclaimer;
|
code += disclaimer;
|
||||||
code += "const " + Arch + "InstExtendedInfo _" + arch + "InstExtendedInfo[] = {\n";
|
code += "const " + Arch + "InstExtendedInfo _" + arch + "InstExtendedInfo[] = {\n";
|
||||||
for (var i = 0; i < db.extendedData.length; i++) {
|
for (i = 0; i < db.extendedData.length; i++) {
|
||||||
code += " { " + db.extendedData[i] + " }";
|
code += " { " + db.extendedData[i] + " }";
|
||||||
if (i !== db.extendedData.length - 1)
|
if (i !== db.extendedData.length - 1)
|
||||||
code += ",";
|
code += ",";
|
||||||
@@ -282,7 +322,7 @@ var generate = function(fileName, arch) {
|
|||||||
|
|
||||||
code += disclaimer;
|
code += disclaimer;
|
||||||
code += "enum k" + Arch + "InstData_ExtendedIndex {\n";
|
code += "enum k" + Arch + "InstData_ExtendedIndex {\n";
|
||||||
for (var k in db.instMap) {
|
for (k in db.instMap) {
|
||||||
var inst = db.instMap[k];
|
var inst = db.instMap[k];
|
||||||
code += " " + inst.id + "_ExtendedIndex = " + inst.extendedIndex + ",\n";
|
code += " " + inst.id + "_ExtendedIndex = " + inst.extendedIndex + ",\n";
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user