diff --git a/src/asmjit/base/assembler.h b/src/asmjit/base/assembler.h index 43a6d45..a2ed9f4 100644 --- a/src/asmjit/base/assembler.h +++ b/src/asmjit/base/assembler.h @@ -123,10 +123,50 @@ ASMJIT_ENUM(InstOptions) { //! conditional hints after P4 and AMD has never supported them. kInstOptionNotTaken = 0x00000008, - //! Don't follow the jump (Compiler-only). + //! Don't follow the jump (Compiler only). //! //! Prevents following the jump during compilation. - kInstOptionUnfollow = 0x00000010 + kInstOptionUnfollow = 0x00000010, + + //! Overwrite the destination operand (Compiler only). + //! + //! Hint that is important for variable liveness analysis. It tells the + //! compiler that the destination operand will be overwritten now or by + //! adjacent instructions. Compiler knows when a variable is overwritten by + //! a single instruction, for example you don't have to mark "movaps" or + //! "pxor x, x" instructions, however, if a pair of instructions is used, + //! and the first of them doesn't completely overwrite the content of the + //! destination, then the compiler fails to mark that variable as dead in. + //! + //! X86/X64 Specific + //! ---------------- + //! + //! - All instructions that always overwrite at least the size of the + //! register that the variable uses, for example "mov", "movq", "movaps" + //! don't need the overwrite modifier to be used - conversion, shuffle, + //! and other miscellaneous instructions included. + //! + //! - All instructions that clear the destination register if all operands + //! are the same, for example "xor x, x", "pcmpeqb", etc... + //! + //! - Consecutive instructions that partially overwrite the variable until + //! there is no old content require the `overwrite()` to be used. Some + //! examples (not always the best use cases thought): + //! + //! - `movlps xmm0, ?` followed by `movhps xmm0, ?` and vice versa + //! - `movlpd xmm0, ?` followed by `movhpd xmm0, ?` and vice versa + //! - `mov al, ?` followed by `and ax, 0xFF` + //! - `mov al, ?` followed by `mov ah, al` + //! - `pinsrq xmm0, ?, 0` followed by `pinsrq xmm0, ?, 1` + //! + //! - If allocated variable is used temporarily for scalar operations. For + //! example if you allocate a full vector like `X86Compiler::newXmm()` + //! and then use that vector for scalar operations you should use + //! `overwrite()` directive: + //! + //! - `sqrtss x, y` - only LO element of `x` is changed, if you don't use + //! HI elements, use `X86Compiler.overwrite().sqrtss(x, y)`. + kInstOptionOverwrite = 0x00000020 }; // ============================================================================ diff --git a/src/asmjit/x86/x86assembler.h b/src/asmjit/x86/x86assembler.h index db0a8c3..ec235f2 100644 --- a/src/asmjit/x86/x86assembler.h +++ b/src/asmjit/x86/x86assembler.h @@ -26,93 +26,93 @@ namespace asmjit { // ============================================================================ // \internal -#define ASMJIT_X86_EMIT_OPTIONS(_Class_) \ +#define ASMJIT_X86_EMIT_OPTIONS(T) \ /*! Force short form of jmp/jcc instruction. */ \ - ASMJIT_INLINE _Class_& short_() { \ + ASMJIT_INLINE T& short_() { \ _instOptions |= kInstOptionShortForm; \ return *this; \ } \ \ /*! Force long form of jmp/jcc instruction. */ \ - ASMJIT_INLINE _Class_& long_() { \ + ASMJIT_INLINE T& long_() { \ _instOptions |= kInstOptionLongForm; \ return *this; \ } \ \ /*! Condition is likely to be taken (has only benefit on P4). */ \ - ASMJIT_INLINE _Class_& taken() { \ + ASMJIT_INLINE T& taken() { \ _instOptions |= kInstOptionTaken; \ return *this; \ } \ \ /*! Condition is unlikely to be taken (has only benefit on P4). */ \ - ASMJIT_INLINE _Class_& notTaken() { \ + ASMJIT_INLINE T& notTaken() { \ _instOptions |= kInstOptionNotTaken; \ return *this; \ } \ \ /*! Use LOCK prefix. */ \ - ASMJIT_INLINE _Class_& lock() { \ + ASMJIT_INLINE T& lock() { \ _instOptions |= kX86InstOptionLock; \ return *this; \ } \ \ /*! Force REX prefix (X64). */ \ - ASMJIT_INLINE _Class_& rex() { \ + ASMJIT_INLINE T& rex() { \ _instOptions |= kX86InstOptionRex; \ return *this; \ } \ \ /*! Force 3-byte VEX prefix (AVX+). */ \ - ASMJIT_INLINE _Class_& vex3() { \ + ASMJIT_INLINE T& vex3() { \ _instOptions |= kX86InstOptionVex3; \ return *this; \ } \ \ /*! Force 4-byte EVEX prefix (AVX512+). */ \ - ASMJIT_INLINE _Class_& evex() { \ + ASMJIT_INLINE T& evex() { \ _instOptions |= kX86InstOptionEvex; \ return *this; \ } \ \ /*! Use zeroing instead of merging (AVX512+). */ \ - ASMJIT_INLINE _Class_& z() { \ + ASMJIT_INLINE T& z() { \ _instOptions |= kX86InstOptionEvexZero; \ return *this; \ } \ \ /*! Broadcast one element to all other elements (AVX512+). */ \ - ASMJIT_INLINE _Class_& _1ToN() { \ + ASMJIT_INLINE T& _1ToN() { \ _instOptions |= kX86InstOptionEvexOneN; \ return *this; \ } \ \ /*! Suppress all exceptions (AVX512+). */ \ - ASMJIT_INLINE _Class_& sae() { \ + ASMJIT_INLINE T& sae() { \ _instOptions |= kX86InstOptionEvexSae; \ return *this; \ } \ \ /*! Static rounding mode `round-to-nearest` (even) and `SAE` (AVX512+). */ \ - ASMJIT_INLINE _Class_& rn_sae() { \ + ASMJIT_INLINE T& rn_sae() { \ _instOptions |= kX86InstOptionEvexRnSae; \ return *this; \ } \ \ /*! Static rounding mode `round-down` (toward -inf) and `SAE` (AVX512+). */ \ - ASMJIT_INLINE _Class_& rd_sae() { \ + ASMJIT_INLINE T& rd_sae() { \ _instOptions |= kX86InstOptionEvexRdSae; \ return *this; \ } \ \ /*! Static rounding mode `round-up` (toward +inf) and `SAE` (AVX512+). */ \ - ASMJIT_INLINE _Class_& ru_sae() { \ + ASMJIT_INLINE T& ru_sae() { \ _instOptions |= kX86InstOptionEvexRuSae; \ return *this; \ } \ \ /*! Static rounding mode `round-toward-zero` (truncate) and `SAE` (AVX512+). */ \ - ASMJIT_INLINE _Class_& rz_sae() { \ + ASMJIT_INLINE T& rz_sae() { \ _instOptions |= kX86InstOptionEvexRzSae; \ return *this; \ } @@ -3558,9 +3558,9 @@ struct ASMJIT_VIRTAPI X86Assembler : public Assembler { INST_1x(xsave64, kX86InstIdXsave64, X86Mem) //! Save Processor Extended States specified by `EDX:EAX` (Optimized) (XSAVEOPT). - INST_1x(xsaveopt, kX86InstIdXsave, X86Mem) + INST_1x(xsaveopt, kX86InstIdXsaveopt, X86Mem) //! Save Processor Extended States specified by `EDX:EAX` (Optimized) (XSAVEOPT&X64). - INST_1x(xsaveopt64, kX86InstIdXsave64, X86Mem) + INST_1x(xsaveopt64, kX86InstIdXsaveopt64, X86Mem) //! Get XCR - `EDX:EAX <- XCR[ECX]` (XSAVE). INST_0x(xgetbv, kX86InstIdXgetbv) diff --git a/src/asmjit/x86/x86compiler.h b/src/asmjit/x86/x86compiler.h index c1ad410..fb9ca12 100644 --- a/src/asmjit/x86/x86compiler.h +++ b/src/asmjit/x86/x86compiler.h @@ -1226,6 +1226,12 @@ struct ASMJIT_VIRTAPI X86Compiler : public Compiler { return *this; } + //! Tell the compiler that the destination variable will be overwritten. + ASMJIT_INLINE X86Compiler& overwrite() { + _instOptions |= kInstOptionOverwrite; + return *this; + } + // -------------------------------------------------------------------------- // [Members] // -------------------------------------------------------------------------- @@ -4367,9 +4373,9 @@ struct ASMJIT_VIRTAPI X86Compiler : public Compiler { INST_3x(xsave64, kX86InstIdXsave64, X86Mem, X86GpVar, X86GpVar) //! Save Processor Extended States specified by `o1:o2` (Optimized) (XSAVEOPT). - INST_3x(xsaveopt, kX86InstIdXsave, X86Mem, X86GpVar, X86GpVar) + INST_3x(xsaveopt, kX86InstIdXsaveopt, X86Mem, X86GpVar, X86GpVar) //! Save Processor Extended States specified by `o1:o2` (Optimized) (XSAVEOPT&X64). - INST_3x(xsaveopt64, kX86InstIdXsave64, X86Mem, X86GpVar, X86GpVar) + INST_3x(xsaveopt64, kX86InstIdXsaveopt64, X86Mem, X86GpVar, X86GpVar) //! Get XCR - `o1:o2 <- XCR[o0]` (`EDX:EAX <- XCR[ECX]`) (XSAVE). INST_3x(xgetbv, kX86InstIdXgetbv, X86GpVar, X86GpVar, X86GpVar) diff --git a/src/asmjit/x86/x86compilercontext.cpp b/src/asmjit/x86/x86compilercontext.cpp index cae0d70..9e5f581 100644 --- a/src/asmjit/x86/x86compilercontext.cpp +++ b/src/asmjit/x86/x86compilercontext.cpp @@ -1923,38 +1923,40 @@ struct SArgData { uint32_t aType; }; -#define SARG(_Dst_, S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20) \ - (S0 << 0) | (S1 << 1) | (S2 << 2) | (S3 << 3) | \ - (S4 << 4) | (S5 << 5) | (S6 << 6) | (S7 << 7) | \ - (S8 << 8) | (S9 << 9) | (S10 << 10) | (S11 << 11) | \ - (S12 << 12) | (S13 << 13) | (S14 << 14) | (S15 << 15) | \ - (S16 << 16) | (S17 << 17) | (S18 << 18) | (S19 << 19) | \ - (S20 << 20) -#define A 0 /* Auto-convert (doesn't need conversion step). */ +#define SARG(dst, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24) \ + (s0 << 0) | (s1 << 1) | (s2 << 2) | (s3 << 3) | (s4 << 4) | (s5 << 5) | (s6 << 6) | (s7 << 7) | \ + (s8 << 8) | (s9 << 9) | (s10 << 10) | (s11 << 11) | (s12 << 12) | (s13 << 13) | (s14 << 14) | (s15 << 15) | \ + (s16 << 16) | (s17 << 17) | (s18 << 18) | (s19 << 19) | (s20 << 20) | (s21 << 21) | (s22 << 22) | (s23 << 23) | \ + (s24 << 24) +#define A 0 // Auto-convert (doesn't need conversion step). static const uint32_t X86Context_sArgConvTable[kX86VarTypeCount] = { - // dst <- | i8| u8|i16|u16|i32|u32|i64|u64| iP| uP|f32|f64|mmx|xmm|xSs|xPs|xSd|xPd|ymm|yPs|yPd| - //--------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - SARG(i8 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ), - SARG(u8 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ), - SARG(i16 , A , A , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ), - SARG(u16 , A , A , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ), - SARG(i32 , A , A , A , A , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ), - SARG(u32 , A , A , A , A , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ), - SARG(i64 , A , A , A , A , A , A , 0 , 0 , A , A , A , A , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ), - SARG(u64 , A , A , A , A , A , A , 0 , 0 , A , A , A , A , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ), - SARG(iPtr , A , A , A , A , A , A , A , A , 0 , 0 , A , A , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ), - SARG(uPtr , A , A , A , A , A , A , A , A , 0 , 0 , A , A , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 ), - SARG(f32 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , A , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 ), - SARG(f64 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , A , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 ), - SARG(mmx , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), - SARG(xmm , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), - SARG(xSs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 ), - SARG(xPs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 ), - SARG(xSd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 ), - SARG(xPd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 ), - SARG(ymm , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), - SARG(yPs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 ), - SARG(yPd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 ) + // dst <- | i8| u8|i16|u16|i32|u32|i64|u64| iP| uP|f32|f64|mmx| k |xmm|xSs|xPs|xSd|xPd|ymm|yPs|yPd|zmm|zPs|zPd| + //--------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ + SARG(i8 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(u8 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(i16 , A , A , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(u16 , A , A , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(i32 , A , A , A , A , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(u32 , A , A , A , A , 0 , 0 , 0 , 0 , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(i64 , A , A , A , A , A , A , 0 , 0 , A , A , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(u64 , A , A , A , A , A , A , 0 , 0 , A , A , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(iPtr , A , A , A , A , A , A , A , A , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(uPtr , A , A , A , A , A , A , A , A , 0 , 0 , A , A , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 0 , 1 , 1 ), + SARG(f32 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , A , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 1 ), + SARG(f64 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , A , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ), + SARG(mmx , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), + SARG(k , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), + SARG(xmm , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), + SARG(xSs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 1 ), + SARG(xPs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 1 ), + SARG(xSd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ), + SARG(xPd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ), + SARG(ymm , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), + SARG(yPs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 1 ), + SARG(yPd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ), + SARG(zmm , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), + SARG(zPs , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 1 ), + SARG(zPd , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ) }; #undef A #undef SARG @@ -2490,10 +2492,14 @@ _NextGroup: // Read/Write is usualy the combination of the first operand. combinedFlags = inFlags | outFlags; + // Handle overwrite option. + if (node->getOptions() & kInstOptionOverwrite) { + combinedFlags = outFlags; + } // Move instructions typically overwrite the first operand, // but there are some exceptions based on the operands' size // and type. - if (extendedInfo.isMove()) { + else if (extendedInfo.isMove()) { uint32_t movSize = extendedInfo.getWriteSize(); uint32_t varSize = vd->getSize(); diff --git a/src/asmjit/x86/x86inst.cpp b/src/asmjit/x86/x86inst.cpp index d23c641..6499fc5 100644 --- a/src/asmjit/x86/x86inst.cpp +++ b/src/asmjit/x86/x86inst.cpp @@ -3774,8 +3774,8 @@ const X86InstInfo _x86InstInfo[] = { INST(kX86InstIdPextrd , "pextrd" , O_000F3A(16,U,_,_,_), U , Enc(ExtExtract) , F(Move) , EF(________), 0 , 8 , O(GdMem) , O(Xmm) , U , U , U ), INST(kX86InstIdPextrq , "pextrq" , O_000F3A(16,U,_,W,_), U , Enc(ExtExtract) , F(Move) , EF(________), 0 , 8 , O(GqdMem) , O(Xmm) , U , U , U ), INST(kX86InstIdPextrw , "pextrw" , O_000F00(C5,U,_,_,_), O_000F3A(15,U,_,_,_), Enc(ExtExtrW) , F(Move) , EF(________), 0 , 8 , O(GdMem) , O(MmXmm) , U , U , U ), - INST(kX86InstIdPf2id , "pf2id" , O_000F0F(1D,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), - INST(kX86InstIdPf2iw , "pf2iw" , O_000F0F(1C,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(kX86InstIdPf2id , "pf2id" , O_000F0F(1D,U,_,_,_), U , Enc(3dNow) , F(Move) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), + INST(kX86InstIdPf2iw , "pf2iw" , O_000F0F(1C,U,_,_,_), U , Enc(3dNow) , F(Move) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), INST(kX86InstIdPfacc , "pfacc" , O_000F0F(AE,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), INST(kX86InstIdPfadd , "pfadd" , O_000F0F(9E,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), INST(kX86InstIdPfcmpeq , "pfcmpeq" , O_000F0F(B0,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), @@ -3786,7 +3786,7 @@ const X86InstInfo _x86InstInfo[] = { INST(kX86InstIdPfmul , "pfmul" , O_000F0F(B4,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), INST(kX86InstIdPfnacc , "pfnacc" , O_000F0F(8A,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), INST(kX86InstIdPfpnacc , "pfpnacc" , O_000F0F(8E,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), - INST(kX86InstIdPfrcp , "pfrcp" , O_000F0F(96,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(kX86InstIdPfrcp , "pfrcp" , O_000F0F(96,U,_,_,_), U , Enc(3dNow) , F(Move) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), INST(kX86InstIdPfrcpit1 , "pfrcpit1" , O_000F0F(A6,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), INST(kX86InstIdPfrcpit2 , "pfrcpit2" , O_000F0F(B6,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), INST(kX86InstIdPfrsqit1 , "pfrsqit1" , O_000F0F(A7,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), @@ -3800,8 +3800,8 @@ const X86InstInfo _x86InstInfo[] = { INST(kX86InstIdPhsubd , "phsubd" , O_000F38(06,U,_,_,_), U , Enc(ExtRm_P) , F(None) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), INST(kX86InstIdPhsubsw , "phsubsw" , O_000F38(07,U,_,_,_), U , Enc(ExtRm_P) , F(None) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), INST(kX86InstIdPhsubw , "phsubw" , O_000F38(05,U,_,_,_), U , Enc(ExtRm_P) , F(None) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), - INST(kX86InstIdPi2fd , "pi2fd" , O_000F0F(0D,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), - INST(kX86InstIdPi2fw , "pi2fw" , O_000F0F(0C,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(kX86InstIdPi2fd , "pi2fd" , O_000F0F(0D,U,_,_,_), U , Enc(3dNow) , F(Move) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), + INST(kX86InstIdPi2fw , "pi2fw" , O_000F0F(0C,U,_,_,_), U , Enc(3dNow) , F(Move) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), INST(kX86InstIdPinsrb , "pinsrb" , O_660F3A(20,U,_,_,_), U , Enc(ExtRmi) , F(None) , EF(________), 0 , 0 , O(Xmm) , O(GdMem) , O(Imm) , U , U ), INST(kX86InstIdPinsrd , "pinsrd" , O_660F3A(22,U,_,_,_), U , Enc(ExtRmi) , F(None) , EF(________), 0 , 0 , O(Xmm) , O(GdMem) , O(Imm) , U , U ), INST(kX86InstIdPinsrq , "pinsrq" , O_660F3A(22,U,_,W,_), U , Enc(ExtRmi) , F(None) , EF(________), 0 , 0 , O(Xmm) , O(GqMem) , O(Imm) , U , U ), @@ -3875,7 +3875,7 @@ const X86InstInfo _x86InstInfo[] = { INST(kX86InstIdPsubusb , "psubusb" , O_000F00(D8,U,_,_,_), U , Enc(ExtRm_P) , F(None) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), INST(kX86InstIdPsubusw , "psubusw" , O_000F00(D9,U,_,_,_), U , Enc(ExtRm_P) , F(None) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), INST(kX86InstIdPsubw , "psubw" , O_000F00(F9,U,_,_,_), U , Enc(ExtRm_P) , F(None) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), - INST(kX86InstIdPswapd , "pswapd" , O_000F0F(BB,U,_,_,_), U , Enc(3dNow) , F(None) , EF(________), 0 , 0 , O(Mm) , O(MmMem) , U , U , U ), + INST(kX86InstIdPswapd , "pswapd" , O_000F0F(BB,U,_,_,_), U , Enc(3dNow) , F(Move) , EF(________), 0 , 8 , O(Mm) , O(MmMem) , U , U , U ), INST(kX86InstIdPtest , "ptest" , O_660F38(17,U,_,_,_), U , Enc(ExtRm) , F(Test) , EF(WWWWWW__), 0 , 0 , O(Xmm) , O(XmmMem) , U , U , U ), INST(kX86InstIdPunpckhbw , "punpckhbw" , O_000F00(68,U,_,_,_), U , Enc(ExtRm_P) , F(None) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), INST(kX86InstIdPunpckhdq , "punpckhdq" , O_000F00(6A,U,_,_,_), U , Enc(ExtRm_P) , F(None) , EF(________), 0 , 0 , O(MmXmm) , O(MmXmmMem) , U , U , U ), diff --git a/src/asmjit/x86/x86inst.h b/src/asmjit/x86/x86inst.h index feb4096..86a5876 100644 --- a/src/asmjit/x86/x86inst.h +++ b/src/asmjit/x86/x86inst.h @@ -1186,7 +1186,7 @@ ASMJIT_ENUM(X86InstOptions) { //! VEX prefix (AVX). //! //! Ignored if the instruction is not AVX or `kX86InstOptionEVEX` is used. - kX86InstOptionVex3 = 0x00001000, + kX86InstOptionVex3 = 0x00000200, //! Force 4-byte EVEX prefix even if the instruction is encodable by using //! VEX prefix. Please note that all higher bits from `kX86InstOptionEvex` diff --git a/src/asmjit/x86/x86operand.h b/src/asmjit/x86/x86operand.h index 807bb69..7711c20 100644 --- a/src/asmjit/x86/x86operand.h +++ b/src/asmjit/x86/x86operand.h @@ -2224,21 +2224,21 @@ ASMJIT_DEF_REG(X86GpReg , r14d , gpd[14]) //!< 32-bit Gpd register (X64). ASMJIT_DEF_REG(X86GpReg , r15d , gpd[15]) //!< 32-bit Gpd register (X64). ASMJIT_DEF_REG(X86GpReg , rax , gpq[0]) //!< 64-bit Gpq register (X64). -ASMJIT_DEF_REG(X86GpReg , rcx , gpq[1]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , rdx , gpq[2]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , rbx , gpq[3]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , rsp , gpq[4]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , rbp , gpq[5]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , rsi , gpq[6]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , rdi , gpq[7]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , r8 , gpq[8]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , r9 , gpq[9]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , r10 , gpq[10]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , r11 , gpq[11]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , r12 , gpq[12]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , r13 , gpq[13]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , r14 , gpq[14]) //!< 64-bit Gpq register (X64) -ASMJIT_DEF_REG(X86GpReg , r15 , gpq[15]) //!< 64-bit Gpq register (X64) +ASMJIT_DEF_REG(X86GpReg , rcx , gpq[1]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , rdx , gpq[2]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , rbx , gpq[3]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , rsp , gpq[4]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , rbp , gpq[5]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , rsi , gpq[6]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , rdi , gpq[7]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , r8 , gpq[8]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , r9 , gpq[9]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , r10 , gpq[10]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , r11 , gpq[11]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , r12 , gpq[12]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , r13 , gpq[13]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , r14 , gpq[14]) //!< 64-bit Gpq register (X64). +ASMJIT_DEF_REG(X86GpReg , r15 , gpq[15]) //!< 64-bit Gpq register (X64). ASMJIT_DEF_REG(X86FpReg , fp0 , fp[0]) //!< 80-bit Fp register. ASMJIT_DEF_REG(X86FpReg , fp1 , fp[1]) //!< 80-bit Fp register.