From c6f12c272083b79d62103a488e8440418a842519 Mon Sep 17 00:00:00 2001 From: kobalicek Date: Mon, 26 May 2014 21:26:45 +0200 Subject: [PATCH] Reworked trampolines (64-bit), results in space saving and patchable jmp/call (Issue #33). Added int64_t overload to prevent truncation of immediates passed to Assembler/Compiler (Issue #34). --- src/app/test/testx86.cpp | 2 +- src/asmjit/x86/x86assembler.cpp | 109 +++++++++++++++++--------------- src/asmjit/x86/x86assembler.h | 45 ++++++++++--- src/asmjit/x86/x86compiler.h | 28 +++++++- src/asmjit/x86/x86inst.cpp | 4 +- src/asmjit/x86/x86util.h | 8 +-- 6 files changed, 129 insertions(+), 67 deletions(-) diff --git a/src/app/test/testx86.cpp b/src/app/test/testx86.cpp index e440257..3687af7 100644 --- a/src/app/test/testx86.cpp +++ b/src/app/test/testx86.cpp @@ -592,7 +592,7 @@ struct X86Test_AllocMany2 : public X86Test { c.setArg(0, a); - uint32_t i; + int i; for (i = 0; i < ASMJIT_ARRAY_SIZE(var); i++) { var[i] = c.newGpVar(kVarTypeInt32); } diff --git a/src/asmjit/x86/x86assembler.cpp b/src/asmjit/x86/x86assembler.cpp index b6bfda5..faa1b75 100644 --- a/src/asmjit/x86/x86assembler.cpp +++ b/src/asmjit/x86/x86assembler.cpp @@ -106,29 +106,15 @@ static const uint8_t x86SegmentPrefix[8] = { 0x00, 0x26, 0x2E, 0x36, 0x3E, 0x64, static const uint8_t x86OpCodePushSeg[8] = { 0x00, 0x06, 0x0E, 0x16, 0x1E, 0xA0, 0xA8 }; static const uint8_t x86OpCodePopSeg[8] = { 0x00, 0x07, 0x00, 0x17, 0x1F, 0xA1, 0xA9 }; -// ============================================================================ -// [asmjit::X64TrampolineWriter] -// ============================================================================ +//! Encode MODR/M. +static ASMJIT_INLINE uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) { + return (m << 6) + (o << 3) + rm; +} -//! \internal -//! -//! Trampoline writer. -struct X64TrampolineWriter { - // Size of trampoline - enum { - kSizeJmp = 6, - kSizeAddr = 8, - kSizeTotal = kSizeJmp + kSizeAddr - }; - - // Write trampoline into code at address `code` that will jump to `target`. - static void writeTrampoline(uint8_t* code, uint64_t target) { - code[0] = 0xFF; // Jmp OpCode. - code[1] = 0x25; // ModM (RIP addressing). - ((uint32_t*)(code + 2))[0] = 0; // Offset (zero). - ((uint64_t*)(code + kSizeJmp))[0] = (uint64_t)target; // Absolute address. - } -}; +//! Encode SIB. +static ASMJIT_INLINE uint32_t x86EncodeSib(uint32_t s, uint32_t i, uint32_t b) { + return (s << 6) + (i << 3) + b; +} // ============================================================================ // [asmjit::x86x64::Emit] @@ -482,8 +468,7 @@ static ASMJIT_INLINE size_t X86X64Assembler_relocCode(const X86X64Assembler* sel tramp = dst + codeOffset; // Relocate all recorded locations. - size_t i; - size_t len = self->_relocData.getLength(); + size_t i, len = self->_relocData.getLength(); for (i = 0; i < len; i++) { const RelocData& r = self->_relocData[i]; @@ -493,7 +478,8 @@ static ASMJIT_INLINE size_t X86X64Assembler_relocCode(const X86X64Assembler* sel bool useTrampoline = false; // Be sure that reloc data structure is correct. - ASMJIT_ASSERT(r.from + r.size <= static_cast(codeSize)); + size_t offset = static_cast(r.from); + ASMJIT_ASSERT(offset + r.size <= static_cast(codeSize)); switch (r.type) { case kRelocAbsToAbs: @@ -520,23 +506,44 @@ static ASMJIT_INLINE size_t X86X64Assembler_relocCode(const X86X64Assembler* sel switch (r.size) { case 4: - *reinterpret_cast(dst + static_cast(r.from)) = static_cast(ptr); + *reinterpret_cast(dst + offset) = static_cast(ptr); break; case 8: - *reinterpret_cast(dst + static_cast(r.from)) = static_cast(ptr); + *reinterpret_cast(dst + offset) = static_cast(ptr); break; default: ASMJIT_ASSERT(!"Reached"); } + // Patch `jmp/call` to use trampoline. if (Arch == kArchX64 && useTrampoline) { - if (self->_logger) - self->_logger->logFormat(kLoggerStyleComment, "; Trampoline from %llX -> %llX\n", base + r.from, r.data); + uint32_t byte0 = 0xFF; + uint32_t byte1 = dst[offset - 1]; - X64TrampolineWriter::writeTrampoline(tramp, (uint64_t)r.data); - tramp += X64TrampolineWriter::kSizeTotal; + if (byte1 == 0xE8) { + // Call, path to FF/2 (-> 0x15). + byte1 = x86EncodeMod(0, 2, 5); + } + else if (byte1 == 0xE9) { + // Jmp, path to FF/4 (-> 0x25). + byte1 = x86EncodeMod(0, 4, 5); + } + + // Patch `jmp/call` instruction. + ASMJIT_ASSERT(offset >= 2); + dst[offset - 2] = byte0; + dst[offset - 1] = byte1; + + // Absolute address. + ((uint64_t*)tramp)[0] = static_cast(r.data); + + // Advance trampoline pointer. + tramp += 8; + + if (self->_logger) + self->_logger->logFormat(kLoggerStyleComment, "; Trampoline %llX\n", r.data); } } @@ -882,16 +889,6 @@ static bool X86Assembler_dumpComment(StringBuilder& sb, size_t len, const uint8_ // [asmjit::x86x64::Assembler - Emit] // ============================================================================ -//! Encode MODR/M. -static ASMJIT_INLINE uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) { - return (m << 6) + (o << 3) + rm; -} - -//! Encode SIB. -static ASMJIT_INLINE uint32_t x86EncodeSib(uint32_t s, uint32_t i, uint32_t b) { - return (s << 6) + (i << 3) + b; -} - //! \internal static const Operand::VRegOp x86PatchedHiRegs[4] = { // --------------+---+--------------------------------+--------------+------+ @@ -4001,13 +3998,26 @@ _EmitXopM: // [Emit - Jump/Call to an Immediate] // -------------------------------------------------------------------------- - // Emit relative relocation to absolute pointer `target`. It's needed to add - // what instruction is emitting this, because in x64 mode the relative - // displacement can be impossible to calculate and in this case the trampoline - // is used. + // 64-bit mode requires a trampoline if a relative displacement doesn't fit + // into 32-bit integer. Old version of AsmJit used to emit jump to a section + // which contained another jump followed by an address (it worked well for + // both `jmp` and `call`), but it required to reserve 14-bytes for a possible + // trampoline. + // + // Instead of using 5-byte `jmp/call` and reserving 14 bytes required by the + // trampoline, it's better to use 6-byte `jmp/call` (prefixing it with REX + // prefix) and to patch the `jmp/call` instruction itself. _EmitJmpOrCallImm: { - // The jmp and call instructions have single-byte opcode. + // Emit REX prefix (64-bit). + // + // Does nothing, but allows to path the instruction in case a trampoline is + // needed. + if (Arch == kArchX64) { + EMIT_OP(0x40); + } + + // Both `jmp` and `call` instructions have a single-byte opcode. EMIT_OP(opCode); RelocData rd; @@ -4019,13 +4029,12 @@ _EmitJmpOrCallImm: if (self->_relocData.append(rd) != kErrorOk) return self->setError(kErrorNoHeapMemory); - // Emit dummy 32-bit integer; will be overwritten by relocCode(). + // Emit dummy 32-bit integer; will be overwritten by `relocCode()`. EMIT_DWORD(0); + // Trampoline has to be reserved, even if it's not used. if (Arch == kArchX64) { - // If we are compiling in 64-bit mode, we can use trampoline if relative jump - // is not possible. - self->_trampolineSize += X64TrampolineWriter::kSizeTotal; + self->_trampolineSize += 8; } } goto _EmitDone; diff --git a/src/asmjit/x86/x86assembler.h b/src/asmjit/x86/x86assembler.h index 6fbd469..b4528d3 100644 --- a/src/asmjit/x86/x86assembler.h +++ b/src/asmjit/x86/x86assembler.h @@ -44,10 +44,13 @@ namespace x86x64 { ASMJIT_INLINE Error _Inst_(const _Op0_& o0) { \ return emit(_Code_, o0); \ } \ - \ /*! \overload */ \ ASMJIT_INLINE Error _Inst_(int o0) { \ return emit(_Code_, o0); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE Error _Inst_(int64_t o0) { \ + return emit(_Code_, Imm(o0)); \ } #define INST_1i_(_Inst_, _Code_, _Op0_, _Cond_) \ @@ -55,11 +58,15 @@ namespace x86x64 { ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0); \ } \ - \ /*! \overload */ \ ASMJIT_INLINE Error _Inst_(int o0) { \ ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE Error _Inst_(int64_t o0) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, Imm(o0)); \ } #define INST_1cc(_Inst_, _Code_, _Translate_, _Op0_) \ @@ -113,10 +120,13 @@ namespace x86x64 { ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1) { \ return emit(_Code_, o0, o1); \ } \ - \ /*! \overload */ \ ASMJIT_INLINE Error _Inst_(const _Op0_& o0, int o1) { \ return emit(_Code_, o0, o1); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, int64_t o1) { \ + return emit(_Code_, o0, Imm(o1)); \ } #define INST_2i_(_Inst_, _Code_, _Op0_, _Op1_, _Cond_) \ @@ -124,11 +134,15 @@ namespace x86x64 { ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0, o1); \ } \ - \ /*! \overload */ \ ASMJIT_INLINE Error _Inst_(const _Op0_& o0, int o1) { \ ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0, o1); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, int64_t o1) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, Imm(o1)); \ } #define INST_2cc(_Inst_, _Code_, _Translate_, _Op0_, _Op1_) \ @@ -182,10 +196,13 @@ namespace x86x64 { ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \ return emit(_Code_, o0, o1, o2); \ } \ - \ /*! \overload */ \ ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, int o2) { \ return emit(_Code_, o0, o1, o2); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, int64_t o2) { \ + return emit(_Code_, o0, o1, Imm(o2)); \ } #define INST_3i_(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Cond_) \ @@ -193,14 +210,17 @@ namespace x86x64 { ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0, o1, o2); \ } \ - \ /*! \overload */ \ ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, int o2) { \ ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0, o1, o2); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, int64_t o2) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, Imm(o2)); \ } - #define INST_4x(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_) \ ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \ return emit(_Code_, o0, o1, o2, o3); \ @@ -216,10 +236,13 @@ namespace x86x64 { ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, const _Op3_& o3) { \ return emit(_Code_, o0, o1, o2, o3); \ } \ - \ /*! \overload */ \ ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, int o3) { \ return emit(_Code_, o0, o1, o2, o3); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, int64_t o3) { \ + return emit(_Code_, o0, o1, o2, Imm(o3)); \ } #define INST_4i_(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Op3_, _Cond_) \ @@ -227,11 +250,15 @@ namespace x86x64 { ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0, o1, o2, o3); \ } \ - \ /*! \overload */ \ ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, int o3) { \ ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0, o1, o2, o3); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE Error _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2, int64_t o3) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, o2, Imm(o3)); \ } #define ASMJIT_X86X64_EMIT_OPTIONS(_Class_) \ diff --git a/src/asmjit/x86/x86compiler.h b/src/asmjit/x86/x86compiler.h index edce31c..cf5cec5 100644 --- a/src/asmjit/x86/x86compiler.h +++ b/src/asmjit/x86/x86compiler.h @@ -48,6 +48,10 @@ namespace x86x64 { /*! \overload */ \ ASMJIT_INLINE InstNode* _Inst_(int o0) { \ return emit(_Code_, o0); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE InstNode* _Inst_(int64_t o0) { \ + return emit(_Code_, Imm(o0)); \ } #define INST_1i_(_Inst_, _Code_, _Op0_, _Cond_) \ @@ -59,6 +63,11 @@ namespace x86x64 { ASMJIT_INLINE InstNode* _Inst_(int o0) { \ ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE InstNode* _Inst_(int64_t o0) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, Imm(o0)); \ } #define INST_1cc(_Inst_, _Code_, _Translate_, _Op0_) \ @@ -115,6 +124,10 @@ namespace x86x64 { /*! \overload */ \ ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, int o1) { \ return emit(_Code_, o0, o1); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, int64_t o1) { \ + return emit(_Code_, o0, Imm(o1)); \ } #define INST_2i_(_Inst_, _Code_, _Op0_, _Op1_, _Cond_) \ @@ -126,6 +139,11 @@ namespace x86x64 { ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, int o1) { \ ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0, o1); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, int64_t o1) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, Imm(o1)); \ } #define INST_2cc(_Inst_, _Code_, _Translate_, _Op0_, _Op1_) \ @@ -179,10 +197,13 @@ namespace x86x64 { ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, const _Op2_& o2) { \ return emit(_Code_, o0, o1, o2); \ } \ - \ /*! \overload */ \ ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, int o2) { \ return emit(_Code_, o0, o1, o2); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, int64_t o2) { \ + return emit(_Code_, o0, o1, Imm(o2)); \ } #define INST_3i_(_Inst_, _Code_, _Op0_, _Op1_, _Op2_, _Cond_) \ @@ -194,6 +215,11 @@ namespace x86x64 { ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, int o2) { \ ASMJIT_ASSERT(_Cond_); \ return emit(_Code_, o0, o1, o2); \ + } \ + /*! \overload */ \ + ASMJIT_INLINE InstNode* _Inst_(const _Op0_& o0, const _Op1_& o1, int64_t o2) { \ + ASMJIT_ASSERT(_Cond_); \ + return emit(_Code_, o0, o1, Imm(o2)); \ } // ============================================================================ diff --git a/src/asmjit/x86/x86inst.cpp b/src/asmjit/x86/x86inst.cpp index 8ebed3d..5f45a3f 100644 --- a/src/asmjit/x86/x86inst.cpp +++ b/src/asmjit/x86/x86inst.cpp @@ -2173,7 +2173,7 @@ const InstInfo _instInfo[] = { INST(kInstBtr , "btr" , G(X86BTest) , F(Lock) , 0 , O(GqdwMem) , O(Gqdw)|O(Imm) , U , U , O_000F00(B3,U) , O_000F00(BA,6) ), INST(kInstBts , "bts" , G(X86BTest) , F(Lock) , 0 , O(GqdwMem) , O(Gqdw)|O(Imm) , U , U , O_000F00(AB,U) , O_000F00(BA,5) ), INST(kInstBzhi , "bzhi" , G(AvxRmv) , F(None) , 0 , O(Gqd) , O(GqdMem) , O(Gqd) , U , O_000F38(F5,U) , U ), - INST(kInstCall , "call" , G(X86Call) , F(Flow) , 0 , O(GqdMem)|O(Label) , U , U , U , O_000000(FF,2) , O_000000(E8,U) ), + INST(kInstCall , "call" , G(X86Call) , F(Flow) , 0 , O(GqdMem)|O(Imm)|O(Label), U , U , U , O_000000(FF,2) , O_000000(E8,U) ), INST(kInstCbw , "cbw" , G(X86Op) , F(None)|F(Special) , 0 , U , U , U , U , O_660000(98,U) , U ), INST(kInstCdq , "cdq" , G(X86Op) , F(None)|F(Special) , 0 , U , U , U , U , O_000000(99,U) , U ), INST(kInstCdqe , "cdqe" , G(X86Op) , F(None)|F(Special)|F(W), 0 , U , U , U , U , O_000000(98,U) , U ), @@ -2397,7 +2397,7 @@ const InstInfo _instInfo[] = { INST(kInstJs , "js" , G(X86Jcc) , F(Flow) , 0 , O(Label) , U , U , U , O_000000(78,U) , U ), INST(kInstJz , "jz" , G(X86Jcc) , F(Flow) , 0 , O(Label) , U , U , U , O_000000(74,U) , U ), INST(kInstJecxz , "jecxz" , G(X86Jecxz) , F(Flow)|F(Special) , 0 , O(Gqdw) , O(Label) , U , U , O_000000(E3,U) , U ), - INST(kInstJmp , "jmp" , G(X86Jmp) , F(Flow) , 0 , O(Label) , U , U , U , O_000000(FF,4) , U ), + INST(kInstJmp , "jmp" , G(X86Jmp) , F(Flow) , 0 , O(Imm)|O(Label) , U , U , U , O_000000(FF,4) , O_000000(E9,U) ), INST(kInstLahf , "lahf" , G(X86Op) , F(None)|F(Special) , 0 , U , U , U , U , O_000000(9F,U) , U ), INST(kInstLddqu , "lddqu" , G(ExtRm) , F(Move) , 16, O(Xmm) , O(Mem) , U , U , O_F20F00(F0,U) , U ), INST(kInstLdmxcsr , "ldmxcsr" , G(X86M) , F(None) , 0 , O(Mem) , U , U , U , O_000F00(AE,2) , U ), diff --git a/src/asmjit/x86/x86util.h b/src/asmjit/x86/x86util.h index eb5c0e1..f3f21be 100644 --- a/src/asmjit/x86/x86util.h +++ b/src/asmjit/x86/x86util.h @@ -348,8 +348,8 @@ struct X86Util { //! //! Shuffle constants can be used to make immediate value for these intrinsics: //! - `X86X64Assembler::shufpd()` and `X86X64Compiler::shufpd()` - static ASMJIT_INLINE uint32_t mmShuffle(uint32_t x, uint32_t y) { - return (x << 1) | y; + static ASMJIT_INLINE int mmShuffle(uint32_t x, uint32_t y) { + return static_cast((x << 1) | y); } //! Pack a shuffle constant to be used with multimedia instrutions (4 values). @@ -365,8 +365,8 @@ struct X86Util { //! - `X86X64Assembler::pshufhw()` and `X86X64Compiler::pshufhw()` //! - `X86X64Assembler::pshuflw()` and `X86X64Compiler::pshuflw()` //! - `X86X64Assembler::shufps()` and `X86X64Compiler::shufps()` - static ASMJIT_INLINE uint32_t mmShuffle(uint32_t z, uint32_t y, uint32_t x, uint32_t w) { - return (z << 6) | (y << 4) | (x << 2) | w; + static ASMJIT_INLINE int mmShuffle(uint32_t z, uint32_t y, uint32_t x, uint32_t w) { + return static_cast((z << 6) | (y << 4) | (x << 2) | w); } };