mirror of
https://github.com/asmjit/asmjit.git
synced 2025-12-17 12:34:35 +03:00
Added FastEval calling convention - AsmJit specific/experimental feature
This commit is contained in:
@@ -45,56 +45,89 @@ struct CallConv {
|
|||||||
//! None or invalid (can't be used).
|
//! None or invalid (can't be used).
|
||||||
kIdNone = 0,
|
kIdNone = 0,
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// [Universal]
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// TODO: To make this possible we need to know target ARCH and ABI.
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
// Universal calling conventions are applicable to any target and are
|
||||||
|
// converted to target dependent conventions at runtime. The purpose of
|
||||||
|
// these conventions is to make using functions less target dependent.
|
||||||
|
|
||||||
|
kIdCDecl = 1,
|
||||||
|
kIdStdCall = 2,
|
||||||
|
kIdFastCall = 3,
|
||||||
|
|
||||||
|
//! AsmJit specific calling convention designed for calling functions
|
||||||
|
//! inside a multimedia code like that don't use many registers internally,
|
||||||
|
//! but are long enough to be called and not inlined. These functions are
|
||||||
|
//! usually used to calculate trigonometric functions, logarithms, etc...
|
||||||
|
kIdFastEval2 = 10,
|
||||||
|
kIdFastEval3 = 11,
|
||||||
|
kIdFastEval4 = 12,
|
||||||
|
*/
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
// [X86]
|
// [X86]
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
//! X86 `__cdecl` calling convention (used by C runtime and libraries).
|
//! X86 `__cdecl` calling convention (used by C runtime and libraries).
|
||||||
kIdX86CDecl = 1,
|
kIdX86CDecl = 16,
|
||||||
//! X86 `__stdcall` calling convention (used mostly by WinAPI).
|
//! X86 `__stdcall` calling convention (used mostly by WinAPI).
|
||||||
kIdX86StdCall = 2,
|
kIdX86StdCall = 17,
|
||||||
//! X86 `__thiscall` calling convention (MSVC/Intel).
|
//! X86 `__thiscall` calling convention (MSVC/Intel).
|
||||||
kIdX86MsThisCall = 3,
|
kIdX86MsThisCall = 18,
|
||||||
//! X86 `__fastcall` convention (MSVC/Intel).
|
//! X86 `__fastcall` convention (MSVC/Intel).
|
||||||
kIdX86MsFastCall = 4,
|
kIdX86MsFastCall = 19,
|
||||||
//! X86 `__fastcall` convention (GCC and Clang).
|
//! X86 `__fastcall` convention (GCC and Clang).
|
||||||
kIdX86GccFastCall = 5,
|
kIdX86GccFastCall = 20,
|
||||||
//! X86 `regparm(1)` convention (GCC and Clang).
|
//! X86 `regparm(1)` convention (GCC and Clang).
|
||||||
kIdX86GccRegParm1 = 6,
|
kIdX86GccRegParm1 = 21,
|
||||||
//! X86 `regparm(2)` convention (GCC and Clang).
|
//! X86 `regparm(2)` convention (GCC and Clang).
|
||||||
kIdX86GccRegParm2 = 7,
|
kIdX86GccRegParm2 = 22,
|
||||||
//! X86 `regparm(3)` convention (GCC and Clang).
|
//! X86 `regparm(3)` convention (GCC and Clang).
|
||||||
kIdX86GccRegParm3 = 8,
|
kIdX86GccRegParm3 = 23,
|
||||||
|
|
||||||
|
kIdX86FastEval2 = 29,
|
||||||
|
kIdX86FastEval3 = 30,
|
||||||
|
kIdX86FastEval4 = 31,
|
||||||
|
|
||||||
//! X64 calling convention defined by WIN64-ABI.
|
//! X64 calling convention defined by WIN64-ABI.
|
||||||
//!
|
//!
|
||||||
//! Links:
|
//! Links:
|
||||||
//! * <http://msdn.microsoft.com/en-us/library/9b372w95.aspx>.
|
//! * <http://msdn.microsoft.com/en-us/library/9b372w95.aspx>.
|
||||||
kIdX86Win64 = 16,
|
kIdX86Win64 = 32,
|
||||||
//! X64 calling convention used by Unix platforms (SYSV/AMD64-ABI).
|
//! X64 calling convention used by Unix platforms (SYSV/AMD64-ABI).
|
||||||
kIdX86SysV64 = 17,
|
kIdX86SysV64 = 33,
|
||||||
|
|
||||||
|
kIdX64FastEval2 = 45,
|
||||||
|
kIdX64FastEval3 = 46,
|
||||||
|
kIdX64FastEval4 = 47,
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
// [ARM]
|
// [ARM]
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Legacy calling convention, floating point arguments are passed via GP registers.
|
//! Legacy calling convention, floating point arguments are passed via GP registers.
|
||||||
kIdArm32SoftFP = 32,
|
kIdArm32SoftFP = 48,
|
||||||
//! Modern calling convention, uses VFP registers to pass floating point arguments.
|
//! Modern calling convention, uses VFP registers to pass floating point arguments.
|
||||||
kIdArm32HardFP = 33,
|
kIdArm32HardFP = 49,
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
// [Internal]
|
// [Internal]
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
_kIdX86Start = 1, //!< \internal
|
_kIdX86Start = 16, //!< \internal
|
||||||
_kIdX86End = 8, //!< \internal
|
_kIdX86End = 31, //!< \internal
|
||||||
|
|
||||||
_kIdX64Start = 16, //!< \internal
|
_kIdX64Start = 32, //!< \internal
|
||||||
_kIdX64End = 17, //!< \internal
|
_kIdX64End = 47, //!< \internal
|
||||||
|
|
||||||
_kIdArmStart = 32, //!< \internal
|
_kIdArmStart = 48, //!< \internal
|
||||||
_kIdArmEnd = 33, //!< \internal
|
_kIdArmEnd = 49, //!< \internal
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
// [Host]
|
// [Host]
|
||||||
@@ -106,42 +139,48 @@ struct CallConv {
|
|||||||
//! NOTE: This should be always the same as `kIdHostCDecl`, but some
|
//! NOTE: This should be always the same as `kIdHostCDecl`, but some
|
||||||
//! compilers allow to override the default calling convention. Overriding
|
//! compilers allow to override the default calling convention. Overriding
|
||||||
//! is not detected at the moment.
|
//! is not detected at the moment.
|
||||||
kIdHost = DETECTED_AT_COMPILE_TIME,
|
kIdHost = DETECTED_AT_COMPILE_TIME,
|
||||||
|
|
||||||
//! Default CDECL calling convention based on the current C++ compiler's settings.
|
//! Default CDECL calling convention based on the current C++ compiler's settings.
|
||||||
kIdHostCDecl = DETECTED_AT_COMPILE_TIME,
|
kIdHostCDecl = DETECTED_AT_COMPILE_TIME,
|
||||||
|
|
||||||
//! Default STDCALL calling convention based on the current C++ compiler's settings.
|
//! Default STDCALL calling convention based on the current C++ compiler's settings.
|
||||||
//!
|
//!
|
||||||
//! NOTE: If not defined by the host then it's the same as `kIdHostCDecl`.
|
//! NOTE: If not defined by the host then it's the same as `kIdHostCDecl`.
|
||||||
kIdHostStdCall = DETECTED_AT_COMPILE_TIME,
|
kIdHostStdCall = DETECTED_AT_COMPILE_TIME,
|
||||||
|
|
||||||
//! Compatibility for `__fastcall` calling convention.
|
//! Compatibility for `__fastcall` calling convention.
|
||||||
//!
|
//!
|
||||||
//! NOTE: If not defined by the host then it's the same as `kIdHostCDecl`.
|
//! NOTE: If not defined by the host then it's the same as `kIdHostCDecl`.
|
||||||
kIdHostFastCall = DETECTED_AT_COMPILE_TIME
|
kIdHostFastCall = DETECTED_AT_COMPILE_TIME
|
||||||
#elif ASMJIT_ARCH_X86
|
#elif ASMJIT_ARCH_X86
|
||||||
kIdHost = kIdX86CDecl,
|
kIdHost = kIdX86CDecl,
|
||||||
kIdHostCDecl = kIdX86CDecl,
|
kIdHostCDecl = kIdX86CDecl,
|
||||||
kIdHostStdCall = kIdX86StdCall,
|
kIdHostStdCall = kIdX86StdCall,
|
||||||
kIdHostFastCall = ASMJIT_CC_MSC ? kIdX86MsFastCall :
|
kIdHostFastCall = ASMJIT_CC_MSC ? kIdX86MsFastCall :
|
||||||
ASMJIT_CC_GCC ? kIdX86GccFastCall :
|
ASMJIT_CC_GCC ? kIdX86GccFastCall :
|
||||||
ASMJIT_CC_CLANG ? kIdX86GccFastCall : kIdNone
|
ASMJIT_CC_CLANG ? kIdX86GccFastCall : kIdNone,
|
||||||
|
kIdHostFastEval2 = kIdX86FastEval2,
|
||||||
|
kIdHostFastEval3 = kIdX86FastEval3,
|
||||||
|
kIdHostFastEval4 = kIdX86FastEval4
|
||||||
#elif ASMJIT_ARCH_X64
|
#elif ASMJIT_ARCH_X64
|
||||||
kIdHost = ASMJIT_OS_WINDOWS ? kIdX86Win64 : kIdX86SysV64,
|
kIdHost = ASMJIT_OS_WINDOWS ? kIdX86Win64 : kIdX86SysV64,
|
||||||
kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host.
|
kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host.
|
||||||
kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host.
|
kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host.
|
||||||
kIdHostFastCall = kIdHost // Doesn't exist, redirected to host.
|
kIdHostFastCall = kIdHost, // Doesn't exist, redirected to host.
|
||||||
|
kIdHostFastEval2 = kIdX64FastEval2,
|
||||||
|
kIdHostFastEval3 = kIdX64FastEval3,
|
||||||
|
kIdHostFastEval4 = kIdX64FastEval4
|
||||||
#elif ASMJIT_ARCH_ARM32
|
#elif ASMJIT_ARCH_ARM32
|
||||||
# if defined(__SOFTFP__)
|
# if defined(__SOFTFP__)
|
||||||
kIdHost = kIdArm32SoftFP,
|
kIdHost = kIdArm32SoftFP,
|
||||||
# else
|
# else
|
||||||
kIdHost = kIdArm32HardFP,
|
kIdHost = kIdArm32HardFP,
|
||||||
# endif
|
# endif
|
||||||
// These don't exist on ARM.
|
// These don't exist on ARM.
|
||||||
kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host.
|
kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host.
|
||||||
kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host.
|
kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host.
|
||||||
kIdHostFastCall = kIdHost // Doesn't exist, redirected to host.
|
kIdHostFastCall = kIdHost // Doesn't exist, redirected to host.
|
||||||
#else
|
#else
|
||||||
# error "[asmjit] Couldn't determine the target's calling convention."
|
# error "[asmjit] Couldn't determine the target's calling convention."
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -224,7 +224,7 @@ public:
|
|||||||
//! Check for equality with other `str` of length `len`.
|
//! Check for equality with other `str` of length `len`.
|
||||||
ASMJIT_API bool eq(const char* str, size_t len = Globals::kInvalidIndex) const noexcept;
|
ASMJIT_API bool eq(const char* str, size_t len = Globals::kInvalidIndex) const noexcept;
|
||||||
//! Check for equality with `other`.
|
//! Check for equality with `other`.
|
||||||
ASMJIT_INLINE bool eq(const StringBuilder& other) const noexcept { return eq(other._data); }
|
ASMJIT_INLINE bool eq(const StringBuilder& other) const noexcept { return eq(other._data, other._length); }
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
// [Operator Overload]
|
// [Operator Overload]
|
||||||
|
|||||||
@@ -345,7 +345,7 @@ struct Utils {
|
|||||||
// [Bits]
|
// [Bits]
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
//! Generate a bit-mask that has `x` most significant bits set.
|
//! Generate a bit-mask that has `x` least significant bits set.
|
||||||
static ASMJIT_INLINE uint32_t bits(uint32_t x) noexcept {
|
static ASMJIT_INLINE uint32_t bits(uint32_t x) noexcept {
|
||||||
// Shifting more bits than the type has results in undefined behavior. In
|
// Shifting more bits than the type has results in undefined behavior. In
|
||||||
// such case asmjit trashes the result by ORing with `overflow` mask, which
|
// such case asmjit trashes the result by ORing with `overflow` mask, which
|
||||||
|
|||||||
@@ -275,15 +275,17 @@ ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markStackArgsReg(FuncFrameInfo& ffi)
|
|||||||
ASMJIT_FAVOR_SIZE Error X86Internal::initCallConv(CallConv& cc, uint32_t ccId) noexcept {
|
ASMJIT_FAVOR_SIZE Error X86Internal::initCallConv(CallConv& cc, uint32_t ccId) noexcept {
|
||||||
const uint32_t kKindGp = X86Reg::kKindGp;
|
const uint32_t kKindGp = X86Reg::kKindGp;
|
||||||
const uint32_t kKindVec = X86Reg::kKindVec;
|
const uint32_t kKindVec = X86Reg::kKindVec;
|
||||||
|
const uint32_t kKindMm = X86Reg::kKindMm;
|
||||||
|
const uint32_t kKindK = X86Reg::kKindK;
|
||||||
|
|
||||||
const uint32_t kAx = X86Gp::kIdAx;
|
const uint32_t kZax = X86Gp::kIdAx;
|
||||||
const uint32_t kBx = X86Gp::kIdBx;
|
const uint32_t kZbx = X86Gp::kIdBx;
|
||||||
const uint32_t kCx = X86Gp::kIdCx;
|
const uint32_t kZcx = X86Gp::kIdCx;
|
||||||
const uint32_t kDx = X86Gp::kIdDx;
|
const uint32_t kZdx = X86Gp::kIdDx;
|
||||||
const uint32_t kSp = X86Gp::kIdSp;
|
const uint32_t kZsp = X86Gp::kIdSp;
|
||||||
const uint32_t kBp = X86Gp::kIdBp;
|
const uint32_t kZbp = X86Gp::kIdBp;
|
||||||
const uint32_t kSi = X86Gp::kIdSi;
|
const uint32_t kZsi = X86Gp::kIdSi;
|
||||||
const uint32_t kDi = X86Gp::kIdDi;
|
const uint32_t kZdi = X86Gp::kIdDi;
|
||||||
|
|
||||||
switch (ccId) {
|
switch (ccId) {
|
||||||
case CallConv::kIdX86StdCall:
|
case CallConv::kIdX86StdCall:
|
||||||
@@ -292,32 +294,32 @@ ASMJIT_FAVOR_SIZE Error X86Internal::initCallConv(CallConv& cc, uint32_t ccId) n
|
|||||||
|
|
||||||
case CallConv::kIdX86MsThisCall:
|
case CallConv::kIdX86MsThisCall:
|
||||||
cc.setFlags(CallConv::kFlagCalleePopsStack);
|
cc.setFlags(CallConv::kFlagCalleePopsStack);
|
||||||
cc.setPassedOrder(kKindGp, kCx);
|
cc.setPassedOrder(kKindGp, kZcx);
|
||||||
goto X86CallConv;
|
goto X86CallConv;
|
||||||
|
|
||||||
case CallConv::kIdX86MsFastCall:
|
case CallConv::kIdX86MsFastCall:
|
||||||
case CallConv::kIdX86GccFastCall:
|
case CallConv::kIdX86GccFastCall:
|
||||||
cc.setFlags(CallConv::kFlagCalleePopsStack);
|
cc.setFlags(CallConv::kFlagCalleePopsStack);
|
||||||
cc.setPassedOrder(kKindGp, kCx, kDx);
|
cc.setPassedOrder(kKindGp, kZcx, kZdx);
|
||||||
goto X86CallConv;
|
goto X86CallConv;
|
||||||
|
|
||||||
case CallConv::kIdX86GccRegParm1:
|
case CallConv::kIdX86GccRegParm1:
|
||||||
cc.setPassedOrder(kKindGp, kAx);
|
cc.setPassedOrder(kKindGp, kZax);
|
||||||
goto X86CallConv;
|
goto X86CallConv;
|
||||||
|
|
||||||
case CallConv::kIdX86GccRegParm2:
|
case CallConv::kIdX86GccRegParm2:
|
||||||
cc.setPassedOrder(kKindGp, kAx, kDx);
|
cc.setPassedOrder(kKindGp, kZax, kZdx);
|
||||||
goto X86CallConv;
|
goto X86CallConv;
|
||||||
|
|
||||||
case CallConv::kIdX86GccRegParm3:
|
case CallConv::kIdX86GccRegParm3:
|
||||||
cc.setPassedOrder(kKindGp, kAx, kDx, kCx);
|
cc.setPassedOrder(kKindGp, kZax, kZdx, kZcx);
|
||||||
goto X86CallConv;
|
goto X86CallConv;
|
||||||
|
|
||||||
case CallConv::kIdX86CDecl:
|
case CallConv::kIdX86CDecl:
|
||||||
X86CallConv:
|
X86CallConv:
|
||||||
cc.setNaturalStackAlignment(4);
|
cc.setNaturalStackAlignment(4);
|
||||||
cc.setArchType(ArchInfo::kTypeX86);
|
cc.setArchType(ArchInfo::kTypeX86);
|
||||||
cc.setPreservedRegs(kKindGp, Utils::mask(kBx, kSp, kBp, kSi, kDi));
|
cc.setPreservedRegs(kKindGp, Utils::mask(kZbx, kZsp, kZbp, kZsi, kZdi));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CallConv::kIdX86Win64:
|
case CallConv::kIdX86Win64:
|
||||||
@@ -326,9 +328,9 @@ X86CallConv:
|
|||||||
cc.setFlags(CallConv::kFlagPassFloatsByVec | CallConv::kFlagIndirectVecArgs);
|
cc.setFlags(CallConv::kFlagPassFloatsByVec | CallConv::kFlagIndirectVecArgs);
|
||||||
cc.setNaturalStackAlignment(16);
|
cc.setNaturalStackAlignment(16);
|
||||||
cc.setSpillZoneSize(32);
|
cc.setSpillZoneSize(32);
|
||||||
cc.setPassedOrder(kKindGp, kCx, kDx, 8, 9);
|
cc.setPassedOrder(kKindGp, kZcx, kZdx, 8, 9);
|
||||||
cc.setPassedOrder(kKindVec, 0, 1, 2, 3);
|
cc.setPassedOrder(kKindVec, 0, 1, 2, 3);
|
||||||
cc.setPreservedRegs(kKindGp, Utils::mask(kBx, kSp, kBp, kSi, kDi, 12, 13, 14, 15));
|
cc.setPreservedRegs(kKindGp, Utils::mask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
|
||||||
cc.setPreservedRegs(kKindVec, Utils::mask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
|
cc.setPreservedRegs(kKindVec, Utils::mask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -337,11 +339,49 @@ X86CallConv:
|
|||||||
cc.setFlags(CallConv::kFlagPassFloatsByVec);
|
cc.setFlags(CallConv::kFlagPassFloatsByVec);
|
||||||
cc.setNaturalStackAlignment(16);
|
cc.setNaturalStackAlignment(16);
|
||||||
cc.setRedZoneSize(128);
|
cc.setRedZoneSize(128);
|
||||||
cc.setPassedOrder(kKindGp, kDi, kSi, kDx, kCx, 8, 9);
|
cc.setPassedOrder(kKindGp, kZdi, kZsi, kZdx, kZcx, 8, 9);
|
||||||
cc.setPassedOrder(kKindVec, 0, 1, 2, 3, 4, 5, 6, 7);
|
cc.setPassedOrder(kKindVec, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
cc.setPreservedRegs(kKindGp, Utils::mask(kBx, kSp, kBp, 12, 13, 14, 15));
|
cc.setPreservedRegs(kKindGp, Utils::mask(kZbx, kZsp, kZbp, 12, 13, 14, 15));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case CallConv::kIdX86FastEval2:
|
||||||
|
case CallConv::kIdX86FastEval3:
|
||||||
|
case CallConv::kIdX86FastEval4: {
|
||||||
|
uint32_t n = ccId - CallConv::kIdX86FastEval2;
|
||||||
|
|
||||||
|
cc.setArchType(ArchInfo::kTypeX86);
|
||||||
|
cc.setFlags(CallConv::kFlagPassFloatsByVec);
|
||||||
|
cc.setNaturalStackAlignment(16);
|
||||||
|
cc.setPassedOrder(kKindGp, kZax, kZdx, kZcx, kZsi, kZdi);
|
||||||
|
cc.setPassedOrder(kKindMm, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
|
cc.setPassedOrder(kKindVec, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
|
|
||||||
|
cc.setPreservedRegs(kKindGp , Utils::bits(8));
|
||||||
|
cc.setPreservedRegs(kKindVec, Utils::bits(8) & ~Utils::bits(n));
|
||||||
|
cc.setPreservedRegs(kKindMm , Utils::bits(8));
|
||||||
|
cc.setPreservedRegs(kKindK , Utils::bits(8));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case CallConv::kIdX64FastEval2:
|
||||||
|
case CallConv::kIdX64FastEval3:
|
||||||
|
case CallConv::kIdX64FastEval4: {
|
||||||
|
uint32_t n = ccId - CallConv::kIdX64FastEval2;
|
||||||
|
|
||||||
|
cc.setArchType(ArchInfo::kTypeX64);
|
||||||
|
cc.setFlags(CallConv::kFlagPassFloatsByVec);
|
||||||
|
cc.setNaturalStackAlignment(16);
|
||||||
|
cc.setPassedOrder(kKindGp, kZax, kZdx, kZcx, kZsi, kZdi);
|
||||||
|
cc.setPassedOrder(kKindMm, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
|
cc.setPassedOrder(kKindVec, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
|
|
||||||
|
cc.setPreservedRegs(kKindGp , Utils::bits(16));
|
||||||
|
cc.setPreservedRegs(kKindVec,~Utils::bits(n));
|
||||||
|
cc.setPreservedRegs(kKindMm , Utils::bits(8));
|
||||||
|
cc.setPreservedRegs(kKindK , Utils::bits(8));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return DebugUtils::errored(kErrorInvalidArgument);
|
return DebugUtils::errored(kErrorInvalidArgument);
|
||||||
}
|
}
|
||||||
@@ -532,7 +572,7 @@ ASMJIT_FAVOR_SIZE Error X86Internal::initFrameLayout(FuncFrameLayout& layout, co
|
|||||||
|
|
||||||
// Calculate a bit-mask of all registers that must be saved & restored.
|
// Calculate a bit-mask of all registers that must be saved & restored.
|
||||||
for (kind = 0; kind < Globals::kMaxVRegKinds; kind++)
|
for (kind = 0; kind < Globals::kMaxVRegKinds; kind++)
|
||||||
layout._savedRegs[kind] = ffi.getDirtyRegs(kind) & func.getPreservedRegs(kind);
|
layout._savedRegs[kind] = (ffi.getDirtyRegs(kind) & ~func.getPassedRegs(kind)) & func.getPreservedRegs(kind);
|
||||||
|
|
||||||
// Include EBP|RBP if the function preserves the frame-pointer.
|
// Include EBP|RBP if the function preserves the frame-pointer.
|
||||||
if (ffi.hasPreservedFP()) {
|
if (ffi.hasPreservedFP()) {
|
||||||
|
|||||||
@@ -2050,10 +2050,10 @@ _NextGroup:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Init clobbered.
|
// Init clobbered.
|
||||||
clobberedRegs.set(X86Reg::kKindGp , Utils::bits(_regCount.getGp()) & (~fd.getPreservedRegs(X86Reg::kKindGp )));
|
clobberedRegs.set(X86Reg::kKindGp , Utils::bits(_regCount.getGp()) & (fd.getPassedRegs(X86Reg::kKindGp ) | ~fd.getPreservedRegs(X86Reg::kKindGp )));
|
||||||
clobberedRegs.set(X86Reg::kKindMm , Utils::bits(_regCount.getMm()) & (~fd.getPreservedRegs(X86Reg::kKindMm )));
|
clobberedRegs.set(X86Reg::kKindMm , Utils::bits(_regCount.getMm()) & (fd.getPassedRegs(X86Reg::kKindMm ) | ~fd.getPreservedRegs(X86Reg::kKindMm )));
|
||||||
clobberedRegs.set(X86Reg::kKindK , Utils::bits(_regCount.getK()) & (~fd.getPreservedRegs(X86Reg::kKindK )));
|
clobberedRegs.set(X86Reg::kKindK , Utils::bits(_regCount.getK()) & (fd.getPassedRegs(X86Reg::kKindK ) | ~fd.getPreservedRegs(X86Reg::kKindK )));
|
||||||
clobberedRegs.set(X86Reg::kKindVec, Utils::bits(_regCount.getVec()) & (~fd.getPreservedRegs(X86Reg::kKindVec)));
|
clobberedRegs.set(X86Reg::kKindVec, Utils::bits(_regCount.getVec()) & (fd.getPassedRegs(X86Reg::kKindVec) | ~fd.getPreservedRegs(X86Reg::kKindVec)));
|
||||||
|
|
||||||
RA_FINALIZE(node_);
|
RA_FINALIZE(node_);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -3260,6 +3260,104 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// [X86Test_MiscFastEval]
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
class X86Test_MiscFastEval : public X86Test {
|
||||||
|
public:
|
||||||
|
X86Test_MiscFastEval() : X86Test("[Misc] FastEval (CConv)") {}
|
||||||
|
|
||||||
|
static void add(X86TestManager& mgr) {
|
||||||
|
mgr.add(new X86Test_MiscFastEval());
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void compile(X86Compiler& cc) {
|
||||||
|
FuncSignature5<void, const void*, const void*, const void*, const void*, void*> funcSig(CallConv::kIdHostCDecl);
|
||||||
|
FuncSignature2<X86Xmm, X86Xmm, X86Xmm> fastSig(CallConv::kIdHostFastEval2);
|
||||||
|
|
||||||
|
CCFunc* func = cc.newFunc(funcSig);
|
||||||
|
CCFunc* fast = cc.newFunc(fastSig);
|
||||||
|
|
||||||
|
{
|
||||||
|
X86Gp aPtr = cc.newIntPtr("aPtr");
|
||||||
|
X86Gp bPtr = cc.newIntPtr("bPtr");
|
||||||
|
X86Gp cPtr = cc.newIntPtr("cPtr");
|
||||||
|
X86Gp dPtr = cc.newIntPtr("dPtr");
|
||||||
|
X86Gp pOut = cc.newIntPtr("pOut");
|
||||||
|
|
||||||
|
X86Xmm aXmm = cc.newXmm("aXmm");
|
||||||
|
X86Xmm bXmm = cc.newXmm("bXmm");
|
||||||
|
X86Xmm cXmm = cc.newXmm("cXmm");
|
||||||
|
X86Xmm dXmm = cc.newXmm("dXmm");
|
||||||
|
|
||||||
|
cc.addFunc(func);
|
||||||
|
|
||||||
|
cc.setArg(0, aPtr);
|
||||||
|
cc.setArg(1, bPtr);
|
||||||
|
cc.setArg(2, cPtr);
|
||||||
|
cc.setArg(3, dPtr);
|
||||||
|
cc.setArg(4, pOut);
|
||||||
|
|
||||||
|
cc.movups(aXmm, x86::ptr(aPtr));
|
||||||
|
cc.movups(bXmm, x86::ptr(bPtr));
|
||||||
|
cc.movups(cXmm, x86::ptr(cPtr));
|
||||||
|
cc.movups(dXmm, x86::ptr(dPtr));
|
||||||
|
|
||||||
|
X86Xmm xXmm = cc.newXmm("xXmm");
|
||||||
|
X86Xmm yXmm = cc.newXmm("yXmm");
|
||||||
|
|
||||||
|
CCFuncCall* call1 = cc.call(fast->getLabel(), fastSig);
|
||||||
|
call1->setArg(0, aXmm);
|
||||||
|
call1->setArg(1, bXmm);
|
||||||
|
call1->setRet(0, xXmm);
|
||||||
|
|
||||||
|
CCFuncCall* call2 = cc.call(fast->getLabel(), fastSig);
|
||||||
|
call2->setArg(0, cXmm);
|
||||||
|
call2->setArg(1, dXmm);
|
||||||
|
call2->setRet(0, yXmm);
|
||||||
|
|
||||||
|
cc.pmullw(xXmm, yXmm);
|
||||||
|
cc.movups(x86::ptr(pOut), xXmm);
|
||||||
|
|
||||||
|
cc.endFunc();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
X86Xmm aXmm = cc.newXmm("aXmm");
|
||||||
|
X86Xmm bXmm = cc.newXmm("bXmm");
|
||||||
|
|
||||||
|
cc.addFunc(fast);
|
||||||
|
cc.setArg(0, aXmm);
|
||||||
|
cc.setArg(1, bXmm);
|
||||||
|
cc.paddw(aXmm, bXmm);
|
||||||
|
cc.ret(aXmm);
|
||||||
|
cc.endFunc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) {
|
||||||
|
typedef void (*Func)(const void*, const void*, const void*, const void*, void*);
|
||||||
|
|
||||||
|
Func func = ptr_as_func<Func>(_func);
|
||||||
|
|
||||||
|
int16_t a[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
|
||||||
|
int16_t b[8] = { 7, 6, 5, 4, 3, 2, 1, 0 };
|
||||||
|
int16_t c[8] = { 1, 3, 9, 7, 5, 4, 2, 1 };
|
||||||
|
int16_t d[8] = { 2, 0,-6,-4,-2,-1, 1, 2 };
|
||||||
|
|
||||||
|
int16_t o[8];
|
||||||
|
int oExp = 7 * 3;
|
||||||
|
|
||||||
|
func(a, b, c, d, o);
|
||||||
|
|
||||||
|
result.setFormat("ret={%02X %02X %02X %02X %02X %02X %02X %02X}", o[0], o[1], o[2], o[3], o[4], o[5], o[6], o[7]);
|
||||||
|
expect.setFormat("ret={%02X %02X %02X %02X %02X %02X %02X %02X}", oExp, oExp, oExp, oExp, oExp, oExp, oExp, oExp);
|
||||||
|
|
||||||
|
return result == expect;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// [X86Test_MiscUnfollow]
|
// [X86Test_MiscUnfollow]
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -3423,6 +3521,7 @@ int main(int argc, char* argv[]) {
|
|||||||
ADD_TEST(X86Test_MiscConstPool);
|
ADD_TEST(X86Test_MiscConstPool);
|
||||||
ADD_TEST(X86Test_MiscMultiRet);
|
ADD_TEST(X86Test_MiscMultiRet);
|
||||||
ADD_TEST(X86Test_MiscMultiFunc);
|
ADD_TEST(X86Test_MiscMultiFunc);
|
||||||
|
ADD_TEST(X86Test_MiscFastEval);
|
||||||
ADD_TEST(X86Test_MiscUnfollow);
|
ADD_TEST(X86Test_MiscUnfollow);
|
||||||
|
|
||||||
return testMgr.run();
|
return testMgr.run();
|
||||||
|
|||||||
Reference in New Issue
Block a user