Added FastEval calling convention - AsmJit specific/experimental feature

This commit is contained in:
kobalicek
2017-02-11 02:34:09 +01:00
parent 43dbe48afc
commit 216fb5a281
6 changed files with 240 additions and 62 deletions

View File

@@ -45,56 +45,89 @@ struct CallConv {
//! None or invalid (can't be used). //! None or invalid (can't be used).
kIdNone = 0, kIdNone = 0,
// ------------------------------------------------------------------------
// [Universal]
// ------------------------------------------------------------------------
// TODO: To make this possible we need to know target ARCH and ABI.
/*
// Universal calling conventions are applicable to any target and are
// converted to target dependent conventions at runtime. The purpose of
// these conventions is to make using functions less target dependent.
kIdCDecl = 1,
kIdStdCall = 2,
kIdFastCall = 3,
//! AsmJit specific calling convention designed for calling functions
//! inside a multimedia code like that don't use many registers internally,
//! but are long enough to be called and not inlined. These functions are
//! usually used to calculate trigonometric functions, logarithms, etc...
kIdFastEval2 = 10,
kIdFastEval3 = 11,
kIdFastEval4 = 12,
*/
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// [X86] // [X86]
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
//! X86 `__cdecl` calling convention (used by C runtime and libraries). //! X86 `__cdecl` calling convention (used by C runtime and libraries).
kIdX86CDecl = 1, kIdX86CDecl = 16,
//! X86 `__stdcall` calling convention (used mostly by WinAPI). //! X86 `__stdcall` calling convention (used mostly by WinAPI).
kIdX86StdCall = 2, kIdX86StdCall = 17,
//! X86 `__thiscall` calling convention (MSVC/Intel). //! X86 `__thiscall` calling convention (MSVC/Intel).
kIdX86MsThisCall = 3, kIdX86MsThisCall = 18,
//! X86 `__fastcall` convention (MSVC/Intel). //! X86 `__fastcall` convention (MSVC/Intel).
kIdX86MsFastCall = 4, kIdX86MsFastCall = 19,
//! X86 `__fastcall` convention (GCC and Clang). //! X86 `__fastcall` convention (GCC and Clang).
kIdX86GccFastCall = 5, kIdX86GccFastCall = 20,
//! X86 `regparm(1)` convention (GCC and Clang). //! X86 `regparm(1)` convention (GCC and Clang).
kIdX86GccRegParm1 = 6, kIdX86GccRegParm1 = 21,
//! X86 `regparm(2)` convention (GCC and Clang). //! X86 `regparm(2)` convention (GCC and Clang).
kIdX86GccRegParm2 = 7, kIdX86GccRegParm2 = 22,
//! X86 `regparm(3)` convention (GCC and Clang). //! X86 `regparm(3)` convention (GCC and Clang).
kIdX86GccRegParm3 = 8, kIdX86GccRegParm3 = 23,
kIdX86FastEval2 = 29,
kIdX86FastEval3 = 30,
kIdX86FastEval4 = 31,
//! X64 calling convention defined by WIN64-ABI. //! X64 calling convention defined by WIN64-ABI.
//! //!
//! Links: //! Links:
//! * <http://msdn.microsoft.com/en-us/library/9b372w95.aspx>. //! * <http://msdn.microsoft.com/en-us/library/9b372w95.aspx>.
kIdX86Win64 = 16, kIdX86Win64 = 32,
//! X64 calling convention used by Unix platforms (SYSV/AMD64-ABI). //! X64 calling convention used by Unix platforms (SYSV/AMD64-ABI).
kIdX86SysV64 = 17, kIdX86SysV64 = 33,
kIdX64FastEval2 = 45,
kIdX64FastEval3 = 46,
kIdX64FastEval4 = 47,
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// [ARM] // [ARM]
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
//! Legacy calling convention, floating point arguments are passed via GP registers. //! Legacy calling convention, floating point arguments are passed via GP registers.
kIdArm32SoftFP = 32, kIdArm32SoftFP = 48,
//! Modern calling convention, uses VFP registers to pass floating point arguments. //! Modern calling convention, uses VFP registers to pass floating point arguments.
kIdArm32HardFP = 33, kIdArm32HardFP = 49,
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// [Internal] // [Internal]
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
_kIdX86Start = 1, //!< \internal _kIdX86Start = 16, //!< \internal
_kIdX86End = 8, //!< \internal _kIdX86End = 31, //!< \internal
_kIdX64Start = 16, //!< \internal _kIdX64Start = 32, //!< \internal
_kIdX64End = 17, //!< \internal _kIdX64End = 47, //!< \internal
_kIdArmStart = 32, //!< \internal _kIdArmStart = 48, //!< \internal
_kIdArmEnd = 33, //!< \internal _kIdArmEnd = 49, //!< \internal
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// [Host] // [Host]
@@ -106,42 +139,48 @@ struct CallConv {
//! NOTE: This should be always the same as `kIdHostCDecl`, but some //! NOTE: This should be always the same as `kIdHostCDecl`, but some
//! compilers allow to override the default calling convention. Overriding //! compilers allow to override the default calling convention. Overriding
//! is not detected at the moment. //! is not detected at the moment.
kIdHost = DETECTED_AT_COMPILE_TIME, kIdHost = DETECTED_AT_COMPILE_TIME,
//! Default CDECL calling convention based on the current C++ compiler's settings. //! Default CDECL calling convention based on the current C++ compiler's settings.
kIdHostCDecl = DETECTED_AT_COMPILE_TIME, kIdHostCDecl = DETECTED_AT_COMPILE_TIME,
//! Default STDCALL calling convention based on the current C++ compiler's settings. //! Default STDCALL calling convention based on the current C++ compiler's settings.
//! //!
//! NOTE: If not defined by the host then it's the same as `kIdHostCDecl`. //! NOTE: If not defined by the host then it's the same as `kIdHostCDecl`.
kIdHostStdCall = DETECTED_AT_COMPILE_TIME, kIdHostStdCall = DETECTED_AT_COMPILE_TIME,
//! Compatibility for `__fastcall` calling convention. //! Compatibility for `__fastcall` calling convention.
//! //!
//! NOTE: If not defined by the host then it's the same as `kIdHostCDecl`. //! NOTE: If not defined by the host then it's the same as `kIdHostCDecl`.
kIdHostFastCall = DETECTED_AT_COMPILE_TIME kIdHostFastCall = DETECTED_AT_COMPILE_TIME
#elif ASMJIT_ARCH_X86 #elif ASMJIT_ARCH_X86
kIdHost = kIdX86CDecl, kIdHost = kIdX86CDecl,
kIdHostCDecl = kIdX86CDecl, kIdHostCDecl = kIdX86CDecl,
kIdHostStdCall = kIdX86StdCall, kIdHostStdCall = kIdX86StdCall,
kIdHostFastCall = ASMJIT_CC_MSC ? kIdX86MsFastCall : kIdHostFastCall = ASMJIT_CC_MSC ? kIdX86MsFastCall :
ASMJIT_CC_GCC ? kIdX86GccFastCall : ASMJIT_CC_GCC ? kIdX86GccFastCall :
ASMJIT_CC_CLANG ? kIdX86GccFastCall : kIdNone ASMJIT_CC_CLANG ? kIdX86GccFastCall : kIdNone,
kIdHostFastEval2 = kIdX86FastEval2,
kIdHostFastEval3 = kIdX86FastEval3,
kIdHostFastEval4 = kIdX86FastEval4
#elif ASMJIT_ARCH_X64 #elif ASMJIT_ARCH_X64
kIdHost = ASMJIT_OS_WINDOWS ? kIdX86Win64 : kIdX86SysV64, kIdHost = ASMJIT_OS_WINDOWS ? kIdX86Win64 : kIdX86SysV64,
kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host. kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host.
kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host. kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host.
kIdHostFastCall = kIdHost // Doesn't exist, redirected to host. kIdHostFastCall = kIdHost, // Doesn't exist, redirected to host.
kIdHostFastEval2 = kIdX64FastEval2,
kIdHostFastEval3 = kIdX64FastEval3,
kIdHostFastEval4 = kIdX64FastEval4
#elif ASMJIT_ARCH_ARM32 #elif ASMJIT_ARCH_ARM32
# if defined(__SOFTFP__) # if defined(__SOFTFP__)
kIdHost = kIdArm32SoftFP, kIdHost = kIdArm32SoftFP,
# else # else
kIdHost = kIdArm32HardFP, kIdHost = kIdArm32HardFP,
# endif # endif
// These don't exist on ARM. // These don't exist on ARM.
kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host. kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host.
kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host. kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host.
kIdHostFastCall = kIdHost // Doesn't exist, redirected to host. kIdHostFastCall = kIdHost // Doesn't exist, redirected to host.
#else #else
# error "[asmjit] Couldn't determine the target's calling convention." # error "[asmjit] Couldn't determine the target's calling convention."
#endif #endif

View File

@@ -224,7 +224,7 @@ public:
//! Check for equality with other `str` of length `len`. //! Check for equality with other `str` of length `len`.
ASMJIT_API bool eq(const char* str, size_t len = Globals::kInvalidIndex) const noexcept; ASMJIT_API bool eq(const char* str, size_t len = Globals::kInvalidIndex) const noexcept;
//! Check for equality with `other`. //! Check for equality with `other`.
ASMJIT_INLINE bool eq(const StringBuilder& other) const noexcept { return eq(other._data); } ASMJIT_INLINE bool eq(const StringBuilder& other) const noexcept { return eq(other._data, other._length); }
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
// [Operator Overload] // [Operator Overload]

View File

@@ -345,7 +345,7 @@ struct Utils {
// [Bits] // [Bits]
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
//! Generate a bit-mask that has `x` most significant bits set. //! Generate a bit-mask that has `x` least significant bits set.
static ASMJIT_INLINE uint32_t bits(uint32_t x) noexcept { static ASMJIT_INLINE uint32_t bits(uint32_t x) noexcept {
// Shifting more bits than the type has results in undefined behavior. In // Shifting more bits than the type has results in undefined behavior. In
// such case asmjit trashes the result by ORing with `overflow` mask, which // such case asmjit trashes the result by ORing with `overflow` mask, which

View File

@@ -275,15 +275,17 @@ ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markStackArgsReg(FuncFrameInfo& ffi)
ASMJIT_FAVOR_SIZE Error X86Internal::initCallConv(CallConv& cc, uint32_t ccId) noexcept { ASMJIT_FAVOR_SIZE Error X86Internal::initCallConv(CallConv& cc, uint32_t ccId) noexcept {
const uint32_t kKindGp = X86Reg::kKindGp; const uint32_t kKindGp = X86Reg::kKindGp;
const uint32_t kKindVec = X86Reg::kKindVec; const uint32_t kKindVec = X86Reg::kKindVec;
const uint32_t kKindMm = X86Reg::kKindMm;
const uint32_t kKindK = X86Reg::kKindK;
const uint32_t kAx = X86Gp::kIdAx; const uint32_t kZax = X86Gp::kIdAx;
const uint32_t kBx = X86Gp::kIdBx; const uint32_t kZbx = X86Gp::kIdBx;
const uint32_t kCx = X86Gp::kIdCx; const uint32_t kZcx = X86Gp::kIdCx;
const uint32_t kDx = X86Gp::kIdDx; const uint32_t kZdx = X86Gp::kIdDx;
const uint32_t kSp = X86Gp::kIdSp; const uint32_t kZsp = X86Gp::kIdSp;
const uint32_t kBp = X86Gp::kIdBp; const uint32_t kZbp = X86Gp::kIdBp;
const uint32_t kSi = X86Gp::kIdSi; const uint32_t kZsi = X86Gp::kIdSi;
const uint32_t kDi = X86Gp::kIdDi; const uint32_t kZdi = X86Gp::kIdDi;
switch (ccId) { switch (ccId) {
case CallConv::kIdX86StdCall: case CallConv::kIdX86StdCall:
@@ -292,32 +294,32 @@ ASMJIT_FAVOR_SIZE Error X86Internal::initCallConv(CallConv& cc, uint32_t ccId) n
case CallConv::kIdX86MsThisCall: case CallConv::kIdX86MsThisCall:
cc.setFlags(CallConv::kFlagCalleePopsStack); cc.setFlags(CallConv::kFlagCalleePopsStack);
cc.setPassedOrder(kKindGp, kCx); cc.setPassedOrder(kKindGp, kZcx);
goto X86CallConv; goto X86CallConv;
case CallConv::kIdX86MsFastCall: case CallConv::kIdX86MsFastCall:
case CallConv::kIdX86GccFastCall: case CallConv::kIdX86GccFastCall:
cc.setFlags(CallConv::kFlagCalleePopsStack); cc.setFlags(CallConv::kFlagCalleePopsStack);
cc.setPassedOrder(kKindGp, kCx, kDx); cc.setPassedOrder(kKindGp, kZcx, kZdx);
goto X86CallConv; goto X86CallConv;
case CallConv::kIdX86GccRegParm1: case CallConv::kIdX86GccRegParm1:
cc.setPassedOrder(kKindGp, kAx); cc.setPassedOrder(kKindGp, kZax);
goto X86CallConv; goto X86CallConv;
case CallConv::kIdX86GccRegParm2: case CallConv::kIdX86GccRegParm2:
cc.setPassedOrder(kKindGp, kAx, kDx); cc.setPassedOrder(kKindGp, kZax, kZdx);
goto X86CallConv; goto X86CallConv;
case CallConv::kIdX86GccRegParm3: case CallConv::kIdX86GccRegParm3:
cc.setPassedOrder(kKindGp, kAx, kDx, kCx); cc.setPassedOrder(kKindGp, kZax, kZdx, kZcx);
goto X86CallConv; goto X86CallConv;
case CallConv::kIdX86CDecl: case CallConv::kIdX86CDecl:
X86CallConv: X86CallConv:
cc.setNaturalStackAlignment(4); cc.setNaturalStackAlignment(4);
cc.setArchType(ArchInfo::kTypeX86); cc.setArchType(ArchInfo::kTypeX86);
cc.setPreservedRegs(kKindGp, Utils::mask(kBx, kSp, kBp, kSi, kDi)); cc.setPreservedRegs(kKindGp, Utils::mask(kZbx, kZsp, kZbp, kZsi, kZdi));
break; break;
case CallConv::kIdX86Win64: case CallConv::kIdX86Win64:
@@ -326,9 +328,9 @@ X86CallConv:
cc.setFlags(CallConv::kFlagPassFloatsByVec | CallConv::kFlagIndirectVecArgs); cc.setFlags(CallConv::kFlagPassFloatsByVec | CallConv::kFlagIndirectVecArgs);
cc.setNaturalStackAlignment(16); cc.setNaturalStackAlignment(16);
cc.setSpillZoneSize(32); cc.setSpillZoneSize(32);
cc.setPassedOrder(kKindGp, kCx, kDx, 8, 9); cc.setPassedOrder(kKindGp, kZcx, kZdx, 8, 9);
cc.setPassedOrder(kKindVec, 0, 1, 2, 3); cc.setPassedOrder(kKindVec, 0, 1, 2, 3);
cc.setPreservedRegs(kKindGp, Utils::mask(kBx, kSp, kBp, kSi, kDi, 12, 13, 14, 15)); cc.setPreservedRegs(kKindGp, Utils::mask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
cc.setPreservedRegs(kKindVec, Utils::mask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); cc.setPreservedRegs(kKindVec, Utils::mask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
break; break;
@@ -337,11 +339,49 @@ X86CallConv:
cc.setFlags(CallConv::kFlagPassFloatsByVec); cc.setFlags(CallConv::kFlagPassFloatsByVec);
cc.setNaturalStackAlignment(16); cc.setNaturalStackAlignment(16);
cc.setRedZoneSize(128); cc.setRedZoneSize(128);
cc.setPassedOrder(kKindGp, kDi, kSi, kDx, kCx, 8, 9); cc.setPassedOrder(kKindGp, kZdi, kZsi, kZdx, kZcx, 8, 9);
cc.setPassedOrder(kKindVec, 0, 1, 2, 3, 4, 5, 6, 7); cc.setPassedOrder(kKindVec, 0, 1, 2, 3, 4, 5, 6, 7);
cc.setPreservedRegs(kKindGp, Utils::mask(kBx, kSp, kBp, 12, 13, 14, 15)); cc.setPreservedRegs(kKindGp, Utils::mask(kZbx, kZsp, kZbp, 12, 13, 14, 15));
break; break;
case CallConv::kIdX86FastEval2:
case CallConv::kIdX86FastEval3:
case CallConv::kIdX86FastEval4: {
uint32_t n = ccId - CallConv::kIdX86FastEval2;
cc.setArchType(ArchInfo::kTypeX86);
cc.setFlags(CallConv::kFlagPassFloatsByVec);
cc.setNaturalStackAlignment(16);
cc.setPassedOrder(kKindGp, kZax, kZdx, kZcx, kZsi, kZdi);
cc.setPassedOrder(kKindMm, 0, 1, 2, 3, 4, 5, 6, 7);
cc.setPassedOrder(kKindVec, 0, 1, 2, 3, 4, 5, 6, 7);
cc.setPreservedRegs(kKindGp , Utils::bits(8));
cc.setPreservedRegs(kKindVec, Utils::bits(8) & ~Utils::bits(n));
cc.setPreservedRegs(kKindMm , Utils::bits(8));
cc.setPreservedRegs(kKindK , Utils::bits(8));
break;
}
case CallConv::kIdX64FastEval2:
case CallConv::kIdX64FastEval3:
case CallConv::kIdX64FastEval4: {
uint32_t n = ccId - CallConv::kIdX64FastEval2;
cc.setArchType(ArchInfo::kTypeX64);
cc.setFlags(CallConv::kFlagPassFloatsByVec);
cc.setNaturalStackAlignment(16);
cc.setPassedOrder(kKindGp, kZax, kZdx, kZcx, kZsi, kZdi);
cc.setPassedOrder(kKindMm, 0, 1, 2, 3, 4, 5, 6, 7);
cc.setPassedOrder(kKindVec, 0, 1, 2, 3, 4, 5, 6, 7);
cc.setPreservedRegs(kKindGp , Utils::bits(16));
cc.setPreservedRegs(kKindVec,~Utils::bits(n));
cc.setPreservedRegs(kKindMm , Utils::bits(8));
cc.setPreservedRegs(kKindK , Utils::bits(8));
break;
}
default: default:
return DebugUtils::errored(kErrorInvalidArgument); return DebugUtils::errored(kErrorInvalidArgument);
} }
@@ -532,7 +572,7 @@ ASMJIT_FAVOR_SIZE Error X86Internal::initFrameLayout(FuncFrameLayout& layout, co
// Calculate a bit-mask of all registers that must be saved & restored. // Calculate a bit-mask of all registers that must be saved & restored.
for (kind = 0; kind < Globals::kMaxVRegKinds; kind++) for (kind = 0; kind < Globals::kMaxVRegKinds; kind++)
layout._savedRegs[kind] = ffi.getDirtyRegs(kind) & func.getPreservedRegs(kind); layout._savedRegs[kind] = (ffi.getDirtyRegs(kind) & ~func.getPassedRegs(kind)) & func.getPreservedRegs(kind);
// Include EBP|RBP if the function preserves the frame-pointer. // Include EBP|RBP if the function preserves the frame-pointer.
if (ffi.hasPreservedFP()) { if (ffi.hasPreservedFP()) {

View File

@@ -2050,10 +2050,10 @@ _NextGroup:
} }
// Init clobbered. // Init clobbered.
clobberedRegs.set(X86Reg::kKindGp , Utils::bits(_regCount.getGp()) & (~fd.getPreservedRegs(X86Reg::kKindGp ))); clobberedRegs.set(X86Reg::kKindGp , Utils::bits(_regCount.getGp()) & (fd.getPassedRegs(X86Reg::kKindGp ) | ~fd.getPreservedRegs(X86Reg::kKindGp )));
clobberedRegs.set(X86Reg::kKindMm , Utils::bits(_regCount.getMm()) & (~fd.getPreservedRegs(X86Reg::kKindMm ))); clobberedRegs.set(X86Reg::kKindMm , Utils::bits(_regCount.getMm()) & (fd.getPassedRegs(X86Reg::kKindMm ) | ~fd.getPreservedRegs(X86Reg::kKindMm )));
clobberedRegs.set(X86Reg::kKindK , Utils::bits(_regCount.getK()) & (~fd.getPreservedRegs(X86Reg::kKindK ))); clobberedRegs.set(X86Reg::kKindK , Utils::bits(_regCount.getK()) & (fd.getPassedRegs(X86Reg::kKindK ) | ~fd.getPreservedRegs(X86Reg::kKindK )));
clobberedRegs.set(X86Reg::kKindVec, Utils::bits(_regCount.getVec()) & (~fd.getPreservedRegs(X86Reg::kKindVec))); clobberedRegs.set(X86Reg::kKindVec, Utils::bits(_regCount.getVec()) & (fd.getPassedRegs(X86Reg::kKindVec) | ~fd.getPreservedRegs(X86Reg::kKindVec)));
RA_FINALIZE(node_); RA_FINALIZE(node_);
break; break;

View File

@@ -3260,6 +3260,104 @@ public:
} }
}; };
// ============================================================================
// [X86Test_MiscFastEval]
// ============================================================================
class X86Test_MiscFastEval : public X86Test {
public:
X86Test_MiscFastEval() : X86Test("[Misc] FastEval (CConv)") {}
static void add(X86TestManager& mgr) {
mgr.add(new X86Test_MiscFastEval());
}
virtual void compile(X86Compiler& cc) {
FuncSignature5<void, const void*, const void*, const void*, const void*, void*> funcSig(CallConv::kIdHostCDecl);
FuncSignature2<X86Xmm, X86Xmm, X86Xmm> fastSig(CallConv::kIdHostFastEval2);
CCFunc* func = cc.newFunc(funcSig);
CCFunc* fast = cc.newFunc(fastSig);
{
X86Gp aPtr = cc.newIntPtr("aPtr");
X86Gp bPtr = cc.newIntPtr("bPtr");
X86Gp cPtr = cc.newIntPtr("cPtr");
X86Gp dPtr = cc.newIntPtr("dPtr");
X86Gp pOut = cc.newIntPtr("pOut");
X86Xmm aXmm = cc.newXmm("aXmm");
X86Xmm bXmm = cc.newXmm("bXmm");
X86Xmm cXmm = cc.newXmm("cXmm");
X86Xmm dXmm = cc.newXmm("dXmm");
cc.addFunc(func);
cc.setArg(0, aPtr);
cc.setArg(1, bPtr);
cc.setArg(2, cPtr);
cc.setArg(3, dPtr);
cc.setArg(4, pOut);
cc.movups(aXmm, x86::ptr(aPtr));
cc.movups(bXmm, x86::ptr(bPtr));
cc.movups(cXmm, x86::ptr(cPtr));
cc.movups(dXmm, x86::ptr(dPtr));
X86Xmm xXmm = cc.newXmm("xXmm");
X86Xmm yXmm = cc.newXmm("yXmm");
CCFuncCall* call1 = cc.call(fast->getLabel(), fastSig);
call1->setArg(0, aXmm);
call1->setArg(1, bXmm);
call1->setRet(0, xXmm);
CCFuncCall* call2 = cc.call(fast->getLabel(), fastSig);
call2->setArg(0, cXmm);
call2->setArg(1, dXmm);
call2->setRet(0, yXmm);
cc.pmullw(xXmm, yXmm);
cc.movups(x86::ptr(pOut), xXmm);
cc.endFunc();
}
{
X86Xmm aXmm = cc.newXmm("aXmm");
X86Xmm bXmm = cc.newXmm("bXmm");
cc.addFunc(fast);
cc.setArg(0, aXmm);
cc.setArg(1, bXmm);
cc.paddw(aXmm, bXmm);
cc.ret(aXmm);
cc.endFunc();
}
}
virtual bool run(void* _func, StringBuilder& result, StringBuilder& expect) {
typedef void (*Func)(const void*, const void*, const void*, const void*, void*);
Func func = ptr_as_func<Func>(_func);
int16_t a[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
int16_t b[8] = { 7, 6, 5, 4, 3, 2, 1, 0 };
int16_t c[8] = { 1, 3, 9, 7, 5, 4, 2, 1 };
int16_t d[8] = { 2, 0,-6,-4,-2,-1, 1, 2 };
int16_t o[8];
int oExp = 7 * 3;
func(a, b, c, d, o);
result.setFormat("ret={%02X %02X %02X %02X %02X %02X %02X %02X}", o[0], o[1], o[2], o[3], o[4], o[5], o[6], o[7]);
expect.setFormat("ret={%02X %02X %02X %02X %02X %02X %02X %02X}", oExp, oExp, oExp, oExp, oExp, oExp, oExp, oExp);
return result == expect;
}
};
// ============================================================================ // ============================================================================
// [X86Test_MiscUnfollow] // [X86Test_MiscUnfollow]
// ============================================================================ // ============================================================================
@@ -3423,6 +3521,7 @@ int main(int argc, char* argv[]) {
ADD_TEST(X86Test_MiscConstPool); ADD_TEST(X86Test_MiscConstPool);
ADD_TEST(X86Test_MiscMultiRet); ADD_TEST(X86Test_MiscMultiRet);
ADD_TEST(X86Test_MiscMultiFunc); ADD_TEST(X86Test_MiscMultiFunc);
ADD_TEST(X86Test_MiscFastEval);
ADD_TEST(X86Test_MiscUnfollow); ADD_TEST(X86Test_MiscUnfollow);
return testMgr.run(); return testMgr.run();