Files
asmjit/test/asmjit_test_misc.h
Petr Kobalicek 5d40561d14 Refactored register allocator asm Compiler. (#249)
Refactored build system macros (ASMJIT_BUILD_STATIC -> ASMJIT_STATIC)
Refactored AVX512 broadcast {1toN} - moved to operand from instruction.
Refactored naming - renamed getters to not use get prefix.
Refactored code structure - move arch-specific stuff into x86 namespace.
Refactored some compiler/arch-specific macros, respect rel/abs option in mov REG, [ADDR].
Refactored StringBuilder (Renamed to String, added small string optimization).
Refactored LabelId<->LabelEntry mapping, force label offset to 64-bits on all archs.
Renamed Runtime to Target (JitRuntime kept for now).
Renamed VirtMemManager to JitAllocator.
Renamed VirtMem to JitUtils.
Renamed FuncSignatureX to FuncSignatureBuilder.
Fixed xchg [mem], rex-lo, refactored RelocEntry.
Fixed Logger to always show abs|rel when formatting a memory operand
Fixed Logger to prefix HEX numbers with 0x prefix
Fixed Support::ctzGeneric to always return uint32_t, T doesn't matter.
Fixed LightCall to not save MMX and K registers
Fixed CpuInfo constructor to propagate NoInit (#243)
Added VAES, AVX512_VBMI2, AVX512_VNNI, and AVX512_BITALG cpu-features and instructions.
Added emscripten support (asmjit can be now compiled by emscripten).
Added asmjit.natvis for better MSVC experience
Added x86::ptr_abs|ptr_rel
Added support for multibyte nop r/m (#135)
Added support for 32-bit to 64-bit zero-extended addresses, improved validation of memory addresses, and removed wrt address type as this will be reworked
Added support for multiple sections, reworked address table support (previously known as trampolines)
Added the following x86 modifiers to the x86::Emitter - xacquire(), xrelease(), and k(kreg)
Added a possibility to use REP prefix with RET instruction
Added a possibility to relocate [rel addr] during relocate()
Added a variadic function-call support (Compiler), argument duplication (Compiler), better /dev/shm vs /tmp shared memory handling (VirtMem).
Removed imm_u imm_ptr helpers, imm() can now accept any integer and pointer.
Changed the default behavior of optimizing instructions to disabled with a possibility to enable that feature through kOptionOptimizedForSize
Use default copy construction / assignment to prevent new kind of warnings introduced by GCC 9
2019-07-16 01:24:22 +02:00

170 lines
3.4 KiB
C++

// [AsmJit]
// Machine Code Generation for C++.
//
// [License]
// Zlib - See LICENSE.md file in the package.
#ifndef _ASMJIT_TEST_MISC_H
#define _ASMJIT_TEST_MISC_H
#include "./asmjit.h"
namespace asmtest {
// Generate a typical alpha blend function using SSE2 instruction set. Used
// for benchmarking and also in test86. The generated code should be stable
// and fully functional.
static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
using namespace asmjit;
using namespace asmjit::x86;
Gp dst = cc.newIntPtr("dst");
Gp src = cc.newIntPtr("src");
Gp i = cc.newIntPtr("i");
Gp j = cc.newIntPtr("j");
Gp t = cc.newIntPtr("t");
Xmm vzero = cc.newXmm("vzero");
Xmm v0080 = cc.newXmm("v0080");
Xmm v0101 = cc.newXmm("v0101");
Label L_SmallLoop = cc.newLabel();
Label L_SmallEnd = cc.newLabel();
Label L_LargeLoop = cc.newLabel();
Label L_LargeEnd = cc.newLabel();
Label L_DataPool = cc.newLabel();
cc.addFunc(FuncSignatureT<void, void*, const void*, size_t>(cc.codeInfo().cdeclCallConv()));
cc.setArg(0, dst);
cc.setArg(1, src);
cc.setArg(2, i);
// How many pixels have to be processed to make the loop aligned.
cc.lea(t, x86::ptr(L_DataPool));
cc.xorps(vzero, vzero);
cc.movaps(v0080, x86::ptr(t, 0));
cc.movaps(v0101, x86::ptr(t, 16));
cc.xor_(j, j);
cc.sub(j, dst);
cc.and_(j, 15);
cc.shr(j, 2);
cc.jz(L_SmallEnd);
cc.cmp(j, i);
cc.cmovg(j, i); // j = min(i, j).
cc.sub(i, j); // i -= j.
// Small loop.
cc.bind(L_SmallLoop);
{
Xmm x0 = cc.newXmm("x0");
Xmm y0 = cc.newXmm("y0");
Xmm a0 = cc.newXmm("a0");
cc.movd(y0, x86::ptr(src));
cc.movd(x0, x86::ptr(dst));
cc.pcmpeqb(a0, a0);
cc.pxor(a0, y0);
cc.psrlw(a0, 8);
cc.punpcklbw(x0, vzero);
cc.pshuflw(a0, a0, x86::Predicate::shuf(1, 1, 1, 1));
cc.punpcklbw(y0, vzero);
cc.pmullw(x0, a0);
cc.paddsw(x0, v0080);
cc.pmulhuw(x0, v0101);
cc.paddw(x0, y0);
cc.packuswb(x0, x0);
cc.movd(x86::ptr(dst), x0);
cc.add(dst, 4);
cc.add(src, 4);
cc.dec(j);
cc.jnz(L_SmallLoop);
}
// Second section, prepare for an aligned loop.
cc.bind(L_SmallEnd);
cc.test(i, i);
cc.mov(j, i);
cc.jz(cc.func()->exitLabel());
cc.and_(j, 3);
cc.shr(i, 2);
cc.jz(L_LargeEnd);
// Aligned loop.
cc.bind(L_LargeLoop);
{
Xmm x0 = cc.newXmm("x0");
Xmm x1 = cc.newXmm("x1");
Xmm y0 = cc.newXmm("y0");
Xmm a0 = cc.newXmm("a0");
Xmm a1 = cc.newXmm("a1");
cc.movups(y0, x86::ptr(src));
cc.movaps(x0, x86::ptr(dst));
cc.pcmpeqb(a0, a0);
cc.xorps(a0, y0);
cc.movaps(x1, x0);
cc.psrlw(a0, 8);
cc.punpcklbw(x0, vzero);
cc.movaps(a1, a0);
cc.punpcklwd(a0, a0);
cc.punpckhbw(x1, vzero);
cc.punpckhwd(a1, a1);
cc.pshufd(a0, a0, x86::Predicate::shuf(3, 3, 1, 1));
cc.pshufd(a1, a1, x86::Predicate::shuf(3, 3, 1, 1));
cc.pmullw(x0, a0);
cc.pmullw(x1, a1);
cc.paddsw(x0, v0080);
cc.paddsw(x1, v0080);
cc.pmulhuw(x0, v0101);
cc.pmulhuw(x1, v0101);
cc.add(src, 16);
cc.packuswb(x0, x1);
cc.paddw(x0, y0);
cc.movaps(x86::ptr(dst), x0);
cc.add(dst, 16);
cc.dec(i);
cc.jnz(L_LargeLoop);
}
cc.bind(L_LargeEnd);
cc.test(j, j);
cc.jnz(L_SmallLoop);
cc.endFunc();
// Data.
cc.align(kAlignData, 16);
cc.bind(L_DataPool);
cc.dxmm(Data128::fromI16(0x0080));
cc.dxmm(Data128::fromI16(0x0101));
}
} // {asmtest}
#endif // _ASMJIT_TEST_MISC_H