mirror of
https://github.com/asmjit/asmjit.git
synced 2025-12-17 20:44:37 +03:00
Added asmjit_test_perf, which replaces asmjit_bench and provides much better performance overview
Removed asmjit_test_opcode (not needed anymore as we have asmjit_test_assembler and asmjit_test_perf)
This commit is contained in:
10
.github/workflows/build-config.json
vendored
10
.github/workflows/build-config.json
vendored
@@ -15,10 +15,6 @@
|
||||
"cmd": ["asmjit_test_unit", "--quick"],
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"cmd": ["asmjit_test_opcode", "--quiet"],
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"cmd": ["asmjit_test_assembler", "--quiet"],
|
||||
"optional": true
|
||||
@@ -27,6 +23,10 @@
|
||||
"cmd": ["asmjit_test_emitters"],
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"cmd": ["asmjit_test_compiler"],
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"cmd": ["asmjit_test_instinfo"],
|
||||
"optional": true
|
||||
@@ -36,7 +36,7 @@
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"cmd": ["asmjit_test_compiler"],
|
||||
"cmd": ["asmjit_test_perf", "--quick"],
|
||||
"optional": true
|
||||
}
|
||||
]
|
||||
|
||||
@@ -497,17 +497,6 @@ if (NOT ASMJIT_EMBED)
|
||||
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
|
||||
target_include_directories(asmjit_test_unit BEFORE PRIVATE ${ASMJIT_INCLUDE_DIRS})
|
||||
|
||||
foreach(_target asmjit_test_opcode
|
||||
asmjit_test_emitters
|
||||
asmjit_test_x86_sections)
|
||||
asmjit_add_target(${_target} TEST
|
||||
SOURCES test/${_target}.cpp
|
||||
LIBRARIES asmjit::asmjit
|
||||
CFLAGS ${ASMJIT_PRIVATE_CFLAGS}
|
||||
CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
|
||||
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
|
||||
endforeach()
|
||||
|
||||
asmjit_add_target(asmjit_test_assembler TEST
|
||||
SOURCES test/asmjit_test_assembler.cpp
|
||||
test/asmjit_test_assembler_x64.cpp
|
||||
@@ -518,6 +507,25 @@ if (NOT ASMJIT_EMBED)
|
||||
CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
|
||||
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
|
||||
|
||||
asmjit_add_target(asmjit_test_perf EXECUTABLE
|
||||
SOURCES test/asmjit_test_perf.cpp
|
||||
test/asmjit_test_perf_x86.cpp
|
||||
SOURCES test/asmjit_test_perf.h
|
||||
LIBRARIES asmjit::asmjit
|
||||
CFLAGS ${ASMJIT_PRIVATE_CFLAGS}
|
||||
CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
|
||||
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
|
||||
|
||||
foreach(_target asmjit_test_emitters
|
||||
asmjit_test_x86_sections)
|
||||
asmjit_add_target(${_target} TEST
|
||||
SOURCES test/${_target}.cpp
|
||||
LIBRARIES asmjit::asmjit
|
||||
CFLAGS ${ASMJIT_PRIVATE_CFLAGS}
|
||||
CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
|
||||
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
|
||||
endforeach()
|
||||
|
||||
if (NOT ASMJIT_NO_INTROSPECTION)
|
||||
asmjit_add_target(asmjit_test_instinfo TEST
|
||||
SOURCES test/asmjit_test_instinfo.cpp
|
||||
@@ -545,14 +553,6 @@ if (NOT ASMJIT_EMBED)
|
||||
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
|
||||
endif()
|
||||
|
||||
foreach(_target asmjit_bench_x86)
|
||||
asmjit_add_target(${_target} EXECUTABLE
|
||||
SOURCES test/${_target}.cpp
|
||||
LIBRARIES asmjit::asmjit
|
||||
CFLAGS ${ASMJIT_PRIVATE_CFLAGS}
|
||||
CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
|
||||
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
|
||||
endforeach()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
@@ -1,177 +0,0 @@
|
||||
// AsmJit - Machine code generation for C++
|
||||
//
|
||||
// * Official AsmJit Home Page: https://asmjit.com
|
||||
// * Official Github Repository: https://github.com/asmjit/asmjit
|
||||
//
|
||||
// Copyright (c) 2008-2020 The AsmJit Authors
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgment in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#include <asmjit/core.h>
|
||||
|
||||
#ifdef ASMJIT_BUILD_X86
|
||||
#include <asmjit/x86.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "asmjit_test_opcode.h"
|
||||
|
||||
#ifndef ASMJIT_NO_COMPILER
|
||||
#include "asmjit_test_misc.h"
|
||||
#endif
|
||||
|
||||
using namespace asmjit;
|
||||
|
||||
// ============================================================================
|
||||
// [Configuration]
|
||||
// ============================================================================
|
||||
|
||||
static constexpr uint32_t kNumRepeats = 20;
|
||||
static constexpr uint32_t kNumIterations = 1000;
|
||||
|
||||
// ============================================================================
|
||||
// [BenchUtils]
|
||||
// ============================================================================
|
||||
|
||||
namespace BenchUtils {
|
||||
class Performance {
|
||||
public:
|
||||
inline Performance() noexcept { reset(); }
|
||||
|
||||
inline void reset() noexcept {
|
||||
tick = 0u;
|
||||
best = 0xFFFFFFFFu;
|
||||
}
|
||||
|
||||
inline uint32_t start() noexcept { return (tick = now()); }
|
||||
inline uint32_t diff() const noexcept { return now() - tick; }
|
||||
|
||||
inline uint32_t end() noexcept {
|
||||
tick = diff();
|
||||
if (best > tick)
|
||||
best = tick;
|
||||
return tick;
|
||||
}
|
||||
|
||||
static inline uint32_t now() noexcept {
|
||||
return OSUtils::getTickCount();
|
||||
}
|
||||
|
||||
uint32_t tick;
|
||||
uint32_t best;
|
||||
};
|
||||
|
||||
static double mbps(uint32_t time, uint64_t outputSize) noexcept {
|
||||
if (!time) return 0.0;
|
||||
|
||||
double bytesTotal = double(outputSize);
|
||||
return (bytesTotal * 1000) / (double(time) * 1024 * 1024);
|
||||
}
|
||||
|
||||
template<typename EmitterT, typename FuncT>
|
||||
static void bench(CodeHolder& code, uint32_t arch, const char* testName, const FuncT& func) noexcept {
|
||||
EmitterT emitter;
|
||||
|
||||
const char* archName =
|
||||
arch == Environment::kArchX86 ? "X86" :
|
||||
arch == Environment::kArchX64 ? "X64" : "???";
|
||||
|
||||
const char* emitterName =
|
||||
emitter.isAssembler() ? "Assembler" :
|
||||
emitter.isCompiler() ? "Compiler" :
|
||||
emitter.isBuilder() ? "Builder" : "Unknown";
|
||||
|
||||
Performance perf;
|
||||
uint64_t codeSize = 0;
|
||||
|
||||
Environment env(arch);
|
||||
|
||||
for (uint32_t r = 0; r < kNumRepeats; r++) {
|
||||
perf.start();
|
||||
codeSize = 0;
|
||||
for (uint32_t i = 0; i < kNumIterations; i++) {
|
||||
code.init(env);
|
||||
code.attach(&emitter);
|
||||
|
||||
func(emitter);
|
||||
codeSize += code.codeSize();
|
||||
|
||||
code.reset();
|
||||
}
|
||||
perf.end();
|
||||
}
|
||||
|
||||
printf("[%s] %-9s %-10s | Time:%6u [ms] | ", archName, emitterName, testName, perf.best);
|
||||
if (codeSize)
|
||||
printf("Speed: %7.3f [MB/s]", mbps(perf.best, codeSize));
|
||||
else
|
||||
printf("Speed: N/A");
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// [Main]
|
||||
// ============================================================================
|
||||
|
||||
#ifdef ASMJIT_BUILD_X86
|
||||
static void benchX86(uint32_t arch) noexcept {
|
||||
CodeHolder code;
|
||||
|
||||
BenchUtils::bench<x86::Assembler>(code, arch, "[fast]", [](x86::Assembler& a) {
|
||||
asmtest::generateOpcodes(a.as<x86::Emitter>());
|
||||
});
|
||||
|
||||
BenchUtils::bench<x86::Assembler>(code, arch, "[validate]", [](x86::Assembler& a) {
|
||||
a.addValidationOptions(BaseEmitter::kValidationOptionAssembler);
|
||||
asmtest::generateOpcodes(a.as<x86::Emitter>());
|
||||
});
|
||||
|
||||
#ifndef ASMJIT_NO_BUILDER
|
||||
BenchUtils::bench<x86::Builder>(code, arch, "[no-asm]", [](x86::Builder& cb) {
|
||||
asmtest::generateOpcodes(cb.as<x86::Emitter>());
|
||||
});
|
||||
|
||||
BenchUtils::bench<x86::Builder>(code, arch, "[asm]", [](x86::Builder& cb) {
|
||||
asmtest::generateOpcodes(cb.as<x86::Emitter>());
|
||||
cb.finalize();
|
||||
});
|
||||
#endif
|
||||
|
||||
#ifndef ASMJIT_NO_COMPILER
|
||||
BenchUtils::bench<x86::Compiler>(code, arch, "[no-asm]", [](x86::Compiler& cc) {
|
||||
asmtest::generateAlphaBlend(cc);
|
||||
});
|
||||
|
||||
BenchUtils::bench<x86::Compiler>(code, arch, "[asm]", [](x86::Compiler& cc) {
|
||||
asmtest::generateAlphaBlend(cc);
|
||||
cc.finalize();
|
||||
});
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
int main() {
|
||||
#ifdef ASMJIT_BUILD_X86
|
||||
benchX86(Environment::kArchX86);
|
||||
benchX86(Environment::kArchX64);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -32,6 +32,8 @@
|
||||
#include <chrono>
|
||||
|
||||
#include "cmdline.h"
|
||||
#include "performancetimer.h"
|
||||
|
||||
#include "asmjit_test_compiler.h"
|
||||
|
||||
#if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86
|
||||
@@ -54,27 +56,6 @@ void compiler_add_a64_tests(TestApp& app);
|
||||
|
||||
using namespace asmjit;
|
||||
|
||||
class PerformanceTimer {
|
||||
public:
|
||||
typedef std::chrono::high_resolution_clock::time_point TimePoint;
|
||||
|
||||
TimePoint _startTime {};
|
||||
TimePoint _endTime {};
|
||||
|
||||
inline void start() {
|
||||
_startTime = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
|
||||
inline void stop() {
|
||||
_endTime = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
|
||||
inline double duration() const {
|
||||
std::chrono::duration<double> elapsed = _endTime - _startTime;
|
||||
return elapsed.count() * 1000;
|
||||
}
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// [TestApp]
|
||||
// ============================================================================
|
||||
|
||||
@@ -2291,7 +2291,7 @@ public:
|
||||
}
|
||||
|
||||
virtual void compile(x86::Compiler& cc) {
|
||||
asmtest::generateAlphaBlend(cc);
|
||||
asmtest::generateSseAlphaBlend(cc, true);
|
||||
}
|
||||
|
||||
virtual bool run(void* _func, String& result, String& expect) {
|
||||
|
||||
@@ -28,42 +28,42 @@
|
||||
|
||||
namespace asmtest {
|
||||
|
||||
// Generate a typical alpha blend function using SSE2 instruction set. Used
|
||||
// for benchmarking and also in test86. The generated code should be stable
|
||||
// and fully functional.
|
||||
static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
|
||||
using namespace asmjit;
|
||||
using namespace asmjit::x86;
|
||||
using namespace asmjit;
|
||||
|
||||
Gp dst = cc.newIntPtr("dst");
|
||||
Gp src = cc.newIntPtr("src");
|
||||
// Generates a typical alpha blend function that uses SSE2 instruction set.
|
||||
// This function combines emitting instructions with control flow constructs
|
||||
// like binding Labels and jumping to them. This should be pretty representative.
|
||||
template<typename Emitter>
|
||||
static void generateSseAlphaBlendInternal(
|
||||
Emitter& cc,
|
||||
const x86::Gp& dst, const x86::Gp& src, const x86::Gp& n,
|
||||
const x86::Gp& gp0,
|
||||
const x86::Xmm& simd0, const x86::Xmm& simd1, const x86::Xmm& simd2, const x86::Xmm& simd3,
|
||||
const x86::Xmm& simd4, const x86::Xmm& simd5, const x86::Xmm& simd6, const x86::Xmm& simd7) {
|
||||
|
||||
Gp i = cc.newIntPtr("i");
|
||||
Gp j = cc.newIntPtr("j");
|
||||
Gp t = cc.newIntPtr("t");
|
||||
x86::Gp i = n;
|
||||
x86::Gp j = gp0;
|
||||
|
||||
Xmm vzero = cc.newXmm("vzero");
|
||||
Xmm v0080 = cc.newXmm("v0080");
|
||||
Xmm v0101 = cc.newXmm("v0101");
|
||||
x86::Xmm vzero = simd0;
|
||||
x86::Xmm v0080 = simd1;
|
||||
x86::Xmm v0101 = simd2;
|
||||
|
||||
Label L_SmallLoop = cc.newLabel();
|
||||
Label L_SmallEnd = cc.newLabel();
|
||||
Label L_LargeLoop = cc.newLabel();
|
||||
Label L_LargeEnd = cc.newLabel();
|
||||
Label L_DataPool = cc.newLabel();
|
||||
Label L_Done = cc.newLabel();
|
||||
|
||||
cc.addFunc(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost));
|
||||
|
||||
cc.setArg(0, dst);
|
||||
cc.setArg(1, src);
|
||||
cc.setArg(2, i);
|
||||
// Load SIMD Constants.
|
||||
cc.xorps(vzero, vzero);
|
||||
cc.mov(gp0.r32(), 0x00800080);
|
||||
cc.movd(v0080, gp0.r32());
|
||||
cc.mov(gp0.r32(), 0x01010101);
|
||||
cc.movd(v0101, gp0.r32());
|
||||
cc.pshufd(v0080, v0080, x86::Predicate::shuf(0, 0, 0, 0));
|
||||
cc.pshufd(v0101, v0101, x86::Predicate::shuf(0, 0, 0, 0));
|
||||
|
||||
// How many pixels have to be processed to make the loop aligned.
|
||||
cc.lea(t, x86::ptr(L_DataPool));
|
||||
cc.xorps(vzero, vzero);
|
||||
cc.movaps(v0080, x86::ptr(t, 0));
|
||||
cc.movaps(v0101, x86::ptr(t, 16));
|
||||
|
||||
cc.xor_(j, j);
|
||||
cc.sub(j, dst);
|
||||
cc.and_(j, 15);
|
||||
@@ -71,15 +71,15 @@ static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
|
||||
cc.jz(L_SmallEnd);
|
||||
|
||||
cc.cmp(j, i);
|
||||
cc.cmovg(j, i); // j = min(i, j).
|
||||
cc.sub(i, j); // i -= j.
|
||||
cc.cmovg(j, i); // j = min(i, j)
|
||||
cc.sub(i, j); // i -= j
|
||||
|
||||
// Small loop.
|
||||
cc.bind(L_SmallLoop);
|
||||
{
|
||||
Xmm x0 = cc.newXmm("x0");
|
||||
Xmm y0 = cc.newXmm("y0");
|
||||
Xmm a0 = cc.newXmm("a0");
|
||||
x86::Xmm x0 = simd3;
|
||||
x86::Xmm y0 = simd4;
|
||||
x86::Xmm a0 = simd5;
|
||||
|
||||
cc.movd(y0, x86::ptr(src));
|
||||
cc.movd(x0, x86::ptr(dst));
|
||||
@@ -113,7 +113,7 @@ static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
|
||||
|
||||
cc.test(i, i);
|
||||
cc.mov(j, i);
|
||||
cc.jz(cc.func()->exitLabel());
|
||||
cc.jz(L_Done);
|
||||
|
||||
cc.and_(j, 3);
|
||||
cc.shr(i, 2);
|
||||
@@ -122,11 +122,11 @@ static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
|
||||
// Aligned loop.
|
||||
cc.bind(L_LargeLoop);
|
||||
{
|
||||
Xmm x0 = cc.newXmm("x0");
|
||||
Xmm x1 = cc.newXmm("x1");
|
||||
Xmm y0 = cc.newXmm("y0");
|
||||
Xmm a0 = cc.newXmm("a0");
|
||||
Xmm a1 = cc.newXmm("a1");
|
||||
x86::Xmm x0 = simd3;
|
||||
x86::Xmm x1 = simd4;
|
||||
x86::Xmm y0 = simd5;
|
||||
x86::Xmm a0 = simd6;
|
||||
x86::Xmm a1 = simd7;
|
||||
|
||||
cc.movups(y0, x86::ptr(src));
|
||||
cc.movaps(x0, x86::ptr(dst));
|
||||
@@ -172,13 +172,102 @@ static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
|
||||
cc.test(j, j);
|
||||
cc.jnz(L_SmallLoop);
|
||||
|
||||
cc.endFunc();
|
||||
cc.bind(L_Done);
|
||||
}
|
||||
|
||||
// Data.
|
||||
cc.align(kAlignData, 16);
|
||||
cc.bind(L_DataPool);
|
||||
cc.embedUInt16(uint16_t(0x0080u), 8);
|
||||
cc.embedUInt16(uint16_t(0x0101u), 8);
|
||||
static void generateSseAlphaBlend(asmjit::BaseEmitter& emitter, bool emitPrologEpilog) {
|
||||
using namespace asmjit::x86;
|
||||
|
||||
if (emitter.isAssembler()) {
|
||||
Assembler& cc = *emitter.as<Assembler>();
|
||||
|
||||
x86::Gp dst = cc.zax();
|
||||
x86::Gp src = cc.zcx();
|
||||
x86::Gp i = cc.zdx();
|
||||
x86::Gp j = cc.zdi();
|
||||
|
||||
if (emitPrologEpilog) {
|
||||
FuncDetail func;
|
||||
func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
|
||||
|
||||
FuncFrame frame;
|
||||
frame.init(func);
|
||||
frame.addDirtyRegs(dst, src, i, j);
|
||||
frame.addDirtyRegs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
|
||||
FuncArgsAssignment args(&func);
|
||||
args.assignAll(dst, src, i);
|
||||
args.updateFuncFrame(frame);
|
||||
frame.finalize();
|
||||
|
||||
cc.emitProlog(frame);
|
||||
cc.emitArgsAssignment(frame, args);
|
||||
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
cc.emitEpilog(frame);
|
||||
}
|
||||
else {
|
||||
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
}
|
||||
}
|
||||
#ifndef ASMJIT_NO_BUILDER
|
||||
else if (emitter.isBuilder()) {
|
||||
Builder& cc = *emitter.as<Builder>();
|
||||
|
||||
x86::Gp dst = cc.zax();
|
||||
x86::Gp src = cc.zcx();
|
||||
x86::Gp i = cc.zdx();
|
||||
x86::Gp j = cc.zdi();
|
||||
|
||||
if (emitPrologEpilog) {
|
||||
FuncDetail func;
|
||||
func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
|
||||
|
||||
FuncFrame frame;
|
||||
frame.init(func);
|
||||
frame.addDirtyRegs(dst, src, i, j);
|
||||
frame.addDirtyRegs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
|
||||
FuncArgsAssignment args(&func);
|
||||
args.assignAll(dst, src, i);
|
||||
args.updateFuncFrame(frame);
|
||||
frame.finalize();
|
||||
|
||||
cc.emitProlog(frame);
|
||||
cc.emitArgsAssignment(frame, args);
|
||||
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
cc.emitEpilog(frame);
|
||||
}
|
||||
else {
|
||||
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifndef ASMJIT_NO_COMPILER
|
||||
else if (emitter.isCompiler()) {
|
||||
Compiler& cc = *emitter.as<Compiler>();
|
||||
|
||||
Gp dst = cc.newIntPtr("dst");
|
||||
Gp src = cc.newIntPtr("src");
|
||||
Gp i = cc.newIntPtr("i");
|
||||
Gp j = cc.newIntPtr("j");
|
||||
|
||||
Xmm v0 = cc.newXmm("v0");
|
||||
Xmm v1 = cc.newXmm("v1");
|
||||
Xmm v2 = cc.newXmm("v2");
|
||||
Xmm v3 = cc.newXmm("v3");
|
||||
Xmm v4 = cc.newXmm("v4");
|
||||
Xmm v5 = cc.newXmm("v5");
|
||||
Xmm v6 = cc.newXmm("v6");
|
||||
Xmm v7 = cc.newXmm("v7");
|
||||
|
||||
cc.addFunc(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost));
|
||||
cc.setArg(0, dst);
|
||||
cc.setArg(1, src);
|
||||
cc.setArg(2, i);
|
||||
generateSseAlphaBlendInternal(cc, dst, src, i, j, v0, v1, v2, v3, v4, v5, v6, v7);
|
||||
cc.endFunc();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // {asmtest}
|
||||
|
||||
@@ -1,115 +0,0 @@
|
||||
// AsmJit - Machine code generation for C++
|
||||
//
|
||||
// * Official AsmJit Home Page: https://asmjit.com
|
||||
// * Official Github Repository: https://github.com/asmjit/asmjit
|
||||
//
|
||||
// Copyright (c) 2008-2020 The AsmJit Authors
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgment in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// This file is used to test opcodes generated by AsmJit. Output can be
|
||||
// disassembled in your IDE or by your favorite disassembler. Instructions
|
||||
// are grouped by category and then sorted alphabetically.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#include <asmjit/x86.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cmdline.h"
|
||||
#include "asmjit_test_opcode.h"
|
||||
|
||||
using namespace asmjit;
|
||||
|
||||
struct OpcodeDumpInfo {
|
||||
uint32_t arch;
|
||||
bool useRex1;
|
||||
bool useRex2;
|
||||
};
|
||||
|
||||
static const char* archToString(uint32_t arch) noexcept {
|
||||
switch (arch & ~Environment::kArchBigEndianMask) {
|
||||
case Environment::kArchX86 : return "X86";
|
||||
case Environment::kArchX64 : return "X64";
|
||||
case Environment::kArchARM : return "ARM";
|
||||
case Environment::kArchThumb : return "Thumb";
|
||||
case Environment::kArchAArch64 : return "AArch64";
|
||||
case Environment::kArchMIPS32_LE: return "MIPS32";
|
||||
case Environment::kArchMIPS64_LE: return "MIPS64";
|
||||
default: return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
struct TestErrorHandler : public ErrorHandler {
|
||||
virtual void handleError(Error err, const char* message, BaseEmitter* origin) {
|
||||
(void)origin;
|
||||
printf("ERROR 0x%08X: %s\n", err, message);
|
||||
}
|
||||
};
|
||||
|
||||
typedef void (*VoidFunc)(void);
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
CmdLine cmdLine(argc, argv);
|
||||
TestErrorHandler eh;
|
||||
|
||||
OpcodeDumpInfo infoList[] = {
|
||||
{ Environment::kArchX86, false, false },
|
||||
{ Environment::kArchX64, false, false },
|
||||
{ Environment::kArchX64, false, true },
|
||||
{ Environment::kArchX64, true , false },
|
||||
{ Environment::kArchX64, true , true }
|
||||
};
|
||||
|
||||
bool quiet = cmdLine.hasArg("--quiet");
|
||||
|
||||
for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(infoList); i++) {
|
||||
const OpcodeDumpInfo& info = infoList[i];
|
||||
|
||||
printf("Opcodes [ARCH=%s REX1=%s REX2=%s]\n",
|
||||
archToString(info.arch),
|
||||
info.useRex1 ? "true" : "false",
|
||||
info.useRex2 ? "true" : "false");
|
||||
|
||||
CodeHolder code;
|
||||
code.init(Environment(info.arch));
|
||||
code.setErrorHandler(&eh);
|
||||
|
||||
#ifndef ASMJIT_NO_LOGGING
|
||||
FileLogger logger(stdout);
|
||||
logger.addFlags(FormatOptions::kFlagMachineCode);
|
||||
if (!quiet)
|
||||
code.setLogger(&logger);
|
||||
#endif
|
||||
|
||||
x86::Assembler a(&code);
|
||||
asmtest::generateOpcodes(a.as<x86::Emitter>(), info.useRex1, info.useRex2);
|
||||
|
||||
// If this is the host architecture the code generated can be executed
|
||||
// for debugging purposes (the first instruction is ret anyway).
|
||||
if (code.arch() == Environment::kArchHost) {
|
||||
JitRuntime runtime;
|
||||
VoidFunc p;
|
||||
|
||||
Error err = runtime.add(&p, &code);
|
||||
if (err == kErrorOk) p();
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
69
test/asmjit_test_perf.cpp
Normal file
69
test/asmjit_test_perf.cpp
Normal file
@@ -0,0 +1,69 @@
|
||||
// AsmJit - Machine code generation for C++
|
||||
//
|
||||
// * Official AsmJit Home Page: https://asmjit.com
|
||||
// * Official Github Repository: https://github.com/asmjit/asmjit
|
||||
//
|
||||
// Copyright (c) 2008-2020 The AsmJit Authors
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgment in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#include <asmjit/core.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cmdline.h"
|
||||
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ASMJIT_BUILD_X86)
|
||||
void benchmarkX86Emitters(uint32_t numIterations, bool testX86, bool testX64) noexcept;
|
||||
#endif
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
CmdLine cmdLine(argc, argv);
|
||||
uint32_t numIterations = 20000;
|
||||
|
||||
printf("AsmJit Performance Suite v%u.%u.%u:\n\n",
|
||||
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
|
||||
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
|
||||
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
|
||||
|
||||
printf("Usage:\n");
|
||||
printf(" --help Show usage only\n");
|
||||
printf(" --quick Decrease the number of iterations to make tests quicker\n");
|
||||
printf(" --arch=<ARCH> Select architecture to run ('all' by default)\n");
|
||||
printf("\n");
|
||||
|
||||
if (cmdLine.hasArg("--help"))
|
||||
return 0;
|
||||
|
||||
if (cmdLine.hasArg("--quick"))
|
||||
numIterations = 1000;
|
||||
|
||||
const char* arch = cmdLine.valueOf("--arch", "all");
|
||||
|
||||
#if defined(ASMJIT_BUILD_X86)
|
||||
bool testX86 = strcmp(arch, "all") == 0 || strcmp(arch, "x86") == 0;
|
||||
bool testX64 = strcmp(arch, "all") == 0 || strcmp(arch, "x64") == 0;
|
||||
|
||||
if (testX86 || testX64)
|
||||
benchmarkX86Emitters(numIterations, testX86, testX64);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
81
test/asmjit_test_perf.h
Normal file
81
test/asmjit_test_perf.h
Normal file
@@ -0,0 +1,81 @@
|
||||
// AsmJit - Machine code generation for C++
|
||||
//
|
||||
// * Official AsmJit Home Page: https://asmjit.com
|
||||
// * Official Github Repository: https://github.com/asmjit/asmjit
|
||||
//
|
||||
// Copyright (c) 2008-2020 The AsmJit Authors
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgment in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#ifndef ASMJIT_TEST_PERF_H_INCLUDED
|
||||
#define ASMJIT_TEST_PERF_H_INCLUDED
|
||||
|
||||
#include <asmjit/core.h>
|
||||
#include "performancetimer.h"
|
||||
|
||||
class MyErrorHandler : public asmjit::ErrorHandler {
|
||||
void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) {
|
||||
(void)err;
|
||||
(void)origin;
|
||||
printf("ERROR: %s\n", message);
|
||||
abort();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename EmitterT, typename FuncT>
|
||||
static void bench(asmjit::CodeHolder& code, uint32_t arch, uint32_t numIterations, const char* testName, const FuncT& func) noexcept {
|
||||
EmitterT emitter;
|
||||
MyErrorHandler eh;
|
||||
|
||||
const char* archName =
|
||||
arch == asmjit::Environment::kArchX86 ? "X86" :
|
||||
arch == asmjit::Environment::kArchX64 ? "X64" : "???";
|
||||
|
||||
const char* emitterName =
|
||||
emitter.isAssembler() ? "Assembler" :
|
||||
emitter.isCompiler() ? "Compiler" :
|
||||
emitter.isBuilder() ? "Builder" : "Unknown";
|
||||
|
||||
uint64_t codeSize = 0;
|
||||
asmjit::Environment env(arch);
|
||||
|
||||
PerformanceTimer timer;
|
||||
double duration = std::numeric_limits<double>::infinity();
|
||||
|
||||
for (uint32_t r = 0; r < numIterations; r++) {
|
||||
codeSize = 0;
|
||||
code.init(env);
|
||||
code.setErrorHandler(&eh);
|
||||
code.attach(&emitter);
|
||||
|
||||
timer.start();
|
||||
func(emitter);
|
||||
timer.stop();
|
||||
|
||||
codeSize += code.codeSize();
|
||||
|
||||
code.reset();
|
||||
duration = asmjit::Support::min(duration, timer.duration());
|
||||
}
|
||||
|
||||
printf(" [%s] %-9s %-16s | CodeSize:%5llu [B] | Time:%8.4f [ms]", archName, emitterName, testName, (unsigned long long)codeSize, duration);
|
||||
if (codeSize)
|
||||
printf(" | Speed:%8.3f [MB/s]", mbps(duration, codeSize));
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
#endif // ASMJIT_TEST_PERF_H_INCLUDED
|
||||
5049
test/asmjit_test_perf_x86.cpp
Normal file
5049
test/asmjit_test_perf_x86.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -21,8 +21,8 @@
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#ifndef ASMJIT_TEST_CMDLINE_H_INCLUDED
|
||||
#define ASMJIT_TEST_CMDLINE_H_INCLUDED
|
||||
#ifndef CMDLINE_H_INCLUDED
|
||||
#define CMDLINE_H_INCLUDED
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
@@ -80,4 +80,4 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
#endif // ASMJIT_TEST_CMDLINE_H_INCLUDED
|
||||
#endif // CMDLINE_H_INCLUDED
|
||||
|
||||
59
test/performancetimer.h
Normal file
59
test/performancetimer.h
Normal file
@@ -0,0 +1,59 @@
|
||||
// AsmJit - Machine code generation for C++
|
||||
//
|
||||
// * Official AsmJit Home Page: https://asmjit.com
|
||||
// * Official Github Repository: https://github.com/asmjit/asmjit
|
||||
//
|
||||
// Copyright (c) 2008-2020 The AsmJit Authors
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgment in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#ifndef PERFORMANCETIMER_H_INCLUDED
|
||||
#define PERFORMANCETIMER_H_INCLUDED
|
||||
|
||||
#include <asmjit/core.h>
|
||||
#include <chrono>
|
||||
|
||||
class PerformanceTimer {
|
||||
public:
|
||||
typedef std::chrono::high_resolution_clock::time_point TimePoint;
|
||||
|
||||
TimePoint _startTime {};
|
||||
TimePoint _endTime {};
|
||||
|
||||
inline void start() {
|
||||
_startTime = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
|
||||
inline void stop() {
|
||||
_endTime = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
|
||||
inline double duration() const {
|
||||
std::chrono::duration<double> elapsed = _endTime - _startTime;
|
||||
return elapsed.count() * 1000;
|
||||
}
|
||||
};
|
||||
|
||||
static inline double mbps(double duration, uint64_t outputSize) noexcept {
|
||||
if (duration == 0)
|
||||
return 0.0;
|
||||
|
||||
double bytesTotal = double(outputSize);
|
||||
return (bytesTotal * 1000) / (duration * 1024 * 1024);
|
||||
}
|
||||
|
||||
#endif // PERFORMANCETIMER_H_INCLUDED
|
||||
Reference in New Issue
Block a user