[ABI] Added CpuFeatures to Target and CodeHolder, improved test_perf

This commit is contained in:
kobalicek
2023-01-15 16:12:52 +01:00
parent a9ac13536e
commit 1ed8b77f5b
18 changed files with 155 additions and 55 deletions

View File

@@ -91,7 +91,7 @@ int TestApp::run() {
CodeHolder code;
SimpleErrorHandler errorHandler;
code.init(runtime.environment());
code.init(runtime.environment(), runtime.cpuFeatures());
code.setErrorHandler(&errorHandler);
if (pass != 0) {

View File

@@ -90,7 +90,7 @@ static uint32_t testFunc(JitRuntime& rt, EmitterType emitterType) noexcept {
#endif
CodeHolder code;
code.init(rt.environment());
code.init(rt.environment(), rt.cpuFeatures());
#ifndef ASMJIT_NO_LOGGING
code.setLogger(&logger);

View File

@@ -10,7 +10,9 @@
#include "asmjitutils.h"
#include "performancetimer.h"
class MyErrorHandler : public asmjit::ErrorHandler {
namespace asmjit_perf_utils {
class TestErrorHandler : public asmjit::ErrorHandler {
void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) {
(void)err;
(void)origin;
@@ -19,10 +21,49 @@ class MyErrorHandler : public asmjit::ErrorHandler {
}
};
#ifndef ASMJIT_NO_BUILDER
template<typename BuilderT, typename FuncT>
static uint32_t calculateInstructionCount(asmjit::CodeHolder& code, asmjit::Arch arch, const FuncT& func) noexcept {
BuilderT builder;
TestErrorHandler eh;
asmjit::Environment env(arch);
code.init(env);
code.setErrorHandler(&eh);
code.attach(&builder);
func(builder);
uint32_t count = 0;
asmjit::BaseNode* node = builder.firstNode();
while (node) {
count += uint32_t(node->isInst());
node = node->next();
}
return count;
}
#endif
static inline double mbps(double duration, uint64_t outputSize) noexcept {
if (duration == 0)
return 0.0;
double bytesTotal = double(outputSize);
return (bytesTotal * 1000) / (duration * 1024 * 1024);
}
static inline double mips(double duration, uint64_t instCount) noexcept {
if (duration == 0)
return 0.0;
return double(instCount) * 1000.0 / (duration * 1e6);
}
template<typename EmitterT, typename FuncT>
static void bench(asmjit::CodeHolder& code, asmjit::Arch arch, uint32_t numIterations, const char* testName, const FuncT& func) noexcept {
static void bench(asmjit::CodeHolder& code, asmjit::Arch arch, uint32_t numIterations, const char* testName, uint32_t instCount, const FuncT& func) noexcept {
EmitterT emitter;
MyErrorHandler eh;
TestErrorHandler eh;
const char* archName = asmjitArchAsString(arch);
const char* emitterName =
@@ -53,9 +94,20 @@ static void bench(asmjit::CodeHolder& code, asmjit::Arch arch, uint32_t numItera
}
printf(" [%s] %-9s %-16s | CodeSize:%5llu [B] | Time:%8.4f [ms]", archName, emitterName, testName, (unsigned long long)codeSize, duration);
if (codeSize)
if (codeSize) {
printf(" | Speed:%8.3f [MB/s]", mbps(duration, codeSize));
}
else {
printf(" | Speed: N/A ");
}
if (instCount) {
printf(", %8.3f [MI/s]", mips(duration, instCount));
}
printf("\n");
}
} // {asmjit_perf_utils}
#endif // ASMJIT_TEST_PERF_H_INCLUDED

View File

@@ -645,42 +645,50 @@ static void benchmarkA64Function(Arch arch, uint32_t numIterations, const char*
CodeHolder code;
printf("%s:\n", description);
bench<a64::Assembler>(code, arch, numIterations, "[raw]", [&](a64::Assembler& cc) {
uint32_t instCount = 0;
#ifndef ASMJIT_NO_BUILDER
instCount = asmjit_perf_utils::calculateInstructionCount<a64::Builder>(code, arch, [&](a64::Builder& cc) {
emitterFn(cc, false);
});
#endif
asmjit_perf_utils::bench<a64::Assembler>(code, arch, numIterations, "[raw]", instCount, [&](a64::Assembler& cc) {
emitterFn(cc, false);
});
bench<a64::Assembler>(code, arch, numIterations, "[validated]", [&](a64::Assembler& cc) {
asmjit_perf_utils::bench<a64::Assembler>(code, arch, numIterations, "[validated]", instCount, [&](a64::Assembler& cc) {
cc.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
emitterFn(cc, false);
});
bench<a64::Assembler>(code, arch, numIterations, "[prolog/epilog]", [&](a64::Assembler& cc) {
asmjit_perf_utils::bench<a64::Assembler>(code, arch, numIterations, "[prolog/epilog]", instCount, [&](a64::Assembler& cc) {
cc.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
emitterFn(cc, true);
});
#ifndef ASMJIT_NO_BUILDER
bench<a64::Builder>(code, arch, numIterations, "[no-asm]", [&](a64::Builder& cc) {
asmjit_perf_utils::bench<a64::Builder>(code, arch, numIterations, "[no-asm]", instCount, [&](a64::Builder& cc) {
emitterFn(cc, false);
});
bench<a64::Builder>(code, arch, numIterations, "[finalized]", [&](a64::Builder& cc) {
asmjit_perf_utils::bench<a64::Builder>(code, arch, numIterations, "[finalized]", instCount, [&](a64::Builder& cc) {
emitterFn(cc, false);
cc.finalize();
});
bench<a64::Builder>(code, arch, numIterations, "[prolog/epilog]", [&](a64::Builder& cc) {
asmjit_perf_utils::bench<a64::Builder>(code, arch, numIterations, "[prolog/epilog]", instCount, [&](a64::Builder& cc) {
emitterFn(cc, true);
cc.finalize();
});
#endif
#ifndef ASMJIT_NO_COMPILER
bench<a64::Compiler>(code, arch, numIterations, "[no-asm]", [&](a64::Compiler& cc) {
asmjit_perf_utils::bench<a64::Compiler>(code, arch, numIterations, "[no-asm]", instCount, [&](a64::Compiler& cc) {
emitterFn(cc, true);
});
bench<a64::Compiler>(code, arch, numIterations, "[finalized]", [&](a64::Compiler& cc) {
asmjit_perf_utils::bench<a64::Compiler>(code, arch, numIterations, "[finalized]", instCount, [&](a64::Compiler& cc) {
emitterFn(cc, true);
cc.finalize();
});

View File

@@ -4912,42 +4912,49 @@ static void benchmarkX86Function(Arch arch, uint32_t numIterations, const char*
CodeHolder code;
printf("%s:\n", description);
bench<x86::Assembler>(code, arch, numIterations, "[raw]", [&](x86::Assembler& cc) {
uint32_t instCount = 0;
#ifndef ASMJIT_NO_BUILDER
instCount = asmjit_perf_utils::calculateInstructionCount<x86::Builder>(code, arch, [&](x86::Builder& cc) {
emitterFn(cc, false);
});
#endif
asmjit_perf_utils::bench<x86::Assembler>(code, arch, numIterations, "[raw]", instCount, [&](x86::Assembler& cc) {
emitterFn(cc, false);
});
bench<x86::Assembler>(code, arch, numIterations, "[validated]", [&](x86::Assembler& cc) {
asmjit_perf_utils::bench<x86::Assembler>(code, arch, numIterations, "[validated]", instCount, [&](x86::Assembler& cc) {
cc.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
emitterFn(cc, false);
});
bench<x86::Assembler>(code, arch, numIterations, "[prolog/epilog]", [&](x86::Assembler& cc) {
cc.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
asmjit_perf_utils::bench<x86::Assembler>(code, arch, numIterations, "[prolog/epilog]", instCount, [&](x86::Assembler& cc) {
emitterFn(cc, true);
});
#ifndef ASMJIT_NO_BUILDER
bench<x86::Builder>(code, arch, numIterations, "[no-asm]", [&](x86::Builder& cc) {
asmjit_perf_utils::bench<x86::Builder>(code, arch, numIterations, "[no-asm]", instCount, [&](x86::Builder& cc) {
emitterFn(cc, false);
});
bench<x86::Builder>(code, arch, numIterations, "[finalized]", [&](x86::Builder& cc) {
asmjit_perf_utils::bench<x86::Builder>(code, arch, numIterations, "[finalized]", instCount, [&](x86::Builder& cc) {
emitterFn(cc, false);
cc.finalize();
});
bench<x86::Builder>(code, arch, numIterations, "[prolog/epilog]", [&](x86::Builder& cc) {
asmjit_perf_utils::bench<x86::Builder>(code, arch, numIterations, "[prolog/epilog]", instCount, [&](x86::Builder& cc) {
emitterFn(cc, true);
cc.finalize();
});
#endif
#ifndef ASMJIT_NO_COMPILER
bench<x86::Compiler>(code, arch, numIterations, "[no-asm]", [&](x86::Compiler& cc) {
asmjit_perf_utils::bench<x86::Compiler>(code, arch, numIterations, "[no-asm]", instCount, [&](x86::Compiler& cc) {
emitterFn(cc, true);
});
bench<x86::Compiler>(code, arch, numIterations, "[finalized]", [&](x86::Compiler& cc) {
asmjit_perf_utils::bench<x86::Compiler>(code, arch, numIterations, "[finalized]", instCount, [&](x86::Compiler& cc) {
emitterFn(cc, true);
cc.finalize();
});

View File

@@ -30,12 +30,4 @@ public:
}
};
static inline double mbps(double duration, uint64_t outputSize) noexcept {
if (duration == 0)
return 0.0;
double bytesTotal = double(outputSize);
return (bytesTotal * 1000) / (duration * 1024 * 1024);
}
#endif // PERFORMANCETIMER_H_INCLUDED