Files
asmjit/test/asmjit_bench_codegen.h
kobalicek 2ff454d415 [abi] AsmJit v1.17 - cumulative & breaking changes
* Reworked register operands - all vector registers are now
    platform::Vec deriving from UniVec (universal vector operand),
    additionally, there is no platform::Reg, instead asmjit::Reg
    provides all necessary features to make it a base register for
    each target architecture
  * Reworked casting between registers - now architecture agnostic
    names are preferred - use Gp32 instead of Gpd or GpW, Gp64
    instead of Gpq and GpX, etc...
  * Reworked vector registers and their names - architecture
    agnostic naming is now preferred Vec32, Vec64, Vec128, etc...
  * Reworked naming conventions used across AsmJit - for clarity
    Identifiers are now prefixed with the type, like sectionId(),
    labelId(), etc...
  * Reworked how Zone and ZoneAllocator are used across AsmJit,
    prefering Zone in most cases and ZoneAllocator only for
    containers - this change alone achieves around 5% better
    performance of Builder and Compiler
  * Reworked LabelEntry - decreased the size of the base entry
    to 16 bytes for anonymous and unnamed labels. Avoided an
    indirection when using labelEntries() - LabelEntry is now
    a value and not a pointer
  * Renamed LabelLink to Fixup
  * Added a new header <asmjit/host.h> which would include
    <asmjit/core.h> + target tools for the host architecture,
    if enabled and supported
  * Added new AArch64 instructions (BTI, CSSC, CHKFEAT)
  * Added a mvn_ alternative of mvn instruction (fix for Windows
    ARM64 SDK)
  * Added more AArch64 CPU features to CpuInfo
  * Added better support for Apple CPU detection (Apple M3, M4)
  * Added a new benchmarking tool asmjit_bench_overhead, which
    benchmarks the overhead of CodeHolder::init()/reset() and
    creating/attaching emitters to it. Thanks to the benchmark the
    most common code-paths were optimized
  * Added a new benchmarking tool asmjit_bench_regalloc, which
    aims to benchmark the cost and complexity of register allocation.
  * Renamed asmjit_test_perf to asmjit_bench_codegen to make it
    clear what is a test and what is a benchmark
2025-06-15 16:45:37 +02:00

116 lines
2.9 KiB
C++

// This file is part of AsmJit project <https://asmjit.com>
//
// See <asmjit/core.h> or LICENSE.md for license and copyright information
// SPDX-License-Identifier: Zlib
#ifndef ASMJIT_TEST_PERF_H_INCLUDED
#define ASMJIT_TEST_PERF_H_INCLUDED
#include <asmjit/core.h>
#include "asmjitutils.h"
#include "performancetimer.h"
namespace asmjit_perf_utils {
class TestErrorHandler : public asmjit::ErrorHandler {
void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) {
(void)err;
(void)origin;
printf("ERROR: %s\n", message);
abort();
}
};
#ifndef ASMJIT_NO_BUILDER
template<typename BuilderT, typename FuncT>
static uint32_t calculateInstructionCount(asmjit::CodeHolder& code, asmjit::Arch arch, const FuncT& func) noexcept {
BuilderT builder;
TestErrorHandler eh;
asmjit::Environment env(arch);
code.init(env);
code.setErrorHandler(&eh);
code.attach(&builder);
func(builder);
uint32_t count = 0;
asmjit::BaseNode* node = builder.firstNode();
while (node) {
count += uint32_t(node->isInst());
node = node->next();
}
code.reset();
return count;
}
#endif
static inline double calculateMBPS(double duration_us, uint64_t outputSize) noexcept {
if (duration_us == 0)
return 0.0;
double bytesTotal = double(outputSize);
return (bytesTotal * 1000000) / (duration_us * 1024 * 1024);
}
static inline double calculateMIPS(double duration, uint64_t instCount) noexcept {
if (duration == 0)
return 0.0;
return double(instCount) * 1000000.0 / (duration * 1e6);
}
template<typename EmitterT, typename FuncT>
static void bench(asmjit::CodeHolder& code, asmjit::Arch arch, uint32_t numIterations, const char* testName, uint32_t instCount, const FuncT& func) noexcept {
EmitterT emitter;
TestErrorHandler eh;
const char* archName = asmjitArchAsString(arch);
const char* emitterName =
emitter.isAssembler() ? "Assembler" :
emitter.isCompiler() ? "Compiler" :
emitter.isBuilder() ? "Builder" : "Unknown";
uint64_t codeSize = 0;
asmjit::Environment env(arch);
PerformanceTimer timer;
double duration = std::numeric_limits<double>::infinity();
code.init(env);
code.setErrorHandler(&eh);
code.attach(&emitter);
for (uint32_t r = 0; r < numIterations; r++) {
codeSize = 0;
timer.start();
func(emitter);
codeSize += code.codeSize();
code.reinit();
timer.stop();
duration = asmjit::Support::min(duration, timer.duration() * 1000);
}
printf(" [%-7s] %-9s %-16s | CodeSize:%5llu [B] | Time:%7.3f [us]", archName, emitterName, testName, (unsigned long long)codeSize, duration);
if (codeSize) {
printf(" | Speed:%7.1f [MiB/s]", calculateMBPS(duration, codeSize));
}
else {
printf(" | Speed: N/A ");
}
if (instCount) {
printf(", %8.1f [MInst/s]", calculateMIPS(duration, instCount));
}
printf("\n");
}
} // {asmjit_perf_utils}
#endif // ASMJIT_TEST_PERF_H_INCLUDED