mirror of
https://github.com/asmjit/asmjit.git
synced 2025-12-18 13:04:36 +03:00
[abi] AsmJit v1.18 - performance and memory footprint improvements
* Refactored the whole codebase to use snake_case convention to
name functions and variables, including member variables.
Class naming is unchanged and each starts with upper-case
character. The intention of this change is to make the source
code more readable and consistent across multiple projects
where AsmJit is currently used.
* Refactored support.h to make it more shareable across projects.
* x86::Vec now inherits from UniVec
* minor changes in JitAllocator and WriteScope in order to make
the size of WriteScope smaller
* added ZoneStatistics and Zone::statistics() getter
* improved x86::EmitHelper to use tables instead of choose() and
other mechanisms to pick between SSE and AVX instructions
* Refactored the whole codebase to use snake_case convention for
for functions names, function parameter names, struct members,
and variables
* Added a non-owning asmjit::Span<T> type and use into public API
to hide the usage of ZoneVector in CodeHolder, Builder, and
Compiler. Users now only get Span (with data and size), which
doesn't require users to know about ZoneVector
* Removed RAWorkId from RATiedReg in favor of RAWorkReg*
* Removed GEN from LiveInfo as it's not needed by CFG construction
to save memory (GEN was merged with LIVE-IN bits). The remaining
LIVE-IN, LIVE-OUT, and KILL bits are enough, however KILL bits may
be removed in the future as KILL bits are not needed after LIVE-IN
and LIVE-OUT converged
* Optimized the representation of LIVE-IN, LIVE-OUT, and KILL bits
per block. Now only registers that live across multiple basic
blocks are included here, which means that virtual registers that
only live in a single block are not included and won't be overhead
during liveness analysis. This optimization alone can make liveness
analysis 90% faster depending on the code generated (more virtual
registers that only live in a single basic block -> more gains)
* Optimized building liveness information bits per block. The new
code uses an optimized algorithm to prevent too many traversals
and uses a more optimized code for a case in which not too many
registers are used (it avoids array operations if the number of
all virtual registers within the function fits a single BitWord)
* Optimized code that computes which virtual register is only used
in a single basic block - this aims to optimize register allocator
in the future by using a designed code path for allocating regs
only used in a single basic block
* Reduced the information required for each live-span, which is used
by bin-packing. Now the struct is 8 bytes, which is good for a lot
of optimizations C++ compiler can do
* Added UniCompiler (ujit) which can be used to share code paths
between X86, X86_64, and AArch64 code generation (experimental).
This commit is contained in:
@@ -31,13 +31,13 @@
|
||||
|
||||
using namespace asmjit;
|
||||
|
||||
static void printAppInfo() {
|
||||
static void print_app_info() {
|
||||
printf("AsmJit Benchmark RegAlloc v%u.%u.%u [Arch=%s] [Mode=%s]\n\n",
|
||||
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
|
||||
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
|
||||
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF),
|
||||
asmjitArchAsString(Arch::kHost),
|
||||
asmjitBuildType()
|
||||
asmjit_arch_as_string(Arch::kHost),
|
||||
asmjit_build_type()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -46,48 +46,48 @@ static void printAppInfo() {
|
||||
class BenchRegAllocApp {
|
||||
public:
|
||||
const char* _arch = nullptr;
|
||||
bool _helpOnly = false;
|
||||
bool _help_only = false;
|
||||
bool _verbose = false;
|
||||
uint32_t _maximumComplexity = 65536;
|
||||
uint32_t _maximum_complexity = 65536;
|
||||
|
||||
BenchRegAllocApp() noexcept
|
||||
: _arch("all") {}
|
||||
~BenchRegAllocApp() noexcept {}
|
||||
|
||||
template<class T>
|
||||
inline void addT() { T::add(*this); }
|
||||
inline void add_t() { T::add(*this); }
|
||||
|
||||
int handleArgs(int argc, const char* const* argv);
|
||||
void showInfo();
|
||||
int handle_args(int argc, const char* const* argv);
|
||||
void show_info();
|
||||
|
||||
bool shouldRunArch(Arch arch) const noexcept;
|
||||
void emitCode(BaseCompiler* cc, uint32_t complexity, uint32_t regCount);
|
||||
bool should_run_arch(Arch arch) const noexcept;
|
||||
void emit_code(BaseCompiler* cc, uint32_t complexity, uint32_t reg_count);
|
||||
|
||||
#if !defined(ASMJIT_NO_X86)
|
||||
void emitCode_x86(x86::Compiler* cc, uint32_t complexity, uint32_t regCount);
|
||||
void emit_code_x86(x86::Compiler* cc, uint32_t complexity, uint32_t reg_count);
|
||||
#endif // !ASMJIT_NO_X86
|
||||
|
||||
#if !defined(ASMJIT_NO_AARCH64)
|
||||
void emitCode_a64(a64::Compiler* cc, uint32_t complexity, uint32_t regCount);
|
||||
void emit_code_aarch64(a64::Compiler* cc, uint32_t complexity, uint32_t reg_count);
|
||||
#endif // !ASMJIT_NO_AARCH64
|
||||
|
||||
int run();
|
||||
bool runArch(Arch arch);
|
||||
bool run_arch(Arch arch);
|
||||
};
|
||||
|
||||
int BenchRegAllocApp::handleArgs(int argc, const char* const* argv) {
|
||||
int BenchRegAllocApp::handle_args(int argc, const char* const* argv) {
|
||||
CmdLine cmd(argc, argv);
|
||||
_arch = cmd.valueOf("--arch", "all");
|
||||
_maximumComplexity = cmd.valueAsUInt("--complexity", _maximumComplexity);
|
||||
_arch = cmd.value_of("--arch", "all");
|
||||
_maximum_complexity = cmd.value_as_uint("--complexity", _maximum_complexity);
|
||||
|
||||
if (cmd.hasArg("--help")) _helpOnly = true;
|
||||
if (cmd.hasArg("--verbose")) _verbose = true;
|
||||
if (cmd.has_arg("--help")) _help_only = true;
|
||||
if (cmd.has_arg("--verbose")) _verbose = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void BenchRegAllocApp::showInfo() {
|
||||
printAppInfo();
|
||||
void BenchRegAllocApp::show_info() {
|
||||
print_app_info();
|
||||
|
||||
printf("Usage:\n");
|
||||
printf(" asmjit_bench_regalloc [arguments]\n");
|
||||
@@ -97,7 +97,7 @@ void BenchRegAllocApp::showInfo() {
|
||||
printf(" --help Show usage only\n");
|
||||
printf(" --arch=<NAME> Select architecture to run ('all' by default)\n");
|
||||
printf(" --verbose Verbose output\n");
|
||||
printf(" --complexity=<n> Maximum complexity to test (%u)\n", _maximumComplexity);
|
||||
printf(" --complexity=<n> Maximum complexity to test (%u)\n", _maximum_complexity);
|
||||
printf("\n");
|
||||
|
||||
printf("Architectures:\n");
|
||||
@@ -111,7 +111,7 @@ void BenchRegAllocApp::showInfo() {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
bool BenchRegAllocApp::shouldRunArch(Arch arch) const noexcept {
|
||||
bool BenchRegAllocApp::should_run_arch(Arch arch) const noexcept {
|
||||
if (strcmp(_arch, "all") == 0) {
|
||||
return true;
|
||||
}
|
||||
@@ -131,54 +131,54 @@ bool BenchRegAllocApp::shouldRunArch(Arch arch) const noexcept {
|
||||
return false;
|
||||
}
|
||||
|
||||
void BenchRegAllocApp::emitCode(BaseCompiler* cc, uint32_t complexity, uint32_t regCount) {
|
||||
void BenchRegAllocApp::emit_code(BaseCompiler* cc, uint32_t complexity, uint32_t reg_count) {
|
||||
#if !defined(ASMJIT_NO_X86)
|
||||
if (cc->arch() == Arch::kX86 || cc->arch() == Arch::kX64) {
|
||||
emitCode_x86(cc->as<x86::Compiler>(), complexity, regCount);
|
||||
emit_code_x86(cc->as<x86::Compiler>(), complexity, reg_count);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(ASMJIT_NO_AARCH64)
|
||||
if (cc->arch() == Arch::kAArch64) {
|
||||
emitCode_a64(cc->as<a64::Compiler>(), complexity, regCount);
|
||||
emit_code_aarch64(cc->as<a64::Compiler>(), complexity, reg_count);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !defined(ASMJIT_NO_X86)
|
||||
void BenchRegAllocApp::emitCode_x86(x86::Compiler* cc, uint32_t complexity, uint32_t regCount) {
|
||||
constexpr size_t kLocalRegCount = 3;
|
||||
constexpr size_t kLocalRegCount = 3;
|
||||
constexpr size_t kLocalOpCount = 15;
|
||||
|
||||
#if !defined(ASMJIT_NO_X86)
|
||||
void BenchRegAllocApp::emit_code_x86(x86::Compiler* cc, uint32_t complexity, uint32_t reg_count) {
|
||||
TestUtils::Random rnd(0x1234);
|
||||
size_t localOpCount = 15;
|
||||
|
||||
std::vector<Label> labels;
|
||||
std::vector<uint32_t> used_labels;
|
||||
std::vector<x86::Vec> vRegs;
|
||||
std::vector<x86::Vec> virt_regs;
|
||||
|
||||
x86::Gp arg_ptr = cc->newIntPtr("arg_ptr");
|
||||
x86::Gp counter = cc->newIntPtr("counter");
|
||||
x86::Gp arg_ptr = cc->new_gp_ptr("arg_ptr");
|
||||
x86::Gp counter = cc->new_gp_ptr("counter");
|
||||
|
||||
for (size_t i = 0; i < complexity; i++) {
|
||||
labels.push_back(cc->newLabel());
|
||||
labels.push_back(cc->new_label());
|
||||
used_labels.push_back(0u);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < regCount; i++) {
|
||||
vRegs.push_back(cc->newXmmSd("v%u", unsigned(i)));
|
||||
for (size_t i = 0; i < reg_count; i++) {
|
||||
virt_regs.push_back(cc->new_xmm_sd("v%u", unsigned(i)));
|
||||
}
|
||||
|
||||
FuncNode* func = cc->addFunc(FuncSignature::build<void, size_t, void*>());
|
||||
func->addAttributes(FuncAttributes::kX86_AVXEnabled);
|
||||
func->setArg(0, counter);
|
||||
func->setArg(1, arg_ptr);
|
||||
FuncNode* func = cc->add_func(FuncSignature::build<void, size_t, void*>());
|
||||
func->add_attributes(FuncAttributes::kX86_AVXEnabled);
|
||||
func->set_arg(0, counter);
|
||||
func->set_arg(1, arg_ptr);
|
||||
|
||||
for (size_t i = 0; i < regCount; i++) {
|
||||
cc->vmovsd(vRegs[i], x86::ptr_64(arg_ptr, int32_t(i * 8)));
|
||||
for (size_t i = 0; i < reg_count; i++) {
|
||||
cc->vmovsd(virt_regs[i], x86::ptr_64(arg_ptr, int32_t(i * 8)));
|
||||
}
|
||||
|
||||
auto next_label = [&]() {
|
||||
uint32_t id = rnd.nextUInt32() % complexity;
|
||||
uint32_t id = rnd.next_uint32() % complexity;
|
||||
if (used_labels[id] > 1) {
|
||||
id = 0;
|
||||
do {
|
||||
@@ -197,26 +197,26 @@ void BenchRegAllocApp::emitCode_x86(x86::Compiler* cc, uint32_t complexity, uint
|
||||
|
||||
x86::Vec locals[kLocalRegCount];
|
||||
for (size_t j = 0; j < kLocalRegCount; j++) {
|
||||
locals[j] = cc->newXmmSd("local%u", unsigned(j));
|
||||
locals[j] = cc->new_xmm_sd("local%u", unsigned(j));
|
||||
}
|
||||
|
||||
size_t localOpThreshold = localOpCount - kLocalRegCount;
|
||||
size_t local_op_threshold = kLocalOpCount - kLocalRegCount;
|
||||
|
||||
for (size_t j = 0; j < 15; j++) {
|
||||
uint32_t op = rnd.nextUInt32() % 6u;
|
||||
uint32_t id1 = rnd.nextUInt32() % regCount;
|
||||
uint32_t id2 = rnd.nextUInt32() % regCount;
|
||||
uint32_t op = rnd.next_uint32() % 6u;
|
||||
uint32_t id1 = rnd.next_uint32() % reg_count;
|
||||
uint32_t id2 = rnd.next_uint32() % reg_count;
|
||||
|
||||
x86::Vec v0 = vRegs[id1];
|
||||
x86::Vec v1 = vRegs[id1];
|
||||
x86::Vec v2 = vRegs[id2];
|
||||
x86::Vec v0 = virt_regs[id1];
|
||||
x86::Vec v1 = virt_regs[id1];
|
||||
x86::Vec v2 = virt_regs[id2];
|
||||
|
||||
if (j < kLocalRegCount) {
|
||||
v0 = locals[j];
|
||||
}
|
||||
|
||||
if (j >= localOpThreshold) {
|
||||
v2 = locals[j - localOpThreshold];
|
||||
if (j >= local_op_threshold) {
|
||||
v2 = locals[j - local_op_threshold];
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
@@ -233,48 +233,45 @@ void BenchRegAllocApp::emitCode_x86(x86::Compiler* cc, uint32_t complexity, uint
|
||||
cc->jns(next_label());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < regCount; i++) {
|
||||
cc->vmovsd(x86::ptr_64(arg_ptr, int32_t(i * 8)), vRegs[i]);
|
||||
for (size_t i = 0; i < reg_count; i++) {
|
||||
cc->vmovsd(x86::ptr_64(arg_ptr, int32_t(i * 8)), virt_regs[i]);
|
||||
}
|
||||
|
||||
cc->endFunc();
|
||||
cc->end_func();
|
||||
}
|
||||
#endif // !ASMJIT_NO_X86
|
||||
|
||||
#if !defined(ASMJIT_NO_AARCH64)
|
||||
void BenchRegAllocApp::emitCode_a64(a64::Compiler* cc, uint32_t complexity, uint32_t regCount) {
|
||||
void BenchRegAllocApp::emit_code_aarch64(a64::Compiler* cc, uint32_t complexity, uint32_t reg_count) {
|
||||
TestUtils::Random rnd(0x1234);
|
||||
|
||||
constexpr size_t kLocalRegCount = 3;
|
||||
size_t localOpCount = 15;
|
||||
|
||||
std::vector<Label> labels;
|
||||
std::vector<uint32_t> used_labels;
|
||||
std::vector<a64::Vec> vRegs;
|
||||
std::vector<a64::Vec> virt_regs;
|
||||
|
||||
a64::Gp arg_ptr = cc->newIntPtr("arg_ptr");
|
||||
a64::Gp counter = cc->newIntPtr("counter");
|
||||
a64::Gp arg_ptr = cc->new_gp_ptr("arg_ptr");
|
||||
a64::Gp counter = cc->new_gp_ptr("counter");
|
||||
|
||||
for (size_t i = 0; i < complexity; i++) {
|
||||
labels.push_back(cc->newLabel());
|
||||
labels.push_back(cc->new_label());
|
||||
used_labels.push_back(0u);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < regCount; i++) {
|
||||
vRegs.push_back(cc->newVecD("v%u", unsigned(i)));
|
||||
for (size_t i = 0; i < reg_count; i++) {
|
||||
virt_regs.push_back(cc->new_vec_d("v%u", unsigned(i)));
|
||||
}
|
||||
|
||||
FuncNode* func = cc->addFunc(FuncSignature::build<void, size_t, void*>());
|
||||
func->addAttributes(FuncAttributes::kX86_AVXEnabled);
|
||||
func->setArg(0, counter);
|
||||
func->setArg(1, arg_ptr);
|
||||
FuncNode* func = cc->add_func(FuncSignature::build<void, size_t, void*>());
|
||||
func->add_attributes(FuncAttributes::kX86_AVXEnabled);
|
||||
func->set_arg(0, counter);
|
||||
func->set_arg(1, arg_ptr);
|
||||
|
||||
for (size_t i = 0; i < regCount; i++) {
|
||||
cc->ldr(vRegs[i].d(), a64::ptr(arg_ptr, int32_t(i * 8) & 1023));
|
||||
for (size_t i = 0; i < reg_count; i++) {
|
||||
cc->ldr(virt_regs[i].d(), a64::ptr(arg_ptr, int32_t(i * 8) & 1023));
|
||||
}
|
||||
|
||||
auto next_label = [&]() {
|
||||
uint32_t id = rnd.nextUInt32() % complexity;
|
||||
uint32_t id = rnd.next_uint32() % complexity;
|
||||
if (used_labels[id] > 1) {
|
||||
id = 0;
|
||||
do {
|
||||
@@ -293,26 +290,26 @@ void BenchRegAllocApp::emitCode_a64(a64::Compiler* cc, uint32_t complexity, uint
|
||||
|
||||
a64::Vec locals[kLocalRegCount];
|
||||
for (size_t j = 0; j < kLocalRegCount; j++) {
|
||||
locals[j] = cc->newVecD("local%u", unsigned(j));
|
||||
locals[j] = cc->new_vec_d("local%u", unsigned(j));
|
||||
}
|
||||
|
||||
size_t localOpThreshold = localOpCount - kLocalRegCount;
|
||||
size_t local_op_threshold = kLocalOpCount - kLocalRegCount;
|
||||
|
||||
for (size_t j = 0; j < 15; j++) {
|
||||
uint32_t op = rnd.nextUInt32() % 6;
|
||||
uint32_t id1 = rnd.nextUInt32() % regCount;
|
||||
uint32_t id2 = rnd.nextUInt32() % regCount;
|
||||
uint32_t op = rnd.next_uint32() % 6;
|
||||
uint32_t id1 = rnd.next_uint32() % reg_count;
|
||||
uint32_t id2 = rnd.next_uint32() % reg_count;
|
||||
|
||||
a64::Vec v0 = vRegs[id1];
|
||||
a64::Vec v1 = vRegs[id1];
|
||||
a64::Vec v2 = vRegs[id2];
|
||||
a64::Vec v0 = virt_regs[id1];
|
||||
a64::Vec v1 = virt_regs[id1];
|
||||
a64::Vec v2 = virt_regs[id2];
|
||||
|
||||
if (j < kLocalRegCount) {
|
||||
v0 = locals[j];
|
||||
}
|
||||
|
||||
if (j >= localOpThreshold) {
|
||||
v2 = locals[j - localOpThreshold];
|
||||
if (j >= local_op_threshold) {
|
||||
v2 = locals[j - local_op_threshold];
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
@@ -329,28 +326,28 @@ void BenchRegAllocApp::emitCode_a64(a64::Compiler* cc, uint32_t complexity, uint
|
||||
cc->b_hi(next_label());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < regCount; i++) {
|
||||
cc->str(vRegs[i].d(), a64::ptr(arg_ptr, int32_t(i * 8) & 1023));
|
||||
for (size_t i = 0; i < reg_count; i++) {
|
||||
cc->str(virt_regs[i].d(), a64::ptr(arg_ptr, int32_t(i * 8) & 1023));
|
||||
}
|
||||
|
||||
cc->endFunc();
|
||||
cc->end_func();
|
||||
}
|
||||
#endif // !ASMJIT_NO_AARCH64
|
||||
|
||||
int BenchRegAllocApp::run() {
|
||||
if (shouldRunArch(Arch::kX64) && !runArch(Arch::kX64)) {
|
||||
if (should_run_arch(Arch::kX64) && !run_arch(Arch::kX64)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (shouldRunArch(Arch::kAArch64) && !runArch(Arch::kAArch64)) {
|
||||
if (should_run_arch(Arch::kAArch64) && !run_arch(Arch::kAArch64)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool BenchRegAllocApp::runArch(Arch arch) {
|
||||
Environment customEnv;
|
||||
bool BenchRegAllocApp::run_arch(Arch arch) {
|
||||
Environment custom_env;
|
||||
CpuFeatures features;
|
||||
|
||||
switch (arch) {
|
||||
@@ -394,8 +391,8 @@ bool BenchRegAllocApp::runArch(Arch arch) {
|
||||
|
||||
CodeHolder code;
|
||||
|
||||
customEnv.init(arch);
|
||||
code.init(customEnv, features);
|
||||
custom_env.init(arch);
|
||||
code.init(custom_env, features);
|
||||
|
||||
std::unique_ptr<BaseCompiler> cc;
|
||||
|
||||
@@ -414,54 +411,77 @@ bool BenchRegAllocApp::runArch(Arch arch) {
|
||||
if (!cc)
|
||||
return false;
|
||||
|
||||
PerformanceTimer emitTimer;
|
||||
PerformanceTimer finalizeTimer;
|
||||
PerformanceTimer emit_timer;
|
||||
PerformanceTimer finalize_timer;
|
||||
|
||||
uint32_t regCount = 35;
|
||||
uint32_t reg_count = 35;
|
||||
|
||||
code.reinit();
|
||||
code.attach(cc.get());
|
||||
|
||||
// Dry run to not benchmark allocs on the first run.
|
||||
emitCode(cc.get(), 0, regCount);
|
||||
emit_code(cc.get(), 0, reg_count);
|
||||
cc->finalize();
|
||||
code.reinit();
|
||||
|
||||
printf("Arch | Complexity | Labels | RegCount | CodeSize | EmitTime [ms]| RA Time [ms]\n");
|
||||
printf("-------+------------+--------+----------+-----------+--------------+-------------\n");
|
||||
#if !defined(ASMJIT_NO_LOGGING)
|
||||
StringLogger logger;
|
||||
if (_verbose) {
|
||||
code.set_logger(&logger);
|
||||
cc->add_diagnostic_options(DiagnosticOptions::kRAAnnotate | DiagnosticOptions::kRADebugAll);
|
||||
}
|
||||
#endif // !ASMJIT_NO_LOGGING
|
||||
|
||||
for (uint32_t complexity = 1u; complexity <= _maximumComplexity; complexity *= 2u) {
|
||||
emitTimer.start();
|
||||
emitCode(cc.get(), complexity + 1, regCount);
|
||||
emitTimer.stop();
|
||||
printf("+-----------------------------------------+-----------+-----------------------------------+--------------+--------------+\n");
|
||||
printf("| Input Configuration | Output | Reserved Memory [KiB] | Time Elapsed [ms] |\n");
|
||||
printf("+--------+------------+--------+----------+-----------+-----------+-----------+-----------+--------------+--------------+\n");
|
||||
printf("| Arch | Complexity | Labels | RegCount | CodeSize | Code Hold.| Compiler | Pass Temp.| Emit Time | Reg. Alloc |\n");
|
||||
printf("+--------+------------+--------+----------+-----------+-----------+-----------+-----------+--------------+--------------+\n");
|
||||
|
||||
for (uint32_t complexity = 1u; complexity <= _maximum_complexity; complexity *= 2u) {
|
||||
emit_timer.start();
|
||||
emit_code(cc.get(), complexity + 1, reg_count);
|
||||
emit_timer.stop();
|
||||
|
||||
finalize_timer.start();
|
||||
Error err = cc->finalize();
|
||||
finalize_timer.stop();
|
||||
|
||||
#if !defined(ASMJIT_NO_LOGGING)
|
||||
if (_verbose) {
|
||||
String sb;
|
||||
FormatOptions fmtOptions;
|
||||
Formatter::formatNodeList(sb, fmtOptions, cc.get());
|
||||
printf("[Complexity: %u Assembly]\n", complexity);
|
||||
printIndented(sb.data(), 4);
|
||||
printf("%s\n", logger.data());
|
||||
logger.clear();
|
||||
}
|
||||
#endif // ASMJIT_NO_LOGGING
|
||||
|
||||
finalizeTimer.start();
|
||||
Error err = cc->finalize();
|
||||
finalizeTimer.stop();
|
||||
#endif
|
||||
|
||||
code.flatten();
|
||||
|
||||
double emitTime = emitTimer.duration();
|
||||
double finalizeTime = finalizeTimer.duration();
|
||||
size_t codeSize = code.codeSize();
|
||||
size_t labelCount = code.labelCount();
|
||||
size_t vRegCount = cc->virtRegs().size();
|
||||
double emit_time = emit_timer.duration();
|
||||
double finalize_time = finalize_timer.duration();
|
||||
size_t code_size = code.code_size();
|
||||
size_t label_count = code.label_count();
|
||||
size_t virt_reg_count = cc->virt_regs().size();
|
||||
|
||||
printf("%-7s| %10u | %6zu | %8zu | %9zu | %12.3f | %12.3f",
|
||||
asmjitArchAsString(arch), complexity, labelCount, vRegCount, codeSize, emitTime, finalizeTime);
|
||||
ArenaStatistics code_holder_stats = code._arena.statistics();
|
||||
ArenaStatistics compiler_stats = cc->_builder_arena.statistics();
|
||||
ArenaStatistics pass_stats = cc->_pass_arena.statistics();
|
||||
|
||||
if (err) {
|
||||
printf(" (err: %s)", DebugUtils::errorAsString(err));
|
||||
printf(
|
||||
"| %-7s| %10u | %6zu | %8zu | %9zu | %9zu | %9zu | %9zu | %12.3f | %12.3f |",
|
||||
asmjit_arch_as_string(arch),
|
||||
complexity,
|
||||
label_count,
|
||||
virt_reg_count,
|
||||
code_size,
|
||||
(code_holder_stats.reserved_size() + 1023) / 1024,
|
||||
(compiler_stats.reserved_size() + 1023) / 1024,
|
||||
(pass_stats.reserved_size() + 1023) / 1024,
|
||||
emit_time,
|
||||
finalize_time
|
||||
);
|
||||
|
||||
if (err != Error::kOk) {
|
||||
printf(" (err: %s)", DebugUtils::error_as_string(err));
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
@@ -469,6 +489,7 @@ bool BenchRegAllocApp::runArch(Arch arch) {
|
||||
code.reinit();
|
||||
}
|
||||
|
||||
printf("+--------+------------+--------+----------+-----------+-----------+-----------+-----------+--------------+--------------+\n");
|
||||
printf("\n");
|
||||
|
||||
return true;
|
||||
@@ -477,10 +498,10 @@ bool BenchRegAllocApp::runArch(Arch arch) {
|
||||
int main(int argc, char* argv[]) {
|
||||
BenchRegAllocApp app;
|
||||
|
||||
app.handleArgs(argc, argv);
|
||||
app.showInfo();
|
||||
app.handle_args(argc, argv);
|
||||
app.show_info();
|
||||
|
||||
if (app._helpOnly)
|
||||
if (app._help_only)
|
||||
return 0;
|
||||
|
||||
return app.run();
|
||||
@@ -489,7 +510,7 @@ int main(int argc, char* argv[]) {
|
||||
#else
|
||||
|
||||
int main() {
|
||||
printAppInfo();
|
||||
print_app_info();
|
||||
printf("!! This Benchmark is disabled: <ASMJIT_NO_JIT> or unsuitable target architecture !!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user