Files
asmjit/test/asmjit_test_x86_sections.cpp
kobalicek 7596c6d035 [abi] AsmJit v1.18 - performance and memory footprint improvements
* Refactored the whole codebase to use snake_case convention to
    name functions and variables, including member variables.
    Class naming is unchanged and each starts with upper-case
    character. The intention of this change is to make the source
    code more readable and consistent across multiple projects
    where AsmJit is currently used.

  * Refactored support.h to make it more shareable across projects.

  * x86::Vec now inherits from UniVec

  * minor changes in JitAllocator and WriteScope in order to make
    the size of WriteScope smaller

  * added ZoneStatistics and Zone::statistics() getter

  * improved x86::EmitHelper to use tables instead of choose() and
    other mechanisms to pick between SSE and AVX instructions

  * Refactored the whole codebase to use snake_case convention for
    for functions names, function parameter names, struct members,
    and variables

  * Added a non-owning asmjit::Span<T> type and use into public API
    to hide the usage of ZoneVector in CodeHolder, Builder, and
    Compiler. Users now only get Span (with data and size), which
    doesn't require users to know about ZoneVector

  * Removed RAWorkId from RATiedReg in favor of RAWorkReg*

  * Removed GEN from LiveInfo as it's not needed by CFG construction
    to save memory (GEN was merged with LIVE-IN bits). The remaining
    LIVE-IN, LIVE-OUT, and KILL bits are enough, however KILL bits may
    be removed in the future as KILL bits are not needed after LIVE-IN
    and LIVE-OUT converged

  * Optimized the representation of LIVE-IN, LIVE-OUT, and KILL bits
    per block. Now only registers that live across multiple basic
    blocks are included here, which means that virtual registers that
    only live in a single block are not included and won't be overhead
    during liveness analysis. This optimization alone can make liveness
    analysis 90% faster depending on the code generated (more virtual
    registers that only live in a single basic block -> more gains)

  * Optimized building liveness information bits per block. The new
    code uses an optimized algorithm to prevent too many traversals
    and uses a more optimized code for a case in which not too many
    registers are used (it avoids array operations if the number of
    all virtual registers within the function fits a single BitWord)

  * Optimized code that computes which virtual register is only used
    in a single basic block - this aims to optimize register allocator
    in the future by using a designed code path for allocating regs
    only used in a single basic block

  * Reduced the information required for each live-span, which is used
    by bin-packing. Now the struct is 8 bytes, which is good for a lot
    of optimizations C++ compiler can do

  * Added UniCompiler (ujit) which can be used to share code paths
    between X86, X86_64, and AArch64 code generation (experimental).
2025-09-06 13:44:34 +02:00

170 lines
5.5 KiB
C++

// This file is part of AsmJit project <https://asmjit.com>
//
// See <asmjit/core.h> or LICENSE.md for license and copyright information
// SPDX-License-Identifier: Zlib
// ----------------------------------------------------------------------------
// This is a working example that demonstrates how multiple sections can be
// used in a JIT-based code generator. It shows also the necessary tooling
// that is expected to be done by the user when the feature is used. It's
// important to handle the following cases:
//
// - Assign offsets to sections when the code generation is finished.
// - Tell the CodeHolder to resolve unresolved fixups and check whether
// all fixups were resolved.
// - Relocate the code
// - Copy the code to the destination address.
// ----------------------------------------------------------------------------
#include <asmjit/core.h>
#if ASMJIT_ARCH_X86 && !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_JIT)
#include <asmjit/x86.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
using namespace asmjit;
// The generated function is very simple, it only accesses the built-in data
// (from .data section) at the index as provided by its first argument. This
// data is inlined into the resulting function so we can use it this array
// for verification that the function returns correct values.
static const uint8_t data_array[] = { 2, 9, 4, 7, 1, 3, 8, 5, 6, 0 };
static void fail(const char* message, Error err) {
printf("** FAILURE: %s (%s) **\n", message, DebugUtils::error_as_string(err));
exit(1);
}
int main() {
printf("AsmJit X86 Sections Test\n\n");
Environment env = Environment::host();
JitAllocator allocator;
#ifndef ASMJIT_NO_LOGGING
FileLogger logger(stdout);
logger.set_indentation(FormatIndentationGroup::kCode, 2);
#endif
CodeHolder code;
code.init(env);
#ifndef ASMJIT_NO_LOGGING
code.set_logger(&logger);
#endif
Section* data_section;
Error err = code.new_section(Out(data_section), ".data", SIZE_MAX, SectionFlags::kNone, 8);
if (err != Error::kOk) {
fail("Failed to create a .data section", err);
}
else {
printf("Generating code:\n");
x86::Assembler a(&code);
x86::Gp idx = a.zax();
x86::Gp addr = a.zcx();
Label data = a.new_label();
FuncDetail func;
func.init(FuncSignature::build<size_t, size_t>(), code.environment());
FuncFrame frame;
frame.init(func);
frame.add_dirty_regs(idx, addr);
FuncArgsAssignment args(&func);
args.assign_all(idx);
args.update_func_frame(frame);
frame.finalize();
a.emit_prolog(frame);
a.emit_args_assignment(frame, args);
a.lea(addr, x86::ptr(data));
a.movzx(idx, x86::byte_ptr(addr, idx));
a.emit_epilog(frame);
a.section(data_section);
a.bind(data);
a.embed(data_array, sizeof(data_array));
}
// Manually change he offsets of each section, start at 0. This code is very similar to
// what `CodeHolder::flatten()` does, however, it's shown here how to do it explicitly.
printf("\nCalculating section offsets:\n");
uint64_t offset = 0;
for (Section* section : code.sections_by_order()) {
offset = Support::align_up(offset, section->alignment());
section->set_offset(offset);
offset += section->real_size();
printf(" [0x%08X %s] {Id=%u Size=%u}\n",
uint32_t(section->offset()),
section->name(),
section->section_id(),
uint32_t(section->real_size()));
}
size_t code_size = size_t(offset);
printf(" Final code size: %zu\n", code_size);
// Resolve cross-section fixups (if any). On 32-bit X86 this is not necessary
// as this is handled through relocations as the addressing is different.
if (code.has_unresolved_fixups()) {
printf("\nResolving cross-section fixups:\n");
printf(" Before 'resolve_cross_section_fixups()': %zu\n", code.unresolved_fixup_count());
err = code.resolve_cross_section_fixups();
if (err != Error::kOk) {
fail("Failed to resolve cross-section fixups", err);
}
printf(" After 'resolve_cross_section_fixups()': %zu\n", code.unresolved_fixup_count());
}
// Allocate memory for the function and relocate it there.
JitAllocator::Span span;
err = allocator.alloc(Out(span), code_size);
if (err != Error::kOk)
fail("Failed to allocate executable memory", err);
// Relocate to the base-address of the allocated memory.
code.relocate_to_base(uint64_t(uintptr_t(span.rx())));
allocator.write(span, [&](JitAllocator::Span& span) noexcept -> Error {
// Copy the flattened code into `mem.rw`. There are two ways. You can either copy
// everything manually by iterating over all sections or use `copy_flattened_data`.
// This code is similar to what `copy_flattened_data(p, code_size, 0)` would do:
for (Section* section : code.sections_by_order())
memcpy(static_cast<uint8_t*>(span.rw()) + size_t(section->offset()), section->data(), section->buffer_size());
return Error::kOk;
});
// Execute the function and test whether it works.
using Func = size_t (*)(size_t idx);
Func fn = (Func)span.rx();
printf("\n");
if (fn(0) != data_array[0] ||
fn(3) != data_array[3] ||
fn(6) != data_array[6] ||
fn(9) != data_array[9] ) {
printf("** FAILURE: The generated function returned incorrect result(s) **\n");
return 1;
}
printf("** SUCCESS **\n");
return 0;
}
#else
int main() {
printf("!! This test is disabled: ASMJIT_NO_JIT or unsuitable target architecture !!\n\n");
return 0;
}
#endif // ASMJIT_ARCH_X86 && !ASMJIT_NO_X86 && !ASMJIT_NO_JIT